1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD$"); 30 31#include "opt_compat.h" 32#include "opt_umtx_profiling.h" 33 34#include <sys/param.h> 35#include <sys/kernel.h> 36#include <sys/limits.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/priv.h> 41#include <sys/proc.h> 42#include <sys/sbuf.h> 43#include <sys/sched.h> 44#include <sys/smp.h> 45#include <sys/sysctl.h> 46#include <sys/sysent.h> 47#include <sys/systm.h> 48#include <sys/sysproto.h> 49#include <sys/syscallsubr.h> 50#include <sys/eventhandler.h> 51#include <sys/umtx.h> 52 53#include <vm/vm.h> 54#include <vm/vm_param.h> 55#include <vm/pmap.h> 56#include <vm/vm_map.h> 57#include <vm/vm_object.h> 58 59#include <machine/cpu.h> 60 61#ifdef COMPAT_FREEBSD32 62#include <compat/freebsd32/freebsd32_proto.h> 63#endif 64 65#define _UMUTEX_TRY 1 66#define _UMUTEX_WAIT 2 67 68#ifdef UMTX_PROFILING 69#define UPROF_PERC_BIGGER(w, f, sw, sf) \ 70 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 71#endif 72 73/* Priority inheritance mutex info. */ 74struct umtx_pi { 75 /* Owner thread */ 76 struct thread *pi_owner; 77 78 /* Reference count */ 79 int pi_refcount; 80 81 /* List entry to link umtx holding by thread */ 82 TAILQ_ENTRY(umtx_pi) pi_link; 83 84 /* List entry in hash */ 85 TAILQ_ENTRY(umtx_pi) pi_hashlink; 86 87 /* List for waiters */ 88 TAILQ_HEAD(,umtx_q) pi_blocked; 89 90 /* Identify a userland lock object */ 91 struct umtx_key pi_key; 92}; 93 94/* A userland synchronous object user. */ 95struct umtx_q { 96 /* Linked list for the hash. */ 97 TAILQ_ENTRY(umtx_q) uq_link; 98 99 /* Umtx key. */ 100 struct umtx_key uq_key; 101 102 /* Umtx flags. */ 103 int uq_flags; 104#define UQF_UMTXQ 0x0001 105 106 /* The thread waits on. */ 107 struct thread *uq_thread; 108 109 /* 110 * Blocked on PI mutex. read can use chain lock 111 * or umtx_lock, write must have both chain lock and 112 * umtx_lock being hold. 113 */ 114 struct umtx_pi *uq_pi_blocked; 115 116 /* On blocked list */ 117 TAILQ_ENTRY(umtx_q) uq_lockq; 118 119 /* Thread contending with us */ 120 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 121 122 /* Inherited priority from PP mutex */ 123 u_char uq_inherited_pri; 124 125 /* Spare queue ready to be reused */ 126 struct umtxq_queue *uq_spare_queue; 127 128 /* The queue we on */ 129 struct umtxq_queue *uq_cur_queue; 130}; 131 132TAILQ_HEAD(umtxq_head, umtx_q); 133 134/* Per-key wait-queue */ 135struct umtxq_queue { 136 struct umtxq_head head; 137 struct umtx_key key; 138 LIST_ENTRY(umtxq_queue) link; 139 int length; 140}; 141 142LIST_HEAD(umtxq_list, umtxq_queue); 143 144/* Userland lock object's wait-queue chain */ 145struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_list uc_queue[2]; 151#define UMTX_SHARED_QUEUE 0 152#define UMTX_EXCLUSIVE_QUEUE 1 153 154 LIST_HEAD(, umtxq_queue) uc_spare_queue; 155 156 /* Busy flag */ 157 char uc_busy; 158 159 /* Chain lock waiters */ 160 int uc_waiters; 161 162 /* All PI in the list */ 163 TAILQ_HEAD(,umtx_pi) uc_pi_list; 164 165#ifdef UMTX_PROFILING 166 u_int length; 167 u_int max_length; 168#endif 169}; 170 171#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 172#define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy")) 173 174/* 175 * Don't propagate time-sharing priority, there is a security reason, 176 * a user can simply introduce PI-mutex, let thread A lock the mutex, 177 * and let another thread B block on the mutex, because B is 178 * sleeping, its priority will be boosted, this causes A's priority to 179 * be boosted via priority propagating too and will never be lowered even 180 * if it is using 100%CPU, this is unfair to other processes. 181 */ 182 183#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 184 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 185 PRI_MAX_TIMESHARE : (td)->td_user_pri) 186 187#define GOLDEN_RATIO_PRIME 2654404609U 188#define UMTX_CHAINS 512 189#define UMTX_SHIFTS (__WORD_BIT - 9) 190 191#define GET_SHARE(flags) \ 192 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 193 194#define BUSY_SPINS 200 195 196struct abs_timeout { 197 int clockid; 198 struct timespec cur; 199 struct timespec end; 200}; 201 202static uma_zone_t umtx_pi_zone; 203static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 204static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 205static int umtx_pi_allocated; 206 207static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 208SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 209 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 210 211#ifdef UMTX_PROFILING 212static long max_length; 213SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 214static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 215#endif 216 217static void umtxq_sysinit(void *); 218static void umtxq_hash(struct umtx_key *key); 219static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 220static void umtxq_lock(struct umtx_key *key); 221static void umtxq_unlock(struct umtx_key *key); 222static void umtxq_busy(struct umtx_key *key); 223static void umtxq_unbusy(struct umtx_key *key); 224static void umtxq_insert_queue(struct umtx_q *uq, int q); 225static void umtxq_remove_queue(struct umtx_q *uq, int q); 226static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 227static int umtxq_count(struct umtx_key *key); 228static struct umtx_pi *umtx_pi_alloc(int); 229static void umtx_pi_free(struct umtx_pi *pi); 230static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 231static void umtx_thread_cleanup(struct thread *td); 232static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 233 struct image_params *imgp __unused); 234SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 235 236#define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 237#define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 238#define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 239 240static struct mtx umtx_lock; 241 242#ifdef UMTX_PROFILING 243static void 244umtx_init_profiling(void) 245{ 246 struct sysctl_oid *chain_oid; 247 char chain_name[10]; 248 int i; 249 250 for (i = 0; i < UMTX_CHAINS; ++i) { 251 snprintf(chain_name, sizeof(chain_name), "%d", i); 252 chain_oid = SYSCTL_ADD_NODE(NULL, 253 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 254 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 255 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 256 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 257 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 258 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 259 } 260} 261 262static int 263sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 264{ 265 char buf[512]; 266 struct sbuf sb; 267 struct umtxq_chain *uc; 268 u_int fract, i, j, tot, whole; 269 u_int sf0, sf1, sf2, sf3, sf4; 270 u_int si0, si1, si2, si3, si4; 271 u_int sw0, sw1, sw2, sw3, sw4; 272 273 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 274 for (i = 0; i < 2; i++) { 275 tot = 0; 276 for (j = 0; j < UMTX_CHAINS; ++j) { 277 uc = &umtxq_chains[i][j]; 278 mtx_lock(&uc->uc_lock); 279 tot += uc->max_length; 280 mtx_unlock(&uc->uc_lock); 281 } 282 if (tot == 0) 283 sbuf_printf(&sb, "%u) Empty ", i); 284 else { 285 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 286 si0 = si1 = si2 = si3 = si4 = 0; 287 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 288 for (j = 0; j < UMTX_CHAINS; j++) { 289 uc = &umtxq_chains[i][j]; 290 mtx_lock(&uc->uc_lock); 291 whole = uc->max_length * 100; 292 mtx_unlock(&uc->uc_lock); 293 fract = (whole % tot) * 100; 294 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 295 sf0 = fract; 296 si0 = j; 297 sw0 = whole; 298 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 299 sf1)) { 300 sf1 = fract; 301 si1 = j; 302 sw1 = whole; 303 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 304 sf2)) { 305 sf2 = fract; 306 si2 = j; 307 sw2 = whole; 308 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 309 sf3)) { 310 sf3 = fract; 311 si3 = j; 312 sw3 = whole; 313 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 314 sf4)) { 315 sf4 = fract; 316 si4 = j; 317 sw4 = whole; 318 } 319 } 320 sbuf_printf(&sb, "queue %u:\n", i); 321 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 322 sf0 / tot, si0); 323 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 324 sf1 / tot, si1); 325 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 326 sf2 / tot, si2); 327 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 328 sf3 / tot, si3); 329 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 330 sf4 / tot, si4); 331 } 332 } 333 sbuf_trim(&sb); 334 sbuf_finish(&sb); 335 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 336 sbuf_delete(&sb); 337 return (0); 338} 339 340static int 341sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 342{ 343 struct umtxq_chain *uc; 344 u_int i, j; 345 int clear, error; 346 347 clear = 0; 348 error = sysctl_handle_int(oidp, &clear, 0, req); 349 if (error != 0 || req->newptr == NULL) 350 return (error); 351 352 if (clear != 0) { 353 for (i = 0; i < 2; ++i) { 354 for (j = 0; j < UMTX_CHAINS; ++j) { 355 uc = &umtxq_chains[i][j]; 356 mtx_lock(&uc->uc_lock); 357 uc->length = 0; 358 uc->max_length = 0; 359 mtx_unlock(&uc->uc_lock); 360 } 361 } 362 } 363 return (0); 364} 365 366SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 367 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 368 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 369SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 370 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 371 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 372#endif 373 374static void 375umtxq_sysinit(void *arg __unused) 376{ 377 int i, j; 378 379 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 380 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 381 for (i = 0; i < 2; ++i) { 382 for (j = 0; j < UMTX_CHAINS; ++j) { 383 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 384 MTX_DEF | MTX_DUPOK); 385 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 386 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 387 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 388 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 389 umtxq_chains[i][j].uc_busy = 0; 390 umtxq_chains[i][j].uc_waiters = 0; 391#ifdef UMTX_PROFILING 392 umtxq_chains[i][j].length = 0; 393 umtxq_chains[i][j].max_length = 0; 394#endif 395 } 396 } 397#ifdef UMTX_PROFILING 398 umtx_init_profiling(); 399#endif 400 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); 401 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 402 EVENTHANDLER_PRI_ANY); 403} 404 405struct umtx_q * 406umtxq_alloc(void) 407{ 408 struct umtx_q *uq; 409 410 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 411 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); 412 TAILQ_INIT(&uq->uq_spare_queue->head); 413 TAILQ_INIT(&uq->uq_pi_contested); 414 uq->uq_inherited_pri = PRI_MAX; 415 return (uq); 416} 417 418void 419umtxq_free(struct umtx_q *uq) 420{ 421 MPASS(uq->uq_spare_queue != NULL); 422 free(uq->uq_spare_queue, M_UMTX); 423 free(uq, M_UMTX); 424} 425 426static inline void 427umtxq_hash(struct umtx_key *key) 428{ 429 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 430 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 431} 432 433static inline struct umtxq_chain * 434umtxq_getchain(struct umtx_key *key) 435{ 436 if (key->type <= TYPE_SEM) 437 return (&umtxq_chains[1][key->hash]); 438 return (&umtxq_chains[0][key->hash]); 439} 440 441/* 442 * Lock a chain. 443 */ 444static inline void 445umtxq_lock(struct umtx_key *key) 446{ 447 struct umtxq_chain *uc; 448 449 uc = umtxq_getchain(key); 450 mtx_lock(&uc->uc_lock); 451} 452 453/* 454 * Unlock a chain. 455 */ 456static inline void 457umtxq_unlock(struct umtx_key *key) 458{ 459 struct umtxq_chain *uc; 460 461 uc = umtxq_getchain(key); 462 mtx_unlock(&uc->uc_lock); 463} 464 465/* 466 * Set chain to busy state when following operation 467 * may be blocked (kernel mutex can not be used). 468 */ 469static inline void 470umtxq_busy(struct umtx_key *key) 471{ 472 struct umtxq_chain *uc; 473 474 uc = umtxq_getchain(key); 475 mtx_assert(&uc->uc_lock, MA_OWNED); 476 if (uc->uc_busy) { 477#ifdef SMP 478 if (smp_cpus > 1) { 479 int count = BUSY_SPINS; 480 if (count > 0) { 481 umtxq_unlock(key); 482 while (uc->uc_busy && --count > 0) 483 cpu_spinwait(); 484 umtxq_lock(key); 485 } 486 } 487#endif 488 while (uc->uc_busy) { 489 uc->uc_waiters++; 490 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 491 uc->uc_waiters--; 492 } 493 } 494 uc->uc_busy = 1; 495} 496 497/* 498 * Unbusy a chain. 499 */ 500static inline void 501umtxq_unbusy(struct umtx_key *key) 502{ 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_assert(&uc->uc_lock, MA_OWNED); 507 KASSERT(uc->uc_busy != 0, ("not busy")); 508 uc->uc_busy = 0; 509 if (uc->uc_waiters) 510 wakeup_one(uc); 511} 512 513static struct umtxq_queue * 514umtxq_queue_lookup(struct umtx_key *key, int q) 515{ 516 struct umtxq_queue *uh; 517 struct umtxq_chain *uc; 518 519 uc = umtxq_getchain(key); 520 UMTXQ_LOCKED_ASSERT(uc); 521 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 522 if (umtx_key_match(&uh->key, key)) 523 return (uh); 524 } 525 526 return (NULL); 527} 528 529static inline void 530umtxq_insert_queue(struct umtx_q *uq, int q) 531{ 532 struct umtxq_queue *uh; 533 struct umtxq_chain *uc; 534 535 uc = umtxq_getchain(&uq->uq_key); 536 UMTXQ_LOCKED_ASSERT(uc); 537 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 538 uh = umtxq_queue_lookup(&uq->uq_key, q); 539 if (uh != NULL) { 540 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 541 } else { 542 uh = uq->uq_spare_queue; 543 uh->key = uq->uq_key; 544 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 545#ifdef UMTX_PROFILING 546 uc->length++; 547 if (uc->length > uc->max_length) { 548 uc->max_length = uc->length; 549 if (uc->max_length > max_length) 550 max_length = uc->max_length; 551 } 552#endif 553 } 554 uq->uq_spare_queue = NULL; 555 556 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 557 uh->length++; 558 uq->uq_flags |= UQF_UMTXQ; 559 uq->uq_cur_queue = uh; 560 return; 561} 562 563static inline void 564umtxq_remove_queue(struct umtx_q *uq, int q) 565{ 566 struct umtxq_chain *uc; 567 struct umtxq_queue *uh; 568 569 uc = umtxq_getchain(&uq->uq_key); 570 UMTXQ_LOCKED_ASSERT(uc); 571 if (uq->uq_flags & UQF_UMTXQ) { 572 uh = uq->uq_cur_queue; 573 TAILQ_REMOVE(&uh->head, uq, uq_link); 574 uh->length--; 575 uq->uq_flags &= ~UQF_UMTXQ; 576 if (TAILQ_EMPTY(&uh->head)) { 577 KASSERT(uh->length == 0, 578 ("inconsistent umtxq_queue length")); 579#ifdef UMTX_PROFILING 580 uc->length--; 581#endif 582 LIST_REMOVE(uh, link); 583 } else { 584 uh = LIST_FIRST(&uc->uc_spare_queue); 585 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 586 LIST_REMOVE(uh, link); 587 } 588 uq->uq_spare_queue = uh; 589 uq->uq_cur_queue = NULL; 590 } 591} 592 593/* 594 * Check if there are multiple waiters 595 */ 596static int 597umtxq_count(struct umtx_key *key) 598{ 599 struct umtxq_chain *uc; 600 struct umtxq_queue *uh; 601 602 uc = umtxq_getchain(key); 603 UMTXQ_LOCKED_ASSERT(uc); 604 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 605 if (uh != NULL) 606 return (uh->length); 607 return (0); 608} 609 610/* 611 * Check if there are multiple PI waiters and returns first 612 * waiter. 613 */ 614static int 615umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 616{ 617 struct umtxq_chain *uc; 618 struct umtxq_queue *uh; 619 620 *first = NULL; 621 uc = umtxq_getchain(key); 622 UMTXQ_LOCKED_ASSERT(uc); 623 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 624 if (uh != NULL) { 625 *first = TAILQ_FIRST(&uh->head); 626 return (uh->length); 627 } 628 return (0); 629} 630 631static int 632umtxq_check_susp(struct thread *td) 633{ 634 struct proc *p; 635 int error; 636 637 /* 638 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 639 * eventually break the lockstep loop. 640 */ 641 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 642 return (0); 643 error = 0; 644 p = td->td_proc; 645 PROC_LOCK(p); 646 if (P_SHOULDSTOP(p) || 647 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 648 if (p->p_flag & P_SINGLE_EXIT) 649 error = EINTR; 650 else 651 error = ERESTART; 652 } 653 PROC_UNLOCK(p); 654 return (error); 655} 656 657/* 658 * Wake up threads waiting on an userland object. 659 */ 660 661static int 662umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 663{ 664 struct umtxq_chain *uc; 665 struct umtxq_queue *uh; 666 struct umtx_q *uq; 667 int ret; 668 669 ret = 0; 670 uc = umtxq_getchain(key); 671 UMTXQ_LOCKED_ASSERT(uc); 672 uh = umtxq_queue_lookup(key, q); 673 if (uh != NULL) { 674 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 675 umtxq_remove_queue(uq, q); 676 wakeup(uq); 677 if (++ret >= n_wake) 678 return (ret); 679 } 680 } 681 return (ret); 682} 683 684 685/* 686 * Wake up specified thread. 687 */ 688static inline void 689umtxq_signal_thread(struct umtx_q *uq) 690{ 691 struct umtxq_chain *uc; 692 693 uc = umtxq_getchain(&uq->uq_key); 694 UMTXQ_LOCKED_ASSERT(uc); 695 umtxq_remove(uq); 696 wakeup(uq); 697} 698 699static inline int 700tstohz(const struct timespec *tsp) 701{ 702 struct timeval tv; 703 704 TIMESPEC_TO_TIMEVAL(&tv, tsp); 705 return tvtohz(&tv); 706} 707 708static void 709abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 710 const struct timespec *timeout) 711{ 712 713 timo->clockid = clockid; 714 if (!absolute) { 715 kern_clock_gettime(curthread, clockid, &timo->end); 716 timo->cur = timo->end; 717 timespecadd(&timo->end, timeout); 718 } else { 719 timo->end = *timeout; 720 kern_clock_gettime(curthread, clockid, &timo->cur); 721 } 722} 723 724static void 725abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 726{ 727 728 abs_timeout_init(timo, umtxtime->_clockid, 729 (umtxtime->_flags & UMTX_ABSTIME) != 0, 730 &umtxtime->_timeout); 731} 732 733static inline void 734abs_timeout_update(struct abs_timeout *timo) 735{ 736 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 737} 738 739static int 740abs_timeout_gethz(struct abs_timeout *timo) 741{ 742 struct timespec tts; 743 744 if (timespeccmp(&timo->end, &timo->cur, <=)) 745 return (-1); 746 tts = timo->end; 747 timespecsub(&tts, &timo->cur); 748 return (tstohz(&tts)); 749} 750 751/* 752 * Put thread into sleep state, before sleeping, check if 753 * thread was removed from umtx queue. 754 */ 755static inline int 756umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 757{ 758 struct umtxq_chain *uc; 759 int error, timo; 760 761 uc = umtxq_getchain(&uq->uq_key); 762 UMTXQ_LOCKED_ASSERT(uc); 763 for (;;) { 764 if (!(uq->uq_flags & UQF_UMTXQ)) 765 return (0); 766 if (abstime != NULL) { 767 timo = abs_timeout_gethz(abstime); 768 if (timo < 0) 769 return (ETIMEDOUT); 770 } else 771 timo = 0; 772 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 773 if (error != EWOULDBLOCK) { 774 umtxq_lock(&uq->uq_key); 775 break; 776 } 777 if (abstime != NULL) 778 abs_timeout_update(abstime); 779 umtxq_lock(&uq->uq_key); 780 } 781 return (error); 782} 783 784/* 785 * Convert userspace address into unique logical address. 786 */ 787int 788umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 789{ 790 struct thread *td = curthread; 791 vm_map_t map; 792 vm_map_entry_t entry; 793 vm_pindex_t pindex; 794 vm_prot_t prot; 795 boolean_t wired; 796 797 key->type = type; 798 if (share == THREAD_SHARE) { 799 key->shared = 0; 800 key->info.private.vs = td->td_proc->p_vmspace; 801 key->info.private.addr = (uintptr_t)addr; 802 } else { 803 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 804 map = &td->td_proc->p_vmspace->vm_map; 805 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 806 &entry, &key->info.shared.object, &pindex, &prot, 807 &wired) != KERN_SUCCESS) { 808 return EFAULT; 809 } 810 811 if ((share == PROCESS_SHARE) || 812 (share == AUTO_SHARE && 813 VM_INHERIT_SHARE == entry->inheritance)) { 814 key->shared = 1; 815 key->info.shared.offset = entry->offset + entry->start - 816 (vm_offset_t)addr; 817 vm_object_reference(key->info.shared.object); 818 } else { 819 key->shared = 0; 820 key->info.private.vs = td->td_proc->p_vmspace; 821 key->info.private.addr = (uintptr_t)addr; 822 } 823 vm_map_lookup_done(map, entry); 824 } 825 826 umtxq_hash(key); 827 return (0); 828} 829 830/* 831 * Release key. 832 */ 833void 834umtx_key_release(struct umtx_key *key) 835{ 836 if (key->shared) 837 vm_object_deallocate(key->info.shared.object); 838} 839 840/* 841 * Lock a umtx object. 842 */ 843static int 844do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 845 const struct timespec *timeout) 846{ 847 struct abs_timeout timo; 848 struct umtx_q *uq; 849 u_long owner; 850 u_long old; 851 int error = 0; 852 853 uq = td->td_umtxq; 854 if (timeout != NULL) 855 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 856 857 /* 858 * Care must be exercised when dealing with umtx structure. It 859 * can fault on any access. 860 */ 861 for (;;) { 862 /* 863 * Try the uncontested case. This should be done in userland. 864 */ 865 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 866 867 /* The acquire succeeded. */ 868 if (owner == UMTX_UNOWNED) 869 return (0); 870 871 /* The address was invalid. */ 872 if (owner == -1) 873 return (EFAULT); 874 875 /* If no one owns it but it is contested try to acquire it. */ 876 if (owner == UMTX_CONTESTED) { 877 owner = casuword(&umtx->u_owner, 878 UMTX_CONTESTED, id | UMTX_CONTESTED); 879 880 if (owner == UMTX_CONTESTED) 881 return (0); 882 883 /* The address was invalid. */ 884 if (owner == -1) 885 return (EFAULT); 886 887 error = umtxq_check_susp(td); 888 if (error != 0) 889 break; 890 891 /* If this failed the lock has changed, restart. */ 892 continue; 893 } 894 895 /* 896 * If we caught a signal, we have retried and now 897 * exit immediately. 898 */ 899 if (error != 0) 900 break; 901 902 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 903 AUTO_SHARE, &uq->uq_key)) != 0) 904 return (error); 905 906 umtxq_lock(&uq->uq_key); 907 umtxq_busy(&uq->uq_key); 908 umtxq_insert(uq); 909 umtxq_unbusy(&uq->uq_key); 910 umtxq_unlock(&uq->uq_key); 911 912 /* 913 * Set the contested bit so that a release in user space 914 * knows to use the system call for unlock. If this fails 915 * either some one else has acquired the lock or it has been 916 * released. 917 */ 918 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 919 920 /* The address was invalid. */ 921 if (old == -1) { 922 umtxq_lock(&uq->uq_key); 923 umtxq_remove(uq); 924 umtxq_unlock(&uq->uq_key); 925 umtx_key_release(&uq->uq_key); 926 return (EFAULT); 927 } 928 929 /* 930 * We set the contested bit, sleep. Otherwise the lock changed 931 * and we need to retry or we lost a race to the thread 932 * unlocking the umtx. 933 */ 934 umtxq_lock(&uq->uq_key); 935 if (old == owner) 936 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 937 &timo); 938 umtxq_remove(uq); 939 umtxq_unlock(&uq->uq_key); 940 umtx_key_release(&uq->uq_key); 941 942 if (error == 0) 943 error = umtxq_check_susp(td); 944 } 945 946 if (timeout == NULL) { 947 /* Mutex locking is restarted if it is interrupted. */ 948 if (error == EINTR) 949 error = ERESTART; 950 } else { 951 /* Timed-locking is not restarted. */ 952 if (error == ERESTART) 953 error = EINTR; 954 } 955 return (error); 956} 957 958/* 959 * Unlock a umtx object. 960 */ 961static int 962do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 963{ 964 struct umtx_key key; 965 u_long owner; 966 u_long old; 967 int error; 968 int count; 969 970 /* 971 * Make sure we own this mtx. 972 */ 973 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 974 if (owner == -1) 975 return (EFAULT); 976 977 if ((owner & ~UMTX_CONTESTED) != id) 978 return (EPERM); 979 980 /* This should be done in userland */ 981 if ((owner & UMTX_CONTESTED) == 0) { 982 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 983 if (old == -1) 984 return (EFAULT); 985 if (old == owner) 986 return (0); 987 owner = old; 988 } 989 990 /* We should only ever be in here for contested locks */ 991 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 992 &key)) != 0) 993 return (error); 994 995 umtxq_lock(&key); 996 umtxq_busy(&key); 997 count = umtxq_count(&key); 998 umtxq_unlock(&key); 999 1000 /* 1001 * When unlocking the umtx, it must be marked as unowned if 1002 * there is zero or one thread only waiting for it. 1003 * Otherwise, it must be marked as contested. 1004 */ 1005 old = casuword(&umtx->u_owner, owner, 1006 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1007 umtxq_lock(&key); 1008 umtxq_signal(&key,1); 1009 umtxq_unbusy(&key); 1010 umtxq_unlock(&key); 1011 umtx_key_release(&key); 1012 if (old == -1) 1013 return (EFAULT); 1014 if (old != owner) 1015 return (EINVAL); 1016 return (0); 1017} 1018 1019#ifdef COMPAT_FREEBSD32 1020 1021/* 1022 * Lock a umtx object. 1023 */ 1024static int 1025do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1026 const struct timespec *timeout) 1027{ 1028 struct abs_timeout timo; 1029 struct umtx_q *uq; 1030 uint32_t owner; 1031 uint32_t old; 1032 int error = 0; 1033 1034 uq = td->td_umtxq; 1035 1036 if (timeout != NULL) 1037 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1038 1039 /* 1040 * Care must be exercised when dealing with umtx structure. It 1041 * can fault on any access. 1042 */ 1043 for (;;) { 1044 /* 1045 * Try the uncontested case. This should be done in userland. 1046 */ 1047 owner = casuword32(m, UMUTEX_UNOWNED, id); 1048 1049 /* The acquire succeeded. */ 1050 if (owner == UMUTEX_UNOWNED) 1051 return (0); 1052 1053 /* The address was invalid. */ 1054 if (owner == -1) 1055 return (EFAULT); 1056 1057 /* If no one owns it but it is contested try to acquire it. */ 1058 if (owner == UMUTEX_CONTESTED) { 1059 owner = casuword32(m, 1060 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1061 if (owner == UMUTEX_CONTESTED) 1062 return (0); 1063 1064 /* The address was invalid. */ 1065 if (owner == -1) 1066 return (EFAULT); 1067 1068 error = umtxq_check_susp(td); 1069 if (error != 0) 1070 break; 1071 1072 /* If this failed the lock has changed, restart. */ 1073 continue; 1074 } 1075 1076 /* 1077 * If we caught a signal, we have retried and now 1078 * exit immediately. 1079 */ 1080 if (error != 0) 1081 return (error); 1082 1083 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1084 AUTO_SHARE, &uq->uq_key)) != 0) 1085 return (error); 1086 1087 umtxq_lock(&uq->uq_key); 1088 umtxq_busy(&uq->uq_key); 1089 umtxq_insert(uq); 1090 umtxq_unbusy(&uq->uq_key); 1091 umtxq_unlock(&uq->uq_key); 1092 1093 /* 1094 * Set the contested bit so that a release in user space 1095 * knows to use the system call for unlock. If this fails 1096 * either some one else has acquired the lock or it has been 1097 * released. 1098 */ 1099 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1100 1101 /* The address was invalid. */ 1102 if (old == -1) { 1103 umtxq_lock(&uq->uq_key); 1104 umtxq_remove(uq); 1105 umtxq_unlock(&uq->uq_key); 1106 umtx_key_release(&uq->uq_key); 1107 return (EFAULT); 1108 } 1109 1110 /* 1111 * We set the contested bit, sleep. Otherwise the lock changed 1112 * and we need to retry or we lost a race to the thread 1113 * unlocking the umtx. 1114 */ 1115 umtxq_lock(&uq->uq_key); 1116 if (old == owner) 1117 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1118 NULL : &timo); 1119 umtxq_remove(uq); 1120 umtxq_unlock(&uq->uq_key); 1121 umtx_key_release(&uq->uq_key); 1122 1123 if (error == 0) 1124 error = umtxq_check_susp(td); 1125 } 1126 1127 if (timeout == NULL) { 1128 /* Mutex locking is restarted if it is interrupted. */ 1129 if (error == EINTR) 1130 error = ERESTART; 1131 } else { 1132 /* Timed-locking is not restarted. */ 1133 if (error == ERESTART) 1134 error = EINTR; 1135 } 1136 return (error); 1137} 1138 1139/* 1140 * Unlock a umtx object. 1141 */ 1142static int 1143do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1144{ 1145 struct umtx_key key; 1146 uint32_t owner; 1147 uint32_t old; 1148 int error; 1149 int count; 1150 1151 /* 1152 * Make sure we own this mtx. 1153 */ 1154 owner = fuword32(m); 1155 if (owner == -1) 1156 return (EFAULT); 1157 1158 if ((owner & ~UMUTEX_CONTESTED) != id) 1159 return (EPERM); 1160 1161 /* This should be done in userland */ 1162 if ((owner & UMUTEX_CONTESTED) == 0) { 1163 old = casuword32(m, owner, UMUTEX_UNOWNED); 1164 if (old == -1) 1165 return (EFAULT); 1166 if (old == owner) 1167 return (0); 1168 owner = old; 1169 } 1170 1171 /* We should only ever be in here for contested locks */ 1172 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1173 &key)) != 0) 1174 return (error); 1175 1176 umtxq_lock(&key); 1177 umtxq_busy(&key); 1178 count = umtxq_count(&key); 1179 umtxq_unlock(&key); 1180 1181 /* 1182 * When unlocking the umtx, it must be marked as unowned if 1183 * there is zero or one thread only waiting for it. 1184 * Otherwise, it must be marked as contested. 1185 */ 1186 old = casuword32(m, owner, 1187 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1188 umtxq_lock(&key); 1189 umtxq_signal(&key,1); 1190 umtxq_unbusy(&key); 1191 umtxq_unlock(&key); 1192 umtx_key_release(&key); 1193 if (old == -1) 1194 return (EFAULT); 1195 if (old != owner) 1196 return (EINVAL); 1197 return (0); 1198} 1199#endif 1200 1201/* 1202 * Fetch and compare value, sleep on the address if value is not changed. 1203 */ 1204static int 1205do_wait(struct thread *td, void *addr, u_long id, 1206 struct _umtx_time *timeout, int compat32, int is_private) 1207{ 1208 struct abs_timeout timo; 1209 struct umtx_q *uq; 1210 u_long tmp; 1211 int error = 0; 1212 1213 uq = td->td_umtxq; 1214 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1215 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1216 return (error); 1217 1218 if (timeout != NULL) 1219 abs_timeout_init2(&timo, timeout); 1220 1221 umtxq_lock(&uq->uq_key); 1222 umtxq_insert(uq); 1223 umtxq_unlock(&uq->uq_key); 1224 if (compat32 == 0) 1225 tmp = fuword(addr); 1226 else 1227 tmp = (unsigned int)fuword32(addr); 1228 umtxq_lock(&uq->uq_key); 1229 if (tmp == id) 1230 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1231 NULL : &timo); 1232 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1233 error = 0; 1234 else 1235 umtxq_remove(uq); 1236 umtxq_unlock(&uq->uq_key); 1237 umtx_key_release(&uq->uq_key); 1238 if (error == ERESTART) 1239 error = EINTR; 1240 return (error); 1241} 1242 1243/* 1244 * Wake up threads sleeping on the specified address. 1245 */ 1246int 1247kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1248{ 1249 struct umtx_key key; 1250 int ret; 1251 1252 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1253 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1254 return (ret); 1255 umtxq_lock(&key); 1256 ret = umtxq_signal(&key, n_wake); 1257 umtxq_unlock(&key); 1258 umtx_key_release(&key); 1259 return (0); 1260} 1261 1262/* 1263 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1264 */ 1265static int 1266do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1267 struct _umtx_time *timeout, int mode) 1268{ 1269 struct abs_timeout timo; 1270 struct umtx_q *uq; 1271 uint32_t owner, old, id; 1272 int error = 0; 1273 1274 id = td->td_tid; 1275 uq = td->td_umtxq; 1276 1277 if (timeout != NULL) 1278 abs_timeout_init2(&timo, timeout); 1279 1280 /* 1281 * Care must be exercised when dealing with umtx structure. It 1282 * can fault on any access. 1283 */ 1284 for (;;) { 1285 owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); 1286 if (mode == _UMUTEX_WAIT) { 1287 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) 1288 return (0); 1289 } else { 1290 /* 1291 * Try the uncontested case. This should be done in userland. 1292 */ 1293 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1294 1295 /* The acquire succeeded. */ 1296 if (owner == UMUTEX_UNOWNED) 1297 return (0); 1298 1299 /* The address was invalid. */ 1300 if (owner == -1) 1301 return (EFAULT); 1302 1303 /* If no one owns it but it is contested try to acquire it. */ 1304 if (owner == UMUTEX_CONTESTED) { 1305 owner = casuword32(&m->m_owner, 1306 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1307 1308 if (owner == UMUTEX_CONTESTED) 1309 return (0); 1310 1311 /* The address was invalid. */ 1312 if (owner == -1) 1313 return (EFAULT); 1314 1315 error = umtxq_check_susp(td); 1316 if (error != 0) 1317 return (error); 1318 1319 /* If this failed the lock has changed, restart. */ 1320 continue; 1321 } 1322 } 1323 1324 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1325 (owner & ~UMUTEX_CONTESTED) == id) 1326 return (EDEADLK); 1327 1328 if (mode == _UMUTEX_TRY) 1329 return (EBUSY); 1330 1331 /* 1332 * If we caught a signal, we have retried and now 1333 * exit immediately. 1334 */ 1335 if (error != 0) 1336 return (error); 1337 1338 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1339 GET_SHARE(flags), &uq->uq_key)) != 0) 1340 return (error); 1341 1342 umtxq_lock(&uq->uq_key); 1343 umtxq_busy(&uq->uq_key); 1344 umtxq_insert(uq); 1345 umtxq_unlock(&uq->uq_key); 1346 1347 /* 1348 * Set the contested bit so that a release in user space 1349 * knows to use the system call for unlock. If this fails 1350 * either some one else has acquired the lock or it has been 1351 * released. 1352 */ 1353 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1354 1355 /* The address was invalid. */ 1356 if (old == -1) { 1357 umtxq_lock(&uq->uq_key); 1358 umtxq_remove(uq); 1359 umtxq_unbusy(&uq->uq_key); 1360 umtxq_unlock(&uq->uq_key); 1361 umtx_key_release(&uq->uq_key); 1362 return (EFAULT); 1363 } 1364 1365 /* 1366 * We set the contested bit, sleep. Otherwise the lock changed 1367 * and we need to retry or we lost a race to the thread 1368 * unlocking the umtx. 1369 */ 1370 umtxq_lock(&uq->uq_key); 1371 umtxq_unbusy(&uq->uq_key); 1372 if (old == owner) 1373 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1374 NULL : &timo); 1375 umtxq_remove(uq); 1376 umtxq_unlock(&uq->uq_key); 1377 umtx_key_release(&uq->uq_key); 1378 1379 if (error == 0) 1380 error = umtxq_check_susp(td); 1381 } 1382 1383 return (0); 1384} 1385 1386/* 1387 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1388 */ 1389static int 1390do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1391{ 1392 struct umtx_key key; 1393 uint32_t owner, old, id; 1394 int error; 1395 int count; 1396 1397 id = td->td_tid; 1398 /* 1399 * Make sure we own this mtx. 1400 */ 1401 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1402 if (owner == -1) 1403 return (EFAULT); 1404 1405 if ((owner & ~UMUTEX_CONTESTED) != id) 1406 return (EPERM); 1407 1408 if ((owner & UMUTEX_CONTESTED) == 0) { 1409 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1410 if (old == -1) 1411 return (EFAULT); 1412 if (old == owner) 1413 return (0); 1414 owner = old; 1415 } 1416 1417 /* We should only ever be in here for contested locks */ 1418 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1419 &key)) != 0) 1420 return (error); 1421 1422 umtxq_lock(&key); 1423 umtxq_busy(&key); 1424 count = umtxq_count(&key); 1425 umtxq_unlock(&key); 1426 1427 /* 1428 * When unlocking the umtx, it must be marked as unowned if 1429 * there is zero or one thread only waiting for it. 1430 * Otherwise, it must be marked as contested. 1431 */ 1432 old = casuword32(&m->m_owner, owner, 1433 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1434 umtxq_lock(&key); 1435 umtxq_signal(&key,1); 1436 umtxq_unbusy(&key); 1437 umtxq_unlock(&key); 1438 umtx_key_release(&key); 1439 if (old == -1) 1440 return (EFAULT); 1441 if (old != owner) 1442 return (EINVAL); 1443 return (0); 1444} 1445 1446/* 1447 * Check if the mutex is available and wake up a waiter, 1448 * only for simple mutex. 1449 */ 1450static int 1451do_wake_umutex(struct thread *td, struct umutex *m) 1452{ 1453 struct umtx_key key; 1454 uint32_t owner; 1455 uint32_t flags; 1456 int error; 1457 int count; 1458 1459 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1460 if (owner == -1) 1461 return (EFAULT); 1462 1463 if ((owner & ~UMUTEX_CONTESTED) != 0) 1464 return (0); 1465 1466 flags = fuword32(&m->m_flags); 1467 1468 /* We should only ever be in here for contested locks */ 1469 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1470 &key)) != 0) 1471 return (error); 1472 1473 umtxq_lock(&key); 1474 umtxq_busy(&key); 1475 count = umtxq_count(&key); 1476 umtxq_unlock(&key); 1477 1478 if (count <= 1) 1479 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); 1480 1481 umtxq_lock(&key); 1482 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1483 umtxq_signal(&key, 1); 1484 umtxq_unbusy(&key); 1485 umtxq_unlock(&key); 1486 umtx_key_release(&key); 1487 return (0); 1488} 1489 1490/* 1491 * Check if the mutex has waiters and tries to fix contention bit. 1492 */ 1493static int 1494do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1495{ 1496 struct umtx_key key; 1497 uint32_t owner, old; 1498 int type; 1499 int error; 1500 int count; 1501 1502 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 1503 case 0: 1504 type = TYPE_NORMAL_UMUTEX; 1505 break; 1506 case UMUTEX_PRIO_INHERIT: 1507 type = TYPE_PI_UMUTEX; 1508 break; 1509 case UMUTEX_PRIO_PROTECT: 1510 type = TYPE_PP_UMUTEX; 1511 break; 1512 default: 1513 return (EINVAL); 1514 } 1515 if ((error = umtx_key_get(m, type, GET_SHARE(flags), 1516 &key)) != 0) 1517 return (error); 1518 1519 owner = 0; 1520 umtxq_lock(&key); 1521 umtxq_busy(&key); 1522 count = umtxq_count(&key); 1523 umtxq_unlock(&key); 1524 /* 1525 * Only repair contention bit if there is a waiter, this means the mutex 1526 * is still being referenced by userland code, otherwise don't update 1527 * any memory. 1528 */ 1529 if (count > 1) { 1530 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1531 while ((owner & UMUTEX_CONTESTED) ==0) { 1532 old = casuword32(&m->m_owner, owner, 1533 owner|UMUTEX_CONTESTED); 1534 if (old == owner) 1535 break; 1536 owner = old; 1537 if (old == -1) 1538 break; 1539 error = umtxq_check_susp(td); 1540 if (error != 0) 1541 break; 1542 } 1543 } else if (count == 1) { 1544 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1545 while ((owner & ~UMUTEX_CONTESTED) != 0 && 1546 (owner & UMUTEX_CONTESTED) == 0) { 1547 old = casuword32(&m->m_owner, owner, 1548 owner|UMUTEX_CONTESTED); 1549 if (old == owner) 1550 break; 1551 owner = old; 1552 if (old == -1) 1553 break; 1554 error = umtxq_check_susp(td); 1555 if (error != 0) 1556 break; 1557 } 1558 } 1559 umtxq_lock(&key); 1560 if (owner == -1) { 1561 error = EFAULT; 1562 umtxq_signal(&key, INT_MAX); 1563 } 1564 else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) 1565 umtxq_signal(&key, 1); 1566 umtxq_unbusy(&key); 1567 umtxq_unlock(&key); 1568 umtx_key_release(&key); 1569 return (error); 1570} 1571 1572static inline struct umtx_pi * 1573umtx_pi_alloc(int flags) 1574{ 1575 struct umtx_pi *pi; 1576 1577 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1578 TAILQ_INIT(&pi->pi_blocked); 1579 atomic_add_int(&umtx_pi_allocated, 1); 1580 return (pi); 1581} 1582 1583static inline void 1584umtx_pi_free(struct umtx_pi *pi) 1585{ 1586 uma_zfree(umtx_pi_zone, pi); 1587 atomic_add_int(&umtx_pi_allocated, -1); 1588} 1589 1590/* 1591 * Adjust the thread's position on a pi_state after its priority has been 1592 * changed. 1593 */ 1594static int 1595umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1596{ 1597 struct umtx_q *uq, *uq1, *uq2; 1598 struct thread *td1; 1599 1600 mtx_assert(&umtx_lock, MA_OWNED); 1601 if (pi == NULL) 1602 return (0); 1603 1604 uq = td->td_umtxq; 1605 1606 /* 1607 * Check if the thread needs to be moved on the blocked chain. 1608 * It needs to be moved if either its priority is lower than 1609 * the previous thread or higher than the next thread. 1610 */ 1611 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1612 uq2 = TAILQ_NEXT(uq, uq_lockq); 1613 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1614 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1615 /* 1616 * Remove thread from blocked chain and determine where 1617 * it should be moved to. 1618 */ 1619 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1620 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1621 td1 = uq1->uq_thread; 1622 MPASS(td1->td_proc->p_magic == P_MAGIC); 1623 if (UPRI(td1) > UPRI(td)) 1624 break; 1625 } 1626 1627 if (uq1 == NULL) 1628 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1629 else 1630 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1631 } 1632 return (1); 1633} 1634 1635/* 1636 * Propagate priority when a thread is blocked on POSIX 1637 * PI mutex. 1638 */ 1639static void 1640umtx_propagate_priority(struct thread *td) 1641{ 1642 struct umtx_q *uq; 1643 struct umtx_pi *pi; 1644 int pri; 1645 1646 mtx_assert(&umtx_lock, MA_OWNED); 1647 pri = UPRI(td); 1648 uq = td->td_umtxq; 1649 pi = uq->uq_pi_blocked; 1650 if (pi == NULL) 1651 return; 1652 1653 for (;;) { 1654 td = pi->pi_owner; 1655 if (td == NULL || td == curthread) 1656 return; 1657 1658 MPASS(td->td_proc != NULL); 1659 MPASS(td->td_proc->p_magic == P_MAGIC); 1660 1661 thread_lock(td); 1662 if (td->td_lend_user_pri > pri) 1663 sched_lend_user_prio(td, pri); 1664 else { 1665 thread_unlock(td); 1666 break; 1667 } 1668 thread_unlock(td); 1669 1670 /* 1671 * Pick up the lock that td is blocked on. 1672 */ 1673 uq = td->td_umtxq; 1674 pi = uq->uq_pi_blocked; 1675 if (pi == NULL) 1676 break; 1677 /* Resort td on the list if needed. */ 1678 umtx_pi_adjust_thread(pi, td); 1679 } 1680} 1681 1682/* 1683 * Unpropagate priority for a PI mutex when a thread blocked on 1684 * it is interrupted by signal or resumed by others. 1685 */ 1686static void 1687umtx_repropagate_priority(struct umtx_pi *pi) 1688{ 1689 struct umtx_q *uq, *uq_owner; 1690 struct umtx_pi *pi2; 1691 int pri; 1692 1693 mtx_assert(&umtx_lock, MA_OWNED); 1694 1695 while (pi != NULL && pi->pi_owner != NULL) { 1696 pri = PRI_MAX; 1697 uq_owner = pi->pi_owner->td_umtxq; 1698 1699 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1700 uq = TAILQ_FIRST(&pi2->pi_blocked); 1701 if (uq != NULL) { 1702 if (pri > UPRI(uq->uq_thread)) 1703 pri = UPRI(uq->uq_thread); 1704 } 1705 } 1706 1707 if (pri > uq_owner->uq_inherited_pri) 1708 pri = uq_owner->uq_inherited_pri; 1709 thread_lock(pi->pi_owner); 1710 sched_lend_user_prio(pi->pi_owner, pri); 1711 thread_unlock(pi->pi_owner); 1712 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1713 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1714 } 1715} 1716 1717/* 1718 * Insert a PI mutex into owned list. 1719 */ 1720static void 1721umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1722{ 1723 struct umtx_q *uq_owner; 1724 1725 uq_owner = owner->td_umtxq; 1726 mtx_assert(&umtx_lock, MA_OWNED); 1727 if (pi->pi_owner != NULL) 1728 panic("pi_ower != NULL"); 1729 pi->pi_owner = owner; 1730 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1731} 1732 1733/* 1734 * Claim ownership of a PI mutex. 1735 */ 1736static int 1737umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1738{ 1739 struct umtx_q *uq, *uq_owner; 1740 1741 uq_owner = owner->td_umtxq; 1742 mtx_lock_spin(&umtx_lock); 1743 if (pi->pi_owner == owner) { 1744 mtx_unlock_spin(&umtx_lock); 1745 return (0); 1746 } 1747 1748 if (pi->pi_owner != NULL) { 1749 /* 1750 * userland may have already messed the mutex, sigh. 1751 */ 1752 mtx_unlock_spin(&umtx_lock); 1753 return (EPERM); 1754 } 1755 umtx_pi_setowner(pi, owner); 1756 uq = TAILQ_FIRST(&pi->pi_blocked); 1757 if (uq != NULL) { 1758 int pri; 1759 1760 pri = UPRI(uq->uq_thread); 1761 thread_lock(owner); 1762 if (pri < UPRI(owner)) 1763 sched_lend_user_prio(owner, pri); 1764 thread_unlock(owner); 1765 } 1766 mtx_unlock_spin(&umtx_lock); 1767 return (0); 1768} 1769 1770/* 1771 * Adjust a thread's order position in its blocked PI mutex, 1772 * this may result new priority propagating process. 1773 */ 1774void 1775umtx_pi_adjust(struct thread *td, u_char oldpri) 1776{ 1777 struct umtx_q *uq; 1778 struct umtx_pi *pi; 1779 1780 uq = td->td_umtxq; 1781 mtx_lock_spin(&umtx_lock); 1782 /* 1783 * Pick up the lock that td is blocked on. 1784 */ 1785 pi = uq->uq_pi_blocked; 1786 if (pi != NULL) { 1787 umtx_pi_adjust_thread(pi, td); 1788 umtx_repropagate_priority(pi); 1789 } 1790 mtx_unlock_spin(&umtx_lock); 1791} 1792 1793/* 1794 * Sleep on a PI mutex. 1795 */ 1796static int 1797umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1798 uint32_t owner, const char *wmesg, struct abs_timeout *timo) 1799{ 1800 struct umtxq_chain *uc; 1801 struct thread *td, *td1; 1802 struct umtx_q *uq1; 1803 int pri; 1804 int error = 0; 1805 1806 td = uq->uq_thread; 1807 KASSERT(td == curthread, ("inconsistent uq_thread")); 1808 uc = umtxq_getchain(&uq->uq_key); 1809 UMTXQ_LOCKED_ASSERT(uc); 1810 UMTXQ_BUSY_ASSERT(uc); 1811 umtxq_insert(uq); 1812 mtx_lock_spin(&umtx_lock); 1813 if (pi->pi_owner == NULL) { 1814 mtx_unlock_spin(&umtx_lock); 1815 /* XXX Only look up thread in current process. */ 1816 td1 = tdfind(owner, curproc->p_pid); 1817 mtx_lock_spin(&umtx_lock); 1818 if (td1 != NULL) { 1819 if (pi->pi_owner == NULL) 1820 umtx_pi_setowner(pi, td1); 1821 PROC_UNLOCK(td1->td_proc); 1822 } 1823 } 1824 1825 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1826 pri = UPRI(uq1->uq_thread); 1827 if (pri > UPRI(td)) 1828 break; 1829 } 1830 1831 if (uq1 != NULL) 1832 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1833 else 1834 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1835 1836 uq->uq_pi_blocked = pi; 1837 thread_lock(td); 1838 td->td_flags |= TDF_UPIBLOCKED; 1839 thread_unlock(td); 1840 umtx_propagate_priority(td); 1841 mtx_unlock_spin(&umtx_lock); 1842 umtxq_unbusy(&uq->uq_key); 1843 1844 error = umtxq_sleep(uq, wmesg, timo); 1845 umtxq_remove(uq); 1846 1847 mtx_lock_spin(&umtx_lock); 1848 uq->uq_pi_blocked = NULL; 1849 thread_lock(td); 1850 td->td_flags &= ~TDF_UPIBLOCKED; 1851 thread_unlock(td); 1852 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1853 umtx_repropagate_priority(pi); 1854 mtx_unlock_spin(&umtx_lock); 1855 umtxq_unlock(&uq->uq_key); 1856 1857 return (error); 1858} 1859 1860/* 1861 * Add reference count for a PI mutex. 1862 */ 1863static void 1864umtx_pi_ref(struct umtx_pi *pi) 1865{ 1866 struct umtxq_chain *uc; 1867 1868 uc = umtxq_getchain(&pi->pi_key); 1869 UMTXQ_LOCKED_ASSERT(uc); 1870 pi->pi_refcount++; 1871} 1872 1873/* 1874 * Decrease reference count for a PI mutex, if the counter 1875 * is decreased to zero, its memory space is freed. 1876 */ 1877static void 1878umtx_pi_unref(struct umtx_pi *pi) 1879{ 1880 struct umtxq_chain *uc; 1881 1882 uc = umtxq_getchain(&pi->pi_key); 1883 UMTXQ_LOCKED_ASSERT(uc); 1884 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1885 if (--pi->pi_refcount == 0) { 1886 mtx_lock_spin(&umtx_lock); 1887 if (pi->pi_owner != NULL) { 1888 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1889 pi, pi_link); 1890 pi->pi_owner = NULL; 1891 } 1892 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1893 ("blocked queue not empty")); 1894 mtx_unlock_spin(&umtx_lock); 1895 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1896 umtx_pi_free(pi); 1897 } 1898} 1899 1900/* 1901 * Find a PI mutex in hash table. 1902 */ 1903static struct umtx_pi * 1904umtx_pi_lookup(struct umtx_key *key) 1905{ 1906 struct umtxq_chain *uc; 1907 struct umtx_pi *pi; 1908 1909 uc = umtxq_getchain(key); 1910 UMTXQ_LOCKED_ASSERT(uc); 1911 1912 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1913 if (umtx_key_match(&pi->pi_key, key)) { 1914 return (pi); 1915 } 1916 } 1917 return (NULL); 1918} 1919 1920/* 1921 * Insert a PI mutex into hash table. 1922 */ 1923static inline void 1924umtx_pi_insert(struct umtx_pi *pi) 1925{ 1926 struct umtxq_chain *uc; 1927 1928 uc = umtxq_getchain(&pi->pi_key); 1929 UMTXQ_LOCKED_ASSERT(uc); 1930 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1931} 1932 1933/* 1934 * Lock a PI mutex. 1935 */ 1936static int 1937do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1938 struct _umtx_time *timeout, int try) 1939{ 1940 struct abs_timeout timo; 1941 struct umtx_q *uq; 1942 struct umtx_pi *pi, *new_pi; 1943 uint32_t id, owner, old; 1944 int error; 1945 1946 id = td->td_tid; 1947 uq = td->td_umtxq; 1948 1949 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1950 &uq->uq_key)) != 0) 1951 return (error); 1952 1953 if (timeout != NULL) 1954 abs_timeout_init2(&timo, timeout); 1955 1956 umtxq_lock(&uq->uq_key); 1957 pi = umtx_pi_lookup(&uq->uq_key); 1958 if (pi == NULL) { 1959 new_pi = umtx_pi_alloc(M_NOWAIT); 1960 if (new_pi == NULL) { 1961 umtxq_unlock(&uq->uq_key); 1962 new_pi = umtx_pi_alloc(M_WAITOK); 1963 umtxq_lock(&uq->uq_key); 1964 pi = umtx_pi_lookup(&uq->uq_key); 1965 if (pi != NULL) { 1966 umtx_pi_free(new_pi); 1967 new_pi = NULL; 1968 } 1969 } 1970 if (new_pi != NULL) { 1971 new_pi->pi_key = uq->uq_key; 1972 umtx_pi_insert(new_pi); 1973 pi = new_pi; 1974 } 1975 } 1976 umtx_pi_ref(pi); 1977 umtxq_unlock(&uq->uq_key); 1978 1979 /* 1980 * Care must be exercised when dealing with umtx structure. It 1981 * can fault on any access. 1982 */ 1983 for (;;) { 1984 /* 1985 * Try the uncontested case. This should be done in userland. 1986 */ 1987 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1988 1989 /* The acquire succeeded. */ 1990 if (owner == UMUTEX_UNOWNED) { 1991 error = 0; 1992 break; 1993 } 1994 1995 /* The address was invalid. */ 1996 if (owner == -1) { 1997 error = EFAULT; 1998 break; 1999 } 2000 2001 /* If no one owns it but it is contested try to acquire it. */ 2002 if (owner == UMUTEX_CONTESTED) { 2003 owner = casuword32(&m->m_owner, 2004 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2005 2006 if (owner == UMUTEX_CONTESTED) { 2007 umtxq_lock(&uq->uq_key); 2008 umtxq_busy(&uq->uq_key); 2009 error = umtx_pi_claim(pi, td); 2010 umtxq_unbusy(&uq->uq_key); 2011 umtxq_unlock(&uq->uq_key); 2012 break; 2013 } 2014 2015 /* The address was invalid. */ 2016 if (owner == -1) { 2017 error = EFAULT; 2018 break; 2019 } 2020 2021 error = umtxq_check_susp(td); 2022 if (error != 0) 2023 break; 2024 2025 /* If this failed the lock has changed, restart. */ 2026 continue; 2027 } 2028 2029 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 2030 (owner & ~UMUTEX_CONTESTED) == id) { 2031 error = EDEADLK; 2032 break; 2033 } 2034 2035 if (try != 0) { 2036 error = EBUSY; 2037 break; 2038 } 2039 2040 /* 2041 * If we caught a signal, we have retried and now 2042 * exit immediately. 2043 */ 2044 if (error != 0) 2045 break; 2046 2047 umtxq_lock(&uq->uq_key); 2048 umtxq_busy(&uq->uq_key); 2049 umtxq_unlock(&uq->uq_key); 2050 2051 /* 2052 * Set the contested bit so that a release in user space 2053 * knows to use the system call for unlock. If this fails 2054 * either some one else has acquired the lock or it has been 2055 * released. 2056 */ 2057 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 2058 2059 /* The address was invalid. */ 2060 if (old == -1) { 2061 umtxq_lock(&uq->uq_key); 2062 umtxq_unbusy(&uq->uq_key); 2063 umtxq_unlock(&uq->uq_key); 2064 error = EFAULT; 2065 break; 2066 } 2067 2068 umtxq_lock(&uq->uq_key); 2069 /* 2070 * We set the contested bit, sleep. Otherwise the lock changed 2071 * and we need to retry or we lost a race to the thread 2072 * unlocking the umtx. 2073 */ 2074 if (old == owner) { 2075 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2076 "umtxpi", timeout == NULL ? NULL : &timo); 2077 if (error != 0) 2078 continue; 2079 } else { 2080 umtxq_unbusy(&uq->uq_key); 2081 umtxq_unlock(&uq->uq_key); 2082 } 2083 2084 error = umtxq_check_susp(td); 2085 if (error != 0) 2086 break; 2087 } 2088 2089 umtxq_lock(&uq->uq_key); 2090 umtx_pi_unref(pi); 2091 umtxq_unlock(&uq->uq_key); 2092 2093 umtx_key_release(&uq->uq_key); 2094 return (error); 2095} 2096 2097/* 2098 * Unlock a PI mutex. 2099 */ 2100static int 2101do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 2102{ 2103 struct umtx_key key; 2104 struct umtx_q *uq_first, *uq_first2, *uq_me; 2105 struct umtx_pi *pi, *pi2; 2106 uint32_t owner, old, id; 2107 int error; 2108 int count; 2109 int pri; 2110 2111 id = td->td_tid; 2112 /* 2113 * Make sure we own this mtx. 2114 */ 2115 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2116 if (owner == -1) 2117 return (EFAULT); 2118 2119 if ((owner & ~UMUTEX_CONTESTED) != id) 2120 return (EPERM); 2121 2122 /* This should be done in userland */ 2123 if ((owner & UMUTEX_CONTESTED) == 0) { 2124 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 2125 if (old == -1) 2126 return (EFAULT); 2127 if (old == owner) 2128 return (0); 2129 owner = old; 2130 } 2131 2132 /* We should only ever be in here for contested locks */ 2133 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 2134 &key)) != 0) 2135 return (error); 2136 2137 umtxq_lock(&key); 2138 umtxq_busy(&key); 2139 count = umtxq_count_pi(&key, &uq_first); 2140 if (uq_first != NULL) { 2141 mtx_lock_spin(&umtx_lock); 2142 pi = uq_first->uq_pi_blocked; 2143 KASSERT(pi != NULL, ("pi == NULL?")); 2144 if (pi->pi_owner != curthread) { 2145 mtx_unlock_spin(&umtx_lock); 2146 umtxq_unbusy(&key); 2147 umtxq_unlock(&key); 2148 umtx_key_release(&key); 2149 /* userland messed the mutex */ 2150 return (EPERM); 2151 } 2152 uq_me = curthread->td_umtxq; 2153 pi->pi_owner = NULL; 2154 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 2155 /* get highest priority thread which is still sleeping. */ 2156 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2157 while (uq_first != NULL && 2158 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2159 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2160 } 2161 pri = PRI_MAX; 2162 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2163 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2164 if (uq_first2 != NULL) { 2165 if (pri > UPRI(uq_first2->uq_thread)) 2166 pri = UPRI(uq_first2->uq_thread); 2167 } 2168 } 2169 thread_lock(curthread); 2170 sched_lend_user_prio(curthread, pri); 2171 thread_unlock(curthread); 2172 mtx_unlock_spin(&umtx_lock); 2173 if (uq_first) 2174 umtxq_signal_thread(uq_first); 2175 } 2176 umtxq_unlock(&key); 2177 2178 /* 2179 * When unlocking the umtx, it must be marked as unowned if 2180 * there is zero or one thread only waiting for it. 2181 * Otherwise, it must be marked as contested. 2182 */ 2183 old = casuword32(&m->m_owner, owner, 2184 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 2185 2186 umtxq_lock(&key); 2187 umtxq_unbusy(&key); 2188 umtxq_unlock(&key); 2189 umtx_key_release(&key); 2190 if (old == -1) 2191 return (EFAULT); 2192 if (old != owner) 2193 return (EINVAL); 2194 return (0); 2195} 2196 2197/* 2198 * Lock a PP mutex. 2199 */ 2200static int 2201do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2202 struct _umtx_time *timeout, int try) 2203{ 2204 struct abs_timeout timo; 2205 struct umtx_q *uq, *uq2; 2206 struct umtx_pi *pi; 2207 uint32_t ceiling; 2208 uint32_t owner, id; 2209 int error, pri, old_inherited_pri, su; 2210 2211 id = td->td_tid; 2212 uq = td->td_umtxq; 2213 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2214 &uq->uq_key)) != 0) 2215 return (error); 2216 2217 if (timeout != NULL) 2218 abs_timeout_init2(&timo, timeout); 2219 2220 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2221 for (;;) { 2222 old_inherited_pri = uq->uq_inherited_pri; 2223 umtxq_lock(&uq->uq_key); 2224 umtxq_busy(&uq->uq_key); 2225 umtxq_unlock(&uq->uq_key); 2226 2227 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 2228 if (ceiling > RTP_PRIO_MAX) { 2229 error = EINVAL; 2230 goto out; 2231 } 2232 2233 mtx_lock_spin(&umtx_lock); 2234 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2235 mtx_unlock_spin(&umtx_lock); 2236 error = EINVAL; 2237 goto out; 2238 } 2239 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2240 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2241 thread_lock(td); 2242 if (uq->uq_inherited_pri < UPRI(td)) 2243 sched_lend_user_prio(td, uq->uq_inherited_pri); 2244 thread_unlock(td); 2245 } 2246 mtx_unlock_spin(&umtx_lock); 2247 2248 owner = casuword32(&m->m_owner, 2249 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2250 2251 if (owner == UMUTEX_CONTESTED) { 2252 error = 0; 2253 break; 2254 } 2255 2256 /* The address was invalid. */ 2257 if (owner == -1) { 2258 error = EFAULT; 2259 break; 2260 } 2261 2262 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 2263 (owner & ~UMUTEX_CONTESTED) == id) { 2264 error = EDEADLK; 2265 break; 2266 } 2267 2268 if (try != 0) { 2269 error = EBUSY; 2270 break; 2271 } 2272 2273 /* 2274 * If we caught a signal, we have retried and now 2275 * exit immediately. 2276 */ 2277 if (error != 0) 2278 break; 2279 2280 umtxq_lock(&uq->uq_key); 2281 umtxq_insert(uq); 2282 umtxq_unbusy(&uq->uq_key); 2283 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2284 NULL : &timo); 2285 umtxq_remove(uq); 2286 umtxq_unlock(&uq->uq_key); 2287 2288 mtx_lock_spin(&umtx_lock); 2289 uq->uq_inherited_pri = old_inherited_pri; 2290 pri = PRI_MAX; 2291 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2292 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2293 if (uq2 != NULL) { 2294 if (pri > UPRI(uq2->uq_thread)) 2295 pri = UPRI(uq2->uq_thread); 2296 } 2297 } 2298 if (pri > uq->uq_inherited_pri) 2299 pri = uq->uq_inherited_pri; 2300 thread_lock(td); 2301 sched_lend_user_prio(td, pri); 2302 thread_unlock(td); 2303 mtx_unlock_spin(&umtx_lock); 2304 } 2305 2306 if (error != 0) { 2307 mtx_lock_spin(&umtx_lock); 2308 uq->uq_inherited_pri = old_inherited_pri; 2309 pri = PRI_MAX; 2310 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2311 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2312 if (uq2 != NULL) { 2313 if (pri > UPRI(uq2->uq_thread)) 2314 pri = UPRI(uq2->uq_thread); 2315 } 2316 } 2317 if (pri > uq->uq_inherited_pri) 2318 pri = uq->uq_inherited_pri; 2319 thread_lock(td); 2320 sched_lend_user_prio(td, pri); 2321 thread_unlock(td); 2322 mtx_unlock_spin(&umtx_lock); 2323 } 2324 2325out: 2326 umtxq_lock(&uq->uq_key); 2327 umtxq_unbusy(&uq->uq_key); 2328 umtxq_unlock(&uq->uq_key); 2329 umtx_key_release(&uq->uq_key); 2330 return (error); 2331} 2332 2333/* 2334 * Unlock a PP mutex. 2335 */ 2336static int 2337do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 2338{ 2339 struct umtx_key key; 2340 struct umtx_q *uq, *uq2; 2341 struct umtx_pi *pi; 2342 uint32_t owner, id; 2343 uint32_t rceiling; 2344 int error, pri, new_inherited_pri, su; 2345 2346 id = td->td_tid; 2347 uq = td->td_umtxq; 2348 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2349 2350 /* 2351 * Make sure we own this mtx. 2352 */ 2353 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2354 if (owner == -1) 2355 return (EFAULT); 2356 2357 if ((owner & ~UMUTEX_CONTESTED) != id) 2358 return (EPERM); 2359 2360 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2361 if (error != 0) 2362 return (error); 2363 2364 if (rceiling == -1) 2365 new_inherited_pri = PRI_MAX; 2366 else { 2367 rceiling = RTP_PRIO_MAX - rceiling; 2368 if (rceiling > RTP_PRIO_MAX) 2369 return (EINVAL); 2370 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2371 } 2372 2373 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2374 &key)) != 0) 2375 return (error); 2376 umtxq_lock(&key); 2377 umtxq_busy(&key); 2378 umtxq_unlock(&key); 2379 /* 2380 * For priority protected mutex, always set unlocked state 2381 * to UMUTEX_CONTESTED, so that userland always enters kernel 2382 * to lock the mutex, it is necessary because thread priority 2383 * has to be adjusted for such mutex. 2384 */ 2385 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2386 UMUTEX_CONTESTED); 2387 2388 umtxq_lock(&key); 2389 if (error == 0) 2390 umtxq_signal(&key, 1); 2391 umtxq_unbusy(&key); 2392 umtxq_unlock(&key); 2393 2394 if (error == -1) 2395 error = EFAULT; 2396 else { 2397 mtx_lock_spin(&umtx_lock); 2398 if (su != 0) 2399 uq->uq_inherited_pri = new_inherited_pri; 2400 pri = PRI_MAX; 2401 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2402 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2403 if (uq2 != NULL) { 2404 if (pri > UPRI(uq2->uq_thread)) 2405 pri = UPRI(uq2->uq_thread); 2406 } 2407 } 2408 if (pri > uq->uq_inherited_pri) 2409 pri = uq->uq_inherited_pri; 2410 thread_lock(td); 2411 sched_lend_user_prio(td, pri); 2412 thread_unlock(td); 2413 mtx_unlock_spin(&umtx_lock); 2414 } 2415 umtx_key_release(&key); 2416 return (error); 2417} 2418 2419static int 2420do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2421 uint32_t *old_ceiling) 2422{ 2423 struct umtx_q *uq; 2424 uint32_t save_ceiling; 2425 uint32_t owner, id; 2426 uint32_t flags; 2427 int error; 2428 2429 flags = fuword32(&m->m_flags); 2430 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2431 return (EINVAL); 2432 if (ceiling > RTP_PRIO_MAX) 2433 return (EINVAL); 2434 id = td->td_tid; 2435 uq = td->td_umtxq; 2436 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2437 &uq->uq_key)) != 0) 2438 return (error); 2439 for (;;) { 2440 umtxq_lock(&uq->uq_key); 2441 umtxq_busy(&uq->uq_key); 2442 umtxq_unlock(&uq->uq_key); 2443 2444 save_ceiling = fuword32(&m->m_ceilings[0]); 2445 2446 owner = casuword32(&m->m_owner, 2447 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2448 2449 if (owner == UMUTEX_CONTESTED) { 2450 suword32(&m->m_ceilings[0], ceiling); 2451 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2452 UMUTEX_CONTESTED); 2453 error = 0; 2454 break; 2455 } 2456 2457 /* The address was invalid. */ 2458 if (owner == -1) { 2459 error = EFAULT; 2460 break; 2461 } 2462 2463 if ((owner & ~UMUTEX_CONTESTED) == id) { 2464 suword32(&m->m_ceilings[0], ceiling); 2465 error = 0; 2466 break; 2467 } 2468 2469 /* 2470 * If we caught a signal, we have retried and now 2471 * exit immediately. 2472 */ 2473 if (error != 0) 2474 break; 2475 2476 /* 2477 * We set the contested bit, sleep. Otherwise the lock changed 2478 * and we need to retry or we lost a race to the thread 2479 * unlocking the umtx. 2480 */ 2481 umtxq_lock(&uq->uq_key); 2482 umtxq_insert(uq); 2483 umtxq_unbusy(&uq->uq_key); 2484 error = umtxq_sleep(uq, "umtxpp", NULL); 2485 umtxq_remove(uq); 2486 umtxq_unlock(&uq->uq_key); 2487 } 2488 umtxq_lock(&uq->uq_key); 2489 if (error == 0) 2490 umtxq_signal(&uq->uq_key, INT_MAX); 2491 umtxq_unbusy(&uq->uq_key); 2492 umtxq_unlock(&uq->uq_key); 2493 umtx_key_release(&uq->uq_key); 2494 if (error == 0 && old_ceiling != NULL) 2495 suword32(old_ceiling, save_ceiling); 2496 return (error); 2497} 2498 2499/* 2500 * Lock a userland POSIX mutex. 2501 */ 2502static int 2503do_lock_umutex(struct thread *td, struct umutex *m, 2504 struct _umtx_time *timeout, int mode) 2505{ 2506 uint32_t flags; 2507 int error; 2508 2509 flags = fuword32(&m->m_flags); 2510 if (flags == -1) 2511 return (EFAULT); 2512 2513 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2514 case 0: 2515 error = do_lock_normal(td, m, flags, timeout, mode); 2516 break; 2517 case UMUTEX_PRIO_INHERIT: 2518 error = do_lock_pi(td, m, flags, timeout, mode); 2519 break; 2520 case UMUTEX_PRIO_PROTECT: 2521 error = do_lock_pp(td, m, flags, timeout, mode); 2522 break; 2523 default: 2524 return (EINVAL); 2525 } 2526 if (timeout == NULL) { 2527 if (error == EINTR && mode != _UMUTEX_WAIT) 2528 error = ERESTART; 2529 } else { 2530 /* Timed-locking is not restarted. */ 2531 if (error == ERESTART) 2532 error = EINTR; 2533 } 2534 return (error); 2535} 2536 2537/* 2538 * Unlock a userland POSIX mutex. 2539 */ 2540static int 2541do_unlock_umutex(struct thread *td, struct umutex *m) 2542{ 2543 uint32_t flags; 2544 2545 flags = fuword32(&m->m_flags); 2546 if (flags == -1) 2547 return (EFAULT); 2548 2549 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2550 case 0: 2551 return (do_unlock_normal(td, m, flags)); 2552 case UMUTEX_PRIO_INHERIT: 2553 return (do_unlock_pi(td, m, flags)); 2554 case UMUTEX_PRIO_PROTECT: 2555 return (do_unlock_pp(td, m, flags)); 2556 } 2557 2558 return (EINVAL); 2559} 2560 2561static int 2562do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2563 struct timespec *timeout, u_long wflags) 2564{ 2565 struct abs_timeout timo; 2566 struct umtx_q *uq; 2567 uint32_t flags; 2568 uint32_t clockid; 2569 int error; 2570 2571 uq = td->td_umtxq; 2572 flags = fuword32(&cv->c_flags); 2573 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2574 if (error != 0) 2575 return (error); 2576 2577 if ((wflags & CVWAIT_CLOCKID) != 0) { 2578 clockid = fuword32(&cv->c_clockid); 2579 if (clockid < CLOCK_REALTIME || 2580 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2581 /* hmm, only HW clock id will work. */ 2582 return (EINVAL); 2583 } 2584 } else { 2585 clockid = CLOCK_REALTIME; 2586 } 2587 2588 umtxq_lock(&uq->uq_key); 2589 umtxq_busy(&uq->uq_key); 2590 umtxq_insert(uq); 2591 umtxq_unlock(&uq->uq_key); 2592 2593 /* 2594 * Set c_has_waiters to 1 before releasing user mutex, also 2595 * don't modify cache line when unnecessary. 2596 */ 2597 if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0) 2598 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2599 2600 umtxq_lock(&uq->uq_key); 2601 umtxq_unbusy(&uq->uq_key); 2602 umtxq_unlock(&uq->uq_key); 2603 2604 error = do_unlock_umutex(td, m); 2605 2606 if (timeout != NULL) 2607 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0), 2608 timeout); 2609 2610 umtxq_lock(&uq->uq_key); 2611 if (error == 0) { 2612 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2613 NULL : &timo); 2614 } 2615 2616 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2617 error = 0; 2618 else { 2619 /* 2620 * This must be timeout,interrupted by signal or 2621 * surprious wakeup, clear c_has_waiter flag when 2622 * necessary. 2623 */ 2624 umtxq_busy(&uq->uq_key); 2625 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2626 int oldlen = uq->uq_cur_queue->length; 2627 umtxq_remove(uq); 2628 if (oldlen == 1) { 2629 umtxq_unlock(&uq->uq_key); 2630 suword32( 2631 __DEVOLATILE(uint32_t *, 2632 &cv->c_has_waiters), 0); 2633 umtxq_lock(&uq->uq_key); 2634 } 2635 } 2636 umtxq_unbusy(&uq->uq_key); 2637 if (error == ERESTART) 2638 error = EINTR; 2639 } 2640 2641 umtxq_unlock(&uq->uq_key); 2642 umtx_key_release(&uq->uq_key); 2643 return (error); 2644} 2645 2646/* 2647 * Signal a userland condition variable. 2648 */ 2649static int 2650do_cv_signal(struct thread *td, struct ucond *cv) 2651{ 2652 struct umtx_key key; 2653 int error, cnt, nwake; 2654 uint32_t flags; 2655 2656 flags = fuword32(&cv->c_flags); 2657 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2658 return (error); 2659 umtxq_lock(&key); 2660 umtxq_busy(&key); 2661 cnt = umtxq_count(&key); 2662 nwake = umtxq_signal(&key, 1); 2663 if (cnt <= nwake) { 2664 umtxq_unlock(&key); 2665 error = suword32( 2666 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2667 umtxq_lock(&key); 2668 } 2669 umtxq_unbusy(&key); 2670 umtxq_unlock(&key); 2671 umtx_key_release(&key); 2672 return (error); 2673} 2674 2675static int 2676do_cv_broadcast(struct thread *td, struct ucond *cv) 2677{ 2678 struct umtx_key key; 2679 int error; 2680 uint32_t flags; 2681 2682 flags = fuword32(&cv->c_flags); 2683 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2684 return (error); 2685 2686 umtxq_lock(&key); 2687 umtxq_busy(&key); 2688 umtxq_signal(&key, INT_MAX); 2689 umtxq_unlock(&key); 2690 2691 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2692 2693 umtxq_lock(&key); 2694 umtxq_unbusy(&key); 2695 umtxq_unlock(&key); 2696 2697 umtx_key_release(&key); 2698 return (error); 2699} 2700 2701static int 2702do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2703{ 2704 struct abs_timeout timo; 2705 struct umtx_q *uq; 2706 uint32_t flags, wrflags; 2707 int32_t state, oldstate; 2708 int32_t blocked_readers; 2709 int error; 2710 2711 uq = td->td_umtxq; 2712 flags = fuword32(&rwlock->rw_flags); 2713 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2714 if (error != 0) 2715 return (error); 2716 2717 if (timeout != NULL) 2718 abs_timeout_init2(&timo, timeout); 2719 2720 wrflags = URWLOCK_WRITE_OWNER; 2721 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2722 wrflags |= URWLOCK_WRITE_WAITERS; 2723 2724 for (;;) { 2725 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2726 /* try to lock it */ 2727 while (!(state & wrflags)) { 2728 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2729 umtx_key_release(&uq->uq_key); 2730 return (EAGAIN); 2731 } 2732 oldstate = casuword32(&rwlock->rw_state, state, state + 1); 2733 if (oldstate == -1) { 2734 umtx_key_release(&uq->uq_key); 2735 return (EFAULT); 2736 } 2737 if (oldstate == state) { 2738 umtx_key_release(&uq->uq_key); 2739 return (0); 2740 } 2741 error = umtxq_check_susp(td); 2742 if (error != 0) 2743 break; 2744 state = oldstate; 2745 } 2746 2747 if (error) 2748 break; 2749 2750 /* grab monitor lock */ 2751 umtxq_lock(&uq->uq_key); 2752 umtxq_busy(&uq->uq_key); 2753 umtxq_unlock(&uq->uq_key); 2754 2755 /* 2756 * re-read the state, in case it changed between the try-lock above 2757 * and the check below 2758 */ 2759 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2760 2761 /* set read contention bit */ 2762 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { 2763 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); 2764 if (oldstate == -1) { 2765 error = EFAULT; 2766 break; 2767 } 2768 if (oldstate == state) 2769 goto sleep; 2770 state = oldstate; 2771 error = umtxq_check_susp(td); 2772 if (error != 0) 2773 break; 2774 } 2775 if (error != 0) { 2776 umtxq_lock(&uq->uq_key); 2777 umtxq_unbusy(&uq->uq_key); 2778 umtxq_unlock(&uq->uq_key); 2779 break; 2780 } 2781 2782 /* state is changed while setting flags, restart */ 2783 if (!(state & wrflags)) { 2784 umtxq_lock(&uq->uq_key); 2785 umtxq_unbusy(&uq->uq_key); 2786 umtxq_unlock(&uq->uq_key); 2787 error = umtxq_check_susp(td); 2788 if (error != 0) 2789 break; 2790 continue; 2791 } 2792 2793sleep: 2794 /* contention bit is set, before sleeping, increase read waiter count */ 2795 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2796 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2797 2798 while (state & wrflags) { 2799 umtxq_lock(&uq->uq_key); 2800 umtxq_insert(uq); 2801 umtxq_unbusy(&uq->uq_key); 2802 2803 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2804 NULL : &timo); 2805 2806 umtxq_busy(&uq->uq_key); 2807 umtxq_remove(uq); 2808 umtxq_unlock(&uq->uq_key); 2809 if (error) 2810 break; 2811 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2812 } 2813 2814 /* decrease read waiter count, and may clear read contention bit */ 2815 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2816 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2817 if (blocked_readers == 1) { 2818 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2819 for (;;) { 2820 oldstate = casuword32(&rwlock->rw_state, state, 2821 state & ~URWLOCK_READ_WAITERS); 2822 if (oldstate == -1) { 2823 error = EFAULT; 2824 break; 2825 } 2826 if (oldstate == state) 2827 break; 2828 state = oldstate; 2829 error = umtxq_check_susp(td); 2830 if (error != 0) 2831 break; 2832 } 2833 } 2834 2835 umtxq_lock(&uq->uq_key); 2836 umtxq_unbusy(&uq->uq_key); 2837 umtxq_unlock(&uq->uq_key); 2838 if (error != 0) 2839 break; 2840 } 2841 umtx_key_release(&uq->uq_key); 2842 if (error == ERESTART) 2843 error = EINTR; 2844 return (error); 2845} 2846 2847static int 2848do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2849{ 2850 struct abs_timeout timo; 2851 struct umtx_q *uq; 2852 uint32_t flags; 2853 int32_t state, oldstate; 2854 int32_t blocked_writers; 2855 int32_t blocked_readers; 2856 int error; 2857 2858 uq = td->td_umtxq; 2859 flags = fuword32(&rwlock->rw_flags); 2860 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2861 if (error != 0) 2862 return (error); 2863 2864 if (timeout != NULL) 2865 abs_timeout_init2(&timo, timeout); 2866 2867 blocked_readers = 0; 2868 for (;;) { 2869 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2870 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2871 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); 2872 if (oldstate == -1) { 2873 umtx_key_release(&uq->uq_key); 2874 return (EFAULT); 2875 } 2876 if (oldstate == state) { 2877 umtx_key_release(&uq->uq_key); 2878 return (0); 2879 } 2880 state = oldstate; 2881 error = umtxq_check_susp(td); 2882 if (error != 0) 2883 break; 2884 } 2885 2886 if (error) { 2887 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2888 blocked_readers != 0) { 2889 umtxq_lock(&uq->uq_key); 2890 umtxq_busy(&uq->uq_key); 2891 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2892 umtxq_unbusy(&uq->uq_key); 2893 umtxq_unlock(&uq->uq_key); 2894 } 2895 2896 break; 2897 } 2898 2899 /* grab monitor lock */ 2900 umtxq_lock(&uq->uq_key); 2901 umtxq_busy(&uq->uq_key); 2902 umtxq_unlock(&uq->uq_key); 2903 2904 /* 2905 * re-read the state, in case it changed between the try-lock above 2906 * and the check below 2907 */ 2908 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2909 2910 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && 2911 (state & URWLOCK_WRITE_WAITERS) == 0) { 2912 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); 2913 if (oldstate == -1) { 2914 error = EFAULT; 2915 break; 2916 } 2917 if (oldstate == state) 2918 goto sleep; 2919 state = oldstate; 2920 error = umtxq_check_susp(td); 2921 if (error != 0) 2922 break; 2923 } 2924 if (error != 0) { 2925 umtxq_lock(&uq->uq_key); 2926 umtxq_unbusy(&uq->uq_key); 2927 umtxq_unlock(&uq->uq_key); 2928 break; 2929 } 2930 2931 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2932 umtxq_lock(&uq->uq_key); 2933 umtxq_unbusy(&uq->uq_key); 2934 umtxq_unlock(&uq->uq_key); 2935 error = umtxq_check_susp(td); 2936 if (error != 0) 2937 break; 2938 continue; 2939 } 2940sleep: 2941 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2942 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2943 2944 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2945 umtxq_lock(&uq->uq_key); 2946 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2947 umtxq_unbusy(&uq->uq_key); 2948 2949 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2950 NULL : &timo); 2951 2952 umtxq_busy(&uq->uq_key); 2953 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2954 umtxq_unlock(&uq->uq_key); 2955 if (error) 2956 break; 2957 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2958 } 2959 2960 blocked_writers = fuword32(&rwlock->rw_blocked_writers); 2961 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2962 if (blocked_writers == 1) { 2963 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 2964 for (;;) { 2965 oldstate = casuword32(&rwlock->rw_state, state, 2966 state & ~URWLOCK_WRITE_WAITERS); 2967 if (oldstate == -1) { 2968 error = EFAULT; 2969 break; 2970 } 2971 if (oldstate == state) 2972 break; 2973 state = oldstate; 2974 error = umtxq_check_susp(td); 2975 /* 2976 * We are leaving the URWLOCK_WRITE_WAITERS 2977 * behind, but this should not harm the 2978 * correctness. 2979 */ 2980 if (error != 0) 2981 break; 2982 } 2983 blocked_readers = fuword32(&rwlock->rw_blocked_readers); 2984 } else 2985 blocked_readers = 0; 2986 2987 umtxq_lock(&uq->uq_key); 2988 umtxq_unbusy(&uq->uq_key); 2989 umtxq_unlock(&uq->uq_key); 2990 } 2991 2992 umtx_key_release(&uq->uq_key); 2993 if (error == ERESTART) 2994 error = EINTR; 2995 return (error); 2996} 2997 2998static int 2999do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3000{ 3001 struct umtx_q *uq; 3002 uint32_t flags; 3003 int32_t state, oldstate; 3004 int error, q, count; 3005 3006 uq = td->td_umtxq; 3007 flags = fuword32(&rwlock->rw_flags); 3008 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3009 if (error != 0) 3010 return (error); 3011 3012 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); 3013 if (state & URWLOCK_WRITE_OWNER) { 3014 for (;;) { 3015 oldstate = casuword32(&rwlock->rw_state, state, 3016 state & ~URWLOCK_WRITE_OWNER); 3017 if (oldstate == -1) { 3018 error = EFAULT; 3019 goto out; 3020 } 3021 if (oldstate != state) { 3022 state = oldstate; 3023 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3024 error = EPERM; 3025 goto out; 3026 } 3027 error = umtxq_check_susp(td); 3028 if (error != 0) 3029 goto out; 3030 } else 3031 break; 3032 } 3033 } else if (URWLOCK_READER_COUNT(state) != 0) { 3034 for (;;) { 3035 oldstate = casuword32(&rwlock->rw_state, state, 3036 state - 1); 3037 if (oldstate == -1) { 3038 error = EFAULT; 3039 goto out; 3040 } 3041 if (oldstate != state) { 3042 state = oldstate; 3043 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3044 error = EPERM; 3045 goto out; 3046 } 3047 error = umtxq_check_susp(td); 3048 if (error != 0) 3049 goto out; 3050 } else 3051 break; 3052 } 3053 } else { 3054 error = EPERM; 3055 goto out; 3056 } 3057 3058 count = 0; 3059 3060 if (!(flags & URWLOCK_PREFER_READER)) { 3061 if (state & URWLOCK_WRITE_WAITERS) { 3062 count = 1; 3063 q = UMTX_EXCLUSIVE_QUEUE; 3064 } else if (state & URWLOCK_READ_WAITERS) { 3065 count = INT_MAX; 3066 q = UMTX_SHARED_QUEUE; 3067 } 3068 } else { 3069 if (state & URWLOCK_READ_WAITERS) { 3070 count = INT_MAX; 3071 q = UMTX_SHARED_QUEUE; 3072 } else if (state & URWLOCK_WRITE_WAITERS) { 3073 count = 1; 3074 q = UMTX_EXCLUSIVE_QUEUE; 3075 } 3076 } 3077 3078 if (count) { 3079 umtxq_lock(&uq->uq_key); 3080 umtxq_busy(&uq->uq_key); 3081 umtxq_signal_queue(&uq->uq_key, count, q); 3082 umtxq_unbusy(&uq->uq_key); 3083 umtxq_unlock(&uq->uq_key); 3084 } 3085out: 3086 umtx_key_release(&uq->uq_key); 3087 return (error); 3088} 3089 3090static int 3091do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3092{ 3093 struct abs_timeout timo; 3094 struct umtx_q *uq; 3095 uint32_t flags, count; 3096 int error; 3097 3098 uq = td->td_umtxq; 3099 flags = fuword32(&sem->_flags); 3100 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3101 if (error != 0) 3102 return (error); 3103 3104 if (timeout != NULL) 3105 abs_timeout_init2(&timo, timeout); 3106 3107 umtxq_lock(&uq->uq_key); 3108 umtxq_busy(&uq->uq_key); 3109 umtxq_insert(uq); 3110 umtxq_unlock(&uq->uq_key); 3111 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1); 3112 count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); 3113 if (count != 0) { 3114 umtxq_lock(&uq->uq_key); 3115 umtxq_unbusy(&uq->uq_key); 3116 umtxq_remove(uq); 3117 umtxq_unlock(&uq->uq_key); 3118 umtx_key_release(&uq->uq_key); 3119 return (0); 3120 } 3121 umtxq_lock(&uq->uq_key); 3122 umtxq_unbusy(&uq->uq_key); 3123 3124 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3125 3126 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3127 error = 0; 3128 else { 3129 umtxq_remove(uq); 3130 /* A relative timeout cannot be restarted. */ 3131 if (error == ERESTART && timeout != NULL && 3132 (timeout->_flags & UMTX_ABSTIME) == 0) 3133 error = EINTR; 3134 } 3135 umtxq_unlock(&uq->uq_key); 3136 umtx_key_release(&uq->uq_key); 3137 return (error); 3138} 3139 3140/* 3141 * Signal a userland condition variable. 3142 */ 3143static int 3144do_sem_wake(struct thread *td, struct _usem *sem) 3145{ 3146 struct umtx_key key; 3147 int error, cnt; 3148 uint32_t flags; 3149 3150 flags = fuword32(&sem->_flags); 3151 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3152 return (error); 3153 umtxq_lock(&key); 3154 umtxq_busy(&key); 3155 cnt = umtxq_count(&key); 3156 if (cnt > 0) { 3157 umtxq_signal(&key, 1); 3158 /* 3159 * Check if count is greater than 0, this means the memory is 3160 * still being referenced by user code, so we can safely 3161 * update _has_waiters flag. 3162 */ 3163 if (cnt == 1) { 3164 umtxq_unlock(&key); 3165 error = suword32( 3166 __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0); 3167 umtxq_lock(&key); 3168 } 3169 } 3170 umtxq_unbusy(&key); 3171 umtxq_unlock(&key); 3172 umtx_key_release(&key); 3173 return (error); 3174} 3175 3176int 3177sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 3178 /* struct umtx *umtx */ 3179{ 3180 return do_lock_umtx(td, uap->umtx, td->td_tid, 0); 3181} 3182 3183int 3184sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 3185 /* struct umtx *umtx */ 3186{ 3187 return do_unlock_umtx(td, uap->umtx, td->td_tid); 3188} 3189 3190inline int 3191umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3192{ 3193 int error; 3194 3195 error = copyin(addr, tsp, sizeof(struct timespec)); 3196 if (error == 0) { 3197 if (tsp->tv_sec < 0 || 3198 tsp->tv_nsec >= 1000000000 || 3199 tsp->tv_nsec < 0) 3200 error = EINVAL; 3201 } 3202 return (error); 3203} 3204 3205static inline int 3206umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3207{ 3208 int error; 3209 3210 if (size <= sizeof(struct timespec)) { 3211 tp->_clockid = CLOCK_REALTIME; 3212 tp->_flags = 0; 3213 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3214 } else 3215 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3216 if (error != 0) 3217 return (error); 3218 if (tp->_timeout.tv_sec < 0 || 3219 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3220 return (EINVAL); 3221 return (0); 3222} 3223 3224static int 3225__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 3226{ 3227 struct timespec *ts, timeout; 3228 int error; 3229 3230 /* Allow a null timespec (wait forever). */ 3231 if (uap->uaddr2 == NULL) 3232 ts = NULL; 3233 else { 3234 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3235 if (error != 0) 3236 return (error); 3237 ts = &timeout; 3238 } 3239 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3240} 3241 3242static int 3243__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 3244{ 3245 return (do_unlock_umtx(td, uap->obj, uap->val)); 3246} 3247 3248static int 3249__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3250{ 3251 struct _umtx_time timeout, *tm_p; 3252 int error; 3253 3254 if (uap->uaddr2 == NULL) 3255 tm_p = NULL; 3256 else { 3257 error = umtx_copyin_umtx_time( 3258 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3259 if (error != 0) 3260 return (error); 3261 tm_p = &timeout; 3262 } 3263 return do_wait(td, uap->obj, uap->val, tm_p, 0, 0); 3264} 3265 3266static int 3267__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3268{ 3269 struct _umtx_time timeout, *tm_p; 3270 int error; 3271 3272 if (uap->uaddr2 == NULL) 3273 tm_p = NULL; 3274 else { 3275 error = umtx_copyin_umtx_time( 3276 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3277 if (error != 0) 3278 return (error); 3279 tm_p = &timeout; 3280 } 3281 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3282} 3283 3284static int 3285__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3286{ 3287 struct _umtx_time *tm_p, timeout; 3288 int error; 3289 3290 if (uap->uaddr2 == NULL) 3291 tm_p = NULL; 3292 else { 3293 error = umtx_copyin_umtx_time( 3294 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3295 if (error != 0) 3296 return (error); 3297 tm_p = &timeout; 3298 } 3299 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3300} 3301 3302static int 3303__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3304{ 3305 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3306} 3307 3308#define BATCH_SIZE 128 3309static int 3310__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3311{ 3312 int count = uap->val; 3313 void *uaddrs[BATCH_SIZE]; 3314 char **upp = (char **)uap->obj; 3315 int tocopy; 3316 int error = 0; 3317 int i, pos = 0; 3318 3319 while (count > 0) { 3320 tocopy = count; 3321 if (tocopy > BATCH_SIZE) 3322 tocopy = BATCH_SIZE; 3323 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); 3324 if (error != 0) 3325 break; 3326 for (i = 0; i < tocopy; ++i) 3327 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3328 count -= tocopy; 3329 pos += tocopy; 3330 } 3331 return (error); 3332} 3333 3334static int 3335__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3336{ 3337 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3338} 3339 3340static int 3341__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3342{ 3343 struct _umtx_time *tm_p, timeout; 3344 int error; 3345 3346 /* Allow a null timespec (wait forever). */ 3347 if (uap->uaddr2 == NULL) 3348 tm_p = NULL; 3349 else { 3350 error = umtx_copyin_umtx_time( 3351 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3352 if (error != 0) 3353 return (error); 3354 tm_p = &timeout; 3355 } 3356 return do_lock_umutex(td, uap->obj, tm_p, 0); 3357} 3358 3359static int 3360__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3361{ 3362 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); 3363} 3364 3365static int 3366__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3367{ 3368 struct _umtx_time *tm_p, timeout; 3369 int error; 3370 3371 /* Allow a null timespec (wait forever). */ 3372 if (uap->uaddr2 == NULL) 3373 tm_p = NULL; 3374 else { 3375 error = umtx_copyin_umtx_time( 3376 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3377 if (error != 0) 3378 return (error); 3379 tm_p = &timeout; 3380 } 3381 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3382} 3383 3384static int 3385__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3386{ 3387 return do_wake_umutex(td, uap->obj); 3388} 3389 3390static int 3391__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3392{ 3393 return do_unlock_umutex(td, uap->obj); 3394} 3395 3396static int 3397__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3398{ 3399 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 3400} 3401 3402static int 3403__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3404{ 3405 struct timespec *ts, timeout; 3406 int error; 3407 3408 /* Allow a null timespec (wait forever). */ 3409 if (uap->uaddr2 == NULL) 3410 ts = NULL; 3411 else { 3412 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3413 if (error != 0) 3414 return (error); 3415 ts = &timeout; 3416 } 3417 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3418} 3419 3420static int 3421__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3422{ 3423 return do_cv_signal(td, uap->obj); 3424} 3425 3426static int 3427__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3428{ 3429 return do_cv_broadcast(td, uap->obj); 3430} 3431 3432static int 3433__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3434{ 3435 struct _umtx_time timeout; 3436 int error; 3437 3438 /* Allow a null timespec (wait forever). */ 3439 if (uap->uaddr2 == NULL) { 3440 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3441 } else { 3442 error = umtx_copyin_umtx_time(uap->uaddr2, 3443 (size_t)uap->uaddr1, &timeout); 3444 if (error != 0) 3445 return (error); 3446 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3447 } 3448 return (error); 3449} 3450 3451static int 3452__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3453{ 3454 struct _umtx_time timeout; 3455 int error; 3456 3457 /* Allow a null timespec (wait forever). */ 3458 if (uap->uaddr2 == NULL) { 3459 error = do_rw_wrlock(td, uap->obj, 0); 3460 } else { 3461 error = umtx_copyin_umtx_time(uap->uaddr2, 3462 (size_t)uap->uaddr1, &timeout); 3463 if (error != 0) 3464 return (error); 3465 3466 error = do_rw_wrlock(td, uap->obj, &timeout); 3467 } 3468 return (error); 3469} 3470 3471static int 3472__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3473{ 3474 return do_rw_unlock(td, uap->obj); 3475} 3476 3477static int 3478__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3479{ 3480 struct _umtx_time *tm_p, timeout; 3481 int error; 3482 3483 /* Allow a null timespec (wait forever). */ 3484 if (uap->uaddr2 == NULL) 3485 tm_p = NULL; 3486 else { 3487 error = umtx_copyin_umtx_time( 3488 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3489 if (error != 0) 3490 return (error); 3491 tm_p = &timeout; 3492 } 3493 return (do_sem_wait(td, uap->obj, tm_p)); 3494} 3495 3496static int 3497__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3498{ 3499 return do_sem_wake(td, uap->obj); 3500} 3501 3502static int 3503__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3504{ 3505 return do_wake2_umutex(td, uap->obj, uap->val); 3506} 3507 3508typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3509 3510static _umtx_op_func op_table[] = { 3511 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 3512 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 3513 __umtx_op_wait, /* UMTX_OP_WAIT */ 3514 __umtx_op_wake, /* UMTX_OP_WAKE */ 3515 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 3516 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3517 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3518 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3519 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 3520 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3521 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3522 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ 3523 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ 3524 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ 3525 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3526 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3527 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3528 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ 3529 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ 3530 __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ 3531 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3532 __umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */ 3533 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ 3534}; 3535 3536int 3537sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3538{ 3539 if ((unsigned)uap->op < UMTX_OP_MAX) 3540 return (*op_table[uap->op])(td, uap); 3541 return (EINVAL); 3542} 3543 3544#ifdef COMPAT_FREEBSD32 3545int 3546freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 3547 /* struct umtx *umtx */ 3548{ 3549 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 3550} 3551 3552int 3553freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 3554 /* struct umtx *umtx */ 3555{ 3556 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 3557} 3558 3559struct timespec32 { 3560 int32_t tv_sec; 3561 int32_t tv_nsec; 3562}; 3563 3564struct umtx_time32 { 3565 struct timespec32 timeout; 3566 uint32_t flags; 3567 uint32_t clockid; 3568}; 3569 3570static inline int 3571umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3572{ 3573 struct timespec32 ts32; 3574 int error; 3575 3576 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3577 if (error == 0) { 3578 if (ts32.tv_sec < 0 || 3579 ts32.tv_nsec >= 1000000000 || 3580 ts32.tv_nsec < 0) 3581 error = EINVAL; 3582 else { 3583 tsp->tv_sec = ts32.tv_sec; 3584 tsp->tv_nsec = ts32.tv_nsec; 3585 } 3586 } 3587 return (error); 3588} 3589 3590static inline int 3591umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 3592{ 3593 struct umtx_time32 t32; 3594 int error; 3595 3596 t32.clockid = CLOCK_REALTIME; 3597 t32.flags = 0; 3598 if (size <= sizeof(struct timespec32)) 3599 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 3600 else 3601 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 3602 if (error != 0) 3603 return (error); 3604 if (t32.timeout.tv_sec < 0 || 3605 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 3606 return (EINVAL); 3607 tp->_timeout.tv_sec = t32.timeout.tv_sec; 3608 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 3609 tp->_flags = t32.flags; 3610 tp->_clockid = t32.clockid; 3611 return (0); 3612} 3613 3614static int 3615__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3616{ 3617 struct timespec *ts, timeout; 3618 int error; 3619 3620 /* Allow a null timespec (wait forever). */ 3621 if (uap->uaddr2 == NULL) 3622 ts = NULL; 3623 else { 3624 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3625 if (error != 0) 3626 return (error); 3627 ts = &timeout; 3628 } 3629 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3630} 3631 3632static int 3633__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 3634{ 3635 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 3636} 3637 3638static int 3639__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3640{ 3641 struct _umtx_time *tm_p, timeout; 3642 int error; 3643 3644 if (uap->uaddr2 == NULL) 3645 tm_p = NULL; 3646 else { 3647 error = umtx_copyin_umtx_time32(uap->uaddr2, 3648 (size_t)uap->uaddr1, &timeout); 3649 if (error != 0) 3650 return (error); 3651 tm_p = &timeout; 3652 } 3653 return do_wait(td, uap->obj, uap->val, tm_p, 1, 0); 3654} 3655 3656static int 3657__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3658{ 3659 struct _umtx_time *tm_p, timeout; 3660 int error; 3661 3662 /* Allow a null timespec (wait forever). */ 3663 if (uap->uaddr2 == NULL) 3664 tm_p = NULL; 3665 else { 3666 error = umtx_copyin_umtx_time(uap->uaddr2, 3667 (size_t)uap->uaddr1, &timeout); 3668 if (error != 0) 3669 return (error); 3670 tm_p = &timeout; 3671 } 3672 return do_lock_umutex(td, uap->obj, tm_p, 0); 3673} 3674 3675static int 3676__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 3677{ 3678 struct _umtx_time *tm_p, timeout; 3679 int error; 3680 3681 /* Allow a null timespec (wait forever). */ 3682 if (uap->uaddr2 == NULL) 3683 tm_p = NULL; 3684 else { 3685 error = umtx_copyin_umtx_time32(uap->uaddr2, 3686 (size_t)uap->uaddr1, &timeout); 3687 if (error != 0) 3688 return (error); 3689 tm_p = &timeout; 3690 } 3691 return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT); 3692} 3693 3694static int 3695__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3696{ 3697 struct timespec *ts, timeout; 3698 int error; 3699 3700 /* Allow a null timespec (wait forever). */ 3701 if (uap->uaddr2 == NULL) 3702 ts = NULL; 3703 else { 3704 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 3705 if (error != 0) 3706 return (error); 3707 ts = &timeout; 3708 } 3709 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3710} 3711 3712static int 3713__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3714{ 3715 struct _umtx_time timeout; 3716 int error; 3717 3718 /* Allow a null timespec (wait forever). */ 3719 if (uap->uaddr2 == NULL) { 3720 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3721 } else { 3722 error = umtx_copyin_umtx_time32(uap->uaddr2, 3723 (size_t)uap->uaddr1, &timeout); 3724 if (error != 0) 3725 return (error); 3726 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3727 } 3728 return (error); 3729} 3730 3731static int 3732__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 3733{ 3734 struct _umtx_time timeout; 3735 int error; 3736 3737 /* Allow a null timespec (wait forever). */ 3738 if (uap->uaddr2 == NULL) { 3739 error = do_rw_wrlock(td, uap->obj, 0); 3740 } else { 3741 error = umtx_copyin_umtx_time32(uap->uaddr2, 3742 (size_t)uap->uaddr1, &timeout); 3743 if (error != 0) 3744 return (error); 3745 error = do_rw_wrlock(td, uap->obj, &timeout); 3746 } 3747 return (error); 3748} 3749 3750static int 3751__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3752{ 3753 struct _umtx_time *tm_p, timeout; 3754 int error; 3755 3756 if (uap->uaddr2 == NULL) 3757 tm_p = NULL; 3758 else { 3759 error = umtx_copyin_umtx_time32( 3760 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 3761 if (error != 0) 3762 return (error); 3763 tm_p = &timeout; 3764 } 3765 return do_wait(td, uap->obj, uap->val, tm_p, 1, 1); 3766} 3767 3768static int 3769__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 3770{ 3771 struct _umtx_time *tm_p, timeout; 3772 int error; 3773 3774 /* Allow a null timespec (wait forever). */ 3775 if (uap->uaddr2 == NULL) 3776 tm_p = NULL; 3777 else { 3778 error = umtx_copyin_umtx_time32(uap->uaddr2, 3779 (size_t)uap->uaddr1, &timeout); 3780 if (error != 0) 3781 return (error); 3782 tm_p = &timeout; 3783 } 3784 return (do_sem_wait(td, uap->obj, tm_p)); 3785} 3786 3787static int 3788__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 3789{ 3790 int count = uap->val; 3791 uint32_t uaddrs[BATCH_SIZE]; 3792 uint32_t **upp = (uint32_t **)uap->obj; 3793 int tocopy; 3794 int error = 0; 3795 int i, pos = 0; 3796 3797 while (count > 0) { 3798 tocopy = count; 3799 if (tocopy > BATCH_SIZE) 3800 tocopy = BATCH_SIZE; 3801 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); 3802 if (error != 0) 3803 break; 3804 for (i = 0; i < tocopy; ++i) 3805 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 3806 INT_MAX, 1); 3807 count -= tocopy; 3808 pos += tocopy; 3809 } 3810 return (error); 3811} 3812 3813static _umtx_op_func op_table_compat32[] = { 3814 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 3815 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 3816 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 3817 __umtx_op_wake, /* UMTX_OP_WAKE */ 3818 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 3819 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 3820 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 3821 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 3822 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 3823 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 3824 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ 3825 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ 3826 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ 3827 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ 3828 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ 3829 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ 3830 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ 3831 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ 3832 __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ 3833 __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ 3834 __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ 3835 __umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */ 3836 __umtx_op_wake2_umutex /* UMTX_OP_UMUTEX_WAKE2 */ 3837}; 3838 3839int 3840freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 3841{ 3842 if ((unsigned)uap->op < UMTX_OP_MAX) 3843 return (*op_table_compat32[uap->op])(td, 3844 (struct _umtx_op_args *)uap); 3845 return (EINVAL); 3846} 3847#endif 3848 3849void 3850umtx_thread_init(struct thread *td) 3851{ 3852 td->td_umtxq = umtxq_alloc(); 3853 td->td_umtxq->uq_thread = td; 3854} 3855 3856void 3857umtx_thread_fini(struct thread *td) 3858{ 3859 umtxq_free(td->td_umtxq); 3860} 3861 3862/* 3863 * It will be called when new thread is created, e.g fork(). 3864 */ 3865void 3866umtx_thread_alloc(struct thread *td) 3867{ 3868 struct umtx_q *uq; 3869 3870 uq = td->td_umtxq; 3871 uq->uq_inherited_pri = PRI_MAX; 3872 3873 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 3874 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 3875 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 3876 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 3877} 3878 3879/* 3880 * exec() hook. 3881 */ 3882static void 3883umtx_exec_hook(void *arg __unused, struct proc *p __unused, 3884 struct image_params *imgp __unused) 3885{ 3886 umtx_thread_cleanup(curthread); 3887} 3888 3889/* 3890 * thread_exit() hook. 3891 */ 3892void 3893umtx_thread_exit(struct thread *td) 3894{ 3895 umtx_thread_cleanup(td); 3896} 3897 3898/* 3899 * clean up umtx data. 3900 */ 3901static void 3902umtx_thread_cleanup(struct thread *td) 3903{ 3904 struct umtx_q *uq; 3905 struct umtx_pi *pi; 3906 3907 if ((uq = td->td_umtxq) == NULL) 3908 return; 3909 3910 mtx_lock_spin(&umtx_lock); 3911 uq->uq_inherited_pri = PRI_MAX; 3912 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 3913 pi->pi_owner = NULL; 3914 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 3915 } 3916 mtx_unlock_spin(&umtx_lock); 3917 thread_lock(td); 3918 sched_lend_user_prio(td, PRI_MAX); 3919 thread_unlock(td); 3920} 3921