kern_umtx.c revision 167232
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 167232 2007-03-05 13:10:58Z rwatson $"); 30 31#include "opt_compat.h" 32#include <sys/param.h> 33#include <sys/kernel.h> 34#include <sys/limits.h> 35#include <sys/lock.h> 36#include <sys/malloc.h> 37#include <sys/mutex.h> 38#include <sys/priv.h> 39#include <sys/proc.h> 40#include <sys/sched.h> 41#include <sys/smp.h> 42#include <sys/sysctl.h> 43#include <sys/sysent.h> 44#include <sys/systm.h> 45#include <sys/sysproto.h> 46#include <sys/eventhandler.h> 47#include <sys/umtx.h> 48 49#include <vm/vm.h> 50#include <vm/vm_param.h> 51#include <vm/pmap.h> 52#include <vm/vm_map.h> 53#include <vm/vm_object.h> 54 55#include <machine/cpu.h> 56 57#ifdef COMPAT_IA32 58#include <compat/freebsd32/freebsd32_proto.h> 59#endif 60 61#define TYPE_SIMPLE_LOCK 0 62#define TYPE_SIMPLE_WAIT 1 63#define TYPE_NORMAL_UMUTEX 2 64#define TYPE_PI_UMUTEX 3 65#define TYPE_PP_UMUTEX 4 66#define TYPE_CV 5 67 68/* Key to represent a unique userland synchronous object */ 69struct umtx_key { 70 int hash; 71 int type; 72 int shared; 73 union { 74 struct { 75 vm_object_t object; 76 uintptr_t offset; 77 } shared; 78 struct { 79 struct vmspace *vs; 80 uintptr_t addr; 81 } private; 82 struct { 83 void *a; 84 uintptr_t b; 85 } both; 86 } info; 87}; 88 89/* Priority inheritance mutex info. */ 90struct umtx_pi { 91 /* Owner thread */ 92 struct thread *pi_owner; 93 94 /* Reference count */ 95 int pi_refcount; 96 97 /* List entry to link umtx holding by thread */ 98 TAILQ_ENTRY(umtx_pi) pi_link; 99 100 /* List entry in hash */ 101 TAILQ_ENTRY(umtx_pi) pi_hashlink; 102 103 /* List for waiters */ 104 TAILQ_HEAD(,umtx_q) pi_blocked; 105 106 /* Identify a userland lock object */ 107 struct umtx_key pi_key; 108}; 109 110/* A userland synchronous object user. */ 111struct umtx_q { 112 /* Linked list for the hash. */ 113 TAILQ_ENTRY(umtx_q) uq_link; 114 115 /* Umtx key. */ 116 struct umtx_key uq_key; 117 118 /* Umtx flags. */ 119 int uq_flags; 120#define UQF_UMTXQ 0x0001 121 122 /* The thread waits on. */ 123 struct thread *uq_thread; 124 125 /* 126 * Blocked on PI mutex. read can use chain lock 127 * or sched_lock, write must have both chain lock and 128 * sched_lock being hold. 129 */ 130 struct umtx_pi *uq_pi_blocked; 131 132 /* On blocked list */ 133 TAILQ_ENTRY(umtx_q) uq_lockq; 134 135 /* Thread contending with us */ 136 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 137 138 /* Inherited priority from PP mutex */ 139 u_char uq_inherited_pri; 140}; 141 142TAILQ_HEAD(umtxq_head, umtx_q); 143 144/* Userland lock object's wait-queue chain */ 145struct umtxq_chain { 146 /* Lock for this chain. */ 147 struct mtx uc_lock; 148 149 /* List of sleep queues. */ 150 struct umtxq_head uc_queue; 151 152 /* Busy flag */ 153 char uc_busy; 154 155 /* Chain lock waiters */ 156 int uc_waiters; 157 158 /* All PI in the list */ 159 TAILQ_HEAD(,umtx_pi) uc_pi_list; 160}; 161 162#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 163 164/* 165 * Don't propagate time-sharing priority, there is a security reason, 166 * a user can simply introduce PI-mutex, let thread A lock the mutex, 167 * and let another thread B block on the mutex, because B is 168 * sleeping, its priority will be boosted, this causes A's priority to 169 * be boosted via priority propagating too and will never be lowered even 170 * if it is using 100%CPU, this is unfair to other processes. 171 */ 172 173#define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 174 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 175 PRI_MAX_TIMESHARE : (td)->td_user_pri) 176 177#define GOLDEN_RATIO_PRIME 2654404609U 178#define UMTX_CHAINS 128 179#define UMTX_SHIFTS (__WORD_BIT - 7) 180 181#define THREAD_SHARE 0 182#define PROCESS_SHARE 1 183#define AUTO_SHARE 2 184 185#define GET_SHARE(flags) \ 186 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 187 188static uma_zone_t umtx_pi_zone; 189static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 190static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 191static int umtx_pi_allocated; 192 193SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 194SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 195 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 196SYSCTL_DECL(_kern_threads); 197static int umtx_dflt_spins = 0; 198SYSCTL_INT(_kern_threads, OID_AUTO, umtx_dflt_spins, CTLFLAG_RW, 199 &umtx_dflt_spins, 0, "default umtx spin count"); 200static int umtx_max_spins = 3000; 201SYSCTL_INT(_kern_threads, OID_AUTO, umtx_max_spins, CTLFLAG_RW, 202 &umtx_max_spins, 0, "max umtx spin count"); 203 204static void umtxq_sysinit(void *); 205static void umtxq_hash(struct umtx_key *key); 206static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 207static void umtxq_lock(struct umtx_key *key); 208static void umtxq_unlock(struct umtx_key *key); 209static void umtxq_busy(struct umtx_key *key); 210static void umtxq_unbusy(struct umtx_key *key); 211static void umtxq_insert(struct umtx_q *uq); 212static void umtxq_remove(struct umtx_q *uq); 213static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); 214static int umtxq_count(struct umtx_key *key); 215static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 216static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 217static int umtx_key_get(void *addr, int type, int share, 218 struct umtx_key *key); 219static void umtx_key_release(struct umtx_key *key); 220static struct umtx_pi *umtx_pi_alloc(int); 221static void umtx_pi_free(struct umtx_pi *pi); 222static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); 223static void umtx_thread_cleanup(struct thread *td); 224static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 225 struct image_params *imgp __unused); 226SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 227 228static void 229umtxq_sysinit(void *arg __unused) 230{ 231 int i; 232 233 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 234 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 235 for (i = 0; i < UMTX_CHAINS; ++i) { 236 mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL, 237 MTX_DEF | MTX_DUPOK); 238 TAILQ_INIT(&umtxq_chains[i].uc_queue); 239 TAILQ_INIT(&umtxq_chains[i].uc_pi_list); 240 umtxq_chains[i].uc_busy = 0; 241 umtxq_chains[i].uc_waiters = 0; 242 } 243 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 244 EVENTHANDLER_PRI_ANY); 245} 246 247struct umtx_q * 248umtxq_alloc(void) 249{ 250 struct umtx_q *uq; 251 252 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 253 TAILQ_INIT(&uq->uq_pi_contested); 254 uq->uq_inherited_pri = PRI_MAX; 255 return (uq); 256} 257 258void 259umtxq_free(struct umtx_q *uq) 260{ 261 free(uq, M_UMTX); 262} 263 264static inline void 265umtxq_hash(struct umtx_key *key) 266{ 267 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; 268 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 269} 270 271static inline int 272umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 273{ 274 return (k1->type == k2->type && 275 k1->info.both.a == k2->info.both.a && 276 k1->info.both.b == k2->info.both.b); 277} 278 279static inline struct umtxq_chain * 280umtxq_getchain(struct umtx_key *key) 281{ 282 return (&umtxq_chains[key->hash]); 283} 284 285/* 286 * Set chain to busy state when following operation 287 * may be blocked (kernel mutex can not be used). 288 */ 289static inline void 290umtxq_busy(struct umtx_key *key) 291{ 292 struct umtxq_chain *uc; 293 294 uc = umtxq_getchain(key); 295 mtx_assert(&uc->uc_lock, MA_OWNED); 296 while (uc->uc_busy != 0) { 297 uc->uc_waiters++; 298 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 299 uc->uc_waiters--; 300 } 301 uc->uc_busy = 1; 302} 303 304/* 305 * Unbusy a chain. 306 */ 307static inline void 308umtxq_unbusy(struct umtx_key *key) 309{ 310 struct umtxq_chain *uc; 311 312 uc = umtxq_getchain(key); 313 mtx_assert(&uc->uc_lock, MA_OWNED); 314 KASSERT(uc->uc_busy != 0, ("not busy")); 315 uc->uc_busy = 0; 316 if (uc->uc_waiters) 317 wakeup_one(uc); 318} 319 320/* 321 * Lock a chain. 322 */ 323static inline void 324umtxq_lock(struct umtx_key *key) 325{ 326 struct umtxq_chain *uc; 327 328 uc = umtxq_getchain(key); 329 mtx_lock(&uc->uc_lock); 330} 331 332/* 333 * Unlock a chain. 334 */ 335static inline void 336umtxq_unlock(struct umtx_key *key) 337{ 338 struct umtxq_chain *uc; 339 340 uc = umtxq_getchain(key); 341 mtx_unlock(&uc->uc_lock); 342} 343 344/* 345 * Insert a thread onto the umtx queue. 346 */ 347static inline void 348umtxq_insert(struct umtx_q *uq) 349{ 350 struct umtxq_chain *uc; 351 352 uc = umtxq_getchain(&uq->uq_key); 353 UMTXQ_LOCKED_ASSERT(uc); 354 TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link); 355 uq->uq_flags |= UQF_UMTXQ; 356} 357 358/* 359 * Remove thread from the umtx queue. 360 */ 361static inline void 362umtxq_remove(struct umtx_q *uq) 363{ 364 struct umtxq_chain *uc; 365 366 uc = umtxq_getchain(&uq->uq_key); 367 UMTXQ_LOCKED_ASSERT(uc); 368 if (uq->uq_flags & UQF_UMTXQ) { 369 TAILQ_REMOVE(&uc->uc_queue, uq, uq_link); 370 uq->uq_flags &= ~UQF_UMTXQ; 371 } 372} 373 374/* 375 * Check if there are multiple waiters 376 */ 377static int 378umtxq_count(struct umtx_key *key) 379{ 380 struct umtxq_chain *uc; 381 struct umtx_q *uq; 382 int count = 0; 383 384 uc = umtxq_getchain(key); 385 UMTXQ_LOCKED_ASSERT(uc); 386 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 387 if (umtx_key_match(&uq->uq_key, key)) { 388 if (++count > 1) 389 break; 390 } 391 } 392 return (count); 393} 394 395/* 396 * Check if there are multiple PI waiters and returns first 397 * waiter. 398 */ 399static int 400umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 401{ 402 struct umtxq_chain *uc; 403 struct umtx_q *uq; 404 int count = 0; 405 406 *first = NULL; 407 uc = umtxq_getchain(key); 408 UMTXQ_LOCKED_ASSERT(uc); 409 TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) { 410 if (umtx_key_match(&uq->uq_key, key)) { 411 if (++count > 1) 412 break; 413 *first = uq; 414 } 415 } 416 return (count); 417} 418 419/* 420 * Wake up threads waiting on an userland object. 421 */ 422static int 423umtxq_signal(struct umtx_key *key, int n_wake) 424{ 425 struct umtxq_chain *uc; 426 struct umtx_q *uq, *next; 427 int ret; 428 429 ret = 0; 430 uc = umtxq_getchain(key); 431 UMTXQ_LOCKED_ASSERT(uc); 432 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) { 433 if (umtx_key_match(&uq->uq_key, key)) { 434 umtxq_remove(uq); 435 wakeup(uq); 436 if (++ret >= n_wake) 437 break; 438 } 439 } 440 return (ret); 441} 442 443/* 444 * Wake up specified thread. 445 */ 446static inline void 447umtxq_signal_thread(struct umtx_q *uq) 448{ 449 struct umtxq_chain *uc; 450 451 uc = umtxq_getchain(&uq->uq_key); 452 UMTXQ_LOCKED_ASSERT(uc); 453 umtxq_remove(uq); 454 wakeup(uq); 455} 456 457/* 458 * Put thread into sleep state, before sleeping, check if 459 * thread was removed from umtx queue. 460 */ 461static inline int 462umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) 463{ 464 struct umtxq_chain *uc; 465 int error; 466 467 uc = umtxq_getchain(&uq->uq_key); 468 UMTXQ_LOCKED_ASSERT(uc); 469 if (!(uq->uq_flags & UQF_UMTXQ)) 470 return (0); 471 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 472 if (error == EWOULDBLOCK) 473 error = ETIMEDOUT; 474 return (error); 475} 476 477/* 478 * Convert userspace address into unique logical address. 479 */ 480static int 481umtx_key_get(void *addr, int type, int share, struct umtx_key *key) 482{ 483 struct thread *td = curthread; 484 vm_map_t map; 485 vm_map_entry_t entry; 486 vm_pindex_t pindex; 487 vm_prot_t prot; 488 boolean_t wired; 489 490 key->type = type; 491 if (share == THREAD_SHARE) { 492 key->shared = 0; 493 key->info.private.vs = td->td_proc->p_vmspace; 494 key->info.private.addr = (uintptr_t)addr; 495 } else { 496 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 497 map = &td->td_proc->p_vmspace->vm_map; 498 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 499 &entry, &key->info.shared.object, &pindex, &prot, 500 &wired) != KERN_SUCCESS) { 501 return EFAULT; 502 } 503 504 if ((share == PROCESS_SHARE) || 505 (share == AUTO_SHARE && 506 VM_INHERIT_SHARE == entry->inheritance)) { 507 key->shared = 1; 508 key->info.shared.offset = entry->offset + entry->start - 509 (vm_offset_t)addr; 510 vm_object_reference(key->info.shared.object); 511 } else { 512 key->shared = 0; 513 key->info.private.vs = td->td_proc->p_vmspace; 514 key->info.private.addr = (uintptr_t)addr; 515 } 516 vm_map_lookup_done(map, entry); 517 } 518 519 umtxq_hash(key); 520 return (0); 521} 522 523/* 524 * Release key. 525 */ 526static inline void 527umtx_key_release(struct umtx_key *key) 528{ 529 if (key->shared) 530 vm_object_deallocate(key->info.shared.object); 531} 532 533/* 534 * Lock a umtx object. 535 */ 536static int 537_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) 538{ 539 struct umtx_q *uq; 540 u_long owner; 541 u_long old; 542 int error = 0; 543 544 uq = td->td_umtxq; 545 546 /* 547 * Care must be exercised when dealing with umtx structure. It 548 * can fault on any access. 549 */ 550 for (;;) { 551 /* 552 * Try the uncontested case. This should be done in userland. 553 */ 554 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 555 556 /* The acquire succeeded. */ 557 if (owner == UMTX_UNOWNED) 558 return (0); 559 560 /* The address was invalid. */ 561 if (owner == -1) 562 return (EFAULT); 563 564 /* If no one owns it but it is contested try to acquire it. */ 565 if (owner == UMTX_CONTESTED) { 566 owner = casuword(&umtx->u_owner, 567 UMTX_CONTESTED, id | UMTX_CONTESTED); 568 569 if (owner == UMTX_CONTESTED) 570 return (0); 571 572 /* The address was invalid. */ 573 if (owner == -1) 574 return (EFAULT); 575 576 /* If this failed the lock has changed, restart. */ 577 continue; 578 } 579 580 /* 581 * If we caught a signal, we have retried and now 582 * exit immediately. 583 */ 584 if (error != 0) 585 return (error); 586 587 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 588 AUTO_SHARE, &uq->uq_key)) != 0) 589 return (error); 590 591 umtxq_lock(&uq->uq_key); 592 umtxq_busy(&uq->uq_key); 593 umtxq_insert(uq); 594 umtxq_unbusy(&uq->uq_key); 595 umtxq_unlock(&uq->uq_key); 596 597 /* 598 * Set the contested bit so that a release in user space 599 * knows to use the system call for unlock. If this fails 600 * either some one else has acquired the lock or it has been 601 * released. 602 */ 603 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 604 605 /* The address was invalid. */ 606 if (old == -1) { 607 umtxq_lock(&uq->uq_key); 608 umtxq_remove(uq); 609 umtxq_unlock(&uq->uq_key); 610 umtx_key_release(&uq->uq_key); 611 return (EFAULT); 612 } 613 614 /* 615 * We set the contested bit, sleep. Otherwise the lock changed 616 * and we need to retry or we lost a race to the thread 617 * unlocking the umtx. 618 */ 619 umtxq_lock(&uq->uq_key); 620 if (old == owner) 621 error = umtxq_sleep(uq, "umtx", timo); 622 umtxq_remove(uq); 623 umtxq_unlock(&uq->uq_key); 624 umtx_key_release(&uq->uq_key); 625 } 626 627 return (0); 628} 629 630/* 631 * Lock a umtx object. 632 */ 633static int 634do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 635 struct timespec *timeout) 636{ 637 struct timespec ts, ts2, ts3; 638 struct timeval tv; 639 int error; 640 641 if (timeout == NULL) { 642 error = _do_lock_umtx(td, umtx, id, 0); 643 /* Mutex locking is restarted if it is interrupted. */ 644 if (error == EINTR) 645 error = ERESTART; 646 } else { 647 getnanouptime(&ts); 648 timespecadd(&ts, timeout); 649 TIMESPEC_TO_TIMEVAL(&tv, timeout); 650 for (;;) { 651 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); 652 if (error != ETIMEDOUT) 653 break; 654 getnanouptime(&ts2); 655 if (timespeccmp(&ts2, &ts, >=)) { 656 error = ETIMEDOUT; 657 break; 658 } 659 ts3 = ts; 660 timespecsub(&ts3, &ts2); 661 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 662 } 663 /* Timed-locking is not restarted. */ 664 if (error == ERESTART) 665 error = EINTR; 666 } 667 return (error); 668} 669 670/* 671 * Unlock a umtx object. 672 */ 673static int 674do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 675{ 676 struct umtx_key key; 677 u_long owner; 678 u_long old; 679 int error; 680 int count; 681 682 /* 683 * Make sure we own this mtx. 684 */ 685 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 686 if (owner == -1) 687 return (EFAULT); 688 689 if ((owner & ~UMTX_CONTESTED) != id) 690 return (EPERM); 691 692 /* This should be done in userland */ 693 if ((owner & UMTX_CONTESTED) == 0) { 694 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 695 if (old == -1) 696 return (EFAULT); 697 if (old == owner) 698 return (0); 699 owner = old; 700 } 701 702 /* We should only ever be in here for contested locks */ 703 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 704 &key)) != 0) 705 return (error); 706 707 umtxq_lock(&key); 708 umtxq_busy(&key); 709 count = umtxq_count(&key); 710 umtxq_unlock(&key); 711 712 /* 713 * When unlocking the umtx, it must be marked as unowned if 714 * there is zero or one thread only waiting for it. 715 * Otherwise, it must be marked as contested. 716 */ 717 old = casuword(&umtx->u_owner, owner, 718 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 719 umtxq_lock(&key); 720 umtxq_signal(&key,1); 721 umtxq_unbusy(&key); 722 umtxq_unlock(&key); 723 umtx_key_release(&key); 724 if (old == -1) 725 return (EFAULT); 726 if (old != owner) 727 return (EINVAL); 728 return (0); 729} 730 731#ifdef COMPAT_IA32 732 733/* 734 * Lock a umtx object. 735 */ 736static int 737_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) 738{ 739 struct umtx_q *uq; 740 uint32_t owner; 741 uint32_t old; 742 int error = 0; 743 744 uq = td->td_umtxq; 745 746 /* 747 * Care must be exercised when dealing with umtx structure. It 748 * can fault on any access. 749 */ 750 for (;;) { 751 /* 752 * Try the uncontested case. This should be done in userland. 753 */ 754 owner = casuword32(m, UMUTEX_UNOWNED, id); 755 756 /* The acquire succeeded. */ 757 if (owner == UMUTEX_UNOWNED) 758 return (0); 759 760 /* The address was invalid. */ 761 if (owner == -1) 762 return (EFAULT); 763 764 /* If no one owns it but it is contested try to acquire it. */ 765 if (owner == UMUTEX_CONTESTED) { 766 owner = casuword32(m, 767 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 768 if (owner == UMUTEX_CONTESTED) 769 return (0); 770 771 /* The address was invalid. */ 772 if (owner == -1) 773 return (EFAULT); 774 775 /* If this failed the lock has changed, restart. */ 776 continue; 777 } 778 779 /* 780 * If we caught a signal, we have retried and now 781 * exit immediately. 782 */ 783 if (error != 0) 784 return (error); 785 786 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 787 AUTO_SHARE, &uq->uq_key)) != 0) 788 return (error); 789 790 umtxq_lock(&uq->uq_key); 791 umtxq_busy(&uq->uq_key); 792 umtxq_insert(uq); 793 umtxq_unbusy(&uq->uq_key); 794 umtxq_unlock(&uq->uq_key); 795 796 /* 797 * Set the contested bit so that a release in user space 798 * knows to use the system call for unlock. If this fails 799 * either some one else has acquired the lock or it has been 800 * released. 801 */ 802 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 803 804 /* The address was invalid. */ 805 if (old == -1) { 806 umtxq_lock(&uq->uq_key); 807 umtxq_remove(uq); 808 umtxq_unlock(&uq->uq_key); 809 umtx_key_release(&uq->uq_key); 810 return (EFAULT); 811 } 812 813 /* 814 * We set the contested bit, sleep. Otherwise the lock changed 815 * and we need to retry or we lost a race to the thread 816 * unlocking the umtx. 817 */ 818 umtxq_lock(&uq->uq_key); 819 if (old == owner) 820 error = umtxq_sleep(uq, "umtx", timo); 821 umtxq_remove(uq); 822 umtxq_unlock(&uq->uq_key); 823 umtx_key_release(&uq->uq_key); 824 } 825 826 return (0); 827} 828 829/* 830 * Lock a umtx object. 831 */ 832static int 833do_lock_umtx32(struct thread *td, void *m, uint32_t id, 834 struct timespec *timeout) 835{ 836 struct timespec ts, ts2, ts3; 837 struct timeval tv; 838 int error; 839 840 if (timeout == NULL) { 841 error = _do_lock_umtx32(td, m, id, 0); 842 /* Mutex locking is restarted if it is interrupted. */ 843 if (error == EINTR) 844 error = ERESTART; 845 } else { 846 getnanouptime(&ts); 847 timespecadd(&ts, timeout); 848 TIMESPEC_TO_TIMEVAL(&tv, timeout); 849 for (;;) { 850 error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); 851 if (error != ETIMEDOUT) 852 break; 853 getnanouptime(&ts2); 854 if (timespeccmp(&ts2, &ts, >=)) { 855 error = ETIMEDOUT; 856 break; 857 } 858 ts3 = ts; 859 timespecsub(&ts3, &ts2); 860 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 861 } 862 /* Timed-locking is not restarted. */ 863 if (error == ERESTART) 864 error = EINTR; 865 } 866 return (error); 867} 868 869/* 870 * Unlock a umtx object. 871 */ 872static int 873do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 874{ 875 struct umtx_key key; 876 uint32_t owner; 877 uint32_t old; 878 int error; 879 int count; 880 881 /* 882 * Make sure we own this mtx. 883 */ 884 owner = fuword32(m); 885 if (owner == -1) 886 return (EFAULT); 887 888 if ((owner & ~UMUTEX_CONTESTED) != id) 889 return (EPERM); 890 891 /* This should be done in userland */ 892 if ((owner & UMUTEX_CONTESTED) == 0) { 893 old = casuword32(m, owner, UMUTEX_UNOWNED); 894 if (old == -1) 895 return (EFAULT); 896 if (old == owner) 897 return (0); 898 owner = old; 899 } 900 901 /* We should only ever be in here for contested locks */ 902 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 903 &key)) != 0) 904 return (error); 905 906 umtxq_lock(&key); 907 umtxq_busy(&key); 908 count = umtxq_count(&key); 909 umtxq_unlock(&key); 910 911 /* 912 * When unlocking the umtx, it must be marked as unowned if 913 * there is zero or one thread only waiting for it. 914 * Otherwise, it must be marked as contested. 915 */ 916 old = casuword32(m, owner, 917 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 918 umtxq_lock(&key); 919 umtxq_signal(&key,1); 920 umtxq_unbusy(&key); 921 umtxq_unlock(&key); 922 umtx_key_release(&key); 923 if (old == -1) 924 return (EFAULT); 925 if (old != owner) 926 return (EINVAL); 927 return (0); 928} 929#endif 930 931/* 932 * Fetch and compare value, sleep on the address if value is not changed. 933 */ 934static int 935do_wait(struct thread *td, void *addr, u_long id, 936 struct timespec *timeout, int compat32) 937{ 938 struct umtx_q *uq; 939 struct timespec ts, ts2, ts3; 940 struct timeval tv; 941 u_long tmp; 942 int error = 0; 943 944 uq = td->td_umtxq; 945 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 946 &uq->uq_key)) != 0) 947 return (error); 948 949 umtxq_lock(&uq->uq_key); 950 umtxq_insert(uq); 951 umtxq_unlock(&uq->uq_key); 952 if (compat32 == 0) 953 tmp = fuword(addr); 954 else 955 tmp = fuword32(addr); 956 if (tmp != id) { 957 umtxq_lock(&uq->uq_key); 958 umtxq_remove(uq); 959 umtxq_unlock(&uq->uq_key); 960 } else if (timeout == NULL) { 961 umtxq_lock(&uq->uq_key); 962 error = umtxq_sleep(uq, "uwait", 0); 963 umtxq_remove(uq); 964 umtxq_unlock(&uq->uq_key); 965 } else { 966 getnanouptime(&ts); 967 timespecadd(&ts, timeout); 968 TIMESPEC_TO_TIMEVAL(&tv, timeout); 969 umtxq_lock(&uq->uq_key); 970 for (;;) { 971 error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); 972 if (!(uq->uq_flags & UQF_UMTXQ)) 973 break; 974 if (error != ETIMEDOUT) 975 break; 976 umtxq_unlock(&uq->uq_key); 977 getnanouptime(&ts2); 978 if (timespeccmp(&ts2, &ts, >=)) { 979 error = ETIMEDOUT; 980 umtxq_lock(&uq->uq_key); 981 break; 982 } 983 ts3 = ts; 984 timespecsub(&ts3, &ts2); 985 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 986 umtxq_lock(&uq->uq_key); 987 } 988 umtxq_remove(uq); 989 umtxq_unlock(&uq->uq_key); 990 } 991 umtx_key_release(&uq->uq_key); 992 if (error == ERESTART) 993 error = EINTR; 994 return (error); 995} 996 997/* 998 * Wake up threads sleeping on the specified address. 999 */ 1000int 1001kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 1002{ 1003 struct umtx_key key; 1004 int ret; 1005 1006 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE, 1007 &key)) != 0) 1008 return (ret); 1009 umtxq_lock(&key); 1010 ret = umtxq_signal(&key, n_wake); 1011 umtxq_unlock(&key); 1012 umtx_key_release(&key); 1013 return (0); 1014} 1015 1016/* 1017 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1018 */ 1019static int 1020_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1021 int try) 1022{ 1023 struct umtx_q *uq; 1024 uint32_t owner, old, id; 1025#ifdef SMP 1026 int spincount; 1027#endif 1028 int error = 0; 1029 1030 id = td->td_tid; 1031 uq = td->td_umtxq; 1032 1033#ifdef SMP 1034 if (smp_cpus > 1) { 1035 spincount = fuword32(&m->m_spincount); 1036 if (spincount == 0) 1037 spincount = umtx_dflt_spins; 1038 if (spincount > umtx_max_spins) 1039 spincount = umtx_max_spins; 1040 } else 1041 spincount = 0; 1042#endif 1043 1044 /* 1045 * Care must be exercised when dealing with umtx structure. It 1046 * can fault on any access. 1047 */ 1048 for (;;) { 1049#ifdef SMP 1050try_unowned: 1051#endif 1052 /* 1053 * Try the uncontested case. This should be done in userland. 1054 */ 1055 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1056 1057 /* The acquire succeeded. */ 1058 if (owner == UMUTEX_UNOWNED) 1059 return (0); 1060 1061 /* The address was invalid. */ 1062 if (owner == -1) 1063 return (EFAULT); 1064 1065 /* If no one owns it but it is contested try to acquire it. */ 1066 if (owner == UMUTEX_CONTESTED) { 1067#ifdef SMP 1068try_contested: 1069#endif 1070 owner = casuword32(&m->m_owner, 1071 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1072 1073 if (owner == UMUTEX_CONTESTED) 1074 return (0); 1075 1076 /* The address was invalid. */ 1077 if (owner == -1) 1078 return (EFAULT); 1079 1080 /* If this failed the lock has changed, restart. */ 1081 continue; 1082 } 1083 1084 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1085 (owner & ~UMUTEX_CONTESTED) == id) 1086 return (EDEADLK); 1087 1088 if (try != 0) 1089 return (EBUSY); 1090 1091#ifdef SMP 1092 if (spincount > 0 && (owner & ~UMUTEX_CONTESTED) != id) { 1093 int i, found = 0; 1094 struct pcpu *pcpu = NULL; 1095 1096 /* Look for a cpu the owner is running on */ 1097 for (i = 0; i < MAXCPU; i++) { 1098 if (CPU_ABSENT(i)) 1099 continue; 1100 pcpu = pcpu_find(i); 1101 if ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid) { 1102 found = 1; 1103 break; 1104 } 1105 } 1106 1107 if (__predict_false(!found)) 1108 goto end_spin; 1109 1110 while ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid && 1111 (owner & ~UMUTEX_CONTESTED) != id) { 1112 if (--spincount <= 0) 1113 break; 1114 if ((td->td_flags & 1115 (TDF_NEEDRESCHED|TDF_ASTPENDING|TDF_NEEDSIGCHK)) || 1116 P_SHOULDSTOP(td->td_proc)) 1117 break; 1118 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1119 if (owner == UMUTEX_UNOWNED) 1120 goto try_unowned; 1121 if (owner == UMUTEX_CONTESTED) 1122 goto try_contested; 1123 cpu_spinwait(); 1124 } 1125 } 1126end_spin: 1127 spincount = 0; 1128 1129#endif 1130 1131 /* 1132 * If we caught a signal, we have retried and now 1133 * exit immediately. 1134 */ 1135 if (error != 0) 1136 return (error); 1137 1138 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1139 GET_SHARE(flags), &uq->uq_key)) != 0) 1140 return (error); 1141 1142 umtxq_lock(&uq->uq_key); 1143 umtxq_busy(&uq->uq_key); 1144 umtxq_insert(uq); 1145 umtxq_unbusy(&uq->uq_key); 1146 umtxq_unlock(&uq->uq_key); 1147 1148 /* 1149 * Set the contested bit so that a release in user space 1150 * knows to use the system call for unlock. If this fails 1151 * either some one else has acquired the lock or it has been 1152 * released. 1153 */ 1154 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1155 1156 /* The address was invalid. */ 1157 if (old == -1) { 1158 umtxq_lock(&uq->uq_key); 1159 umtxq_remove(uq); 1160 umtxq_unlock(&uq->uq_key); 1161 umtx_key_release(&uq->uq_key); 1162 return (EFAULT); 1163 } 1164 1165 /* 1166 * We set the contested bit, sleep. Otherwise the lock changed 1167 * and we need to retry or we lost a race to the thread 1168 * unlocking the umtx. 1169 */ 1170 umtxq_lock(&uq->uq_key); 1171 if (old == owner) 1172 error = umtxq_sleep(uq, "umtxn", timo); 1173 umtxq_remove(uq); 1174 umtxq_unlock(&uq->uq_key); 1175 umtx_key_release(&uq->uq_key); 1176 } 1177 1178 return (0); 1179} 1180 1181/* 1182 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1183 */ 1184/* 1185 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1186 */ 1187static int 1188do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) 1189{ 1190 struct umtx_key key; 1191 uint32_t owner, old, id; 1192 int error; 1193 int count; 1194 1195 id = td->td_tid; 1196 /* 1197 * Make sure we own this mtx. 1198 */ 1199 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1200 if (owner == -1) 1201 return (EFAULT); 1202 1203 if ((owner & ~UMUTEX_CONTESTED) != id) 1204 return (EPERM); 1205 1206 /* This should be done in userland */ 1207 if ((owner & UMUTEX_CONTESTED) == 0) { 1208 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1209 if (old == -1) 1210 return (EFAULT); 1211 if (old == owner) 1212 return (0); 1213 owner = old; 1214 } 1215 1216 /* We should only ever be in here for contested locks */ 1217 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1218 &key)) != 0) 1219 return (error); 1220 1221 umtxq_lock(&key); 1222 umtxq_busy(&key); 1223 count = umtxq_count(&key); 1224 umtxq_unlock(&key); 1225 1226 /* 1227 * When unlocking the umtx, it must be marked as unowned if 1228 * there is zero or one thread only waiting for it. 1229 * Otherwise, it must be marked as contested. 1230 */ 1231 old = casuword32(&m->m_owner, owner, 1232 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1233 umtxq_lock(&key); 1234 umtxq_signal(&key,1); 1235 umtxq_unbusy(&key); 1236 umtxq_unlock(&key); 1237 umtx_key_release(&key); 1238 if (old == -1) 1239 return (EFAULT); 1240 if (old != owner) 1241 return (EINVAL); 1242 return (0); 1243} 1244 1245static inline struct umtx_pi * 1246umtx_pi_alloc(int flags) 1247{ 1248 struct umtx_pi *pi; 1249 1250 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1251 TAILQ_INIT(&pi->pi_blocked); 1252 atomic_add_int(&umtx_pi_allocated, 1); 1253 return (pi); 1254} 1255 1256static inline void 1257umtx_pi_free(struct umtx_pi *pi) 1258{ 1259 uma_zfree(umtx_pi_zone, pi); 1260 atomic_add_int(&umtx_pi_allocated, -1); 1261} 1262 1263/* 1264 * Adjust the thread's position on a pi_state after its priority has been 1265 * changed. 1266 */ 1267static int 1268umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1269{ 1270 struct umtx_q *uq, *uq1, *uq2; 1271 struct thread *td1; 1272 1273 mtx_assert(&sched_lock, MA_OWNED); 1274 if (pi == NULL) 1275 return (0); 1276 1277 uq = td->td_umtxq; 1278 1279 /* 1280 * Check if the thread needs to be moved on the blocked chain. 1281 * It needs to be moved if either its priority is lower than 1282 * the previous thread or higher than the next thread. 1283 */ 1284 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1285 uq2 = TAILQ_NEXT(uq, uq_lockq); 1286 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1287 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1288 /* 1289 * Remove thread from blocked chain and determine where 1290 * it should be moved to. 1291 */ 1292 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1293 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1294 td1 = uq1->uq_thread; 1295 MPASS(td1->td_proc->p_magic == P_MAGIC); 1296 if (UPRI(td1) > UPRI(td)) 1297 break; 1298 } 1299 1300 if (uq1 == NULL) 1301 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1302 else 1303 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1304 } 1305 return (1); 1306} 1307 1308/* 1309 * Propagate priority when a thread is blocked on POSIX 1310 * PI mutex. 1311 */ 1312static void 1313umtx_propagate_priority(struct thread *td) 1314{ 1315 struct umtx_q *uq; 1316 struct umtx_pi *pi; 1317 int pri; 1318 1319 mtx_assert(&sched_lock, MA_OWNED); 1320 pri = UPRI(td); 1321 uq = td->td_umtxq; 1322 pi = uq->uq_pi_blocked; 1323 if (pi == NULL) 1324 return; 1325 1326 for (;;) { 1327 td = pi->pi_owner; 1328 if (td == NULL) 1329 return; 1330 1331 MPASS(td->td_proc != NULL); 1332 MPASS(td->td_proc->p_magic == P_MAGIC); 1333 1334 if (UPRI(td) <= pri) 1335 return; 1336 1337 sched_lend_user_prio(td, pri); 1338 1339 /* 1340 * Pick up the lock that td is blocked on. 1341 */ 1342 uq = td->td_umtxq; 1343 pi = uq->uq_pi_blocked; 1344 /* Resort td on the list if needed. */ 1345 if (!umtx_pi_adjust_thread(pi, td)) 1346 break; 1347 } 1348} 1349 1350/* 1351 * Unpropagate priority for a PI mutex when a thread blocked on 1352 * it is interrupted by signal or resumed by others. 1353 */ 1354static void 1355umtx_unpropagate_priority(struct umtx_pi *pi) 1356{ 1357 struct umtx_q *uq, *uq_owner; 1358 struct umtx_pi *pi2; 1359 int pri; 1360 1361 mtx_assert(&sched_lock, MA_OWNED); 1362 1363 while (pi != NULL && pi->pi_owner != NULL) { 1364 pri = PRI_MAX; 1365 uq_owner = pi->pi_owner->td_umtxq; 1366 1367 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1368 uq = TAILQ_FIRST(&pi2->pi_blocked); 1369 if (uq != NULL) { 1370 if (pri > UPRI(uq->uq_thread)) 1371 pri = UPRI(uq->uq_thread); 1372 } 1373 } 1374 1375 if (pri > uq_owner->uq_inherited_pri) 1376 pri = uq_owner->uq_inherited_pri; 1377 sched_unlend_user_prio(pi->pi_owner, pri); 1378 pi = uq_owner->uq_pi_blocked; 1379 } 1380} 1381 1382/* 1383 * Insert a PI mutex into owned list. 1384 */ 1385static void 1386umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1387{ 1388 struct umtx_q *uq_owner; 1389 1390 uq_owner = owner->td_umtxq; 1391 mtx_assert(&sched_lock, MA_OWNED); 1392 if (pi->pi_owner != NULL) 1393 panic("pi_ower != NULL"); 1394 pi->pi_owner = owner; 1395 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1396} 1397 1398/* 1399 * Claim ownership of a PI mutex. 1400 */ 1401static int 1402umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1403{ 1404 struct umtx_q *uq, *uq_owner; 1405 1406 uq_owner = owner->td_umtxq; 1407 mtx_lock_spin(&sched_lock); 1408 if (pi->pi_owner == owner) { 1409 mtx_unlock_spin(&sched_lock); 1410 return (0); 1411 } 1412 1413 if (pi->pi_owner != NULL) { 1414 /* 1415 * userland may have already messed the mutex, sigh. 1416 */ 1417 mtx_unlock_spin(&sched_lock); 1418 return (EPERM); 1419 } 1420 umtx_pi_setowner(pi, owner); 1421 uq = TAILQ_FIRST(&pi->pi_blocked); 1422 if (uq != NULL) { 1423 int pri; 1424 1425 pri = UPRI(uq->uq_thread); 1426 if (pri < UPRI(owner)) 1427 sched_lend_user_prio(owner, pri); 1428 } 1429 mtx_unlock_spin(&sched_lock); 1430 return (0); 1431} 1432 1433/* 1434 * Adjust a thread's order position in its blocked PI mutex, 1435 * this may result new priority propagating process. 1436 */ 1437void 1438umtx_pi_adjust(struct thread *td, u_char oldpri) 1439{ 1440 struct umtx_q *uq; 1441 struct umtx_pi *pi; 1442 1443 uq = td->td_umtxq; 1444 1445 mtx_assert(&sched_lock, MA_OWNED); 1446 MPASS(TD_ON_UPILOCK(td)); 1447 1448 /* 1449 * Pick up the lock that td is blocked on. 1450 */ 1451 pi = uq->uq_pi_blocked; 1452 MPASS(pi != NULL); 1453 1454 /* Resort the turnstile on the list. */ 1455 if (!umtx_pi_adjust_thread(pi, td)) 1456 return; 1457 1458 /* 1459 * If our priority was lowered and we are at the head of the 1460 * turnstile, then propagate our new priority up the chain. 1461 */ 1462 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) 1463 umtx_propagate_priority(td); 1464} 1465 1466/* 1467 * Sleep on a PI mutex. 1468 */ 1469static int 1470umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, 1471 uint32_t owner, const char *wmesg, int timo) 1472{ 1473 struct umtxq_chain *uc; 1474 struct thread *td, *td1; 1475 struct umtx_q *uq1; 1476 int pri; 1477 int error = 0; 1478 1479 td = uq->uq_thread; 1480 KASSERT(td == curthread, ("inconsistent uq_thread")); 1481 uc = umtxq_getchain(&uq->uq_key); 1482 UMTXQ_LOCKED_ASSERT(uc); 1483 umtxq_insert(uq); 1484 if (pi->pi_owner == NULL) { 1485 /* XXX 1486 * Current, We only support process private PI-mutex, 1487 * non-contended PI-mutexes are locked in userland. 1488 * Process shared PI-mutex should always be initialized 1489 * by kernel and be registered in kernel, locking should 1490 * always be done by kernel to avoid security problems. 1491 * For process private PI-mutex, we can find owner 1492 * thread and boost its priority safely. 1493 */ 1494 PROC_LOCK(curproc); 1495 td1 = thread_find(curproc, owner); 1496 mtx_lock_spin(&sched_lock); 1497 if (td1 != NULL && pi->pi_owner == NULL) { 1498 uq1 = td1->td_umtxq; 1499 umtx_pi_setowner(pi, td1); 1500 } 1501 PROC_UNLOCK(curproc); 1502 } else { 1503 mtx_lock_spin(&sched_lock); 1504 } 1505 1506 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1507 pri = UPRI(uq1->uq_thread); 1508 if (pri > UPRI(td)) 1509 break; 1510 } 1511 1512 if (uq1 != NULL) 1513 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1514 else 1515 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1516 1517 uq->uq_pi_blocked = pi; 1518 td->td_flags |= TDF_UPIBLOCKED; 1519 mtx_unlock_spin(&sched_lock); 1520 umtxq_unlock(&uq->uq_key); 1521 1522 mtx_lock_spin(&sched_lock); 1523 umtx_propagate_priority(td); 1524 mtx_unlock_spin(&sched_lock); 1525 1526 umtxq_lock(&uq->uq_key); 1527 if (uq->uq_flags & UQF_UMTXQ) { 1528 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); 1529 if (error == EWOULDBLOCK) 1530 error = ETIMEDOUT; 1531 if (uq->uq_flags & UQF_UMTXQ) { 1532 umtxq_busy(&uq->uq_key); 1533 umtxq_remove(uq); 1534 umtxq_unbusy(&uq->uq_key); 1535 } 1536 } 1537 umtxq_unlock(&uq->uq_key); 1538 1539 mtx_lock_spin(&sched_lock); 1540 uq->uq_pi_blocked = NULL; 1541 td->td_flags &= ~TDF_UPIBLOCKED; 1542 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1543 umtx_unpropagate_priority(pi); 1544 mtx_unlock_spin(&sched_lock); 1545 1546 umtxq_lock(&uq->uq_key); 1547 1548 return (error); 1549} 1550 1551/* 1552 * Add reference count for a PI mutex. 1553 */ 1554static void 1555umtx_pi_ref(struct umtx_pi *pi) 1556{ 1557 struct umtxq_chain *uc; 1558 1559 uc = umtxq_getchain(&pi->pi_key); 1560 UMTXQ_LOCKED_ASSERT(uc); 1561 pi->pi_refcount++; 1562} 1563 1564/* 1565 * Decrease reference count for a PI mutex, if the counter 1566 * is decreased to zero, its memory space is freed. 1567 */ 1568static void 1569umtx_pi_unref(struct umtx_pi *pi) 1570{ 1571 struct umtxq_chain *uc; 1572 int free = 0; 1573 1574 uc = umtxq_getchain(&pi->pi_key); 1575 UMTXQ_LOCKED_ASSERT(uc); 1576 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1577 if (--pi->pi_refcount == 0) { 1578 mtx_lock_spin(&sched_lock); 1579 if (pi->pi_owner != NULL) { 1580 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, 1581 pi, pi_link); 1582 pi->pi_owner = NULL; 1583 } 1584 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1585 ("blocked queue not empty")); 1586 mtx_unlock_spin(&sched_lock); 1587 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1588 free = 1; 1589 } 1590 if (free) 1591 umtx_pi_free(pi); 1592} 1593 1594/* 1595 * Find a PI mutex in hash table. 1596 */ 1597static struct umtx_pi * 1598umtx_pi_lookup(struct umtx_key *key) 1599{ 1600 struct umtxq_chain *uc; 1601 struct umtx_pi *pi; 1602 1603 uc = umtxq_getchain(key); 1604 UMTXQ_LOCKED_ASSERT(uc); 1605 1606 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1607 if (umtx_key_match(&pi->pi_key, key)) { 1608 return (pi); 1609 } 1610 } 1611 return (NULL); 1612} 1613 1614/* 1615 * Insert a PI mutex into hash table. 1616 */ 1617static inline void 1618umtx_pi_insert(struct umtx_pi *pi) 1619{ 1620 struct umtxq_chain *uc; 1621 1622 uc = umtxq_getchain(&pi->pi_key); 1623 UMTXQ_LOCKED_ASSERT(uc); 1624 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1625} 1626 1627/* 1628 * Lock a PI mutex. 1629 */ 1630static int 1631_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1632 int try) 1633{ 1634 struct umtx_q *uq; 1635 struct umtx_pi *pi, *new_pi; 1636 uint32_t id, owner, old; 1637 int error; 1638 1639 id = td->td_tid; 1640 uq = td->td_umtxq; 1641 1642 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1643 &uq->uq_key)) != 0) 1644 return (error); 1645 umtxq_lock(&uq->uq_key); 1646 pi = umtx_pi_lookup(&uq->uq_key); 1647 if (pi == NULL) { 1648 new_pi = umtx_pi_alloc(M_NOWAIT); 1649 if (new_pi == NULL) { 1650 umtxq_unlock(&uq->uq_key); 1651 new_pi = umtx_pi_alloc(M_WAITOK); 1652 new_pi->pi_key = uq->uq_key; 1653 umtxq_lock(&uq->uq_key); 1654 pi = umtx_pi_lookup(&uq->uq_key); 1655 if (pi != NULL) { 1656 umtx_pi_free(new_pi); 1657 new_pi = NULL; 1658 } 1659 } 1660 if (new_pi != NULL) { 1661 new_pi->pi_key = uq->uq_key; 1662 umtx_pi_insert(new_pi); 1663 pi = new_pi; 1664 } 1665 } 1666 umtx_pi_ref(pi); 1667 umtxq_unlock(&uq->uq_key); 1668 1669 /* 1670 * Care must be exercised when dealing with umtx structure. It 1671 * can fault on any access. 1672 */ 1673 for (;;) { 1674 /* 1675 * Try the uncontested case. This should be done in userland. 1676 */ 1677 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); 1678 1679 /* The acquire succeeded. */ 1680 if (owner == UMUTEX_UNOWNED) { 1681 error = 0; 1682 break; 1683 } 1684 1685 /* The address was invalid. */ 1686 if (owner == -1) { 1687 error = EFAULT; 1688 break; 1689 } 1690 1691 /* If no one owns it but it is contested try to acquire it. */ 1692 if (owner == UMUTEX_CONTESTED) { 1693 owner = casuword32(&m->m_owner, 1694 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1695 1696 if (owner == UMUTEX_CONTESTED) { 1697 umtxq_lock(&uq->uq_key); 1698 error = umtx_pi_claim(pi, td); 1699 umtxq_unlock(&uq->uq_key); 1700 break; 1701 } 1702 1703 /* The address was invalid. */ 1704 if (owner == -1) { 1705 error = EFAULT; 1706 break; 1707 } 1708 1709 /* If this failed the lock has changed, restart. */ 1710 continue; 1711 } 1712 1713 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1714 (owner & ~UMUTEX_CONTESTED) == id) { 1715 error = EDEADLK; 1716 break; 1717 } 1718 1719 if (try != 0) { 1720 error = EBUSY; 1721 break; 1722 } 1723 1724 /* 1725 * If we caught a signal, we have retried and now 1726 * exit immediately. 1727 */ 1728 if (error != 0) 1729 break; 1730 1731 umtxq_lock(&uq->uq_key); 1732 umtxq_busy(&uq->uq_key); 1733 umtxq_unlock(&uq->uq_key); 1734 1735 /* 1736 * Set the contested bit so that a release in user space 1737 * knows to use the system call for unlock. If this fails 1738 * either some one else has acquired the lock or it has been 1739 * released. 1740 */ 1741 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); 1742 1743 /* The address was invalid. */ 1744 if (old == -1) { 1745 umtxq_lock(&uq->uq_key); 1746 umtxq_unbusy(&uq->uq_key); 1747 umtxq_unlock(&uq->uq_key); 1748 error = EFAULT; 1749 break; 1750 } 1751 1752 umtxq_lock(&uq->uq_key); 1753 umtxq_unbusy(&uq->uq_key); 1754 /* 1755 * We set the contested bit, sleep. Otherwise the lock changed 1756 * and we need to retry or we lost a race to the thread 1757 * unlocking the umtx. 1758 */ 1759 if (old == owner) 1760 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1761 "umtxpi", timo); 1762 umtxq_unlock(&uq->uq_key); 1763 } 1764 1765 umtxq_lock(&uq->uq_key); 1766 umtx_pi_unref(pi); 1767 umtxq_unlock(&uq->uq_key); 1768 1769 umtx_key_release(&uq->uq_key); 1770 return (error); 1771} 1772 1773/* 1774 * Unlock a PI mutex. 1775 */ 1776static int 1777do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) 1778{ 1779 struct umtx_key key; 1780 struct umtx_q *uq_first, *uq_first2, *uq_me; 1781 struct umtx_pi *pi, *pi2; 1782 uint32_t owner, old, id; 1783 int error; 1784 int count; 1785 int pri; 1786 1787 id = td->td_tid; 1788 /* 1789 * Make sure we own this mtx. 1790 */ 1791 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 1792 if (owner == -1) 1793 return (EFAULT); 1794 1795 if ((owner & ~UMUTEX_CONTESTED) != id) 1796 return (EPERM); 1797 1798 /* This should be done in userland */ 1799 if ((owner & UMUTEX_CONTESTED) == 0) { 1800 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); 1801 if (old == -1) 1802 return (EFAULT); 1803 if (old == owner) 1804 return (0); 1805 owner = old; 1806 } 1807 1808 /* We should only ever be in here for contested locks */ 1809 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), 1810 &key)) != 0) 1811 return (error); 1812 1813 umtxq_lock(&key); 1814 umtxq_busy(&key); 1815 count = umtxq_count_pi(&key, &uq_first); 1816 if (uq_first != NULL) { 1817 pi = uq_first->uq_pi_blocked; 1818 if (pi->pi_owner != curthread) { 1819 umtxq_unbusy(&key); 1820 umtxq_unlock(&key); 1821 /* userland messed the mutex */ 1822 return (EPERM); 1823 } 1824 uq_me = curthread->td_umtxq; 1825 mtx_lock_spin(&sched_lock); 1826 pi->pi_owner = NULL; 1827 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); 1828 uq_first = TAILQ_FIRST(&pi->pi_blocked); 1829 pri = PRI_MAX; 1830 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 1831 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 1832 if (uq_first2 != NULL) { 1833 if (pri > UPRI(uq_first2->uq_thread)) 1834 pri = UPRI(uq_first2->uq_thread); 1835 } 1836 } 1837 sched_unlend_user_prio(curthread, pri); 1838 mtx_unlock_spin(&sched_lock); 1839 } 1840 umtxq_unlock(&key); 1841 1842 /* 1843 * When unlocking the umtx, it must be marked as unowned if 1844 * there is zero or one thread only waiting for it. 1845 * Otherwise, it must be marked as contested. 1846 */ 1847 old = casuword32(&m->m_owner, owner, 1848 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1849 1850 umtxq_lock(&key); 1851 if (uq_first != NULL) 1852 umtxq_signal_thread(uq_first); 1853 umtxq_unbusy(&key); 1854 umtxq_unlock(&key); 1855 umtx_key_release(&key); 1856 if (old == -1) 1857 return (EFAULT); 1858 if (old != owner) 1859 return (EINVAL); 1860 return (0); 1861} 1862 1863/* 1864 * Lock a PP mutex. 1865 */ 1866static int 1867_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, 1868 int try) 1869{ 1870 struct umtx_q *uq, *uq2; 1871 struct umtx_pi *pi; 1872 uint32_t ceiling; 1873 uint32_t owner, id; 1874 int error, pri, old_inherited_pri, su; 1875 1876 id = td->td_tid; 1877 uq = td->td_umtxq; 1878 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 1879 &uq->uq_key)) != 0) 1880 return (error); 1881 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 1882 for (;;) { 1883 old_inherited_pri = uq->uq_inherited_pri; 1884 umtxq_lock(&uq->uq_key); 1885 umtxq_busy(&uq->uq_key); 1886 umtxq_unlock(&uq->uq_key); 1887 1888 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); 1889 if (ceiling > RTP_PRIO_MAX) { 1890 error = EINVAL; 1891 goto out; 1892 } 1893 1894 mtx_lock_spin(&sched_lock); 1895 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 1896 mtx_unlock_spin(&sched_lock); 1897 error = EINVAL; 1898 goto out; 1899 } 1900 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 1901 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 1902 if (uq->uq_inherited_pri < UPRI(td)) 1903 sched_lend_user_prio(td, uq->uq_inherited_pri); 1904 } 1905 mtx_unlock_spin(&sched_lock); 1906 1907 owner = casuword32(&m->m_owner, 1908 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1909 1910 if (owner == UMUTEX_CONTESTED) { 1911 error = 0; 1912 break; 1913 } 1914 1915 /* The address was invalid. */ 1916 if (owner == -1) { 1917 error = EFAULT; 1918 break; 1919 } 1920 1921 if ((flags & UMUTEX_ERROR_CHECK) != 0 && 1922 (owner & ~UMUTEX_CONTESTED) == id) { 1923 error = EDEADLK; 1924 break; 1925 } 1926 1927 if (try != 0) { 1928 error = EBUSY; 1929 break; 1930 } 1931 1932 /* 1933 * If we caught a signal, we have retried and now 1934 * exit immediately. 1935 */ 1936 if (error != 0) 1937 break; 1938 1939 umtxq_lock(&uq->uq_key); 1940 umtxq_insert(uq); 1941 umtxq_unbusy(&uq->uq_key); 1942 error = umtxq_sleep(uq, "umtxpp", timo); 1943 umtxq_remove(uq); 1944 umtxq_unlock(&uq->uq_key); 1945 1946 mtx_lock_spin(&sched_lock); 1947 uq->uq_inherited_pri = old_inherited_pri; 1948 pri = PRI_MAX; 1949 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1950 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1951 if (uq2 != NULL) { 1952 if (pri > UPRI(uq2->uq_thread)) 1953 pri = UPRI(uq2->uq_thread); 1954 } 1955 } 1956 if (pri > uq->uq_inherited_pri) 1957 pri = uq->uq_inherited_pri; 1958 sched_unlend_user_prio(td, pri); 1959 mtx_unlock_spin(&sched_lock); 1960 } 1961 1962 if (error != 0) { 1963 mtx_lock_spin(&sched_lock); 1964 uq->uq_inherited_pri = old_inherited_pri; 1965 pri = PRI_MAX; 1966 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 1967 uq2 = TAILQ_FIRST(&pi->pi_blocked); 1968 if (uq2 != NULL) { 1969 if (pri > UPRI(uq2->uq_thread)) 1970 pri = UPRI(uq2->uq_thread); 1971 } 1972 } 1973 if (pri > uq->uq_inherited_pri) 1974 pri = uq->uq_inherited_pri; 1975 sched_unlend_user_prio(td, pri); 1976 mtx_unlock_spin(&sched_lock); 1977 } 1978 1979out: 1980 umtxq_lock(&uq->uq_key); 1981 umtxq_unbusy(&uq->uq_key); 1982 umtxq_unlock(&uq->uq_key); 1983 umtx_key_release(&uq->uq_key); 1984 return (error); 1985} 1986 1987/* 1988 * Unlock a PP mutex. 1989 */ 1990static int 1991do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) 1992{ 1993 struct umtx_key key; 1994 struct umtx_q *uq, *uq2; 1995 struct umtx_pi *pi; 1996 uint32_t owner, id; 1997 uint32_t rceiling; 1998 int error, pri, new_inherited_pri, su; 1999 2000 id = td->td_tid; 2001 uq = td->td_umtxq; 2002 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2003 2004 /* 2005 * Make sure we own this mtx. 2006 */ 2007 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); 2008 if (owner == -1) 2009 return (EFAULT); 2010 2011 if ((owner & ~UMUTEX_CONTESTED) != id) 2012 return (EPERM); 2013 2014 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2015 if (error != 0) 2016 return (error); 2017 2018 if (rceiling == -1) 2019 new_inherited_pri = PRI_MAX; 2020 else { 2021 rceiling = RTP_PRIO_MAX - rceiling; 2022 if (rceiling > RTP_PRIO_MAX) 2023 return (EINVAL); 2024 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2025 } 2026 2027 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2028 &key)) != 0) 2029 return (error); 2030 umtxq_lock(&key); 2031 umtxq_busy(&key); 2032 umtxq_unlock(&key); 2033 /* 2034 * For priority protected mutex, always set unlocked state 2035 * to UMUTEX_CONTESTED, so that userland always enters kernel 2036 * to lock the mutex, it is necessary because thread priority 2037 * has to be adjusted for such mutex. 2038 */ 2039 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2040 UMUTEX_CONTESTED); 2041 2042 umtxq_lock(&key); 2043 if (error == 0) 2044 umtxq_signal(&key, 1); 2045 umtxq_unbusy(&key); 2046 umtxq_unlock(&key); 2047 2048 if (error == -1) 2049 error = EFAULT; 2050 else { 2051 mtx_lock_spin(&sched_lock); 2052 if (su != 0) 2053 uq->uq_inherited_pri = new_inherited_pri; 2054 pri = PRI_MAX; 2055 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2056 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2057 if (uq2 != NULL) { 2058 if (pri > UPRI(uq2->uq_thread)) 2059 pri = UPRI(uq2->uq_thread); 2060 } 2061 } 2062 if (pri > uq->uq_inherited_pri) 2063 pri = uq->uq_inherited_pri; 2064 sched_unlend_user_prio(td, pri); 2065 mtx_unlock_spin(&sched_lock); 2066 } 2067 umtx_key_release(&key); 2068 return (error); 2069} 2070 2071static int 2072do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2073 uint32_t *old_ceiling) 2074{ 2075 struct umtx_q *uq; 2076 uint32_t save_ceiling; 2077 uint32_t owner, id; 2078 uint32_t flags; 2079 int error; 2080 2081 flags = fuword32(&m->m_flags); 2082 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2083 return (EINVAL); 2084 if (ceiling > RTP_PRIO_MAX) 2085 return (EINVAL); 2086 id = td->td_tid; 2087 uq = td->td_umtxq; 2088 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), 2089 &uq->uq_key)) != 0) 2090 return (error); 2091 for (;;) { 2092 umtxq_lock(&uq->uq_key); 2093 umtxq_busy(&uq->uq_key); 2094 umtxq_unlock(&uq->uq_key); 2095 2096 save_ceiling = fuword32(&m->m_ceilings[0]); 2097 2098 owner = casuword32(&m->m_owner, 2099 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 2100 2101 if (owner == UMUTEX_CONTESTED) { 2102 suword32(&m->m_ceilings[0], ceiling); 2103 suword32(__DEVOLATILE(uint32_t *, &m->m_owner), 2104 UMUTEX_CONTESTED); 2105 error = 0; 2106 break; 2107 } 2108 2109 /* The address was invalid. */ 2110 if (owner == -1) { 2111 error = EFAULT; 2112 break; 2113 } 2114 2115 if ((owner & ~UMUTEX_CONTESTED) == id) { 2116 suword32(&m->m_ceilings[0], ceiling); 2117 error = 0; 2118 break; 2119 } 2120 2121 /* 2122 * If we caught a signal, we have retried and now 2123 * exit immediately. 2124 */ 2125 if (error != 0) 2126 break; 2127 2128 /* 2129 * We set the contested bit, sleep. Otherwise the lock changed 2130 * and we need to retry or we lost a race to the thread 2131 * unlocking the umtx. 2132 */ 2133 umtxq_lock(&uq->uq_key); 2134 umtxq_insert(uq); 2135 umtxq_unbusy(&uq->uq_key); 2136 error = umtxq_sleep(uq, "umtxpp", 0); 2137 umtxq_remove(uq); 2138 umtxq_unlock(&uq->uq_key); 2139 } 2140 umtxq_lock(&uq->uq_key); 2141 if (error == 0) 2142 umtxq_signal(&uq->uq_key, INT_MAX); 2143 umtxq_unbusy(&uq->uq_key); 2144 umtxq_unlock(&uq->uq_key); 2145 umtx_key_release(&uq->uq_key); 2146 if (error == 0 && old_ceiling != NULL) 2147 suword32(old_ceiling, save_ceiling); 2148 return (error); 2149} 2150 2151static int 2152_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, 2153 int try) 2154{ 2155 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2156 case 0: 2157 return (_do_lock_normal(td, m, flags, timo, try)); 2158 case UMUTEX_PRIO_INHERIT: 2159 return (_do_lock_pi(td, m, flags, timo, try)); 2160 case UMUTEX_PRIO_PROTECT: 2161 return (_do_lock_pp(td, m, flags, timo, try)); 2162 } 2163 return (EINVAL); 2164} 2165 2166/* 2167 * Lock a userland POSIX mutex. 2168 */ 2169static int 2170do_lock_umutex(struct thread *td, struct umutex *m, 2171 struct timespec *timeout, int try) 2172{ 2173 struct timespec ts, ts2, ts3; 2174 struct timeval tv; 2175 uint32_t flags; 2176 int error; 2177 2178 flags = fuword32(&m->m_flags); 2179 if (flags == -1) 2180 return (EFAULT); 2181 2182 if (timeout == NULL) { 2183 error = _do_lock_umutex(td, m, flags, 0, try); 2184 /* Mutex locking is restarted if it is interrupted. */ 2185 if (error == EINTR) 2186 error = ERESTART; 2187 } else { 2188 getnanouptime(&ts); 2189 timespecadd(&ts, timeout); 2190 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2191 for (;;) { 2192 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); 2193 if (error != ETIMEDOUT) 2194 break; 2195 getnanouptime(&ts2); 2196 if (timespeccmp(&ts2, &ts, >=)) { 2197 error = ETIMEDOUT; 2198 break; 2199 } 2200 ts3 = ts; 2201 timespecsub(&ts3, &ts2); 2202 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 2203 } 2204 /* Timed-locking is not restarted. */ 2205 if (error == ERESTART) 2206 error = EINTR; 2207 } 2208 return (error); 2209} 2210 2211/* 2212 * Unlock a userland POSIX mutex. 2213 */ 2214static int 2215do_unlock_umutex(struct thread *td, struct umutex *m) 2216{ 2217 uint32_t flags; 2218 2219 flags = fuword32(&m->m_flags); 2220 if (flags == -1) 2221 return (EFAULT); 2222 2223 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2224 case 0: 2225 return (do_unlock_normal(td, m, flags)); 2226 case UMUTEX_PRIO_INHERIT: 2227 return (do_unlock_pi(td, m, flags)); 2228 case UMUTEX_PRIO_PROTECT: 2229 return (do_unlock_pp(td, m, flags)); 2230 } 2231 2232 return (EINVAL); 2233} 2234 2235static int 2236do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2237 struct timespec *timeout, u_long wflags) 2238{ 2239 struct umtx_q *uq; 2240 struct timeval tv; 2241 struct timespec cts, ets, tts; 2242 uint32_t flags; 2243 int error; 2244 2245 uq = td->td_umtxq; 2246 flags = fuword32(&cv->c_flags); 2247 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2248 if (error != 0) 2249 return (error); 2250 umtxq_lock(&uq->uq_key); 2251 umtxq_busy(&uq->uq_key); 2252 umtxq_insert(uq); 2253 umtxq_unlock(&uq->uq_key); 2254 2255 /* 2256 * The magic thing is we should set c_has_waiters to 1 before 2257 * releasing user mutex. 2258 */ 2259 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); 2260 2261 umtxq_lock(&uq->uq_key); 2262 umtxq_unbusy(&uq->uq_key); 2263 umtxq_unlock(&uq->uq_key); 2264 2265 error = do_unlock_umutex(td, m); 2266 2267 umtxq_lock(&uq->uq_key); 2268 if (error == 0) { 2269 if ((wflags & UMTX_CHECK_UNPARKING) && 2270 (td->td_pflags & TDP_WAKEUP)) { 2271 td->td_pflags &= ~TDP_WAKEUP; 2272 error = EINTR; 2273 } else if (timeout == NULL) { 2274 error = umtxq_sleep(uq, "ucond", 0); 2275 } else { 2276 getnanouptime(&ets); 2277 timespecadd(&ets, timeout); 2278 TIMESPEC_TO_TIMEVAL(&tv, timeout); 2279 for (;;) { 2280 error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); 2281 if (error != ETIMEDOUT) 2282 break; 2283 getnanouptime(&cts); 2284 if (timespeccmp(&cts, &ets, >=)) { 2285 error = ETIMEDOUT; 2286 break; 2287 } 2288 tts = ets; 2289 timespecsub(&tts, &cts); 2290 TIMESPEC_TO_TIMEVAL(&tv, &tts); 2291 } 2292 } 2293 } 2294 2295 if (error != 0) { 2296 if ((uq->uq_flags & UQF_UMTXQ) == 0) { 2297 /* 2298 * If we concurrently got do_cv_signal()d 2299 * and we got an error or UNIX signals or a timeout, 2300 * then, perform another umtxq_signal to avoid 2301 * consuming the wakeup. This may cause supurious 2302 * wakeup for another thread which was just queued, 2303 * but SUSV3 explicitly allows supurious wakeup to 2304 * occur, and indeed a kernel based implementation 2305 * can not avoid it. 2306 */ 2307 if (!umtxq_signal(&uq->uq_key, 1)) 2308 error = 0; 2309 } 2310 if (error == ERESTART) 2311 error = EINTR; 2312 } 2313 umtxq_remove(uq); 2314 umtxq_unlock(&uq->uq_key); 2315 umtx_key_release(&uq->uq_key); 2316 return (error); 2317} 2318 2319/* 2320 * Signal a userland condition variable. 2321 */ 2322static int 2323do_cv_signal(struct thread *td, struct ucond *cv) 2324{ 2325 struct umtx_key key; 2326 int error, cnt, nwake; 2327 uint32_t flags; 2328 2329 flags = fuword32(&cv->c_flags); 2330 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2331 return (error); 2332 umtxq_lock(&key); 2333 umtxq_busy(&key); 2334 cnt = umtxq_count(&key); 2335 nwake = umtxq_signal(&key, 1); 2336 if (cnt <= nwake) { 2337 umtxq_unlock(&key); 2338 error = suword32( 2339 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2340 umtxq_lock(&key); 2341 } 2342 umtxq_unbusy(&key); 2343 umtxq_unlock(&key); 2344 umtx_key_release(&key); 2345 return (error); 2346} 2347 2348static int 2349do_cv_broadcast(struct thread *td, struct ucond *cv) 2350{ 2351 struct umtx_key key; 2352 int error; 2353 uint32_t flags; 2354 2355 flags = fuword32(&cv->c_flags); 2356 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2357 return (error); 2358 2359 umtxq_lock(&key); 2360 umtxq_busy(&key); 2361 umtxq_signal(&key, INT_MAX); 2362 umtxq_unlock(&key); 2363 2364 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); 2365 2366 umtxq_lock(&key); 2367 umtxq_unbusy(&key); 2368 umtxq_unlock(&key); 2369 2370 umtx_key_release(&key); 2371 return (error); 2372} 2373 2374int 2375_umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 2376 /* struct umtx *umtx */ 2377{ 2378 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); 2379} 2380 2381int 2382_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 2383 /* struct umtx *umtx */ 2384{ 2385 return do_unlock_umtx(td, uap->umtx, td->td_tid); 2386} 2387 2388static int 2389__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) 2390{ 2391 struct timespec *ts, timeout; 2392 int error; 2393 2394 /* Allow a null timespec (wait forever). */ 2395 if (uap->uaddr2 == NULL) 2396 ts = NULL; 2397 else { 2398 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2399 if (error != 0) 2400 return (error); 2401 if (timeout.tv_nsec >= 1000000000 || 2402 timeout.tv_nsec < 0) { 2403 return (EINVAL); 2404 } 2405 ts = &timeout; 2406 } 2407 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 2408} 2409 2410static int 2411__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) 2412{ 2413 return (do_unlock_umtx(td, uap->obj, uap->val)); 2414} 2415 2416static int 2417__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 2418{ 2419 struct timespec *ts, timeout; 2420 int error; 2421 2422 if (uap->uaddr2 == NULL) 2423 ts = NULL; 2424 else { 2425 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 2426 if (error != 0) 2427 return (error); 2428 if (timeout.tv_nsec >= 1000000000 || 2429 timeout.tv_nsec < 0) 2430 return (EINVAL); 2431 ts = &timeout; 2432 } 2433 return do_wait(td, uap->obj, uap->val, ts, 0); 2434} 2435 2436static int 2437__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 2438{ 2439 return (kern_umtx_wake(td, uap->obj, uap->val)); 2440} 2441 2442static int 2443__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 2444{ 2445 struct timespec *ts, timeout; 2446 int error; 2447 2448 /* Allow a null timespec (wait forever). */ 2449 if (uap->uaddr2 == NULL) 2450 ts = NULL; 2451 else { 2452 error = copyin(uap->uaddr2, &timeout, 2453 sizeof(timeout)); 2454 if (error != 0) 2455 return (error); 2456 if (timeout.tv_nsec >= 1000000000 || 2457 timeout.tv_nsec < 0) { 2458 return (EINVAL); 2459 } 2460 ts = &timeout; 2461 } 2462 return do_lock_umutex(td, uap->obj, ts, 0); 2463} 2464 2465static int 2466__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 2467{ 2468 return do_lock_umutex(td, uap->obj, NULL, 1); 2469} 2470 2471static int 2472__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 2473{ 2474 return do_unlock_umutex(td, uap->obj); 2475} 2476 2477static int 2478__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 2479{ 2480 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); 2481} 2482 2483static int 2484__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 2485{ 2486 struct timespec *ts, timeout; 2487 int error; 2488 2489 /* Allow a null timespec (wait forever). */ 2490 if (uap->uaddr2 == NULL) 2491 ts = NULL; 2492 else { 2493 error = copyin(uap->uaddr2, &timeout, 2494 sizeof(timeout)); 2495 if (error != 0) 2496 return (error); 2497 if (timeout.tv_nsec >= 1000000000 || 2498 timeout.tv_nsec < 0) { 2499 return (EINVAL); 2500 } 2501 ts = &timeout; 2502 } 2503 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2504} 2505 2506static int 2507__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 2508{ 2509 return do_cv_signal(td, uap->obj); 2510} 2511 2512static int 2513__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 2514{ 2515 return do_cv_broadcast(td, uap->obj); 2516} 2517 2518typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 2519 2520static _umtx_op_func op_table[] = { 2521 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ 2522 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ 2523 __umtx_op_wait, /* UMTX_OP_WAIT */ 2524 __umtx_op_wake, /* UMTX_OP_WAKE */ 2525 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ 2526 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2527 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2528 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2529 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ 2530 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2531 __umtx_op_cv_broadcast /* UMTX_OP_CV_BROADCAST */ 2532}; 2533 2534int 2535_umtx_op(struct thread *td, struct _umtx_op_args *uap) 2536{ 2537 if ((unsigned)uap->op < UMTX_OP_MAX) 2538 return (*op_table[uap->op])(td, uap); 2539 return (EINVAL); 2540} 2541 2542#ifdef COMPAT_IA32 2543int 2544freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) 2545 /* struct umtx *umtx */ 2546{ 2547 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 2548} 2549 2550int 2551freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) 2552 /* struct umtx *umtx */ 2553{ 2554 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 2555} 2556 2557struct timespec32 { 2558 u_int32_t tv_sec; 2559 u_int32_t tv_nsec; 2560}; 2561 2562static inline int 2563copyin_timeout32(void *addr, struct timespec *tsp) 2564{ 2565 struct timespec32 ts32; 2566 int error; 2567 2568 error = copyin(addr, &ts32, sizeof(struct timespec32)); 2569 if (error == 0) { 2570 tsp->tv_sec = ts32.tv_sec; 2571 tsp->tv_nsec = ts32.tv_nsec; 2572 } 2573 return (error); 2574} 2575 2576static int 2577__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2578{ 2579 struct timespec *ts, timeout; 2580 int error; 2581 2582 /* Allow a null timespec (wait forever). */ 2583 if (uap->uaddr2 == NULL) 2584 ts = NULL; 2585 else { 2586 error = copyin_timeout32(uap->uaddr2, &timeout); 2587 if (error != 0) 2588 return (error); 2589 if (timeout.tv_nsec >= 1000000000 || 2590 timeout.tv_nsec < 0) { 2591 return (EINVAL); 2592 } 2593 ts = &timeout; 2594 } 2595 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 2596} 2597 2598static int 2599__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) 2600{ 2601 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); 2602} 2603 2604static int 2605__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2606{ 2607 struct timespec *ts, timeout; 2608 int error; 2609 2610 if (uap->uaddr2 == NULL) 2611 ts = NULL; 2612 else { 2613 error = copyin_timeout32(uap->uaddr2, &timeout); 2614 if (error != 0) 2615 return (error); 2616 if (timeout.tv_nsec >= 1000000000 || 2617 timeout.tv_nsec < 0) 2618 return (EINVAL); 2619 ts = &timeout; 2620 } 2621 return do_wait(td, uap->obj, uap->val, ts, 1); 2622} 2623 2624static int 2625__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 2626{ 2627 struct timespec *ts, timeout; 2628 int error; 2629 2630 /* Allow a null timespec (wait forever). */ 2631 if (uap->uaddr2 == NULL) 2632 ts = NULL; 2633 else { 2634 error = copyin_timeout32(uap->uaddr2, &timeout); 2635 if (error != 0) 2636 return (error); 2637 if (timeout.tv_nsec >= 1000000000 || 2638 timeout.tv_nsec < 0) 2639 return (EINVAL); 2640 ts = &timeout; 2641 } 2642 return do_lock_umutex(td, uap->obj, ts, 0); 2643} 2644 2645static int 2646__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 2647{ 2648 struct timespec *ts, timeout; 2649 int error; 2650 2651 /* Allow a null timespec (wait forever). */ 2652 if (uap->uaddr2 == NULL) 2653 ts = NULL; 2654 else { 2655 error = copyin_timeout32(uap->uaddr2, &timeout); 2656 if (error != 0) 2657 return (error); 2658 if (timeout.tv_nsec >= 1000000000 || 2659 timeout.tv_nsec < 0) 2660 return (EINVAL); 2661 ts = &timeout; 2662 } 2663 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 2664} 2665 2666static _umtx_op_func op_table_compat32[] = { 2667 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ 2668 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ 2669 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ 2670 __umtx_op_wake, /* UMTX_OP_WAKE */ 2671 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ 2672 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ 2673 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ 2674 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ 2675 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ 2676 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ 2677 __umtx_op_cv_broadcast /* UMTX_OP_CV_BROADCAST */ 2678}; 2679 2680int 2681freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 2682{ 2683 if ((unsigned)uap->op < UMTX_OP_MAX) 2684 return (*op_table_compat32[uap->op])(td, 2685 (struct _umtx_op_args *)uap); 2686 return (EINVAL); 2687} 2688#endif 2689 2690void 2691umtx_thread_init(struct thread *td) 2692{ 2693 td->td_umtxq = umtxq_alloc(); 2694 td->td_umtxq->uq_thread = td; 2695} 2696 2697void 2698umtx_thread_fini(struct thread *td) 2699{ 2700 umtxq_free(td->td_umtxq); 2701} 2702 2703/* 2704 * It will be called when new thread is created, e.g fork(). 2705 */ 2706void 2707umtx_thread_alloc(struct thread *td) 2708{ 2709 struct umtx_q *uq; 2710 2711 uq = td->td_umtxq; 2712 uq->uq_inherited_pri = PRI_MAX; 2713 2714 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 2715 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 2716 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 2717 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 2718} 2719 2720/* 2721 * exec() hook. 2722 */ 2723static void 2724umtx_exec_hook(void *arg __unused, struct proc *p __unused, 2725 struct image_params *imgp __unused) 2726{ 2727 umtx_thread_cleanup(curthread); 2728} 2729 2730/* 2731 * thread_exit() hook. 2732 */ 2733void 2734umtx_thread_exit(struct thread *td) 2735{ 2736 umtx_thread_cleanup(td); 2737} 2738 2739/* 2740 * clean up umtx data. 2741 */ 2742static void 2743umtx_thread_cleanup(struct thread *td) 2744{ 2745 struct umtx_q *uq; 2746 struct umtx_pi *pi; 2747 2748 if ((uq = td->td_umtxq) == NULL) 2749 return; 2750 2751 mtx_lock_spin(&sched_lock); 2752 uq->uq_inherited_pri = PRI_MAX; 2753 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 2754 pi->pi_owner = NULL; 2755 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 2756 } 2757 td->td_flags &= ~TDF_UBORROWING; 2758 mtx_unlock_spin(&sched_lock); 2759} 2760