kern_umtx.c revision 158718
1/*- 2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 158718 2006-05-18 08:43:46Z davidxu $"); 30 31#include <sys/param.h> 32#include <sys/kernel.h> 33#include <sys/limits.h> 34#include <sys/lock.h> 35#include <sys/malloc.h> 36#include <sys/mutex.h> 37#include <sys/proc.h> 38#include <sys/sysent.h> 39#include <sys/systm.h> 40#include <sys/sysproto.h> 41#include <sys/eventhandler.h> 42#include <sys/thr.h> 43#include <sys/umtx.h> 44 45#include <vm/vm.h> 46#include <vm/vm_param.h> 47#include <vm/pmap.h> 48#include <vm/vm_map.h> 49#include <vm/vm_object.h> 50 51#define UMTX_PRIVATE 0 52#define UMTX_SHARED 1 53 54struct umtx_key { 55 int type; 56 union { 57 struct { 58 vm_object_t object; 59 long offset; 60 } shared; 61 struct { 62 struct umtx *umtx; 63 long pid; 64 } private; 65 struct { 66 void *ptr; 67 long word; 68 } both; 69 } info; 70}; 71 72struct umtx_q { 73 LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */ 74 struct umtx_key uq_key; /* Umtx key. */ 75 int uq_flags; /* Umtx flags. */ 76#define UQF_UMTXQ 0x0001 77 struct thread *uq_thread; /* The thread waits on. */ 78 vm_offset_t uq_addr; /* Umtx's virtual address. */ 79}; 80 81LIST_HEAD(umtx_head, umtx_q); 82struct umtxq_chain { 83 struct mtx uc_lock; /* Lock for this chain. */ 84 struct umtx_head uc_queue; /* List of sleep queues. */ 85#define UCF_BUSY 0x01 86 int uc_flags; 87 int uc_waiters; 88}; 89 90#define GOLDEN_RATIO_PRIME 2654404609U 91#define UMTX_CHAINS 128 92#define UMTX_SHIFTS (__WORD_BIT - 7) 93 94static struct umtxq_chain umtxq_chains[UMTX_CHAINS]; 95static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 96 97static void umtxq_init_chains(void *); 98static int umtxq_hash(struct umtx_key *key); 99static struct mtx *umtxq_mtx(int chain); 100static void umtxq_lock(struct umtx_key *key); 101static void umtxq_unlock(struct umtx_key *key); 102static void umtxq_busy(struct umtx_key *key); 103static void umtxq_unbusy(struct umtx_key *key); 104static void umtxq_insert(struct umtx_q *uq); 105static void umtxq_remove(struct umtx_q *uq); 106static int umtxq_sleep(struct thread *td, struct umtx_key *key, 107 int prio, const char *wmesg, int timo); 108static int umtxq_count(struct umtx_key *key); 109static int umtxq_signal(struct umtx_key *key, int nr_wakeup); 110static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); 111static int umtx_key_get(struct thread *td, struct umtx *umtx, 112 struct umtx_key *key); 113static void umtx_key_release(struct umtx_key *key); 114 115SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL); 116 117struct umtx_q * 118umtxq_alloc(void) 119{ 120 return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK|M_ZERO)); 121} 122 123void 124umtxq_free(struct umtx_q *uq) 125{ 126 free(uq, M_UMTX); 127} 128 129static void 130umtxq_init_chains(void *arg __unused) 131{ 132 int i; 133 134 for (i = 0; i < UMTX_CHAINS; ++i) { 135 mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL, 136 MTX_DEF | MTX_DUPOK); 137 LIST_INIT(&umtxq_chains[i].uc_queue); 138 umtxq_chains[i].uc_flags = 0; 139 umtxq_chains[i].uc_waiters = 0; 140 } 141} 142 143static inline int 144umtxq_hash(struct umtx_key *key) 145{ 146 unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word; 147 return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS); 148} 149 150static inline int 151umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) 152{ 153 return (k1->type == k2->type && 154 k1->info.both.ptr == k2->info.both.ptr && 155 k1->info.both.word == k2->info.both.word); 156} 157 158static inline struct mtx * 159umtxq_mtx(int chain) 160{ 161 return (&umtxq_chains[chain].uc_lock); 162} 163 164static inline void 165umtxq_busy(struct umtx_key *key) 166{ 167 int chain = umtxq_hash(key); 168 169 mtx_assert(umtxq_mtx(chain), MA_OWNED); 170 while (umtxq_chains[chain].uc_flags & UCF_BUSY) { 171 umtxq_chains[chain].uc_waiters++; 172 msleep(&umtxq_chains[chain], umtxq_mtx(chain), 173 0, "umtxq_busy", 0); 174 umtxq_chains[chain].uc_waiters--; 175 } 176 umtxq_chains[chain].uc_flags |= UCF_BUSY; 177} 178 179static inline void 180umtxq_unbusy(struct umtx_key *key) 181{ 182 int chain = umtxq_hash(key); 183 184 mtx_assert(umtxq_mtx(chain), MA_OWNED); 185 KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy")); 186 umtxq_chains[chain].uc_flags &= ~UCF_BUSY; 187 if (umtxq_chains[chain].uc_waiters) 188 wakeup_one(&umtxq_chains[chain]); 189} 190 191static inline void 192umtxq_lock(struct umtx_key *key) 193{ 194 int chain = umtxq_hash(key); 195 mtx_lock(umtxq_mtx(chain)); 196} 197 198static inline void 199umtxq_unlock(struct umtx_key *key) 200{ 201 int chain = umtxq_hash(key); 202 mtx_unlock(umtxq_mtx(chain)); 203} 204 205/* 206 * Insert a thread onto the umtx queue. 207 */ 208static inline void 209umtxq_insert(struct umtx_q *uq) 210{ 211 struct umtx_head *head; 212 int chain = umtxq_hash(&uq->uq_key); 213 214 mtx_assert(umtxq_mtx(chain), MA_OWNED); 215 head = &umtxq_chains[chain].uc_queue; 216 LIST_INSERT_HEAD(head, uq, uq_next); 217 uq->uq_flags |= UQF_UMTXQ; 218} 219 220/* 221 * Remove thread from the umtx queue. 222 */ 223static inline void 224umtxq_remove(struct umtx_q *uq) 225{ 226 mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED); 227 if (uq->uq_flags & UQF_UMTXQ) { 228 LIST_REMOVE(uq, uq_next); 229 /* turning off UQF_UMTXQ should be the last thing. */ 230 uq->uq_flags &= ~UQF_UMTXQ; 231 } 232} 233 234static int 235umtxq_count(struct umtx_key *key) 236{ 237 struct umtx_q *uq; 238 struct umtx_head *head; 239 int chain, count = 0; 240 241 chain = umtxq_hash(key); 242 mtx_assert(umtxq_mtx(chain), MA_OWNED); 243 head = &umtxq_chains[chain].uc_queue; 244 LIST_FOREACH(uq, head, uq_next) { 245 if (umtx_key_match(&uq->uq_key, key)) { 246 if (++count > 1) 247 break; 248 } 249 } 250 return (count); 251} 252 253static int 254umtxq_signal(struct umtx_key *key, int n_wake) 255{ 256 struct umtx_q *uq, *next; 257 struct umtx_head *head; 258 struct thread *blocked = NULL; 259 int chain, ret; 260 261 ret = 0; 262 chain = umtxq_hash(key); 263 mtx_assert(umtxq_mtx(chain), MA_OWNED); 264 head = &umtxq_chains[chain].uc_queue; 265 for (uq = LIST_FIRST(head); uq; uq = next) { 266 next = LIST_NEXT(uq, uq_next); 267 if (umtx_key_match(&uq->uq_key, key)) { 268 blocked = uq->uq_thread; 269 umtxq_remove(uq); 270 wakeup(blocked); 271 if (++ret >= n_wake) 272 break; 273 } 274 } 275 return (ret); 276} 277 278static inline int 279umtxq_sleep(struct thread *td, struct umtx_key *key, int priority, 280 const char *wmesg, int timo) 281{ 282 int chain = umtxq_hash(key); 283 int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo); 284 if (error == EWOULDBLOCK) 285 error = ETIMEDOUT; 286 return (error); 287} 288 289static int 290umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key) 291{ 292 vm_map_t map; 293 vm_map_entry_t entry; 294 vm_pindex_t pindex; 295 vm_prot_t prot; 296 boolean_t wired; 297 298 map = &td->td_proc->p_vmspace->vm_map; 299 if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE, 300 &entry, &key->info.shared.object, &pindex, &prot, 301 &wired) != KERN_SUCCESS) { 302 return EFAULT; 303 } 304 305 if (VM_INHERIT_SHARE == entry->inheritance) { 306 key->type = UMTX_SHARED; 307 key->info.shared.offset = entry->offset + entry->start - 308 (vm_offset_t)umtx; 309 vm_object_reference(key->info.shared.object); 310 } else { 311 key->type = UMTX_PRIVATE; 312 key->info.private.umtx = umtx; 313 key->info.private.pid = td->td_proc->p_pid; 314 } 315 vm_map_lookup_done(map, entry); 316 return (0); 317} 318 319static inline void 320umtx_key_release(struct umtx_key *key) 321{ 322 if (key->type == UMTX_SHARED) 323 vm_object_deallocate(key->info.shared.object); 324} 325 326static inline int 327umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq) 328{ 329 int error; 330 331 if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0) 332 return (error); 333 334 uq->uq_addr = (vm_offset_t)umtx; 335 uq->uq_thread = td; 336 umtxq_lock(&uq->uq_key); 337 /* hmm, for condition variable, we don't need busy flag. */ 338 umtxq_busy(&uq->uq_key); 339 umtxq_insert(uq); 340 umtxq_unbusy(&uq->uq_key); 341 umtxq_unlock(&uq->uq_key); 342 return (0); 343} 344 345static int 346_do_lock(struct thread *td, struct umtx *umtx, long id, int timo) 347{ 348 struct umtx_q *uq; 349 intptr_t owner; 350 intptr_t old; 351 int error = 0; 352 353 uq = td->td_umtxq; 354 /* 355 * Care must be exercised when dealing with umtx structure. It 356 * can fault on any access. 357 */ 358 359 for (;;) { 360 /* 361 * Try the uncontested case. This should be done in userland. 362 */ 363 owner = casuptr((intptr_t *)&umtx->u_owner, 364 UMTX_UNOWNED, id); 365 366 /* The acquire succeeded. */ 367 if (owner == UMTX_UNOWNED) 368 return (0); 369 370 /* The address was invalid. */ 371 if (owner == -1) 372 return (EFAULT); 373 374 /* If no one owns it but it is contested try to acquire it. */ 375 if (owner == UMTX_CONTESTED) { 376 owner = casuptr((intptr_t *)&umtx->u_owner, 377 UMTX_CONTESTED, id | UMTX_CONTESTED); 378 379 if (owner == UMTX_CONTESTED) 380 return (0); 381 382 /* The address was invalid. */ 383 if (owner == -1) 384 return (EFAULT); 385 386 /* If this failed the lock has changed, restart. */ 387 continue; 388 } 389 390 /* 391 * If we caught a signal, we have retried and now 392 * exit immediately. 393 */ 394 if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0) 395 return (error); 396 397 /* 398 * Set the contested bit so that a release in user space 399 * knows to use the system call for unlock. If this fails 400 * either some one else has acquired the lock or it has been 401 * released. 402 */ 403 old = casuptr((intptr_t *)&umtx->u_owner, owner, 404 owner | UMTX_CONTESTED); 405 406 /* The address was invalid. */ 407 if (old == -1) { 408 umtxq_lock(&uq->uq_key); 409 umtxq_busy(&uq->uq_key); 410 umtxq_remove(uq); 411 umtxq_unbusy(&uq->uq_key); 412 umtxq_unlock(&uq->uq_key); 413 umtx_key_release(&uq->uq_key); 414 return (EFAULT); 415 } 416 417 /* 418 * We set the contested bit, sleep. Otherwise the lock changed 419 * and we need to retry or we lost a race to the thread 420 * unlocking the umtx. 421 */ 422 umtxq_lock(&uq->uq_key); 423 if (old == owner && (uq->uq_flags & UQF_UMTXQ)) { 424 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 425 "umtx", timo); 426 } 427 umtxq_busy(&uq->uq_key); 428 umtxq_remove(uq); 429 umtxq_unbusy(&uq->uq_key); 430 umtxq_unlock(&uq->uq_key); 431 umtx_key_release(&uq->uq_key); 432 } 433 434 return (0); 435} 436 437static int 438do_lock(struct thread *td, struct umtx *umtx, long id, 439 struct timespec *timeout) 440{ 441 struct timespec ts, ts2, ts3; 442 struct timeval tv; 443 int error; 444 445 if (timeout == NULL) { 446 error = _do_lock(td, umtx, id, 0); 447 } else { 448 getnanouptime(&ts); 449 timespecadd(&ts, timeout); 450 TIMESPEC_TO_TIMEVAL(&tv, timeout); 451 for (;;) { 452 error = _do_lock(td, umtx, id, tvtohz(&tv)); 453 if (error != ETIMEDOUT) 454 break; 455 getnanouptime(&ts2); 456 if (timespeccmp(&ts2, &ts, >=)) { 457 error = ETIMEDOUT; 458 break; 459 } 460 ts3 = ts; 461 timespecsub(&ts3, &ts2); 462 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 463 } 464 } 465 /* 466 * This lets userland back off critical region if needed. 467 */ 468 if (error == ERESTART) 469 error = EINTR; 470 return (error); 471} 472 473static int 474do_unlock(struct thread *td, struct umtx *umtx, long id) 475{ 476 struct umtx_key key; 477 intptr_t owner; 478 intptr_t old; 479 int error; 480 int count; 481 482 /* 483 * Make sure we own this mtx. 484 * 485 * XXX Need a {fu,su}ptr this is not correct on arch where 486 * sizeof(intptr_t) != sizeof(long). 487 */ 488 if ((owner = fuword(&umtx->u_owner)) == -1) 489 return (EFAULT); 490 491 if ((owner & ~UMTX_CONTESTED) != id) 492 return (EPERM); 493 494 /* We should only ever be in here for contested locks */ 495 if ((owner & UMTX_CONTESTED) == 0) 496 return (EINVAL); 497 498 if ((error = umtx_key_get(td, umtx, &key)) != 0) 499 return (error); 500 501 umtxq_lock(&key); 502 umtxq_busy(&key); 503 count = umtxq_count(&key); 504 umtxq_unlock(&key); 505 506 /* 507 * When unlocking the umtx, it must be marked as unowned if 508 * there is zero or one thread only waiting for it. 509 * Otherwise, it must be marked as contested. 510 */ 511 old = casuptr((intptr_t *)&umtx->u_owner, owner, 512 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 513 umtxq_lock(&key); 514 umtxq_signal(&key, 0); 515 umtxq_unbusy(&key); 516 umtxq_unlock(&key); 517 umtx_key_release(&key); 518 if (old == -1) 519 return (EFAULT); 520 if (old != owner) 521 return (EINVAL); 522 return (0); 523} 524 525static int 526do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout) 527{ 528 struct umtx_q *uq; 529 struct timespec ts, ts2, ts3; 530 struct timeval tv; 531 long tmp; 532 int error = 0; 533 534 uq = td->td_umtxq; 535 if ((error = umtxq_queue_me(td, umtx, uq)) != 0) 536 return (error); 537 tmp = fuword(&umtx->u_owner); 538 if (tmp != id) { 539 umtxq_lock(&uq->uq_key); 540 umtxq_remove(uq); 541 umtxq_unlock(&uq->uq_key); 542 } else if (timeout == NULL) { 543 umtxq_lock(&uq->uq_key); 544 if (uq->uq_flags & UQF_UMTXQ) 545 error = umtxq_sleep(td, &uq->uq_key, 546 PCATCH, "ucond", 0); 547 if (!(uq->uq_flags & UQF_UMTXQ)) 548 error = 0; 549 else 550 umtxq_remove(uq); 551 umtxq_unlock(&uq->uq_key); 552 } else { 553 getnanouptime(&ts); 554 timespecadd(&ts, timeout); 555 TIMESPEC_TO_TIMEVAL(&tv, timeout); 556 for (;;) { 557 umtxq_lock(&uq->uq_key); 558 if (uq->uq_flags & UQF_UMTXQ) { 559 error = umtxq_sleep(td, &uq->uq_key, PCATCH, 560 "ucond", tvtohz(&tv)); 561 } 562 if (!(uq->uq_flags & UQF_UMTXQ)) { 563 umtxq_unlock(&uq->uq_key); 564 goto out; 565 } 566 umtxq_unlock(&uq->uq_key); 567 if (error != ETIMEDOUT) 568 break; 569 getnanouptime(&ts2); 570 if (timespeccmp(&ts2, &ts, >=)) { 571 error = ETIMEDOUT; 572 break; 573 } 574 ts3 = ts; 575 timespecsub(&ts3, &ts2); 576 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 577 } 578 umtxq_lock(&uq->uq_key); 579 umtxq_remove(uq); 580 umtxq_unlock(&uq->uq_key); 581 } 582out: 583 umtx_key_release(&uq->uq_key); 584 if (error == ERESTART) 585 error = EINTR; 586 return (error); 587} 588 589int 590kern_umtx_wake(struct thread *td, void *uaddr, int n_wake) 591{ 592 struct umtx_key key; 593 int ret; 594 595 if ((ret = umtx_key_get(td, uaddr, &key)) != 0) 596 return (ret); 597 umtxq_lock(&key); 598 ret = umtxq_signal(&key, n_wake); 599 umtxq_unlock(&key); 600 umtx_key_release(&key); 601 return (0); 602} 603 604int 605_umtx_lock(struct thread *td, struct _umtx_lock_args *uap) 606 /* struct umtx *umtx */ 607{ 608 return _do_lock(td, uap->umtx, td->td_tid, 0); 609} 610 611int 612_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) 613 /* struct umtx *umtx */ 614{ 615 return do_unlock(td, uap->umtx, td->td_tid); 616} 617 618int 619_umtx_op(struct thread *td, struct _umtx_op_args *uap) 620{ 621 struct timespec timeout; 622 struct timespec *ts; 623 int error; 624 625 switch(uap->op) { 626 case UMTX_OP_LOCK: 627 /* Allow a null timespec (wait forever). */ 628 if (uap->uaddr2 == NULL) 629 ts = NULL; 630 else { 631 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 632 if (error != 0) 633 break; 634 if (timeout.tv_nsec >= 1000000000 || 635 timeout.tv_nsec < 0) { 636 error = EINVAL; 637 break; 638 } 639 ts = &timeout; 640 } 641 error = do_lock(td, uap->umtx, uap->id, ts); 642 break; 643 case UMTX_OP_UNLOCK: 644 error = do_unlock(td, uap->umtx, uap->id); 645 break; 646 case UMTX_OP_WAIT: 647 /* Allow a null timespec (wait forever). */ 648 if (uap->uaddr2 == NULL) 649 ts = NULL; 650 else { 651 error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); 652 if (error != 0) 653 break; 654 if (timeout.tv_nsec >= 1000000000 || 655 timeout.tv_nsec < 0) { 656 error = EINVAL; 657 break; 658 } 659 ts = &timeout; 660 } 661 error = do_wait(td, uap->umtx, uap->id, ts); 662 break; 663 case UMTX_OP_WAKE: 664 error = kern_umtx_wake(td, uap->umtx, uap->id); 665 break; 666 default: 667 error = EINVAL; 668 break; 669 } 670 return (error); 671} 672