kern_mutex.c revision 68790
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/kern_mutex.c 68790 2000-11-15 22:08:16Z jhb $ 31 */ 32 33/* 34 * Main Entry: witness 35 * Pronunciation: 'wit-n&s 36 * Function: noun 37 * Etymology: Middle English witnesse, from Old English witnes knowledge, 38 * testimony, witness, from 2wit 39 * Date: before 12th century 40 * 1 : attestation of a fact or event : TESTIMONY 41 * 2 : one that gives evidence; specifically : one who testifies in 42 * a cause or before a judicial tribunal 43 * 3 : one asked to be present at a transaction so as to be able to 44 * testify to its having taken place 45 * 4 : one who has personal knowledge of something 46 * 5 a : something serving as evidence or proof : SIGN 47 * b : public affirmation by word or example of usually 48 * religious faith or conviction <the heroic witness to divine 49 * life -- Pilot> 50 * 6 capitalized : a member of the Jehovah's Witnesses 51 */ 52 53#include "opt_ddb.h" 54#include "opt_witness.h" 55 56#include <sys/param.h> 57#include <sys/bus.h> 58#include <sys/kernel.h> 59#include <sys/malloc.h> 60#include <sys/proc.h> 61#include <sys/sysctl.h> 62#include <sys/systm.h> 63#include <sys/vmmeter.h> 64#include <sys/ktr.h> 65 66#include <machine/atomic.h> 67#include <machine/bus.h> 68#include <machine/clock.h> 69#include <machine/cpu.h> 70 71#include <ddb/ddb.h> 72 73#include <vm/vm.h> 74#include <vm/vm_extern.h> 75 76#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */ 77#include <sys/mutex.h> 78 79/* 80 * Machine independent bits of the mutex implementation 81 */ 82/* All mutexes in system (used for debug/panic) */ 83#ifdef MUTEX_DEBUG 84static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0, 85 "All mutexes queue head" }; 86static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug, 87 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 88 { NULL, NULL }, &all_mtx, &all_mtx }; 89#else /* MUTEX_DEBUG */ 90static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head", 91 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 92 { NULL, NULL }, &all_mtx, &all_mtx }; 93#endif /* MUTEX_DEBUG */ 94 95static int mtx_cur_cnt; 96static int mtx_max_cnt; 97 98void _mtx_enter_giant_def(void); 99void _mtx_exit_giant_def(void); 100static void propagate_priority(struct proc *) __unused; 101 102#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 103#define mtx_owner(m) (mtx_unowned(m) ? NULL \ 104 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 105 106#define RETIP(x) *(((uintptr_t *)(&x)) - 1) 107#define SET_PRIO(p, pri) (p)->p_priority = (pri) 108 109/* 110 * XXX Temporary, for use from assembly language 111 */ 112 113void 114_mtx_enter_giant_def(void) 115{ 116 117 mtx_enter(&Giant, MTX_DEF); 118} 119 120void 121_mtx_exit_giant_def(void) 122{ 123 124 mtx_exit(&Giant, MTX_DEF); 125} 126 127static void 128propagate_priority(struct proc *p) 129{ 130 int pri = p->p_priority; 131 struct mtx *m = p->p_blocked; 132 133 for (;;) { 134 struct proc *p1; 135 136 p = mtx_owner(m); 137 138 if (p == NULL) { 139 /* 140 * This really isn't quite right. Really 141 * ought to bump priority of process that 142 * next acquires the mutex. 143 */ 144 MPASS(m->mtx_lock == MTX_CONTESTED); 145 return; 146 } 147 MPASS(p->p_magic == P_MAGIC); 148 if (p->p_priority <= pri) 149 return; 150 /* 151 * If lock holder is actually running, just bump priority. 152 */ 153 if (TAILQ_NEXT(p, p_procq) == NULL) { 154 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); 155 SET_PRIO(p, pri); 156 return; 157 } 158 /* 159 * If on run queue move to new run queue, and 160 * quit. 161 */ 162 if (p->p_stat == SRUN) { 163 MPASS(p->p_blocked == NULL); 164 remrunqueue(p); 165 SET_PRIO(p, pri); 166 setrunqueue(p); 167 return; 168 } 169 170 /* 171 * If we aren't blocked on a mutex, give up and quit. 172 */ 173 if (p->p_stat != SMTX) { 174 printf( 175 "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n", 176 p->p_pid, p->p_comm, p->p_stat, m->mtx_description); 177 return; 178 } 179 180 /* 181 * Pick up the mutex that p is blocked on. 182 */ 183 m = p->p_blocked; 184 MPASS(m != NULL); 185 186 printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, 187 p->p_comm, m->mtx_description); 188 /* 189 * Check if the proc needs to be moved up on 190 * the blocked chain 191 */ 192 if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL || 193 p1->p_priority <= pri) { 194 if (p1) 195 printf( 196 "XXX: previous process %d(%s) has higher priority\n", 197 p->p_pid, p->p_comm); 198 else 199 printf("XXX: process at head of run queue\n"); 200 continue; 201 } 202 203 /* 204 * Remove proc from blocked chain 205 */ 206 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 207 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 208 MPASS(p1->p_magic == P_MAGIC); 209 if (p1->p_priority > pri) 210 break; 211 } 212 if (p1) 213 TAILQ_INSERT_BEFORE(p1, p, p_procq); 214 else 215 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 216 CTR4(KTR_LOCK, 217 "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s", 218 p, p1, m, m->mtx_description); 219 } 220} 221 222void 223mtx_enter_hard(struct mtx *m, int type, int saveintr) 224{ 225 struct proc *p = CURPROC; 226 struct timeval new_switchtime; 227 228 KASSERT(p != NULL, ("curproc is NULL in mutex")); 229 230 switch (type) { 231 case MTX_DEF: 232 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 233 m->mtx_recurse++; 234 atomic_set_ptr(&m->mtx_lock, MTX_RECURSE); 235 CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); 236 return; 237 } 238 CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]", 239 m, (void *)m->mtx_lock, (void *)RETIP(m)); 240 while (!_obtain_lock(m, p)) { 241 uintptr_t v; 242 struct proc *p1; 243 244 mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 245 /* 246 * check if the lock has been released while 247 * waiting for the schedlock. 248 */ 249 if ((v = m->mtx_lock) == MTX_UNOWNED) { 250 mtx_exit(&sched_lock, MTX_SPIN); 251 continue; 252 } 253 /* 254 * The mutex was marked contested on release. This 255 * means that there are processes blocked on it. 256 */ 257 if (v == MTX_CONTESTED) { 258 p1 = TAILQ_FIRST(&m->mtx_blocked); 259 KASSERT(p1 != NULL, ("contested mutex has no contesters")); 260 KASSERT(p != NULL, ("curproc is NULL for contested mutex")); 261 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 262 if (p1->p_priority < p->p_priority) { 263 SET_PRIO(p, p1->p_priority); 264 } 265 mtx_exit(&sched_lock, MTX_SPIN); 266 return; 267 } 268 /* 269 * If the mutex isn't already contested and 270 * a failure occurs setting the contested bit the 271 * mutex was either release or the 272 * state of the RECURSION bit changed. 273 */ 274 if ((v & MTX_CONTESTED) == 0 && 275 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 276 (void *)(v | MTX_CONTESTED))) { 277 mtx_exit(&sched_lock, MTX_SPIN); 278 continue; 279 } 280 281 /* We definitely have to sleep for this lock */ 282 mtx_assert(m, MA_NOTOWNED); 283 284#ifdef notyet 285 /* 286 * If we're borrowing an interrupted thread's VM 287 * context must clean up before going to sleep. 288 */ 289 if (p->p_flag & (P_ITHD | P_SITHD)) { 290 ithd_t *it = (ithd_t *)p; 291 292 if (it->it_interrupted) { 293 CTR2(KTR_LOCK, 294 "mtx_enter: 0x%x interrupted 0x%x", 295 it, it->it_interrupted); 296 intr_thd_fixup(it); 297 } 298 } 299#endif 300 301 /* Put us on the list of procs blocked on this mutex */ 302 if (TAILQ_EMPTY(&m->mtx_blocked)) { 303 p1 = (struct proc *)(m->mtx_lock & 304 MTX_FLAGMASK); 305 LIST_INSERT_HEAD(&p1->p_contested, m, 306 mtx_contested); 307 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 308 } else { 309 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 310 if (p1->p_priority > p->p_priority) 311 break; 312 if (p1) 313 TAILQ_INSERT_BEFORE(p1, p, p_procq); 314 else 315 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, 316 p_procq); 317 } 318 319 p->p_blocked = m; /* Who we're blocked on */ 320 p->p_stat = SMTX; 321#if 0 322 propagate_priority(p); 323#endif 324 CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s", 325 p, m, m->mtx_description); 326 /* 327 * Blatantly copied from mi_switch nearly verbatim. 328 * When Giant goes away and we stop dinking with it 329 * in mi_switch, we can go back to calling mi_switch 330 * directly here. 331 */ 332 333 /* 334 * Compute the amount of time during which the current 335 * process was running, and add that to its total so 336 * far. 337 */ 338 microuptime(&new_switchtime); 339 if (timevalcmp(&new_switchtime, &switchtime, <)) { 340 printf( 341 "microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", 342 switchtime.tv_sec, switchtime.tv_usec, 343 new_switchtime.tv_sec, 344 new_switchtime.tv_usec); 345 new_switchtime = switchtime; 346 } else { 347 p->p_runtime += (new_switchtime.tv_usec - 348 switchtime.tv_usec) + 349 (new_switchtime.tv_sec - switchtime.tv_sec) * 350 (int64_t)1000000; 351 } 352 353 /* 354 * Pick a new current process and record its start time. 355 */ 356 cnt.v_swtch++; 357 switchtime = new_switchtime; 358 cpu_switch(); 359 if (switchtime.tv_sec == 0) 360 microuptime(&switchtime); 361 switchticks = ticks; 362 CTR3(KTR_LOCK, 363 "mtx_enter: p 0x%p free from blocked on [0x%p] %s", 364 p, m, m->mtx_description); 365 mtx_exit(&sched_lock, MTX_SPIN); 366 } 367 return; 368 case MTX_SPIN: 369 case MTX_SPIN | MTX_FIRST: 370 case MTX_SPIN | MTX_TOPHALF: 371 { 372 int i = 0; 373 374 if (m->mtx_lock == (uintptr_t)p) { 375 m->mtx_recurse++; 376 return; 377 } 378 CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); 379 for (;;) { 380 if (_obtain_lock(m, p)) 381 break; 382 while (m->mtx_lock != MTX_UNOWNED) { 383 if (i++ < 1000000) 384 continue; 385 if (i++ < 6000000) 386 DELAY (1); 387#ifdef DDB 388 else if (!db_active) 389#else 390 else 391#endif 392 panic( 393 "spin lock %s held by 0x%p for > 5 seconds", 394 m->mtx_description, 395 (void *)m->mtx_lock); 396 } 397 } 398 399#ifdef MUTEX_DEBUG 400 if (type != MTX_SPIN) 401 m->mtx_saveintr = 0xbeefface; 402 else 403#endif 404 m->mtx_saveintr = saveintr; 405 CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); 406 return; 407 } 408 } 409} 410 411void 412mtx_exit_hard(struct mtx *m, int type) 413{ 414 struct proc *p, *p1; 415 struct mtx *m1; 416 int pri; 417 418 p = CURPROC; 419 switch (type) { 420 case MTX_DEF: 421 case MTX_DEF | MTX_NOSWITCH: 422 if (m->mtx_recurse != 0) { 423 if (--(m->mtx_recurse) == 0) 424 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE); 425 CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); 426 return; 427 } 428 mtx_enter(&sched_lock, MTX_SPIN); 429 CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); 430 p1 = TAILQ_FIRST(&m->mtx_blocked); 431 MPASS(p->p_magic == P_MAGIC); 432 MPASS(p1->p_magic == P_MAGIC); 433 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 434 if (TAILQ_EMPTY(&m->mtx_blocked)) { 435 LIST_REMOVE(m, mtx_contested); 436 _release_lock_quick(m); 437 CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); 438 } else 439 m->mtx_lock = MTX_CONTESTED; 440 pri = MAXPRI; 441 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 442 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; 443 if (cp < pri) 444 pri = cp; 445 } 446 if (pri > p->p_nativepri) 447 pri = p->p_nativepri; 448 SET_PRIO(p, pri); 449 CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p", 450 m, p1); 451 p1->p_blocked = NULL; 452 p1->p_stat = SRUN; 453 setrunqueue(p1); 454 if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { 455#ifdef notyet 456 if (p->p_flag & (P_ITHD | P_SITHD)) { 457 ithd_t *it = (ithd_t *)p; 458 459 if (it->it_interrupted) { 460 CTR2(KTR_LOCK, 461 "mtx_exit: 0x%x interruped 0x%x", 462 it, it->it_interrupted); 463 intr_thd_fixup(it); 464 } 465 } 466#endif 467 setrunqueue(p); 468 CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p", 469 m, (void *)m->mtx_lock); 470 mi_switch(); 471 CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p", 472 m, (void *)m->mtx_lock); 473 } 474 mtx_exit(&sched_lock, MTX_SPIN); 475 break; 476 case MTX_SPIN: 477 case MTX_SPIN | MTX_FIRST: 478 if (m->mtx_recurse != 0) { 479 m->mtx_recurse--; 480 return; 481 } 482 MPASS(mtx_owned(m)); 483 _release_lock_quick(m); 484 if (type & MTX_FIRST) 485 enable_intr(); /* XXX is this kosher? */ 486 else { 487 MPASS(m->mtx_saveintr != 0xbeefface); 488 restore_intr(m->mtx_saveintr); 489 } 490 break; 491 case MTX_SPIN | MTX_TOPHALF: 492 if (m->mtx_recurse != 0) { 493 m->mtx_recurse--; 494 return; 495 } 496 MPASS(mtx_owned(m)); 497 _release_lock_quick(m); 498 break; 499 default: 500 panic("mtx_exit_hard: unsupported type 0x%x\n", type); 501 } 502} 503 504#define MV_DESTROY 0 /* validate before destory */ 505#define MV_INIT 1 /* validate before init */ 506 507#ifdef MUTEX_DEBUG 508 509int mtx_validate __P((struct mtx *, int)); 510 511int 512mtx_validate(struct mtx *m, int when) 513{ 514 struct mtx *mp; 515 int i; 516 int retval = 0; 517 518 if (m == &all_mtx || cold) 519 return 0; 520 521 mtx_enter(&all_mtx, MTX_DEF); 522/* 523 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 524 * we can re-enable the kernacc() checks. 525 */ 526#ifndef __alpha__ 527 MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t), 528 VM_PROT_READ) == 1); 529#endif 530 MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx); 531 for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { 532#ifndef __alpha__ 533 if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t), 534 VM_PROT_READ) != 1) { 535 panic("mtx_validate: mp=%p mp->mtx_next=%p", 536 mp, mp->mtx_next); 537 } 538#endif 539 i++; 540 if (i > mtx_cur_cnt) { 541 panic("mtx_validate: too many in chain, known=%d\n", 542 mtx_cur_cnt); 543 } 544 } 545 MPASS(i == mtx_cur_cnt); 546 switch (when) { 547 case MV_DESTROY: 548 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 549 if (mp == m) 550 break; 551 MPASS(mp == m); 552 break; 553 case MV_INIT: 554 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 555 if (mp == m) { 556 /* 557 * Not good. This mutex already exists. 558 */ 559 printf("re-initing existing mutex %s\n", 560 m->mtx_description); 561 MPASS(m->mtx_lock == MTX_UNOWNED); 562 retval = 1; 563 } 564 } 565 mtx_exit(&all_mtx, MTX_DEF); 566 return (retval); 567} 568#endif 569 570void 571mtx_init(struct mtx *m, const char *t, int flag) 572{ 573#ifdef MUTEX_DEBUG 574 struct mtx_debug *debug; 575#endif 576 577 CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); 578#ifdef MUTEX_DEBUG 579 if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ 580 return; 581 if (flag & MTX_COLD) 582 debug = m->mtx_debug; 583 else 584 debug = NULL; 585 if (debug == NULL) { 586#ifdef DIAGNOSTIC 587 if(cold && bootverbose) 588 printf("malloc'ing mtx_debug while cold for %s\n", t); 589#endif 590 591 /* XXX - should not use DEVBUF */ 592 debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT); 593 MPASS(debug != NULL); 594 bzero(debug, sizeof(struct mtx_debug)); 595 } 596#endif 597 bzero((void *)m, sizeof *m); 598 TAILQ_INIT(&m->mtx_blocked); 599#ifdef MUTEX_DEBUG 600 m->mtx_debug = debug; 601#endif 602 m->mtx_description = t; 603 m->mtx_lock = MTX_UNOWNED; 604 /* Put on all mutex queue */ 605 mtx_enter(&all_mtx, MTX_DEF); 606 m->mtx_next = &all_mtx; 607 m->mtx_prev = all_mtx.mtx_prev; 608 m->mtx_prev->mtx_next = m; 609 all_mtx.mtx_prev = m; 610 if (++mtx_cur_cnt > mtx_max_cnt) 611 mtx_max_cnt = mtx_cur_cnt; 612 mtx_exit(&all_mtx, MTX_DEF); 613 witness_init(m, flag); 614} 615 616void 617mtx_destroy(struct mtx *m) 618{ 619 620 CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); 621#ifdef MUTEX_DEBUG 622 if (m->mtx_next == NULL) 623 panic("mtx_destroy: %p (%s) already destroyed", 624 m, m->mtx_description); 625 626 if (!mtx_owned(m)) { 627 MPASS(m->mtx_lock == MTX_UNOWNED); 628 } else { 629 MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); 630 } 631 mtx_validate(m, MV_DESTROY); /* diagnostic */ 632#endif 633 634#ifdef WITNESS 635 if (m->mtx_witness) 636 witness_destroy(m); 637#endif /* WITNESS */ 638 639 /* Remove from the all mutex queue */ 640 mtx_enter(&all_mtx, MTX_DEF); 641 m->mtx_next->mtx_prev = m->mtx_prev; 642 m->mtx_prev->mtx_next = m->mtx_next; 643#ifdef MUTEX_DEBUG 644 m->mtx_next = m->mtx_prev = NULL; 645 free(m->mtx_debug, M_DEVBUF); 646 m->mtx_debug = NULL; 647#endif 648 mtx_cur_cnt--; 649 mtx_exit(&all_mtx, MTX_DEF); 650} 651 652/* 653 * The non-inlined versions of the mtx_*() functions are always built (above), 654 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options 655 * being specified. 656 */ 657#if (defined(MUTEX_DEBUG) && defined(WITNESS)) 658 659#define WITNESS_COUNT 200 660#define WITNESS_NCHILDREN 2 661 662int witness_watch = 1; 663 664struct witness { 665 struct witness *w_next; 666 const char *w_description; 667 const char *w_file; 668 int w_line; 669 struct witness *w_morechildren; 670 u_char w_childcnt; 671 u_char w_Giant_squawked:1; 672 u_char w_other_squawked:1; 673 u_char w_same_squawked:1; 674 u_char w_sleep:1; 675 u_char w_spin:1; /* this is a spin mutex */ 676 u_int w_level; 677 struct witness *w_children[WITNESS_NCHILDREN]; 678}; 679 680struct witness_blessed { 681 char *b_lock1; 682 char *b_lock2; 683}; 684 685#ifdef DDB 686/* 687 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 688 * drop into kdebug() when: 689 * - a lock heirarchy violation occurs 690 * - locks are held when going to sleep. 691 */ 692#ifdef WITNESS_DDB 693int witness_ddb = 1; 694#else 695int witness_ddb = 0; 696#endif 697SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 698#endif /* DDB */ 699 700#ifdef WITNESS_SKIPSPIN 701int witness_skipspin = 1; 702#else 703int witness_skipspin = 0; 704#endif 705SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0, 706 ""); 707 708MUTEX_DECLARE(static,w_mtx); 709static struct witness *w_free; 710static struct witness *w_all; 711static int w_inited; 712static int witness_dead; /* fatal error, probably no memory */ 713 714static struct witness w_data[WITNESS_COUNT]; 715 716static struct witness *enroll __P((const char *description, int flag)); 717static int itismychild __P((struct witness *parent, struct witness *child)); 718static void removechild __P((struct witness *parent, struct witness *child)); 719static int isitmychild __P((struct witness *parent, struct witness *child)); 720static int isitmydescendant __P((struct witness *parent, struct witness *child)); 721static int dup_ok __P((struct witness *)); 722static int blessed __P((struct witness *, struct witness *)); 723static void witness_displaydescendants 724 __P((void(*)(const char *fmt, ...), struct witness *)); 725static void witness_leveldescendents __P((struct witness *parent, int level)); 726static void witness_levelall __P((void)); 727static struct witness * witness_get __P((void)); 728static void witness_free __P((struct witness *m)); 729 730 731static char *ignore_list[] = { 732 "witness lock", 733 NULL 734}; 735 736static char *spin_order_list[] = { 737 "sched lock", 738 "clk", 739 "sio", 740 /* 741 * leaf locks 742 */ 743 NULL 744}; 745 746static char *order_list[] = { 747 NULL 748}; 749 750static char *dup_list[] = { 751 NULL 752}; 753 754static char *sleep_list[] = { 755 "Giant lock", 756 NULL 757}; 758 759/* 760 * Pairs of locks which have been blessed 761 * Don't complain about order problems with blessed locks 762 */ 763static struct witness_blessed blessed_list[] = { 764}; 765static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); 766 767void 768witness_init(struct mtx *m, int flag) 769{ 770 m->mtx_witness = enroll(m->mtx_description, flag); 771} 772 773void 774witness_destroy(struct mtx *m) 775{ 776 struct mtx *m1; 777 struct proc *p; 778 p = CURPROC; 779 for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL; 780 m1 = LIST_NEXT(m1, mtx_held)) { 781 if (m1 == m) { 782 LIST_REMOVE(m, mtx_held); 783 break; 784 } 785 } 786 return; 787 788} 789 790void 791witness_enter(struct mtx *m, int flags, const char *file, int line) 792{ 793 struct witness *w, *w1; 794 struct mtx *m1; 795 struct proc *p; 796 int i; 797#ifdef DDB 798 int go_into_ddb = 0; 799#endif /* DDB */ 800 801 w = m->mtx_witness; 802 p = CURPROC; 803 804 if (flags & MTX_SPIN) { 805 if (!w->w_spin) 806 panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @" 807 " %s:%d", m->mtx_description, file, line); 808 if (m->mtx_recurse != 0) 809 return; 810 mtx_enter(&w_mtx, MTX_SPIN); 811 i = witness_spin_check; 812 if (i != 0 && w->w_level < i) { 813 mtx_exit(&w_mtx, MTX_SPIN); 814 panic("mutex_enter(%s:%x, MTX_SPIN) out of order @" 815 " %s:%d already holding %s:%x", 816 m->mtx_description, w->w_level, file, line, 817 spin_order_list[ffs(i)-1], i); 818 } 819 PCPU_SET(witness_spin_check, i | w->w_level); 820 mtx_exit(&w_mtx, MTX_SPIN); 821 return; 822 } 823 if (w->w_spin) 824 panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 825 m->mtx_description, file, line); 826 827 if (m->mtx_recurse != 0) 828 return; 829 if (witness_dead) 830 goto out; 831 if (cold || panicstr) 832 goto out; 833 834 if (!mtx_legal2block()) 835 panic("blockable mtx_enter() of %s when not legal @ %s:%d", 836 m->mtx_description, file, line); 837 /* 838 * Is this the first mutex acquired 839 */ 840 if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL) 841 goto out; 842 843 if ((w1 = m1->mtx_witness) == w) { 844 if (w->w_same_squawked || dup_ok(w)) 845 goto out; 846 w->w_same_squawked = 1; 847 printf("acquring duplicate lock of same type: \"%s\"\n", 848 m->mtx_description); 849 printf(" 1st @ %s:%d\n", w->w_file, w->w_line); 850 printf(" 2nd @ %s:%d\n", file, line); 851#ifdef DDB 852 go_into_ddb = 1; 853#endif /* DDB */ 854 goto out; 855 } 856 MPASS(!mtx_owned(&w_mtx)); 857 mtx_enter(&w_mtx, MTX_SPIN); 858 /* 859 * If we have a known higher number just say ok 860 */ 861 if (witness_watch > 1 && w->w_level > w1->w_level) { 862 mtx_exit(&w_mtx, MTX_SPIN); 863 goto out; 864 } 865 if (isitmydescendant(m1->mtx_witness, w)) { 866 mtx_exit(&w_mtx, MTX_SPIN); 867 goto out; 868 } 869 for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) { 870 871 MPASS(i < 200); 872 w1 = m1->mtx_witness; 873 if (isitmydescendant(w, w1)) { 874 mtx_exit(&w_mtx, MTX_SPIN); 875 if (blessed(w, w1)) 876 goto out; 877 if (m1 == &Giant) { 878 if (w1->w_Giant_squawked) 879 goto out; 880 else 881 w1->w_Giant_squawked = 1; 882 } else { 883 if (w1->w_other_squawked) 884 goto out; 885 else 886 w1->w_other_squawked = 1; 887 } 888 printf("lock order reversal\n"); 889 printf(" 1st %s last acquired @ %s:%d\n", 890 w->w_description, w->w_file, w->w_line); 891 printf(" 2nd %p %s @ %s:%d\n", 892 m1, w1->w_description, w1->w_file, w1->w_line); 893 printf(" 3rd %p %s @ %s:%d\n", 894 m, w->w_description, file, line); 895#ifdef DDB 896 go_into_ddb = 1; 897#endif /* DDB */ 898 goto out; 899 } 900 } 901 m1 = LIST_FIRST(&p->p_heldmtx); 902 if (!itismychild(m1->mtx_witness, w)) 903 mtx_exit(&w_mtx, MTX_SPIN); 904 905out: 906#ifdef DDB 907 if (witness_ddb && go_into_ddb) 908 Debugger("witness_enter"); 909#endif /* DDB */ 910 w->w_file = file; 911 w->w_line = line; 912 m->mtx_line = line; 913 m->mtx_file = file; 914 915 /* 916 * If this pays off it likely means that a mutex being witnessed 917 * is acquired in hardclock. Put it in the ignore list. It is 918 * likely not the mutex this assert fails on. 919 */ 920 MPASS(m->mtx_held.le_prev == NULL); 921 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 922} 923 924void 925witness_exit(struct mtx *m, int flags, const char *file, int line) 926{ 927 struct witness *w; 928 929 w = m->mtx_witness; 930 931 if (flags & MTX_SPIN) { 932 if (!w->w_spin) 933 panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @" 934 " %s:%d", m->mtx_description, file, line); 935 if (m->mtx_recurse != 0) 936 return; 937 mtx_enter(&w_mtx, MTX_SPIN); 938 PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level); 939 mtx_exit(&w_mtx, MTX_SPIN); 940 return; 941 } 942 if (w->w_spin) 943 panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 944 m->mtx_description, file, line); 945 946 if (m->mtx_recurse != 0) 947 return; 948 949 if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold) 950 panic("switchable mtx_exit() of %s when not legal @ %s:%d", 951 m->mtx_description, file, line); 952 LIST_REMOVE(m, mtx_held); 953 m->mtx_held.le_prev = NULL; 954} 955 956void 957witness_try_enter(struct mtx *m, int flags, const char *file, int line) 958{ 959 struct proc *p; 960 struct witness *w = m->mtx_witness; 961 962 if (flags & MTX_SPIN) { 963 if (!w->w_spin) 964 panic("mutex_try_enter: " 965 "MTX_SPIN on MTX_DEF mutex %s @ %s:%d", 966 m->mtx_description, file, line); 967 if (m->mtx_recurse != 0) 968 return; 969 mtx_enter(&w_mtx, MTX_SPIN); 970 PCPU_SET(witness_spin_check, witness_spin_check | w->w_level); 971 mtx_exit(&w_mtx, MTX_SPIN); 972 return; 973 } 974 975 if (w->w_spin) 976 panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 977 m->mtx_description, file, line); 978 979 if (m->mtx_recurse != 0) 980 return; 981 982 w->w_file = file; 983 w->w_line = line; 984 m->mtx_line = line; 985 m->mtx_file = file; 986 p = CURPROC; 987 MPASS(m->mtx_held.le_prev == NULL); 988 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 989} 990 991void 992witness_display(void(*prnt)(const char *fmt, ...)) 993{ 994 struct witness *w, *w1; 995 996 witness_levelall(); 997 998 for (w = w_all; w; w = w->w_next) { 999 if (w->w_file == NULL) 1000 continue; 1001 for (w1 = w_all; w1; w1 = w1->w_next) { 1002 if (isitmychild(w1, w)) 1003 break; 1004 } 1005 if (w1 != NULL) 1006 continue; 1007 /* 1008 * This lock has no anscestors, display its descendants. 1009 */ 1010 witness_displaydescendants(prnt, w); 1011 } 1012 prnt("\nMutex which were never acquired\n"); 1013 for (w = w_all; w; w = w->w_next) { 1014 if (w->w_file != NULL) 1015 continue; 1016 prnt("%s\n", w->w_description); 1017 } 1018} 1019 1020int 1021witness_sleep(int check_only, struct mtx *mtx, const char *file, int line) 1022{ 1023 struct mtx *m; 1024 struct proc *p; 1025 char **sleep; 1026 int n = 0; 1027 1028 p = CURPROC; 1029 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1030 m = LIST_NEXT(m, mtx_held)) { 1031 if (m == mtx) 1032 continue; 1033 for (sleep = sleep_list; *sleep!= NULL; sleep++) 1034 if (strcmp(m->mtx_description, *sleep) == 0) 1035 goto next; 1036 printf("%s:%d: %s with \"%s\" locked from %s:%d\n", 1037 file, line, check_only ? "could sleep" : "sleeping", 1038 m->mtx_description, 1039 m->mtx_witness->w_file, m->mtx_witness->w_line); 1040 n++; 1041 next: 1042 } 1043#ifdef DDB 1044 if (witness_ddb && n) 1045 Debugger("witness_sleep"); 1046#endif /* DDB */ 1047 return (n); 1048} 1049 1050static struct witness * 1051enroll(const char *description, int flag) 1052{ 1053 int i; 1054 struct witness *w, *w1; 1055 char **ignore; 1056 char **order; 1057 1058 if (!witness_watch) 1059 return (NULL); 1060 for (ignore = ignore_list; *ignore != NULL; ignore++) 1061 if (strcmp(description, *ignore) == 0) 1062 return (NULL); 1063 1064 if (w_inited == 0) { 1065 mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF); 1066 for (i = 0; i < WITNESS_COUNT; i++) { 1067 w = &w_data[i]; 1068 witness_free(w); 1069 } 1070 w_inited = 1; 1071 for (order = order_list; *order != NULL; order++) { 1072 w = enroll(*order, MTX_DEF); 1073 w->w_file = "order list"; 1074 for (order++; *order != NULL; order++) { 1075 w1 = enroll(*order, MTX_DEF); 1076 w1->w_file = "order list"; 1077 itismychild(w, w1); 1078 w = w1; 1079 } 1080 } 1081 } 1082 if ((flag & MTX_SPIN) && witness_skipspin) 1083 return (NULL); 1084 mtx_enter(&w_mtx, MTX_SPIN); 1085 for (w = w_all; w; w = w->w_next) { 1086 if (strcmp(description, w->w_description) == 0) { 1087 mtx_exit(&w_mtx, MTX_SPIN); 1088 return (w); 1089 } 1090 } 1091 if ((w = witness_get()) == NULL) 1092 return (NULL); 1093 w->w_next = w_all; 1094 w_all = w; 1095 w->w_description = description; 1096 mtx_exit(&w_mtx, MTX_SPIN); 1097 if (flag & MTX_SPIN) { 1098 w->w_spin = 1; 1099 1100 i = 1; 1101 for (order = spin_order_list; *order != NULL; order++) { 1102 if (strcmp(description, *order) == 0) 1103 break; 1104 i <<= 1; 1105 } 1106 if (*order == NULL) 1107 panic("spin lock %s not in order list", description); 1108 w->w_level = i; 1109 } 1110 return (w); 1111} 1112 1113static int 1114itismychild(struct witness *parent, struct witness *child) 1115{ 1116 static int recursed; 1117 1118 /* 1119 * Insert "child" after "parent" 1120 */ 1121 while (parent->w_morechildren) 1122 parent = parent->w_morechildren; 1123 1124 if (parent->w_childcnt == WITNESS_NCHILDREN) { 1125 if ((parent->w_morechildren = witness_get()) == NULL) 1126 return (1); 1127 parent = parent->w_morechildren; 1128 } 1129 MPASS(child != NULL); 1130 parent->w_children[parent->w_childcnt++] = child; 1131 /* 1132 * now prune whole tree 1133 */ 1134 if (recursed) 1135 return (0); 1136 recursed = 1; 1137 for (child = w_all; child != NULL; child = child->w_next) { 1138 for (parent = w_all; parent != NULL; 1139 parent = parent->w_next) { 1140 if (!isitmychild(parent, child)) 1141 continue; 1142 removechild(parent, child); 1143 if (isitmydescendant(parent, child)) 1144 continue; 1145 itismychild(parent, child); 1146 } 1147 } 1148 recursed = 0; 1149 witness_levelall(); 1150 return (0); 1151} 1152 1153static void 1154removechild(struct witness *parent, struct witness *child) 1155{ 1156 struct witness *w, *w1; 1157 int i; 1158 1159 for (w = parent; w != NULL; w = w->w_morechildren) 1160 for (i = 0; i < w->w_childcnt; i++) 1161 if (w->w_children[i] == child) 1162 goto found; 1163 return; 1164found: 1165 for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren) 1166 continue; 1167 w->w_children[i] = w1->w_children[--w1->w_childcnt]; 1168 MPASS(w->w_children[i] != NULL); 1169 1170 if (w1->w_childcnt != 0) 1171 return; 1172 1173 if (w1 == parent) 1174 return; 1175 for (w = parent; w->w_morechildren != w1; w = w->w_morechildren) 1176 continue; 1177 w->w_morechildren = 0; 1178 witness_free(w1); 1179} 1180 1181static int 1182isitmychild(struct witness *parent, struct witness *child) 1183{ 1184 struct witness *w; 1185 int i; 1186 1187 for (w = parent; w != NULL; w = w->w_morechildren) { 1188 for (i = 0; i < w->w_childcnt; i++) { 1189 if (w->w_children[i] == child) 1190 return (1); 1191 } 1192 } 1193 return (0); 1194} 1195 1196static int 1197isitmydescendant(struct witness *parent, struct witness *child) 1198{ 1199 struct witness *w; 1200 int i; 1201 int j; 1202 1203 for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) { 1204 MPASS(j < 1000); 1205 for (i = 0; i < w->w_childcnt; i++) { 1206 if (w->w_children[i] == child) 1207 return (1); 1208 } 1209 for (i = 0; i < w->w_childcnt; i++) { 1210 if (isitmydescendant(w->w_children[i], child)) 1211 return (1); 1212 } 1213 } 1214 return (0); 1215} 1216 1217void 1218witness_levelall (void) 1219{ 1220 struct witness *w, *w1; 1221 1222 for (w = w_all; w; w = w->w_next) 1223 if (!w->w_spin) 1224 w->w_level = 0; 1225 for (w = w_all; w; w = w->w_next) { 1226 if (w->w_spin) 1227 continue; 1228 for (w1 = w_all; w1; w1 = w1->w_next) { 1229 if (isitmychild(w1, w)) 1230 break; 1231 } 1232 if (w1 != NULL) 1233 continue; 1234 witness_leveldescendents(w, 0); 1235 } 1236} 1237 1238static void 1239witness_leveldescendents(struct witness *parent, int level) 1240{ 1241 int i; 1242 struct witness *w; 1243 1244 if (parent->w_level < level) 1245 parent->w_level = level; 1246 level++; 1247 for (w = parent; w != NULL; w = w->w_morechildren) 1248 for (i = 0; i < w->w_childcnt; i++) 1249 witness_leveldescendents(w->w_children[i], level); 1250} 1251 1252static void 1253witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1254 struct witness *parent) 1255{ 1256 struct witness *w; 1257 int i; 1258 int level = parent->w_level; 1259 1260 prnt("%d", level); 1261 if (level < 10) 1262 prnt(" "); 1263 for (i = 0; i < level; i++) 1264 prnt(" "); 1265 prnt("%s", parent->w_description); 1266 if (parent->w_file != NULL) { 1267 prnt(" -- last acquired @ %s", parent->w_file); 1268#ifndef W_USE_WHERE 1269 prnt(":%d", parent->w_line); 1270#endif 1271 prnt("\n"); 1272 } 1273 1274 for (w = parent; w != NULL; w = w->w_morechildren) 1275 for (i = 0; i < w->w_childcnt; i++) 1276 witness_displaydescendants(prnt, w->w_children[i]); 1277 } 1278 1279static int 1280dup_ok(struct witness *w) 1281{ 1282 char **dup; 1283 1284 for (dup = dup_list; *dup!= NULL; dup++) 1285 if (strcmp(w->w_description, *dup) == 0) 1286 return (1); 1287 return (0); 1288} 1289 1290static int 1291blessed(struct witness *w1, struct witness *w2) 1292{ 1293 int i; 1294 struct witness_blessed *b; 1295 1296 for (i = 0; i < blessed_count; i++) { 1297 b = &blessed_list[i]; 1298 if (strcmp(w1->w_description, b->b_lock1) == 0) { 1299 if (strcmp(w2->w_description, b->b_lock2) == 0) 1300 return (1); 1301 continue; 1302 } 1303 if (strcmp(w1->w_description, b->b_lock2) == 0) 1304 if (strcmp(w2->w_description, b->b_lock1) == 0) 1305 return (1); 1306 } 1307 return (0); 1308} 1309 1310static struct witness * 1311witness_get() 1312{ 1313 struct witness *w; 1314 1315 if ((w = w_free) == NULL) { 1316 witness_dead = 1; 1317 mtx_exit(&w_mtx, MTX_SPIN); 1318 printf("witness exhausted\n"); 1319 return (NULL); 1320 } 1321 w_free = w->w_next; 1322 bzero(w, sizeof(*w)); 1323 return (w); 1324} 1325 1326static void 1327witness_free(struct witness *w) 1328{ 1329 w->w_next = w_free; 1330 w_free = w; 1331} 1332 1333void 1334witness_list(struct proc *p) 1335{ 1336 struct mtx *m; 1337 1338 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1339 m = LIST_NEXT(m, mtx_held)) { 1340 printf("\t\"%s\" (%p) locked at %s:%d\n", 1341 m->mtx_description, m, 1342 m->mtx_witness->w_file, m->mtx_witness->w_line); 1343 } 1344} 1345 1346void 1347witness_save(struct mtx *m, const char **filep, int *linep) 1348{ 1349 *filep = m->mtx_witness->w_file; 1350 *linep = m->mtx_witness->w_line; 1351} 1352 1353void 1354witness_restore(struct mtx *m, const char *file, int line) 1355{ 1356 m->mtx_witness->w_file = file; 1357 m->mtx_witness->w_line = line; 1358} 1359 1360#endif /* (defined(MUTEX_DEBUG) && defined(WITNESS)) */ 1361