kern_mutex.c revision 69208
1/*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD: head/sys/kern/kern_mutex.c 69208 2000-11-26 15:05:46Z jake $ 31 */ 32 33/* 34 * Main Entry: witness 35 * Pronunciation: 'wit-n&s 36 * Function: noun 37 * Etymology: Middle English witnesse, from Old English witnes knowledge, 38 * testimony, witness, from 2wit 39 * Date: before 12th century 40 * 1 : attestation of a fact or event : TESTIMONY 41 * 2 : one that gives evidence; specifically : one who testifies in 42 * a cause or before a judicial tribunal 43 * 3 : one asked to be present at a transaction so as to be able to 44 * testify to its having taken place 45 * 4 : one who has personal knowledge of something 46 * 5 a : something serving as evidence or proof : SIGN 47 * b : public affirmation by word or example of usually 48 * religious faith or conviction <the heroic witness to divine 49 * life -- Pilot> 50 * 6 capitalized : a member of the Jehovah's Witnesses 51 */ 52 53#include "opt_ddb.h" 54#include "opt_witness.h" 55 56#include <sys/param.h> 57#include <sys/bus.h> 58#include <sys/kernel.h> 59#include <sys/malloc.h> 60#include <sys/proc.h> 61#include <sys/sysctl.h> 62#include <sys/systm.h> 63#include <sys/vmmeter.h> 64#include <sys/ktr.h> 65 66#include <machine/atomic.h> 67#include <machine/bus.h> 68#include <machine/clock.h> 69#include <machine/cpu.h> 70 71#include <ddb/ddb.h> 72 73#include <vm/vm.h> 74#include <vm/vm_extern.h> 75 76#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */ 77#include <sys/mutex.h> 78 79/* 80 * Machine independent bits of the mutex implementation 81 */ 82/* All mutexes in system (used for debug/panic) */ 83#ifdef MUTEX_DEBUG 84static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0, 85 "All mutexes queue head" }; 86static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug, 87 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 88 { NULL, NULL }, &all_mtx, &all_mtx }; 89#else /* MUTEX_DEBUG */ 90static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head", 91 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 92 { NULL, NULL }, &all_mtx, &all_mtx }; 93#endif /* MUTEX_DEBUG */ 94 95static int mtx_cur_cnt; 96static int mtx_max_cnt; 97 98void _mtx_enter_giant_def(void); 99void _mtx_exit_giant_def(void); 100static void propagate_priority(struct proc *) __unused; 101 102#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 103#define mtx_owner(m) (mtx_unowned(m) ? NULL \ 104 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 105 106#define RETIP(x) *(((uintptr_t *)(&x)) - 1) 107#define SET_PRIO(p, pri) (p)->p_priority = (pri) 108 109/* 110 * XXX Temporary, for use from assembly language 111 */ 112 113void 114_mtx_enter_giant_def(void) 115{ 116 117 mtx_enter(&Giant, MTX_DEF); 118} 119 120void 121_mtx_exit_giant_def(void) 122{ 123 124 mtx_exit(&Giant, MTX_DEF); 125} 126 127static void 128propagate_priority(struct proc *p) 129{ 130 int pri = p->p_priority; 131 struct mtx *m = p->p_blocked; 132 133 for (;;) { 134 struct proc *p1; 135 136 p = mtx_owner(m); 137 138 if (p == NULL) { 139 /* 140 * This really isn't quite right. Really 141 * ought to bump priority of process that 142 * next acquires the mutex. 143 */ 144 MPASS(m->mtx_lock == MTX_CONTESTED); 145 return; 146 } 147 MPASS(p->p_magic == P_MAGIC); 148 if (p->p_priority <= pri) 149 return; 150 /* 151 * If lock holder is actually running, just bump priority. 152 */ 153 if (TAILQ_NEXT(p, p_procq) == NULL) { 154 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); 155 SET_PRIO(p, pri); 156 return; 157 } 158 /* 159 * If on run queue move to new run queue, and 160 * quit. 161 */ 162 if (p->p_stat == SRUN) { 163 MPASS(p->p_blocked == NULL); 164 remrunqueue(p); 165 SET_PRIO(p, pri); 166 setrunqueue(p); 167 return; 168 } 169 170 /* 171 * If we aren't blocked on a mutex, give up and quit. 172 */ 173 if (p->p_stat != SMTX) { 174 printf( 175 "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n", 176 p->p_pid, p->p_comm, p->p_stat, m->mtx_description); 177 return; 178 } 179 180 /* 181 * Pick up the mutex that p is blocked on. 182 */ 183 m = p->p_blocked; 184 MPASS(m != NULL); 185 186 printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, 187 p->p_comm, m->mtx_description); 188 /* 189 * Check if the proc needs to be moved up on 190 * the blocked chain 191 */ 192 if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL || 193 p1->p_priority <= pri) { 194 if (p1) 195 printf( 196 "XXX: previous process %d(%s) has higher priority\n", 197 p->p_pid, p->p_comm); 198 else 199 printf("XXX: process at head of run queue\n"); 200 continue; 201 } 202 203 /* 204 * Remove proc from blocked chain 205 */ 206 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 207 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 208 MPASS(p1->p_magic == P_MAGIC); 209 if (p1->p_priority > pri) 210 break; 211 } 212 if (p1) 213 TAILQ_INSERT_BEFORE(p1, p, p_procq); 214 else 215 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 216 CTR4(KTR_LOCK, 217 "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s", 218 p, p1, m, m->mtx_description); 219 } 220} 221 222void 223mtx_enter_hard(struct mtx *m, int type, int saveintr) 224{ 225 struct proc *p = CURPROC; 226 227 KASSERT(p != NULL, ("curproc is NULL in mutex")); 228 229 switch (type) { 230 case MTX_DEF: 231 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 232 m->mtx_recurse++; 233 atomic_set_ptr(&m->mtx_lock, MTX_RECURSE); 234 CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); 235 return; 236 } 237 CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]", 238 m, (void *)m->mtx_lock, (void *)RETIP(m)); 239 while (!_obtain_lock(m, p)) { 240 uintptr_t v; 241 struct proc *p1; 242 243 mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 244 /* 245 * check if the lock has been released while 246 * waiting for the schedlock. 247 */ 248 if ((v = m->mtx_lock) == MTX_UNOWNED) { 249 mtx_exit(&sched_lock, MTX_SPIN); 250 continue; 251 } 252 /* 253 * The mutex was marked contested on release. This 254 * means that there are processes blocked on it. 255 */ 256 if (v == MTX_CONTESTED) { 257 p1 = TAILQ_FIRST(&m->mtx_blocked); 258 KASSERT(p1 != NULL, ("contested mutex has no contesters")); 259 KASSERT(p != NULL, ("curproc is NULL for contested mutex")); 260 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 261 if (p1->p_priority < p->p_priority) { 262 SET_PRIO(p, p1->p_priority); 263 } 264 mtx_exit(&sched_lock, MTX_SPIN); 265 return; 266 } 267 /* 268 * If the mutex isn't already contested and 269 * a failure occurs setting the contested bit the 270 * mutex was either release or the 271 * state of the RECURSION bit changed. 272 */ 273 if ((v & MTX_CONTESTED) == 0 && 274 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 275 (void *)(v | MTX_CONTESTED))) { 276 mtx_exit(&sched_lock, MTX_SPIN); 277 continue; 278 } 279 280 /* We definitely have to sleep for this lock */ 281 mtx_assert(m, MA_NOTOWNED); 282 283#ifdef notyet 284 /* 285 * If we're borrowing an interrupted thread's VM 286 * context must clean up before going to sleep. 287 */ 288 if (p->p_flag & (P_ITHD | P_SITHD)) { 289 ithd_t *it = (ithd_t *)p; 290 291 if (it->it_interrupted) { 292 CTR2(KTR_LOCK, 293 "mtx_enter: 0x%x interrupted 0x%x", 294 it, it->it_interrupted); 295 intr_thd_fixup(it); 296 } 297 } 298#endif 299 300 /* Put us on the list of procs blocked on this mutex */ 301 if (TAILQ_EMPTY(&m->mtx_blocked)) { 302 p1 = (struct proc *)(m->mtx_lock & 303 MTX_FLAGMASK); 304 LIST_INSERT_HEAD(&p1->p_contested, m, 305 mtx_contested); 306 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 307 } else { 308 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 309 if (p1->p_priority > p->p_priority) 310 break; 311 if (p1) 312 TAILQ_INSERT_BEFORE(p1, p, p_procq); 313 else 314 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, 315 p_procq); 316 } 317 318 p->p_blocked = m; /* Who we're blocked on */ 319 p->p_stat = SMTX; 320#if 0 321 propagate_priority(p); 322#endif 323 CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s", 324 p, m, m->mtx_description); 325 mi_switch(); 326 CTR3(KTR_LOCK, 327 "mtx_enter: p 0x%p free from blocked on [0x%p] %s", 328 p, m, m->mtx_description); 329 mtx_exit(&sched_lock, MTX_SPIN); 330 } 331 return; 332 case MTX_SPIN: 333 case MTX_SPIN | MTX_FIRST: 334 case MTX_SPIN | MTX_TOPHALF: 335 { 336 int i = 0; 337 338 if (m->mtx_lock == (uintptr_t)p) { 339 m->mtx_recurse++; 340 return; 341 } 342 CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); 343 for (;;) { 344 if (_obtain_lock(m, p)) 345 break; 346 while (m->mtx_lock != MTX_UNOWNED) { 347 if (i++ < 1000000) 348 continue; 349 if (i++ < 6000000) 350 DELAY (1); 351#ifdef DDB 352 else if (!db_active) 353#else 354 else 355#endif 356 panic( 357 "spin lock %s held by 0x%p for > 5 seconds", 358 m->mtx_description, 359 (void *)m->mtx_lock); 360 } 361 } 362 363#ifdef MUTEX_DEBUG 364 if (type != MTX_SPIN) 365 m->mtx_saveintr = 0xbeefface; 366 else 367#endif 368 m->mtx_saveintr = saveintr; 369 CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); 370 return; 371 } 372 } 373} 374 375void 376mtx_exit_hard(struct mtx *m, int type) 377{ 378 struct proc *p, *p1; 379 struct mtx *m1; 380 int pri; 381 382 p = CURPROC; 383 switch (type) { 384 case MTX_DEF: 385 case MTX_DEF | MTX_NOSWITCH: 386 if (m->mtx_recurse != 0) { 387 if (--(m->mtx_recurse) == 0) 388 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE); 389 CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); 390 return; 391 } 392 mtx_enter(&sched_lock, MTX_SPIN); 393 CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); 394 p1 = TAILQ_FIRST(&m->mtx_blocked); 395 MPASS(p->p_magic == P_MAGIC); 396 MPASS(p1->p_magic == P_MAGIC); 397 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 398 if (TAILQ_EMPTY(&m->mtx_blocked)) { 399 LIST_REMOVE(m, mtx_contested); 400 _release_lock_quick(m); 401 CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); 402 } else 403 m->mtx_lock = MTX_CONTESTED; 404 pri = MAXPRI; 405 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 406 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; 407 if (cp < pri) 408 pri = cp; 409 } 410 if (pri > p->p_nativepri) 411 pri = p->p_nativepri; 412 SET_PRIO(p, pri); 413 CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p", 414 m, p1); 415 p1->p_blocked = NULL; 416 p1->p_stat = SRUN; 417 setrunqueue(p1); 418 if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { 419#ifdef notyet 420 if (p->p_flag & (P_ITHD | P_SITHD)) { 421 ithd_t *it = (ithd_t *)p; 422 423 if (it->it_interrupted) { 424 CTR2(KTR_LOCK, 425 "mtx_exit: 0x%x interruped 0x%x", 426 it, it->it_interrupted); 427 intr_thd_fixup(it); 428 } 429 } 430#endif 431 setrunqueue(p); 432 CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p", 433 m, (void *)m->mtx_lock); 434 mi_switch(); 435 CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p", 436 m, (void *)m->mtx_lock); 437 } 438 mtx_exit(&sched_lock, MTX_SPIN); 439 break; 440 case MTX_SPIN: 441 case MTX_SPIN | MTX_FIRST: 442 if (m->mtx_recurse != 0) { 443 m->mtx_recurse--; 444 return; 445 } 446 MPASS(mtx_owned(m)); 447 _release_lock_quick(m); 448 if (type & MTX_FIRST) 449 enable_intr(); /* XXX is this kosher? */ 450 else { 451 MPASS(m->mtx_saveintr != 0xbeefface); 452 restore_intr(m->mtx_saveintr); 453 } 454 break; 455 case MTX_SPIN | MTX_TOPHALF: 456 if (m->mtx_recurse != 0) { 457 m->mtx_recurse--; 458 return; 459 } 460 MPASS(mtx_owned(m)); 461 _release_lock_quick(m); 462 break; 463 default: 464 panic("mtx_exit_hard: unsupported type 0x%x\n", type); 465 } 466} 467 468#define MV_DESTROY 0 /* validate before destory */ 469#define MV_INIT 1 /* validate before init */ 470 471#ifdef MUTEX_DEBUG 472 473int mtx_validate __P((struct mtx *, int)); 474 475int 476mtx_validate(struct mtx *m, int when) 477{ 478 struct mtx *mp; 479 int i; 480 int retval = 0; 481 482 if (m == &all_mtx || cold) 483 return 0; 484 485 mtx_enter(&all_mtx, MTX_DEF); 486/* 487 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 488 * we can re-enable the kernacc() checks. 489 */ 490#ifndef __alpha__ 491 MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t), 492 VM_PROT_READ) == 1); 493#endif 494 MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx); 495 for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { 496#ifndef __alpha__ 497 if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t), 498 VM_PROT_READ) != 1) { 499 panic("mtx_validate: mp=%p mp->mtx_next=%p", 500 mp, mp->mtx_next); 501 } 502#endif 503 i++; 504 if (i > mtx_cur_cnt) { 505 panic("mtx_validate: too many in chain, known=%d\n", 506 mtx_cur_cnt); 507 } 508 } 509 MPASS(i == mtx_cur_cnt); 510 switch (when) { 511 case MV_DESTROY: 512 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 513 if (mp == m) 514 break; 515 MPASS(mp == m); 516 break; 517 case MV_INIT: 518 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 519 if (mp == m) { 520 /* 521 * Not good. This mutex already exists. 522 */ 523 printf("re-initing existing mutex %s\n", 524 m->mtx_description); 525 MPASS(m->mtx_lock == MTX_UNOWNED); 526 retval = 1; 527 } 528 } 529 mtx_exit(&all_mtx, MTX_DEF); 530 return (retval); 531} 532#endif 533 534void 535mtx_init(struct mtx *m, const char *t, int flag) 536{ 537#ifdef MUTEX_DEBUG 538 struct mtx_debug *debug; 539#endif 540 541 CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); 542#ifdef MUTEX_DEBUG 543 if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ 544 return; 545 if (flag & MTX_COLD) 546 debug = m->mtx_debug; 547 else 548 debug = NULL; 549 if (debug == NULL) { 550#ifdef DIAGNOSTIC 551 if(cold && bootverbose) 552 printf("malloc'ing mtx_debug while cold for %s\n", t); 553#endif 554 555 /* XXX - should not use DEVBUF */ 556 debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT); 557 MPASS(debug != NULL); 558 bzero(debug, sizeof(struct mtx_debug)); 559 } 560#endif 561 bzero((void *)m, sizeof *m); 562 TAILQ_INIT(&m->mtx_blocked); 563#ifdef MUTEX_DEBUG 564 m->mtx_debug = debug; 565#endif 566 m->mtx_description = t; 567 m->mtx_lock = MTX_UNOWNED; 568 /* Put on all mutex queue */ 569 mtx_enter(&all_mtx, MTX_DEF); 570 m->mtx_next = &all_mtx; 571 m->mtx_prev = all_mtx.mtx_prev; 572 m->mtx_prev->mtx_next = m; 573 all_mtx.mtx_prev = m; 574 if (++mtx_cur_cnt > mtx_max_cnt) 575 mtx_max_cnt = mtx_cur_cnt; 576 mtx_exit(&all_mtx, MTX_DEF); 577 witness_init(m, flag); 578} 579 580void 581mtx_destroy(struct mtx *m) 582{ 583 584 CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); 585#ifdef MUTEX_DEBUG 586 if (m->mtx_next == NULL) 587 panic("mtx_destroy: %p (%s) already destroyed", 588 m, m->mtx_description); 589 590 if (!mtx_owned(m)) { 591 MPASS(m->mtx_lock == MTX_UNOWNED); 592 } else { 593 MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); 594 } 595 mtx_validate(m, MV_DESTROY); /* diagnostic */ 596#endif 597 598#ifdef WITNESS 599 if (m->mtx_witness) 600 witness_destroy(m); 601#endif /* WITNESS */ 602 603 /* Remove from the all mutex queue */ 604 mtx_enter(&all_mtx, MTX_DEF); 605 m->mtx_next->mtx_prev = m->mtx_prev; 606 m->mtx_prev->mtx_next = m->mtx_next; 607#ifdef MUTEX_DEBUG 608 m->mtx_next = m->mtx_prev = NULL; 609 free(m->mtx_debug, M_DEVBUF); 610 m->mtx_debug = NULL; 611#endif 612 mtx_cur_cnt--; 613 mtx_exit(&all_mtx, MTX_DEF); 614} 615 616/* 617 * The non-inlined versions of the mtx_*() functions are always built (above), 618 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options 619 * being specified. 620 */ 621#if (defined(MUTEX_DEBUG) && defined(WITNESS)) 622 623#define WITNESS_COUNT 200 624#define WITNESS_NCHILDREN 2 625 626int witness_watch = 1; 627 628struct witness { 629 struct witness *w_next; 630 const char *w_description; 631 const char *w_file; 632 int w_line; 633 struct witness *w_morechildren; 634 u_char w_childcnt; 635 u_char w_Giant_squawked:1; 636 u_char w_other_squawked:1; 637 u_char w_same_squawked:1; 638 u_char w_sleep:1; 639 u_char w_spin:1; /* this is a spin mutex */ 640 u_int w_level; 641 struct witness *w_children[WITNESS_NCHILDREN]; 642}; 643 644struct witness_blessed { 645 char *b_lock1; 646 char *b_lock2; 647}; 648 649#ifdef DDB 650/* 651 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 652 * drop into kdebug() when: 653 * - a lock heirarchy violation occurs 654 * - locks are held when going to sleep. 655 */ 656#ifdef WITNESS_DDB 657int witness_ddb = 1; 658#else 659int witness_ddb = 0; 660#endif 661SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 662#endif /* DDB */ 663 664#ifdef WITNESS_SKIPSPIN 665int witness_skipspin = 1; 666#else 667int witness_skipspin = 0; 668#endif 669SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0, 670 ""); 671 672MUTEX_DECLARE(static,w_mtx); 673static struct witness *w_free; 674static struct witness *w_all; 675static int w_inited; 676static int witness_dead; /* fatal error, probably no memory */ 677 678static struct witness w_data[WITNESS_COUNT]; 679 680static struct witness *enroll __P((const char *description, int flag)); 681static int itismychild __P((struct witness *parent, struct witness *child)); 682static void removechild __P((struct witness *parent, struct witness *child)); 683static int isitmychild __P((struct witness *parent, struct witness *child)); 684static int isitmydescendant __P((struct witness *parent, struct witness *child)); 685static int dup_ok __P((struct witness *)); 686static int blessed __P((struct witness *, struct witness *)); 687static void witness_displaydescendants 688 __P((void(*)(const char *fmt, ...), struct witness *)); 689static void witness_leveldescendents __P((struct witness *parent, int level)); 690static void witness_levelall __P((void)); 691static struct witness * witness_get __P((void)); 692static void witness_free __P((struct witness *m)); 693 694 695static char *ignore_list[] = { 696 "witness lock", 697 NULL 698}; 699 700static char *spin_order_list[] = { 701 "sched lock", 702 "sio", 703#ifdef __i386__ 704 "clk", 705#endif 706 "callout", 707 /* 708 * leaf locks 709 */ 710 NULL 711}; 712 713static char *order_list[] = { 714 "uidinfo hash", "uidinfo struct", NULL, 715 NULL 716}; 717 718static char *dup_list[] = { 719 NULL 720}; 721 722static char *sleep_list[] = { 723 "Giant", 724 NULL 725}; 726 727/* 728 * Pairs of locks which have been blessed 729 * Don't complain about order problems with blessed locks 730 */ 731static struct witness_blessed blessed_list[] = { 732}; 733static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); 734 735void 736witness_init(struct mtx *m, int flag) 737{ 738 m->mtx_witness = enroll(m->mtx_description, flag); 739} 740 741void 742witness_destroy(struct mtx *m) 743{ 744 struct mtx *m1; 745 struct proc *p; 746 p = CURPROC; 747 for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL; 748 m1 = LIST_NEXT(m1, mtx_held)) { 749 if (m1 == m) { 750 LIST_REMOVE(m, mtx_held); 751 break; 752 } 753 } 754 return; 755 756} 757 758void 759witness_enter(struct mtx *m, int flags, const char *file, int line) 760{ 761 struct witness *w, *w1; 762 struct mtx *m1; 763 struct proc *p; 764 int i; 765#ifdef DDB 766 int go_into_ddb = 0; 767#endif /* DDB */ 768 769 w = m->mtx_witness; 770 p = CURPROC; 771 772 if (flags & MTX_SPIN) { 773 if (!w->w_spin) 774 panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @" 775 " %s:%d", m->mtx_description, file, line); 776 if (m->mtx_recurse != 0) 777 return; 778 mtx_enter(&w_mtx, MTX_SPIN); 779 i = witness_spin_check; 780 if (i != 0 && w->w_level < i) { 781 mtx_exit(&w_mtx, MTX_SPIN); 782 panic("mutex_enter(%s:%x, MTX_SPIN) out of order @" 783 " %s:%d already holding %s:%x", 784 m->mtx_description, w->w_level, file, line, 785 spin_order_list[ffs(i)-1], i); 786 } 787 PCPU_SET(witness_spin_check, i | w->w_level); 788 mtx_exit(&w_mtx, MTX_SPIN); 789 return; 790 } 791 if (w->w_spin) 792 panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 793 m->mtx_description, file, line); 794 795 if (m->mtx_recurse != 0) 796 return; 797 if (witness_dead) 798 goto out; 799 if (cold || panicstr) 800 goto out; 801 802 if (!mtx_legal2block()) 803 panic("blockable mtx_enter() of %s when not legal @ %s:%d", 804 m->mtx_description, file, line); 805 /* 806 * Is this the first mutex acquired 807 */ 808 if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL) 809 goto out; 810 811 if ((w1 = m1->mtx_witness) == w) { 812 if (w->w_same_squawked || dup_ok(w)) 813 goto out; 814 w->w_same_squawked = 1; 815 printf("acquring duplicate lock of same type: \"%s\"\n", 816 m->mtx_description); 817 printf(" 1st @ %s:%d\n", w->w_file, w->w_line); 818 printf(" 2nd @ %s:%d\n", file, line); 819#ifdef DDB 820 go_into_ddb = 1; 821#endif /* DDB */ 822 goto out; 823 } 824 MPASS(!mtx_owned(&w_mtx)); 825 mtx_enter(&w_mtx, MTX_SPIN); 826 /* 827 * If we have a known higher number just say ok 828 */ 829 if (witness_watch > 1 && w->w_level > w1->w_level) { 830 mtx_exit(&w_mtx, MTX_SPIN); 831 goto out; 832 } 833 if (isitmydescendant(m1->mtx_witness, w)) { 834 mtx_exit(&w_mtx, MTX_SPIN); 835 goto out; 836 } 837 for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) { 838 839 MPASS(i < 200); 840 w1 = m1->mtx_witness; 841 if (isitmydescendant(w, w1)) { 842 mtx_exit(&w_mtx, MTX_SPIN); 843 if (blessed(w, w1)) 844 goto out; 845 if (m1 == &Giant) { 846 if (w1->w_Giant_squawked) 847 goto out; 848 else 849 w1->w_Giant_squawked = 1; 850 } else { 851 if (w1->w_other_squawked) 852 goto out; 853 else 854 w1->w_other_squawked = 1; 855 } 856 printf("lock order reversal\n"); 857 printf(" 1st %s last acquired @ %s:%d\n", 858 w->w_description, w->w_file, w->w_line); 859 printf(" 2nd %p %s @ %s:%d\n", 860 m1, w1->w_description, w1->w_file, w1->w_line); 861 printf(" 3rd %p %s @ %s:%d\n", 862 m, w->w_description, file, line); 863#ifdef DDB 864 go_into_ddb = 1; 865#endif /* DDB */ 866 goto out; 867 } 868 } 869 m1 = LIST_FIRST(&p->p_heldmtx); 870 if (!itismychild(m1->mtx_witness, w)) 871 mtx_exit(&w_mtx, MTX_SPIN); 872 873out: 874#ifdef DDB 875 if (witness_ddb && go_into_ddb) 876 Debugger("witness_enter"); 877#endif /* DDB */ 878 w->w_file = file; 879 w->w_line = line; 880 m->mtx_line = line; 881 m->mtx_file = file; 882 883 /* 884 * If this pays off it likely means that a mutex being witnessed 885 * is acquired in hardclock. Put it in the ignore list. It is 886 * likely not the mutex this assert fails on. 887 */ 888 MPASS(m->mtx_held.le_prev == NULL); 889 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 890} 891 892void 893witness_exit(struct mtx *m, int flags, const char *file, int line) 894{ 895 struct witness *w; 896 897 w = m->mtx_witness; 898 899 if (flags & MTX_SPIN) { 900 if (!w->w_spin) 901 panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @" 902 " %s:%d", m->mtx_description, file, line); 903 if (m->mtx_recurse != 0) 904 return; 905 mtx_enter(&w_mtx, MTX_SPIN); 906 PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level); 907 mtx_exit(&w_mtx, MTX_SPIN); 908 return; 909 } 910 if (w->w_spin) 911 panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 912 m->mtx_description, file, line); 913 914 if (m->mtx_recurse != 0) 915 return; 916 917 if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold) 918 panic("switchable mtx_exit() of %s when not legal @ %s:%d", 919 m->mtx_description, file, line); 920 LIST_REMOVE(m, mtx_held); 921 m->mtx_held.le_prev = NULL; 922} 923 924void 925witness_try_enter(struct mtx *m, int flags, const char *file, int line) 926{ 927 struct proc *p; 928 struct witness *w = m->mtx_witness; 929 930 if (flags & MTX_SPIN) { 931 if (!w->w_spin) 932 panic("mutex_try_enter: " 933 "MTX_SPIN on MTX_DEF mutex %s @ %s:%d", 934 m->mtx_description, file, line); 935 if (m->mtx_recurse != 0) 936 return; 937 mtx_enter(&w_mtx, MTX_SPIN); 938 PCPU_SET(witness_spin_check, witness_spin_check | w->w_level); 939 mtx_exit(&w_mtx, MTX_SPIN); 940 return; 941 } 942 943 if (w->w_spin) 944 panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 945 m->mtx_description, file, line); 946 947 if (m->mtx_recurse != 0) 948 return; 949 950 w->w_file = file; 951 w->w_line = line; 952 m->mtx_line = line; 953 m->mtx_file = file; 954 p = CURPROC; 955 MPASS(m->mtx_held.le_prev == NULL); 956 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 957} 958 959void 960witness_display(void(*prnt)(const char *fmt, ...)) 961{ 962 struct witness *w, *w1; 963 964 witness_levelall(); 965 966 for (w = w_all; w; w = w->w_next) { 967 if (w->w_file == NULL) 968 continue; 969 for (w1 = w_all; w1; w1 = w1->w_next) { 970 if (isitmychild(w1, w)) 971 break; 972 } 973 if (w1 != NULL) 974 continue; 975 /* 976 * This lock has no anscestors, display its descendants. 977 */ 978 witness_displaydescendants(prnt, w); 979 } 980 prnt("\nMutex which were never acquired\n"); 981 for (w = w_all; w; w = w->w_next) { 982 if (w->w_file != NULL) 983 continue; 984 prnt("%s\n", w->w_description); 985 } 986} 987 988int 989witness_sleep(int check_only, struct mtx *mtx, const char *file, int line) 990{ 991 struct mtx *m; 992 struct proc *p; 993 char **sleep; 994 int n = 0; 995 996 p = CURPROC; 997 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 998 m = LIST_NEXT(m, mtx_held)) { 999 if (m == mtx) 1000 continue; 1001 for (sleep = sleep_list; *sleep!= NULL; sleep++) 1002 if (strcmp(m->mtx_description, *sleep) == 0) 1003 goto next; 1004 printf("%s:%d: %s with \"%s\" locked from %s:%d\n", 1005 file, line, check_only ? "could sleep" : "sleeping", 1006 m->mtx_description, 1007 m->mtx_witness->w_file, m->mtx_witness->w_line); 1008 n++; 1009 next: 1010 } 1011#ifdef DDB 1012 if (witness_ddb && n) 1013 Debugger("witness_sleep"); 1014#endif /* DDB */ 1015 return (n); 1016} 1017 1018static struct witness * 1019enroll(const char *description, int flag) 1020{ 1021 int i; 1022 struct witness *w, *w1; 1023 char **ignore; 1024 char **order; 1025 1026 if (!witness_watch) 1027 return (NULL); 1028 for (ignore = ignore_list; *ignore != NULL; ignore++) 1029 if (strcmp(description, *ignore) == 0) 1030 return (NULL); 1031 1032 if (w_inited == 0) { 1033 mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF); 1034 for (i = 0; i < WITNESS_COUNT; i++) { 1035 w = &w_data[i]; 1036 witness_free(w); 1037 } 1038 w_inited = 1; 1039 for (order = order_list; *order != NULL; order++) { 1040 w = enroll(*order, MTX_DEF); 1041 w->w_file = "order list"; 1042 for (order++; *order != NULL; order++) { 1043 w1 = enroll(*order, MTX_DEF); 1044 w1->w_file = "order list"; 1045 itismychild(w, w1); 1046 w = w1; 1047 } 1048 } 1049 } 1050 if ((flag & MTX_SPIN) && witness_skipspin) 1051 return (NULL); 1052 mtx_enter(&w_mtx, MTX_SPIN); 1053 for (w = w_all; w; w = w->w_next) { 1054 if (strcmp(description, w->w_description) == 0) { 1055 mtx_exit(&w_mtx, MTX_SPIN); 1056 return (w); 1057 } 1058 } 1059 if ((w = witness_get()) == NULL) 1060 return (NULL); 1061 w->w_next = w_all; 1062 w_all = w; 1063 w->w_description = description; 1064 mtx_exit(&w_mtx, MTX_SPIN); 1065 if (flag & MTX_SPIN) { 1066 w->w_spin = 1; 1067 1068 i = 1; 1069 for (order = spin_order_list; *order != NULL; order++) { 1070 if (strcmp(description, *order) == 0) 1071 break; 1072 i <<= 1; 1073 } 1074 if (*order == NULL) 1075 panic("spin lock %s not in order list", description); 1076 w->w_level = i; 1077 } 1078 return (w); 1079} 1080 1081static int 1082itismychild(struct witness *parent, struct witness *child) 1083{ 1084 static int recursed; 1085 1086 /* 1087 * Insert "child" after "parent" 1088 */ 1089 while (parent->w_morechildren) 1090 parent = parent->w_morechildren; 1091 1092 if (parent->w_childcnt == WITNESS_NCHILDREN) { 1093 if ((parent->w_morechildren = witness_get()) == NULL) 1094 return (1); 1095 parent = parent->w_morechildren; 1096 } 1097 MPASS(child != NULL); 1098 parent->w_children[parent->w_childcnt++] = child; 1099 /* 1100 * now prune whole tree 1101 */ 1102 if (recursed) 1103 return (0); 1104 recursed = 1; 1105 for (child = w_all; child != NULL; child = child->w_next) { 1106 for (parent = w_all; parent != NULL; 1107 parent = parent->w_next) { 1108 if (!isitmychild(parent, child)) 1109 continue; 1110 removechild(parent, child); 1111 if (isitmydescendant(parent, child)) 1112 continue; 1113 itismychild(parent, child); 1114 } 1115 } 1116 recursed = 0; 1117 witness_levelall(); 1118 return (0); 1119} 1120 1121static void 1122removechild(struct witness *parent, struct witness *child) 1123{ 1124 struct witness *w, *w1; 1125 int i; 1126 1127 for (w = parent; w != NULL; w = w->w_morechildren) 1128 for (i = 0; i < w->w_childcnt; i++) 1129 if (w->w_children[i] == child) 1130 goto found; 1131 return; 1132found: 1133 for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren) 1134 continue; 1135 w->w_children[i] = w1->w_children[--w1->w_childcnt]; 1136 MPASS(w->w_children[i] != NULL); 1137 1138 if (w1->w_childcnt != 0) 1139 return; 1140 1141 if (w1 == parent) 1142 return; 1143 for (w = parent; w->w_morechildren != w1; w = w->w_morechildren) 1144 continue; 1145 w->w_morechildren = 0; 1146 witness_free(w1); 1147} 1148 1149static int 1150isitmychild(struct witness *parent, struct witness *child) 1151{ 1152 struct witness *w; 1153 int i; 1154 1155 for (w = parent; w != NULL; w = w->w_morechildren) { 1156 for (i = 0; i < w->w_childcnt; i++) { 1157 if (w->w_children[i] == child) 1158 return (1); 1159 } 1160 } 1161 return (0); 1162} 1163 1164static int 1165isitmydescendant(struct witness *parent, struct witness *child) 1166{ 1167 struct witness *w; 1168 int i; 1169 int j; 1170 1171 for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) { 1172 MPASS(j < 1000); 1173 for (i = 0; i < w->w_childcnt; i++) { 1174 if (w->w_children[i] == child) 1175 return (1); 1176 } 1177 for (i = 0; i < w->w_childcnt; i++) { 1178 if (isitmydescendant(w->w_children[i], child)) 1179 return (1); 1180 } 1181 } 1182 return (0); 1183} 1184 1185void 1186witness_levelall (void) 1187{ 1188 struct witness *w, *w1; 1189 1190 for (w = w_all; w; w = w->w_next) 1191 if (!w->w_spin) 1192 w->w_level = 0; 1193 for (w = w_all; w; w = w->w_next) { 1194 if (w->w_spin) 1195 continue; 1196 for (w1 = w_all; w1; w1 = w1->w_next) { 1197 if (isitmychild(w1, w)) 1198 break; 1199 } 1200 if (w1 != NULL) 1201 continue; 1202 witness_leveldescendents(w, 0); 1203 } 1204} 1205 1206static void 1207witness_leveldescendents(struct witness *parent, int level) 1208{ 1209 int i; 1210 struct witness *w; 1211 1212 if (parent->w_level < level) 1213 parent->w_level = level; 1214 level++; 1215 for (w = parent; w != NULL; w = w->w_morechildren) 1216 for (i = 0; i < w->w_childcnt; i++) 1217 witness_leveldescendents(w->w_children[i], level); 1218} 1219 1220static void 1221witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1222 struct witness *parent) 1223{ 1224 struct witness *w; 1225 int i; 1226 int level = parent->w_level; 1227 1228 prnt("%d", level); 1229 if (level < 10) 1230 prnt(" "); 1231 for (i = 0; i < level; i++) 1232 prnt(" "); 1233 prnt("%s", parent->w_description); 1234 if (parent->w_file != NULL) { 1235 prnt(" -- last acquired @ %s", parent->w_file); 1236#ifndef W_USE_WHERE 1237 prnt(":%d", parent->w_line); 1238#endif 1239 prnt("\n"); 1240 } 1241 1242 for (w = parent; w != NULL; w = w->w_morechildren) 1243 for (i = 0; i < w->w_childcnt; i++) 1244 witness_displaydescendants(prnt, w->w_children[i]); 1245 } 1246 1247static int 1248dup_ok(struct witness *w) 1249{ 1250 char **dup; 1251 1252 for (dup = dup_list; *dup!= NULL; dup++) 1253 if (strcmp(w->w_description, *dup) == 0) 1254 return (1); 1255 return (0); 1256} 1257 1258static int 1259blessed(struct witness *w1, struct witness *w2) 1260{ 1261 int i; 1262 struct witness_blessed *b; 1263 1264 for (i = 0; i < blessed_count; i++) { 1265 b = &blessed_list[i]; 1266 if (strcmp(w1->w_description, b->b_lock1) == 0) { 1267 if (strcmp(w2->w_description, b->b_lock2) == 0) 1268 return (1); 1269 continue; 1270 } 1271 if (strcmp(w1->w_description, b->b_lock2) == 0) 1272 if (strcmp(w2->w_description, b->b_lock1) == 0) 1273 return (1); 1274 } 1275 return (0); 1276} 1277 1278static struct witness * 1279witness_get() 1280{ 1281 struct witness *w; 1282 1283 if ((w = w_free) == NULL) { 1284 witness_dead = 1; 1285 mtx_exit(&w_mtx, MTX_SPIN); 1286 printf("witness exhausted\n"); 1287 return (NULL); 1288 } 1289 w_free = w->w_next; 1290 bzero(w, sizeof(*w)); 1291 return (w); 1292} 1293 1294static void 1295witness_free(struct witness *w) 1296{ 1297 w->w_next = w_free; 1298 w_free = w; 1299} 1300 1301void 1302witness_list(struct proc *p) 1303{ 1304 struct mtx *m; 1305 1306 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1307 m = LIST_NEXT(m, mtx_held)) { 1308 printf("\t\"%s\" (%p) locked at %s:%d\n", 1309 m->mtx_description, m, 1310 m->mtx_witness->w_file, m->mtx_witness->w_line); 1311 } 1312} 1313 1314void 1315witness_save(struct mtx *m, const char **filep, int *linep) 1316{ 1317 *filep = m->mtx_witness->w_file; 1318 *linep = m->mtx_witness->w_line; 1319} 1320 1321void 1322witness_restore(struct mtx *m, const char *file, int line) 1323{ 1324 m->mtx_witness->w_file = file; 1325 m->mtx_witness->w_line = line; 1326} 1327 1328#endif /* (defined(MUTEX_DEBUG) && defined(WITNESS)) */ 1329