kern_mutex.c revision 74016
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/kern_mutex.c 74016 2001-03-09 07:24:17Z jhb $
31 */
32
33/*
34 * Machine independent bits of mutex implementation and implementation of
35 * `witness' structure & related debugging routines.
36 */
37
38/*
39 *	Main Entry: witness
40 *	Pronunciation: 'wit-n&s
41 *	Function: noun
42 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
43 *	    testimony, witness, from 2wit
44 *	Date: before 12th century
45 *	1 : attestation of a fact or event : TESTIMONY
46 *	2 : one that gives evidence; specifically : one who testifies in
47 *	    a cause or before a judicial tribunal
48 *	3 : one asked to be present at a transaction so as to be able to
49 *	    testify to its having taken place
50 *	4 : one who has personal knowledge of something
51 *	5 a : something serving as evidence or proof : SIGN
52 *	  b : public affirmation by word or example of usually
53 *	      religious faith or conviction <the heroic witness to divine
54 *	      life -- Pilot>
55 *	6 capitalized : a member of the Jehovah's Witnesses
56 */
57
58#include "opt_ddb.h"
59#include "opt_witness.h"
60
61#include <sys/param.h>
62#include <sys/bus.h>
63#include <sys/kernel.h>
64#include <sys/malloc.h>
65#include <sys/proc.h>
66#include <sys/sysctl.h>
67#include <sys/systm.h>
68#include <sys/vmmeter.h>
69#include <sys/ktr.h>
70
71#include <machine/atomic.h>
72#include <machine/bus.h>
73#include <machine/clock.h>
74#include <machine/cpu.h>
75
76#include <ddb/ddb.h>
77
78#include <vm/vm.h>
79#include <vm/vm_extern.h>
80
81#include <sys/mutex.h>
82
83/*
84 * The WITNESS-enabled mutex debug structure.
85 */
86#ifdef WITNESS
87struct mtx_debug {
88	struct witness	*mtxd_witness;
89	LIST_ENTRY(mtx)	mtxd_held;
90	const char	*mtxd_file;
91	int		mtxd_line;
92};
93
94#define mtx_held	mtx_debug->mtxd_held
95#define	mtx_file	mtx_debug->mtxd_file
96#define	mtx_line	mtx_debug->mtxd_line
97#define	mtx_witness	mtx_debug->mtxd_witness
98#endif	/* WITNESS */
99
100/*
101 * Internal utility macros.
102 */
103#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104
105#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107
108#define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
109
110/*
111 * Early WITNESS-enabled declarations.
112 */
113#ifdef WITNESS
114
115/*
116 * Internal WITNESS routines which must be prototyped early.
117 *
118 * XXX: When/if witness code is cleaned up, it would be wise to place all
119 *	witness prototyping early in this file.
120 */
121static void witness_init(struct mtx *, int flag);
122static void witness_destroy(struct mtx *);
123static void witness_display(void(*)(const char *fmt, ...));
124
125MALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
126
127/* All mutexes in system (used for debug/panic) */
128static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
129
130/*
131 * This global is set to 0 once it becomes safe to use the witness code.
132 */
133static int witness_cold = 1;
134
135#else	/* WITNESS */
136
137/* XXX XXX XXX
138 * flag++ is sleazoid way of shuting up warning
139 */
140#define witness_init(m, flag) flag++
141#define witness_destroy(m)
142#define witness_try_enter(m, t, f, l)
143#endif	/* WITNESS */
144
145/*
146 * All mutex locks in system are kept on the all_mtx list.
147 */
148static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
149	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
150	{ NULL, NULL }, &all_mtx, &all_mtx,
151#ifdef WITNESS
152	&all_mtx_debug
153#else
154	NULL
155#endif
156	 };
157
158/*
159 * Global variables for book keeping.
160 */
161static int	mtx_cur_cnt;
162static int	mtx_max_cnt;
163
164/*
165 * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
166 */
167char	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
168char	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
169char	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
170char	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
171
172/*
173 * Prototypes for non-exported routines.
174 *
175 * NOTE: Prototypes for witness routines are placed at the bottom of the file.
176 */
177static void	propagate_priority(struct proc *);
178
179static void
180propagate_priority(struct proc *p)
181{
182	int pri = p->p_pri.pri_level;
183	struct mtx *m = p->p_blocked;
184
185	mtx_assert(&sched_lock, MA_OWNED);
186	for (;;) {
187		struct proc *p1;
188
189		p = mtx_owner(m);
190
191		if (p == NULL) {
192			/*
193			 * This really isn't quite right. Really
194			 * ought to bump priority of process that
195			 * next acquires the mutex.
196			 */
197			MPASS(m->mtx_lock == MTX_CONTESTED);
198			return;
199		}
200
201		MPASS(p->p_magic == P_MAGIC);
202		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
203		if (p->p_pri.pri_level <= pri)
204			return;
205
206		/*
207		 * Bump this process' priority.
208		 */
209		SET_PRIO(p, pri);
210
211		/*
212		 * If lock holder is actually running, just bump priority.
213		 */
214		if (p->p_oncpu != NOCPU) {
215			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
216			return;
217		}
218
219#ifndef SMP
220		/*
221		 * For UP, we check to see if p is curproc (this shouldn't
222		 * ever happen however as it would mean we are in a deadlock.)
223		 */
224		KASSERT(p != curproc, ("Deadlock detected"));
225#endif
226
227		/*
228		 * If on run queue move to new run queue, and
229		 * quit.
230		 */
231		if (p->p_stat == SRUN) {
232			MPASS(p->p_blocked == NULL);
233			remrunqueue(p);
234			setrunqueue(p);
235			return;
236		}
237
238		/*
239		 * If we aren't blocked on a mutex, we should be.
240		 */
241		KASSERT(p->p_stat == SMTX, (
242		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
243		    p->p_pid, p->p_comm, p->p_stat,
244		    m->mtx_description));
245
246		/*
247		 * Pick up the mutex that p is blocked on.
248		 */
249		m = p->p_blocked;
250		MPASS(m != NULL);
251
252		/*
253		 * Check if the proc needs to be moved up on
254		 * the blocked chain
255		 */
256		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
257			continue;
258		}
259
260		p1 = TAILQ_PREV(p, procqueue, p_procq);
261		if (p1->p_pri.pri_level <= pri) {
262			continue;
263		}
264
265		/*
266		 * Remove proc from blocked chain and determine where
267		 * it should be moved up to.  Since we know that p1 has
268		 * a lower priority than p, we know that at least one
269		 * process in the chain has a lower priority and that
270		 * p1 will thus not be NULL after the loop.
271		 */
272		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
273		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
274			MPASS(p1->p_magic == P_MAGIC);
275			if (p1->p_pri.pri_level > pri)
276				break;
277		}
278
279		MPASS(p1 != NULL);
280		TAILQ_INSERT_BEFORE(p1, p, p_procq);
281		CTR4(KTR_LOCK,
282		    "propagate_priority: p %p moved before %p on [%p] %s",
283		    p, p1, m, m->mtx_description);
284	}
285}
286
287/*
288 * The important part of mtx_trylock{,_flags}()
289 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
290 * if we're called, it's because we know we don't already own this lock.
291 */
292int
293_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
294{
295	int rval;
296
297	MPASS(curproc != NULL);
298
299	/*
300	 * _mtx_trylock does not accept MTX_NOSWITCH option.
301	 */
302	KASSERT((opts & MTX_NOSWITCH) == 0,
303	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
304
305	rval = _obtain_lock(m, curproc);
306
307#ifdef WITNESS
308	if (rval && m->mtx_witness != NULL) {
309		/*
310		 * We do not handle recursion in _mtx_trylock; see the
311		 * note at the top of the routine.
312		 */
313		KASSERT(!mtx_recursed(m),
314		    ("mtx_trylock() called on a recursed mutex"));
315		witness_try_enter(m, (opts | m->mtx_flags), file, line);
316	}
317#endif	/* WITNESS */
318
319	if ((opts & MTX_QUIET) == 0)
320		CTR5(KTR_LOCK, "TRY_LOCK %s [%p] result=%d at %s:%d",
321		    m->mtx_description, m, rval, file, line);
322
323	return rval;
324}
325
326/*
327 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
328 *
329 * We call this if the lock is either contested (i.e. we need to go to
330 * sleep waiting for it), or if we need to recurse on it.
331 */
332void
333_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
334{
335	struct proc *p = curproc;
336
337	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
338		m->mtx_recurse++;
339		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
340		if ((opts & MTX_QUIET) == 0)
341			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
342		return;
343	}
344
345	if ((opts & MTX_QUIET) == 0)
346		CTR4(KTR_LOCK,
347		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
348		    m->mtx_description, (void *)m->mtx_lock, file, line);
349
350	while (!_obtain_lock(m, p)) {
351		uintptr_t v;
352		struct proc *p1;
353
354		mtx_lock_spin(&sched_lock);
355		/*
356		 * Check if the lock has been released while spinning for
357		 * the sched_lock.
358		 */
359		if ((v = m->mtx_lock) == MTX_UNOWNED) {
360			mtx_unlock_spin(&sched_lock);
361			continue;
362		}
363
364		/*
365		 * The mutex was marked contested on release. This means that
366		 * there are processes blocked on it.
367		 */
368		if (v == MTX_CONTESTED) {
369			p1 = TAILQ_FIRST(&m->mtx_blocked);
370			MPASS(p1 != NULL);
371			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
372
373			if (p1->p_pri.pri_level < p->p_pri.pri_level)
374				SET_PRIO(p, p1->p_pri.pri_level);
375			mtx_unlock_spin(&sched_lock);
376			return;
377		}
378
379		/*
380		 * If the mutex isn't already contested and a failure occurs
381		 * setting the contested bit, the mutex was either released
382		 * or the state of the MTX_RECURSED bit changed.
383		 */
384		if ((v & MTX_CONTESTED) == 0 &&
385		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
386			(void *)(v | MTX_CONTESTED))) {
387			mtx_unlock_spin(&sched_lock);
388			continue;
389		}
390
391		/*
392		 * We deffinately must sleep for this lock.
393		 */
394		mtx_assert(m, MA_NOTOWNED);
395
396#ifdef notyet
397		/*
398		 * If we're borrowing an interrupted thread's VM context, we
399		 * must clean up before going to sleep.
400		 */
401		if (p->p_ithd != NULL) {
402			struct ithd *it = p->p_ithd;
403
404			if (it->it_interrupted) {
405				if ((opts & MTX_QUIET) == 0)
406					CTR2(KTR_LOCK,
407				    "_mtx_lock_sleep: %p interrupted %p",
408					    it, it->it_interrupted);
409				intr_thd_fixup(it);
410			}
411		}
412#endif
413
414		/*
415		 * Put us on the list of threads blocked on this mutex.
416		 */
417		if (TAILQ_EMPTY(&m->mtx_blocked)) {
418			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
419			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
420			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
421		} else {
422			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
423				if (p1->p_pri.pri_level > p->p_pri.pri_level)
424					break;
425			if (p1)
426				TAILQ_INSERT_BEFORE(p1, p, p_procq);
427			else
428				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
429		}
430
431		/*
432		 * Save who we're blocked on.
433		 */
434		p->p_blocked = m;
435		p->p_mtxname = m->mtx_description;
436		p->p_stat = SMTX;
437		propagate_priority(p);
438
439		if ((opts & MTX_QUIET) == 0)
440			CTR3(KTR_LOCK,
441			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
442			    m->mtx_description);
443
444		mi_switch();
445
446		if ((opts & MTX_QUIET) == 0)
447			CTR3(KTR_LOCK,
448			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
449			  p, m, m->mtx_description);
450
451		mtx_unlock_spin(&sched_lock);
452	}
453
454	return;
455}
456
457/*
458 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
459 *
460 * This is only called if we need to actually spin for the lock. Recursion
461 * is handled inline.
462 */
463void
464_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
465	       int line)
466{
467	int i = 0;
468
469	if ((opts & MTX_QUIET) == 0)
470		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
471
472	for (;;) {
473		if (_obtain_lock(m, curproc))
474			break;
475
476		while (m->mtx_lock != MTX_UNOWNED) {
477			if (i++ < 1000000)
478				continue;
479			if (i++ < 6000000)
480				DELAY(1);
481#ifdef DDB
482			else if (!db_active)
483#else
484			else
485#endif
486			panic("spin lock %s held by %p for > 5 seconds",
487			    m->mtx_description, (void *)m->mtx_lock);
488		}
489	}
490
491	m->mtx_saveintr = mtx_intr;
492	if ((opts & MTX_QUIET) == 0)
493		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
494
495	return;
496}
497
498/*
499 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
500 *
501 * We are only called here if the lock is recursed or contested (i.e. we
502 * need to wake up a blocked thread).
503 */
504void
505_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
506{
507	struct proc *p, *p1;
508	struct mtx *m1;
509	int pri;
510
511	p = curproc;
512
513	if (mtx_recursed(m)) {
514		if (--(m->mtx_recurse) == 0)
515			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
516		if ((opts & MTX_QUIET) == 0)
517			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
518		return;
519	}
520
521	mtx_lock_spin(&sched_lock);
522	if ((opts & MTX_QUIET) == 0)
523		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
524
525	p1 = TAILQ_FIRST(&m->mtx_blocked);
526	MPASS(p->p_magic == P_MAGIC);
527	MPASS(p1->p_magic == P_MAGIC);
528
529	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
530
531	if (TAILQ_EMPTY(&m->mtx_blocked)) {
532		LIST_REMOVE(m, mtx_contested);
533		_release_lock_quick(m);
534		if ((opts & MTX_QUIET) == 0)
535			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
536	} else
537		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
538
539	pri = PRI_MAX;
540	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
541		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
542		if (cp < pri)
543			pri = cp;
544	}
545
546	if (pri > p->p_pri.pri_native)
547		pri = p->p_pri.pri_native;
548	SET_PRIO(p, pri);
549
550	if ((opts & MTX_QUIET) == 0)
551		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
552		    m, p1);
553
554	p1->p_blocked = NULL;
555	p1->p_stat = SRUN;
556	setrunqueue(p1);
557
558	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
559#ifdef notyet
560		if (p->p_ithd != NULL) {
561			struct ithd *it = p->p_ithd;
562
563			if (it->it_interrupted) {
564				if ((opts & MTX_QUIET) == 0)
565					CTR2(KTR_LOCK,
566				    "_mtx_unlock_sleep: %p interrupted %p",
567					    it, it->it_interrupted);
568				intr_thd_fixup(it);
569			}
570		}
571#endif
572		setrunqueue(p);
573		if ((opts & MTX_QUIET) == 0)
574			CTR2(KTR_LOCK,
575			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
576			    (void *)m->mtx_lock);
577
578		mi_switch();
579		if ((opts & MTX_QUIET) == 0)
580			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
581			    m, (void *)m->mtx_lock);
582	}
583
584	mtx_unlock_spin(&sched_lock);
585
586	return;
587}
588
589/*
590 * All the unlocking of MTX_SPIN locks is done inline.
591 * See the _rel_spin_lock() macro for the details.
592 */
593
594/*
595 * The backing function for the INVARIANTS-enabled mtx_assert()
596 */
597#ifdef INVARIANT_SUPPORT
598void
599_mtx_assert(struct mtx *m, int what, const char *file, int line)
600{
601	switch (what) {
602	case MA_OWNED:
603	case MA_OWNED | MA_RECURSED:
604	case MA_OWNED | MA_NOTRECURSED:
605		if (!mtx_owned(m))
606			panic("mutex %s not owned at %s:%d",
607			    m->mtx_description, file, line);
608		if (mtx_recursed(m)) {
609			if ((what & MA_NOTRECURSED) != 0)
610				panic("mutex %s recursed at %s:%d",
611				    m->mtx_description, file, line);
612		} else if ((what & MA_RECURSED) != 0) {
613			panic("mutex %s unrecursed at %s:%d",
614			    m->mtx_description, file, line);
615		}
616		break;
617	case MA_NOTOWNED:
618		if (mtx_owned(m))
619			panic("mutex %s owned at %s:%d",
620			    m->mtx_description, file, line);
621		break;
622	default:
623		panic("unknown mtx_assert at %s:%d", file, line);
624	}
625}
626#endif
627
628/*
629 * The MUTEX_DEBUG-enabled mtx_validate()
630 */
631#define MV_DESTROY	0	/* validate before destory */
632#define MV_INIT		1	/* validate before init */
633
634#ifdef MUTEX_DEBUG
635
636int mtx_validate __P((struct mtx *, int));
637
638int
639mtx_validate(struct mtx *m, int when)
640{
641	struct mtx *mp;
642	int i;
643	int retval = 0;
644
645#ifdef WITNESS
646	if (witness_cold)
647		return 0;
648#endif
649	if (m == &all_mtx || cold)
650		return 0;
651
652	mtx_lock(&all_mtx);
653/*
654 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
655 * we can re-enable the kernacc() checks.
656 */
657#ifndef __alpha__
658	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
659	    VM_PROT_READ) == 1);
660#endif
661	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
662	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
663#ifndef __alpha__
664		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
665		    VM_PROT_READ) != 1) {
666			panic("mtx_validate: mp=%p mp->mtx_next=%p",
667			    mp, mp->mtx_next);
668		}
669#endif
670		i++;
671		if (i > mtx_cur_cnt) {
672			panic("mtx_validate: too many in chain, known=%d\n",
673			    mtx_cur_cnt);
674		}
675	}
676	MPASS(i == mtx_cur_cnt);
677	switch (when) {
678	case MV_DESTROY:
679		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
680			if (mp == m)
681				break;
682		MPASS(mp == m);
683		break;
684	case MV_INIT:
685		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
686		if (mp == m) {
687			/*
688			 * Not good. This mutex already exists.
689			 */
690			printf("re-initing existing mutex %s\n",
691			    m->mtx_description);
692			MPASS(m->mtx_lock == MTX_UNOWNED);
693			retval = 1;
694		}
695	}
696	mtx_unlock(&all_mtx);
697	return (retval);
698}
699#endif
700
701/*
702 * Mutex initialization routine; initialize lock `m' of type contained in
703 * `opts' with options contained in `opts' and description `description.'
704 * Place on "all_mtx" queue.
705 */
706void
707mtx_init(struct mtx *m, const char *description, int opts)
708{
709
710	if ((opts & MTX_QUIET) == 0)
711		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
712
713#ifdef MUTEX_DEBUG
714	/* Diagnostic and error correction */
715	if (mtx_validate(m, MV_INIT))
716		return;
717#endif
718
719	bzero((void *)m, sizeof *m);
720	TAILQ_INIT(&m->mtx_blocked);
721
722#ifdef WITNESS
723	if (!witness_cold) {
724		m->mtx_debug = malloc(sizeof(struct mtx_debug),
725		    M_WITNESS, M_NOWAIT | M_ZERO);
726		MPASS(m->mtx_debug != NULL);
727	}
728#endif
729
730	m->mtx_description = description;
731	m->mtx_flags = opts;
732	m->mtx_lock = MTX_UNOWNED;
733
734	/* Put on all mutex queue */
735	mtx_lock(&all_mtx);
736	m->mtx_next = &all_mtx;
737	m->mtx_prev = all_mtx.mtx_prev;
738	m->mtx_prev->mtx_next = m;
739	all_mtx.mtx_prev = m;
740	if (++mtx_cur_cnt > mtx_max_cnt)
741		mtx_max_cnt = mtx_cur_cnt;
742	mtx_unlock(&all_mtx);
743
744#ifdef WITNESS
745	if (!witness_cold)
746		witness_init(m, opts);
747#endif
748}
749
750/*
751 * Remove lock `m' from all_mtx queue.
752 */
753void
754mtx_destroy(struct mtx *m)
755{
756
757#ifdef WITNESS
758	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
759	    __FUNCTION__));
760#endif
761
762	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
763
764#ifdef MUTEX_DEBUG
765	if (m->mtx_next == NULL)
766		panic("mtx_destroy: %p (%s) already destroyed",
767		    m, m->mtx_description);
768
769	if (!mtx_owned(m)) {
770		MPASS(m->mtx_lock == MTX_UNOWNED);
771	} else {
772		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
773	}
774
775	/* diagnostic */
776	mtx_validate(m, MV_DESTROY);
777#endif
778
779#ifdef WITNESS
780	if (m->mtx_witness)
781		witness_destroy(m);
782#endif /* WITNESS */
783
784	/* Remove from the all mutex queue */
785	mtx_lock(&all_mtx);
786	m->mtx_next->mtx_prev = m->mtx_prev;
787	m->mtx_prev->mtx_next = m->mtx_next;
788
789#ifdef MUTEX_DEBUG
790	m->mtx_next = m->mtx_prev = NULL;
791#endif
792
793#ifdef WITNESS
794	free(m->mtx_debug, M_WITNESS);
795	m->mtx_debug = NULL;
796#endif
797
798	mtx_cur_cnt--;
799	mtx_unlock(&all_mtx);
800}
801
802
803/*
804 * The WITNESS-enabled diagnostic code.
805 */
806#ifdef WITNESS
807static void
808witness_fixup(void *dummy __unused)
809{
810	struct mtx *mp;
811
812	/*
813	 * We have to release Giant before initializing its witness
814	 * structure so that WITNESS doesn't get confused.
815	 */
816	mtx_unlock(&Giant);
817	mtx_assert(&Giant, MA_NOTOWNED);
818
819	mtx_lock(&all_mtx);
820
821	/* Iterate through all mutexes and finish up mutex initialization. */
822	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
823
824		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
825		    M_WITNESS, M_NOWAIT | M_ZERO);
826		MPASS(mp->mtx_debug != NULL);
827
828		witness_init(mp, mp->mtx_flags);
829	}
830	mtx_unlock(&all_mtx);
831
832	/* Mark the witness code as being ready for use. */
833	atomic_store_rel_int(&witness_cold, 0);
834
835	mtx_lock(&Giant);
836}
837SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
838
839#define WITNESS_COUNT 200
840#define	WITNESS_NCHILDREN 2
841
842int witness_watch = 1;
843
844struct witness {
845	struct witness	*w_next;
846	const char	*w_description;
847	const char	*w_file;
848	int		 w_line;
849	struct witness	*w_morechildren;
850	u_char		 w_childcnt;
851	u_char		 w_Giant_squawked:1;
852	u_char		 w_other_squawked:1;
853	u_char		 w_same_squawked:1;
854	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
855	u_int		 w_level;
856	struct witness	*w_children[WITNESS_NCHILDREN];
857};
858
859struct witness_blessed {
860	char 	*b_lock1;
861	char	*b_lock2;
862};
863
864#ifdef DDB
865/*
866 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
867 * drop into kdebug() when:
868 *	- a lock heirarchy violation occurs
869 *	- locks are held when going to sleep.
870 */
871int	witness_ddb;
872#ifdef WITNESS_DDB
873TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
874#else
875TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
876#endif
877SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
878#endif /* DDB */
879
880int	witness_skipspin;
881#ifdef WITNESS_SKIPSPIN
882TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
883#else
884TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
885#endif
886SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
887    "");
888
889/*
890 * Witness-enabled globals
891 */
892static struct mtx	w_mtx;
893static struct witness	*w_free;
894static struct witness	*w_all;
895static int		 w_inited;
896static int		 witness_dead;	/* fatal error, probably no memory */
897
898static struct witness	 w_data[WITNESS_COUNT];
899
900/*
901 * Internal witness routine prototypes
902 */
903static struct witness *enroll(const char *description, int flag);
904static int itismychild(struct witness *parent, struct witness *child);
905static void removechild(struct witness *parent, struct witness *child);
906static int isitmychild(struct witness *parent, struct witness *child);
907static int isitmydescendant(struct witness *parent, struct witness *child);
908static int dup_ok(struct witness *);
909static int blessed(struct witness *, struct witness *);
910static void
911    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
912static void witness_leveldescendents(struct witness *parent, int level);
913static void witness_levelall(void);
914static struct witness * witness_get(void);
915static void witness_free(struct witness *m);
916
917static char *ignore_list[] = {
918	"witness lock",
919	NULL
920};
921
922static char *spin_order_list[] = {
923#if defined(__i386__) && defined (SMP)
924	"com",
925#endif
926	"sio",
927#ifdef __i386__
928	"cy",
929#endif
930	"ng_node",
931	"ng_worklist",
932	"ithread table lock",
933	"ithread list lock",
934	"sched lock",
935#ifdef __i386__
936	"clk",
937#endif
938	"callout",
939	/*
940	 * leaf locks
941	 */
942#ifdef SMP
943#ifdef __i386__
944	"ap boot",
945	"imen",
946#endif
947	"smp rendezvous",
948#endif
949	NULL
950};
951
952static char *order_list[] = {
953	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
954	    "uidinfo struct", NULL,
955	NULL
956};
957
958static char *dup_list[] = {
959	"process lock",
960	NULL
961};
962
963static char *sleep_list[] = {
964	"Giant",
965	NULL
966};
967
968/*
969 * Pairs of locks which have been blessed
970 * Don't complain about order problems with blessed locks
971 */
972static struct witness_blessed blessed_list[] = {
973};
974static int blessed_count =
975	sizeof(blessed_list) / sizeof(struct witness_blessed);
976
977static void
978witness_init(struct mtx *m, int flag)
979{
980	m->mtx_witness = enroll(m->mtx_description, flag);
981}
982
983static void
984witness_destroy(struct mtx *m)
985{
986	struct mtx *m1;
987	struct proc *p;
988	p = curproc;
989	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
990		if (m1 == m) {
991			LIST_REMOVE(m, mtx_held);
992			break;
993		}
994	}
995	return;
996
997}
998
999static void
1000witness_display(void(*prnt)(const char *fmt, ...))
1001{
1002	struct witness *w, *w1;
1003	int level, found;
1004
1005	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1006	witness_levelall();
1007
1008	/*
1009	 * First, handle sleep mutexes which have been acquired at least
1010	 * once.
1011	 */
1012	prnt("Sleep mutexes:\n");
1013	for (w = w_all; w; w = w->w_next) {
1014		if (w->w_file == NULL || w->w_spin)
1015			continue;
1016		for (w1 = w_all; w1; w1 = w1->w_next) {
1017			if (isitmychild(w1, w))
1018				break;
1019		}
1020		if (w1 != NULL)
1021			continue;
1022		/*
1023		 * This lock has no anscestors, display its descendants.
1024		 */
1025		witness_displaydescendants(prnt, w);
1026	}
1027
1028	/*
1029	 * Now do spin mutexes which have been acquired at least once.
1030	 */
1031	prnt("\nSpin mutexes:\n");
1032	level = 0;
1033	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1034		found = 0;
1035		for (w = w_all; w; w = w->w_next) {
1036			if (w->w_file == NULL || !w->w_spin)
1037				continue;
1038			if (w->w_level == 1 << level) {
1039				witness_displaydescendants(prnt, w);
1040				level++;
1041				found = 1;
1042			}
1043		}
1044		if (found == 0)
1045			level++;
1046	}
1047
1048	/*
1049	 * Finally, any mutexes which have not been acquired yet.
1050	 */
1051	prnt("\nMutexes which were never acquired:\n");
1052	for (w = w_all; w; w = w->w_next) {
1053		if (w->w_file != NULL)
1054			continue;
1055		prnt("%s\n", w->w_description);
1056	}
1057}
1058
1059void
1060witness_enter(struct mtx *m, int flags, const char *file, int line)
1061{
1062	struct witness *w, *w1;
1063	struct mtx *m1;
1064	struct proc *p;
1065	int i;
1066#ifdef DDB
1067	int go_into_ddb = 0;
1068#endif /* DDB */
1069
1070	if (witness_cold || m->mtx_witness == NULL || panicstr)
1071		return;
1072	w = m->mtx_witness;
1073	p = curproc;
1074
1075	if (flags & MTX_SPIN) {
1076		if ((m->mtx_flags & MTX_SPIN) == 0)
1077			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1078			    " %s:%d", m->mtx_description, file, line);
1079		if (mtx_recursed(m)) {
1080			if ((m->mtx_flags & MTX_RECURSE) == 0)
1081				panic("mutex_enter: recursion on non-recursive"
1082				    " mutex %s @ %s:%d", m->mtx_description,
1083				    file, line);
1084			return;
1085		}
1086		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1087		i = PCPU_GET(witness_spin_check);
1088		if (i != 0 && w->w_level < i) {
1089			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1090			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1091			    " %s:%d already holding %s:%x",
1092			    m->mtx_description, w->w_level, file, line,
1093			    spin_order_list[ffs(i)-1], i);
1094		}
1095		PCPU_SET(witness_spin_check, i | w->w_level);
1096		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1097		p->p_spinlocks++;
1098		MPASS(p->p_spinlocks > 0);
1099		w->w_file = file;
1100		w->w_line = line;
1101		m->mtx_line = line;
1102		m->mtx_file = file;
1103		return;
1104	}
1105	if ((m->mtx_flags & MTX_SPIN) != 0)
1106		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1107		    m->mtx_description, file, line);
1108
1109	if (mtx_recursed(m)) {
1110		if ((m->mtx_flags & MTX_RECURSE) == 0)
1111			panic("mutex_enter: recursion on non-recursive"
1112			    " mutex %s @ %s:%d", m->mtx_description,
1113			    file, line);
1114		return;
1115	}
1116	if (witness_dead)
1117		goto out;
1118	if (cold)
1119		goto out;
1120
1121	if (p->p_spinlocks != 0)
1122		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1123			    m->mtx_description, file, line);
1124	/*
1125	 * Is this the first mutex acquired
1126	 */
1127	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1128		goto out;
1129
1130	if ((w1 = m1->mtx_witness) == w) {
1131		if (w->w_same_squawked || dup_ok(w))
1132			goto out;
1133		w->w_same_squawked = 1;
1134		printf("acquring duplicate lock of same type: \"%s\"\n",
1135			m->mtx_description);
1136		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1137		printf(" 2nd @ %s:%d\n", file, line);
1138#ifdef DDB
1139		go_into_ddb = 1;
1140#endif /* DDB */
1141		goto out;
1142	}
1143	MPASS(!mtx_owned(&w_mtx));
1144	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1145	/*
1146	 * If we have a known higher number just say ok
1147	 */
1148	if (witness_watch > 1 && w->w_level > w1->w_level) {
1149		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1150		goto out;
1151	}
1152	if (isitmydescendant(m1->mtx_witness, w)) {
1153		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1154		goto out;
1155	}
1156	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1157
1158		MPASS(i < 200);
1159		w1 = m1->mtx_witness;
1160		if (isitmydescendant(w, w1)) {
1161			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1162			if (blessed(w, w1))
1163				goto out;
1164			if (m1 == &Giant) {
1165				if (w1->w_Giant_squawked)
1166					goto out;
1167				else
1168					w1->w_Giant_squawked = 1;
1169			} else {
1170				if (w1->w_other_squawked)
1171					goto out;
1172				else
1173					w1->w_other_squawked = 1;
1174			}
1175			printf("lock order reversal\n");
1176			printf(" 1st %s last acquired @ %s:%d\n",
1177			    w->w_description, w->w_file, w->w_line);
1178			printf(" 2nd %p %s @ %s:%d\n",
1179			    m1, w1->w_description, w1->w_file, w1->w_line);
1180			printf(" 3rd %p %s @ %s:%d\n",
1181			    m, w->w_description, file, line);
1182#ifdef DDB
1183			go_into_ddb = 1;
1184#endif /* DDB */
1185			goto out;
1186		}
1187	}
1188	m1 = LIST_FIRST(&p->p_heldmtx);
1189	if (!itismychild(m1->mtx_witness, w))
1190		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1191
1192out:
1193#ifdef DDB
1194	if (witness_ddb && go_into_ddb)
1195		Debugger("witness_enter");
1196#endif /* DDB */
1197	w->w_file = file;
1198	w->w_line = line;
1199	m->mtx_line = line;
1200	m->mtx_file = file;
1201
1202	/*
1203	 * If this pays off it likely means that a mutex being witnessed
1204	 * is acquired in hardclock. Put it in the ignore list. It is
1205	 * likely not the mutex this assert fails on.
1206	 */
1207	MPASS(m->mtx_held.le_prev == NULL);
1208	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1209}
1210
1211void
1212witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1213{
1214	struct proc *p;
1215	struct witness *w = m->mtx_witness;
1216
1217	if (witness_cold)
1218		return;
1219	if (panicstr)
1220		return;
1221	if (flags & MTX_SPIN) {
1222		if ((m->mtx_flags & MTX_SPIN) == 0)
1223			panic("mutex_try_enter: "
1224			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1225			    m->mtx_description, file, line);
1226		if (mtx_recursed(m)) {
1227			if ((m->mtx_flags & MTX_RECURSE) == 0)
1228				panic("mutex_try_enter: recursion on"
1229				    " non-recursive mutex %s @ %s:%d",
1230				    m->mtx_description, file, line);
1231			return;
1232		}
1233		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1234		PCPU_SET(witness_spin_check,
1235		    PCPU_GET(witness_spin_check) | w->w_level);
1236		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1237		w->w_file = file;
1238		w->w_line = line;
1239		m->mtx_line = line;
1240		m->mtx_file = file;
1241		return;
1242	}
1243
1244	if ((m->mtx_flags & MTX_SPIN) != 0)
1245		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1246		    m->mtx_description, file, line);
1247
1248	if (mtx_recursed(m)) {
1249		if ((m->mtx_flags & MTX_RECURSE) == 0)
1250			panic("mutex_try_enter: recursion on non-recursive"
1251			    " mutex %s @ %s:%d", m->mtx_description, file,
1252			    line);
1253		return;
1254	}
1255	w->w_file = file;
1256	w->w_line = line;
1257	m->mtx_line = line;
1258	m->mtx_file = file;
1259	p = curproc;
1260	MPASS(m->mtx_held.le_prev == NULL);
1261	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1262}
1263
1264void
1265witness_exit(struct mtx *m, int flags, const char *file, int line)
1266{
1267	struct witness *w;
1268	struct proc *p;
1269
1270	if (witness_cold || m->mtx_witness == NULL || panicstr)
1271		return;
1272	w = m->mtx_witness;
1273	p = curproc;
1274
1275	if (flags & MTX_SPIN) {
1276		if ((m->mtx_flags & MTX_SPIN) == 0)
1277			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1278			    " %s:%d", m->mtx_description, file, line);
1279		if (mtx_recursed(m)) {
1280			if ((m->mtx_flags & MTX_RECURSE) == 0)
1281				panic("mutex_exit: recursion on non-recursive"
1282				    " mutex %s @ %s:%d", m->mtx_description,
1283				    file, line);
1284			return;
1285		}
1286		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1287		PCPU_SET(witness_spin_check,
1288		    PCPU_GET(witness_spin_check) & ~w->w_level);
1289		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1290		MPASS(p->p_spinlocks > 0);
1291		p->p_spinlocks--;
1292		return;
1293	}
1294	if ((m->mtx_flags & MTX_SPIN) != 0)
1295		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1296		    m->mtx_description, file, line);
1297
1298	if (mtx_recursed(m)) {
1299		if ((m->mtx_flags & MTX_RECURSE) == 0)
1300			panic("mutex_exit: recursion on non-recursive"
1301			    " mutex %s @ %s:%d", m->mtx_description,
1302			    file, line);
1303		return;
1304	}
1305
1306	if ((flags & MTX_NOSWITCH) == 0 && p->p_spinlocks != 0 && !cold)
1307		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1308			    m->mtx_description, file, line);
1309	LIST_REMOVE(m, mtx_held);
1310	m->mtx_held.le_prev = NULL;
1311}
1312
1313int
1314witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1315{
1316	struct mtx *m;
1317	struct proc *p;
1318	char **sleep;
1319	int n = 0;
1320
1321	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1322	p = curproc;
1323	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1324		if (m == mtx)
1325			continue;
1326		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1327			if (strcmp(m->mtx_description, *sleep) == 0)
1328				goto next;
1329		if (n == 0)
1330			printf("Whee!\n");
1331		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1332			file, line, check_only ? "could sleep" : "sleeping",
1333			m->mtx_description,
1334			m->mtx_witness->w_file, m->mtx_witness->w_line);
1335		n++;
1336	next:
1337	}
1338#ifdef DDB
1339	if (witness_ddb && n)
1340		Debugger("witness_sleep");
1341#endif /* DDB */
1342	return (n);
1343}
1344
1345static struct witness *
1346enroll(const char *description, int flag)
1347{
1348	int i;
1349	struct witness *w, *w1;
1350	char **ignore;
1351	char **order;
1352
1353	if (!witness_watch)
1354		return (NULL);
1355	for (ignore = ignore_list; *ignore != NULL; ignore++)
1356		if (strcmp(description, *ignore) == 0)
1357			return (NULL);
1358
1359	if (w_inited == 0) {
1360		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1361		for (i = 0; i < WITNESS_COUNT; i++) {
1362			w = &w_data[i];
1363			witness_free(w);
1364		}
1365		w_inited = 1;
1366		for (order = order_list; *order != NULL; order++) {
1367			w = enroll(*order, MTX_DEF);
1368			w->w_file = "order list";
1369			for (order++; *order != NULL; order++) {
1370				w1 = enroll(*order, MTX_DEF);
1371				w1->w_file = "order list";
1372				itismychild(w, w1);
1373				w = w1;
1374    	    	    	}
1375		}
1376	}
1377	if ((flag & MTX_SPIN) && witness_skipspin)
1378		return (NULL);
1379	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1380	for (w = w_all; w; w = w->w_next) {
1381		if (strcmp(description, w->w_description) == 0) {
1382			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1383			return (w);
1384		}
1385	}
1386	if ((w = witness_get()) == NULL)
1387		return (NULL);
1388	w->w_next = w_all;
1389	w_all = w;
1390	w->w_description = description;
1391	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1392	if (flag & MTX_SPIN) {
1393		w->w_spin = 1;
1394
1395		i = 1;
1396		for (order = spin_order_list; *order != NULL; order++) {
1397			if (strcmp(description, *order) == 0)
1398				break;
1399			i <<= 1;
1400		}
1401		if (*order == NULL)
1402			panic("spin lock %s not in order list", description);
1403		w->w_level = i;
1404	}
1405
1406	return (w);
1407}
1408
1409static int
1410itismychild(struct witness *parent, struct witness *child)
1411{
1412	static int recursed;
1413
1414	/*
1415	 * Insert "child" after "parent"
1416	 */
1417	while (parent->w_morechildren)
1418		parent = parent->w_morechildren;
1419
1420	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1421		if ((parent->w_morechildren = witness_get()) == NULL)
1422			return (1);
1423		parent = parent->w_morechildren;
1424	}
1425	MPASS(child != NULL);
1426	parent->w_children[parent->w_childcnt++] = child;
1427	/*
1428	 * now prune whole tree
1429	 */
1430	if (recursed)
1431		return (0);
1432	recursed = 1;
1433	for (child = w_all; child != NULL; child = child->w_next) {
1434		for (parent = w_all; parent != NULL;
1435		    parent = parent->w_next) {
1436			if (!isitmychild(parent, child))
1437				continue;
1438			removechild(parent, child);
1439			if (isitmydescendant(parent, child))
1440				continue;
1441			itismychild(parent, child);
1442		}
1443	}
1444	recursed = 0;
1445	witness_levelall();
1446	return (0);
1447}
1448
1449static void
1450removechild(struct witness *parent, struct witness *child)
1451{
1452	struct witness *w, *w1;
1453	int i;
1454
1455	for (w = parent; w != NULL; w = w->w_morechildren)
1456		for (i = 0; i < w->w_childcnt; i++)
1457			if (w->w_children[i] == child)
1458				goto found;
1459	return;
1460found:
1461	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1462		continue;
1463	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1464	MPASS(w->w_children[i] != NULL);
1465
1466	if (w1->w_childcnt != 0)
1467		return;
1468
1469	if (w1 == parent)
1470		return;
1471	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1472		continue;
1473	w->w_morechildren = 0;
1474	witness_free(w1);
1475}
1476
1477static int
1478isitmychild(struct witness *parent, struct witness *child)
1479{
1480	struct witness *w;
1481	int i;
1482
1483	for (w = parent; w != NULL; w = w->w_morechildren) {
1484		for (i = 0; i < w->w_childcnt; i++) {
1485			if (w->w_children[i] == child)
1486				return (1);
1487		}
1488	}
1489	return (0);
1490}
1491
1492static int
1493isitmydescendant(struct witness *parent, struct witness *child)
1494{
1495	struct witness *w;
1496	int i;
1497	int j;
1498
1499	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1500		MPASS(j < 1000);
1501		for (i = 0; i < w->w_childcnt; i++) {
1502			if (w->w_children[i] == child)
1503				return (1);
1504		}
1505		for (i = 0; i < w->w_childcnt; i++) {
1506			if (isitmydescendant(w->w_children[i], child))
1507				return (1);
1508		}
1509	}
1510	return (0);
1511}
1512
1513void
1514witness_levelall (void)
1515{
1516	struct witness *w, *w1;
1517
1518	for (w = w_all; w; w = w->w_next)
1519		if (!(w->w_spin))
1520			w->w_level = 0;
1521	for (w = w_all; w; w = w->w_next) {
1522		if (w->w_spin)
1523			continue;
1524		for (w1 = w_all; w1; w1 = w1->w_next) {
1525			if (isitmychild(w1, w))
1526				break;
1527		}
1528		if (w1 != NULL)
1529			continue;
1530		witness_leveldescendents(w, 0);
1531	}
1532}
1533
1534static void
1535witness_leveldescendents(struct witness *parent, int level)
1536{
1537	int i;
1538	struct witness *w;
1539
1540	if (parent->w_level < level)
1541		parent->w_level = level;
1542	level++;
1543	for (w = parent; w != NULL; w = w->w_morechildren)
1544		for (i = 0; i < w->w_childcnt; i++)
1545			witness_leveldescendents(w->w_children[i], level);
1546}
1547
1548static void
1549witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1550			   struct witness *parent)
1551{
1552	struct witness *w;
1553	int i;
1554	int level;
1555
1556	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1557
1558	prnt("%d", level);
1559	if (level < 10)
1560		prnt(" ");
1561	for (i = 0; i < level; i++)
1562		prnt(" ");
1563	prnt("%s", parent->w_description);
1564	if (parent->w_file != NULL)
1565		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1566		    parent->w_line);
1567
1568	for (w = parent; w != NULL; w = w->w_morechildren)
1569		for (i = 0; i < w->w_childcnt; i++)
1570			    witness_displaydescendants(prnt, w->w_children[i]);
1571    }
1572
1573static int
1574dup_ok(struct witness *w)
1575{
1576	char **dup;
1577
1578	for (dup = dup_list; *dup!= NULL; dup++)
1579		if (strcmp(w->w_description, *dup) == 0)
1580			return (1);
1581	return (0);
1582}
1583
1584static int
1585blessed(struct witness *w1, struct witness *w2)
1586{
1587	int i;
1588	struct witness_blessed *b;
1589
1590	for (i = 0; i < blessed_count; i++) {
1591		b = &blessed_list[i];
1592		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1593			if (strcmp(w2->w_description, b->b_lock2) == 0)
1594				return (1);
1595			continue;
1596		}
1597		if (strcmp(w1->w_description, b->b_lock2) == 0)
1598			if (strcmp(w2->w_description, b->b_lock1) == 0)
1599				return (1);
1600	}
1601	return (0);
1602}
1603
1604static struct witness *
1605witness_get()
1606{
1607	struct witness *w;
1608
1609	if ((w = w_free) == NULL) {
1610		witness_dead = 1;
1611		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1612		printf("witness exhausted\n");
1613		return (NULL);
1614	}
1615	w_free = w->w_next;
1616	bzero(w, sizeof(*w));
1617	return (w);
1618}
1619
1620static void
1621witness_free(struct witness *w)
1622{
1623	w->w_next = w_free;
1624	w_free = w;
1625}
1626
1627int
1628witness_list(struct proc *p)
1629{
1630	struct mtx *m;
1631	int nheld;
1632
1633	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1634	nheld = 0;
1635	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1636		printf("\t\"%s\" (%p) locked at %s:%d\n",
1637		    m->mtx_description, m,
1638		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1639		nheld++;
1640	}
1641
1642	return (nheld);
1643}
1644
1645#ifdef DDB
1646
1647DB_SHOW_COMMAND(mutexes, db_witness_list)
1648{
1649
1650	witness_list(curproc);
1651}
1652
1653DB_SHOW_COMMAND(witness, db_witness_display)
1654{
1655
1656	witness_display(db_printf);
1657}
1658#endif
1659
1660void
1661witness_save(struct mtx *m, const char **filep, int *linep)
1662{
1663
1664	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1665	if (m->mtx_witness == NULL)
1666		return;
1667
1668	*filep = m->mtx_witness->w_file;
1669	*linep = m->mtx_witness->w_line;
1670}
1671
1672void
1673witness_restore(struct mtx *m, const char *file, int line)
1674{
1675
1676	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1677	if (m->mtx_witness == NULL)
1678		return;
1679
1680	m->mtx_witness->w_file = file;
1681	m->mtx_witness->w_line = line;
1682}
1683
1684#endif	/* WITNESS */
1685