kern_mutex.c revision 72200
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/kern_mutex.c 72200 2001-02-09 06:11:45Z bmilekic $
31 */
32
33/*
34 * Machine independent bits of mutex implementation and implementation of
35 * `witness' structure & related debugging routines.
36 */
37
38/*
39 *	Main Entry: witness
40 *	Pronunciation: 'wit-n&s
41 *	Function: noun
42 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
43 *	    testimony, witness, from 2wit
44 *	Date: before 12th century
45 *	1 : attestation of a fact or event : TESTIMONY
46 *	2 : one that gives evidence; specifically : one who testifies in
47 *	    a cause or before a judicial tribunal
48 *	3 : one asked to be present at a transaction so as to be able to
49 *	    testify to its having taken place
50 *	4 : one who has personal knowledge of something
51 *	5 a : something serving as evidence or proof : SIGN
52 *	  b : public affirmation by word or example of usually
53 *	      religious faith or conviction <the heroic witness to divine
54 *	      life -- Pilot>
55 *	6 capitalized : a member of the Jehovah's Witnesses
56 */
57
58#include "opt_ddb.h"
59#include "opt_witness.h"
60
61#include <sys/param.h>
62#include <sys/bus.h>
63#include <sys/kernel.h>
64#include <sys/malloc.h>
65#include <sys/proc.h>
66#include <sys/sysctl.h>
67#include <sys/systm.h>
68#include <sys/vmmeter.h>
69#include <sys/ktr.h>
70
71#include <machine/atomic.h>
72#include <machine/bus.h>
73#include <machine/clock.h>
74#include <machine/cpu.h>
75
76#include <ddb/ddb.h>
77
78#include <vm/vm.h>
79#include <vm/vm_extern.h>
80
81#include <sys/mutex.h>
82
83/*
84 * The WITNESS-enabled mutex debug structure.
85 */
86#ifdef WITNESS
87struct mtx_debug {
88	struct witness	*mtxd_witness;
89	LIST_ENTRY(mtx)	mtxd_held;
90	const char	*mtxd_file;
91	int		mtxd_line;
92};
93
94#define mtx_held	mtx_debug->mtxd_held
95#define	mtx_file	mtx_debug->mtxd_file
96#define	mtx_line	mtx_debug->mtxd_line
97#define	mtx_witness	mtx_debug->mtxd_witness
98#endif	/* WITNESS */
99
100/*
101 * Internal utility macros.
102 */
103#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104
105#define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107
108#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
109#define SET_PRIO(p, pri)	(p)->p_priority = (pri)
110
111/*
112 * Early WITNESS-enabled declarations.
113 */
114#ifdef WITNESS
115
116/*
117 * Internal WITNESS routines which must be prototyped early.
118 *
119 * XXX: When/if witness code is cleaned up, it would be wise to place all
120 *	witness prototyping early in this file.
121 */
122static void witness_init(struct mtx *, int flag);
123static void witness_destroy(struct mtx *);
124static void witness_display(void(*)(const char *fmt, ...));
125
126MALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
127
128/* All mutexes in system (used for debug/panic) */
129static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
130
131/*
132 * This global is set to 0 once it becomes safe to use the witness code.
133 */
134static int witness_cold = 1;
135
136#else	/* WITNESS */
137
138/* XXX XXX XXX
139 * flag++ is sleazoid way of shuting up warning
140 */
141#define witness_init(m, flag) flag++
142#define witness_destroy(m)
143#define witness_try_enter(m, t, f, l)
144#endif	/* WITNESS */
145
146/*
147 * All mutex locks in system are kept on the all_mtx list.
148 */
149static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
150	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
151	{ NULL, NULL }, &all_mtx, &all_mtx,
152#ifdef WITNESS
153	&all_mtx_debug
154#else
155	NULL
156#endif
157	 };
158
159/*
160 * Global variables for book keeping.
161 */
162static int	mtx_cur_cnt;
163static int	mtx_max_cnt;
164
165/*
166 * Prototypes for non-exported routines.
167 *
168 * NOTE: Prototypes for witness routines are placed at the bottom of the file.
169 */
170static void	propagate_priority(struct proc *);
171
172static void
173propagate_priority(struct proc *p)
174{
175	int pri = p->p_priority;
176	struct mtx *m = p->p_blocked;
177
178	mtx_assert(&sched_lock, MA_OWNED);
179	for (;;) {
180		struct proc *p1;
181
182		p = mtx_owner(m);
183
184		if (p == NULL) {
185			/*
186			 * This really isn't quite right. Really
187			 * ought to bump priority of process that
188			 * next acquires the mutex.
189			 */
190			MPASS(m->mtx_lock == MTX_CONTESTED);
191			return;
192		}
193
194		MPASS(p->p_magic == P_MAGIC);
195		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
196		if (p->p_priority <= pri)
197			return;
198
199		/*
200		 * Bump this process' priority.
201		 */
202		SET_PRIO(p, pri);
203
204		/*
205		 * If lock holder is actually running, just bump priority.
206		 */
207#ifdef SMP
208		/*
209		 * For SMP, we can check the p_oncpu field to see if we are
210		 * running.
211		 */
212		if (p->p_oncpu != 0xff) {
213			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
214			return;
215		}
216#else
217		/*
218		 * For UP, we check to see if p is curproc (this shouldn't
219		 * ever happen however as it would mean we are in a deadlock.)
220		 */
221		if (p == curproc) {
222			panic("Deadlock detected");
223			return;
224		}
225#endif
226		/*
227		 * If on run queue move to new run queue, and
228		 * quit.
229		 */
230		if (p->p_stat == SRUN) {
231			printf("XXX: moving proc %d(%s) to a new run queue\n",
232			       p->p_pid, p->p_comm);
233			MPASS(p->p_blocked == NULL);
234			remrunqueue(p);
235			setrunqueue(p);
236			return;
237		}
238
239		/*
240		 * If we aren't blocked on a mutex, we should be.
241		 */
242		KASSERT(p->p_stat == SMTX, (
243		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
244		    p->p_pid, p->p_comm, p->p_stat,
245		    m->mtx_description));
246
247		/*
248		 * Pick up the mutex that p is blocked on.
249		 */
250		m = p->p_blocked;
251		MPASS(m != NULL);
252
253		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
254		    p->p_comm, m->mtx_description);
255
256		/*
257		 * Check if the proc needs to be moved up on
258		 * the blocked chain
259		 */
260		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
261			printf("XXX: process at head of run queue\n");
262			continue;
263		}
264
265		p1 = TAILQ_PREV(p, rq, p_procq);
266		if (p1->p_priority <= pri) {
267			printf(
268			   "XXX: previous process %d(%s) has higher priority\n",
269	                    p->p_pid, p->p_comm);
270			continue;
271		}
272
273		/*
274		 * Remove proc from blocked chain and determine where
275		 * it should be moved up to.  Since we know that p1 has
276		 * a lower priority than p, we know that at least one
277		 * process in the chain has a lower priority and that
278		 * p1 will thus not be NULL after the loop.
279		 */
280		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
281		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
282			MPASS(p1->p_magic == P_MAGIC);
283			if (p1->p_priority > pri)
284				break;
285		}
286
287		MPASS(p1 != NULL);
288		TAILQ_INSERT_BEFORE(p1, p, p_procq);
289		CTR4(KTR_LOCK,
290		    "propagate_priority: p %p moved before %p on [%p] %s",
291		    p, p1, m, m->mtx_description);
292	}
293}
294
295/*
296 * The important part of mtx_trylock{,_flags}()
297 * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
298 * if we're called, it's because we know we don't already own this lock.
299 */
300int
301_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
302{
303	int rval;
304
305	KASSERT(CURPROC != NULL, ("curproc is NULL in _mtx_trylock"));
306
307	/*
308	 * _mtx_trylock does not accept MTX_NOSWITCH option.
309	 */
310	MPASS((opts & MTX_NOSWITCH) == 0);
311
312	rval = _obtain_lock(m, CURTHD);
313
314#ifdef WITNESS
315	if (rval && m->mtx_witness != NULL) {
316		/*
317		 * We do not handle recursion in _mtx_trylock; see the
318		 * note at the top of the routine.
319		 */
320		MPASS(!mtx_recursed(m));
321		witness_try_enter(m, (opts | m->mtx_flags), file, line);
322	}
323#endif	/* WITNESS */
324
325	if ((opts & MTX_QUIET) == 0)
326		CTR5(KTR_LOCK, "TRY_ENTER %s [%p] result=%d at %s:%d",
327		    m->mtx_description, m, rval, file, line);
328
329	return rval;
330}
331
332/*
333 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
334 *
335 * We call this if the lock is either contested (i.e. we need to go to
336 * sleep waiting for it), or if we need to recurse on it.
337 */
338void
339_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
340{
341	struct proc *p = CURPROC;
342
343	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
344		m->mtx_recurse++;
345		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
346		if ((opts & MTX_QUIET) == 0)
347			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recurse", m);
348		return;
349	}
350
351	if ((opts & MTX_QUIET) == 0)
352		CTR3(KTR_LOCK, "mtx_lock: %p contested (lock=%p) [%p]", m,
353		    (void *)m->mtx_lock, (void *)RETIP(m));
354
355	/*
356	 * Save our priority. Even though p_nativepri is protected by
357	 * sched_lock, we don't obtain it here as it can be expensive.
358	 * Since this is the only place p_nativepri is set, and since two
359	 * CPUs will not be executing the same process concurrently, we know
360	 * that no other CPU is going to be messing with this. Also,
361	 * p_nativepri is only read when we are blocked on a mutex, so that
362	 * can't be happening right now either.
363	 */
364	p->p_nativepri = p->p_priority;
365
366	while (!_obtain_lock(m, p)) {
367		uintptr_t v;
368		struct proc *p1;
369
370		mtx_lock_spin(&sched_lock);
371		/*
372		 * Check if the lock has been released while spinning for
373		 * the sched_lock.
374		 */
375		if ((v = m->mtx_lock) == MTX_UNOWNED) {
376			mtx_unlock_spin(&sched_lock);
377			continue;
378		}
379
380		/*
381		 * The mutex was marked contested on release. This means that
382		 * there are processes blocked on it.
383		 */
384		if (v == MTX_CONTESTED) {
385			p1 = TAILQ_FIRST(&m->mtx_blocked);
386			KASSERT(p1 != NULL,
387			    ("contested mutex has no contesters"));
388			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
389
390			if (p1->p_priority < p->p_priority)
391				SET_PRIO(p, p1->p_priority);
392			mtx_unlock_spin(&sched_lock);
393			return;
394		}
395
396		/*
397		 * If the mutex isn't already contested and a failure occurs
398		 * setting the contested bit, the mutex was either released
399		 * or the state of the MTX_RECURSED bit changed.
400		 */
401		if ((v & MTX_CONTESTED) == 0 &&
402		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
403			(void *)(v | MTX_CONTESTED))) {
404			mtx_unlock_spin(&sched_lock);
405			continue;
406		}
407
408		/*
409		 * We deffinately must sleep for this lock.
410		 */
411		mtx_assert(m, MA_NOTOWNED);
412
413#ifdef notyet
414		/*
415		 * If we're borrowing an interrupted thread's VM context, we
416		 * must clean up before going to sleep.
417		 */
418		if (p->p_flag & (P_ITHD | P_SITHD)) {
419			ithd_t *it = (ithd_t *)p;
420
421			if (it->it_interrupted) {
422				if ((opts & MTX_QUIET) == 0)
423					CTR2(KTR_LOCK,
424					    "mtx_lock: 0x%x interrupted 0x%x",
425					    it, it->it_interrupted);
426				intr_thd_fixup(it);
427			}
428		}
429#endif
430
431		/*
432		 * Put us on the list of threads blocked on this mutex.
433		 */
434		if (TAILQ_EMPTY(&m->mtx_blocked)) {
435			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
436			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
437			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
438		} else {
439			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
440				if (p1->p_priority > p->p_priority)
441					break;
442			if (p1)
443				TAILQ_INSERT_BEFORE(p1, p, p_procq);
444			else
445				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
446		}
447
448		/*
449		 * Save who we're blocked on.
450		 */
451		p->p_blocked = m;
452		p->p_mtxname = m->mtx_description;
453		p->p_stat = SMTX;
454#if 0
455		propagate_priority(p);
456#endif
457
458		if ((opts & MTX_QUIET) == 0)
459			CTR3(KTR_LOCK,
460			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
461			    m->mtx_description);
462
463		mi_switch();
464
465		if ((opts & MTX_QUIET) == 0)
466			CTR3(KTR_LOCK,
467			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
468			  p, m, m->mtx_description);
469
470		mtx_unlock_spin(&sched_lock);
471	}
472
473	return;
474}
475
476/*
477 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
478 *
479 * This is only called if we need to actually spin for the lock. Recursion
480 * is handled inline.
481 */
482void
483_mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
484	       int line)
485{
486	int i = 0;
487
488	if ((opts & MTX_QUIET) == 0)
489		CTR1(KTR_LOCK, "mtx_lock_spin: %p spinning", m);
490
491	for (;;) {
492		if (_obtain_lock(m, CURPROC))
493			break;
494
495		while (m->mtx_lock != MTX_UNOWNED) {
496			if (i++ < 1000000)
497				continue;
498			if (i++ < 6000000)
499				DELAY(1);
500#ifdef DDB
501			else if (!db_active)
502#else
503			else
504#endif
505			panic("spin lock %s held by %p for > 5 seconds",
506			    m->mtx_description, (void *)m->mtx_lock);
507		}
508	}
509
510	m->mtx_saveintr = mtx_intr;
511	if ((opts & MTX_QUIET) == 0)
512		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
513
514	return;
515}
516
517/*
518 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
519 *
520 * We are only called here if the lock is recursed or contested (i.e. we
521 * need to wake up a blocked thread).
522 */
523void
524_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
525{
526	struct proc *p, *p1;
527	struct mtx *m1;
528	int pri;
529
530	p = CURPROC;
531	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
532
533	if ((opts & MTX_QUIET) == 0)
534		CTR5(KTR_LOCK, "REL %s [%p] r=%d at %s:%d", m->mtx_description,
535		    m, m->mtx_recurse, file, line);
536
537	if (mtx_recursed(m)) {
538		if (--(m->mtx_recurse) == 0)
539			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
540		if ((opts & MTX_QUIET) == 0)
541			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
542		return;
543	}
544
545	mtx_lock_spin(&sched_lock);
546	if ((opts & MTX_QUIET) == 0)
547		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
548
549	p1 = TAILQ_FIRST(&m->mtx_blocked);
550	MPASS(p->p_magic == P_MAGIC);
551	MPASS(p1->p_magic == P_MAGIC);
552
553	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
554
555	if (TAILQ_EMPTY(&m->mtx_blocked)) {
556		LIST_REMOVE(m, mtx_contested);
557		_release_lock_quick(m);
558		if ((opts & MTX_QUIET) == 0)
559			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
560	} else
561		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
562
563	pri = MAXPRI;
564	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
565		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
566		if (cp < pri)
567			pri = cp;
568	}
569
570	if (pri > p->p_nativepri)
571		pri = p->p_nativepri;
572	SET_PRIO(p, pri);
573
574	if ((opts & MTX_QUIET) == 0)
575		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
576		    m, p1);
577
578	p1->p_blocked = NULL;
579	p1->p_mtxname = NULL;
580	p1->p_stat = SRUN;
581	setrunqueue(p1);
582
583	if ((opts & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
584#ifdef notyet
585		if (p->p_flag & (P_ITHD | P_SITHD)) {
586			ithd_t *it = (ithd_t *)p;
587
588			if (it->it_interrupted) {
589				if ((opts & MTX_QUIET) == 0)
590					CTR2(KTR_LOCK,
591				    "_mtx_unlock_sleep: 0x%x interrupted 0x%x",
592					    it, it->it_interrupted);
593				intr_thd_fixup(it);
594			}
595		}
596#endif
597		setrunqueue(p);
598		if ((opts & MTX_QUIET) == 0)
599			CTR2(KTR_LOCK,
600			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
601			    (void *)m->mtx_lock);
602
603		mi_switch();
604		if ((opts & MTX_QUIET) == 0)
605			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
606			    m, (void *)m->mtx_lock);
607	}
608
609	mtx_unlock_spin(&sched_lock);
610
611	return;
612}
613
614/*
615 * All the unlocking of MTX_SPIN locks is done inline.
616 * See the _rel_spin_lock() macro for the details.
617 */
618
619/*
620 * The INVARIANTS-enabled mtx_assert()
621 */
622#ifdef INVARIANTS
623void
624_mtx_assert(struct mtx *m, int what, const char *file, int line)
625{
626	switch ((what)) {
627	case MA_OWNED:
628	case MA_OWNED | MA_RECURSED:
629	case MA_OWNED | MA_NOTRECURSED:
630		if (!mtx_owned((m)))
631			panic("mutex %s not owned at %s:%d",
632			    (m)->mtx_description, file, line);
633		if (mtx_recursed((m))) {
634			if (((what) & MA_NOTRECURSED) != 0)
635				panic("mutex %s recursed at %s:%d",
636				    (m)->mtx_description, file, line);
637		} else if (((what) & MA_RECURSED) != 0) {
638			panic("mutex %s unrecursed at %s:%d",
639			    (m)->mtx_description, file, line);
640		}
641		break;
642	case MA_NOTOWNED:
643		if (mtx_owned((m)))
644			panic("mutex %s owned at %s:%d",
645			    (m)->mtx_description, file, line);
646		break;
647	default:
648		panic("unknown mtx_assert at %s:%d", file, line);
649	}
650}
651#endif
652
653/*
654 * The MUTEX_DEBUG-enabled mtx_validate()
655 */
656#define MV_DESTROY	0	/* validate before destory */
657#define MV_INIT		1	/* validate before init */
658
659#ifdef MUTEX_DEBUG
660
661int mtx_validate __P((struct mtx *, int));
662
663int
664mtx_validate(struct mtx *m, int when)
665{
666	struct mtx *mp;
667	int i;
668	int retval = 0;
669
670#ifdef WITNESS
671	if (witness_cold)
672		return 0;
673#endif
674	if (m == &all_mtx || cold)
675		return 0;
676
677	mtx_lock(&all_mtx);
678/*
679 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
680 * we can re-enable the kernacc() checks.
681 */
682#ifndef __alpha__
683	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
684	    VM_PROT_READ) == 1);
685#endif
686	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
687	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
688#ifndef __alpha__
689		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
690		    VM_PROT_READ) != 1) {
691			panic("mtx_validate: mp=%p mp->mtx_next=%p",
692			    mp, mp->mtx_next);
693		}
694#endif
695		i++;
696		if (i > mtx_cur_cnt) {
697			panic("mtx_validate: too many in chain, known=%d\n",
698			    mtx_cur_cnt);
699		}
700	}
701	MPASS(i == mtx_cur_cnt);
702	switch (when) {
703	case MV_DESTROY:
704		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
705			if (mp == m)
706				break;
707		MPASS(mp == m);
708		break;
709	case MV_INIT:
710		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
711		if (mp == m) {
712			/*
713			 * Not good. This mutex already exists.
714			 */
715			printf("re-initing existing mutex %s\n",
716			    m->mtx_description);
717			MPASS(m->mtx_lock == MTX_UNOWNED);
718			retval = 1;
719		}
720	}
721	mtx_unlock(&all_mtx);
722	return (retval);
723}
724#endif
725
726/*
727 * Mutex initialization routine; initialize lock `m' of type contained in
728 * `opts' with options contained in `opts' and description `description.'
729 * Place on "all_mtx" queue.
730 */
731void
732mtx_init(struct mtx *m, const char *description, int opts)
733{
734
735	if ((opts & MTX_QUIET) == 0)
736		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
737
738#ifdef MUTEX_DEBUG
739	/* Diagnostic and error correction */
740	if (mtx_validate(m, MV_INIT))
741		return;
742#endif
743
744	bzero((void *)m, sizeof *m);
745	TAILQ_INIT(&m->mtx_blocked);
746
747#ifdef WITNESS
748	if (!witness_cold) {
749		m->mtx_debug = malloc(sizeof(struct mtx_debug),
750		    M_WITNESS, M_NOWAIT | M_ZERO);
751		MPASS(m->mtx_debug != NULL);
752	}
753#endif
754
755	m->mtx_description = description;
756	m->mtx_flags = opts;
757	m->mtx_lock = MTX_UNOWNED;
758
759	/* Put on all mutex queue */
760	mtx_lock(&all_mtx);
761	m->mtx_next = &all_mtx;
762	m->mtx_prev = all_mtx.mtx_prev;
763	m->mtx_prev->mtx_next = m;
764	all_mtx.mtx_prev = m;
765	if (++mtx_cur_cnt > mtx_max_cnt)
766		mtx_max_cnt = mtx_cur_cnt;
767	mtx_unlock(&all_mtx);
768
769#ifdef WITNESS
770	if (!witness_cold)
771		witness_init(m, opts);
772#endif
773}
774
775/*
776 * Remove lock `m' from all_mtx queue.
777 */
778void
779mtx_destroy(struct mtx *m)
780{
781
782#ifdef WITNESS
783	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
784	    __FUNCTION__));
785#endif
786
787	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
788
789#ifdef MUTEX_DEBUG
790	if (m->mtx_next == NULL)
791		panic("mtx_destroy: %p (%s) already destroyed",
792		    m, m->mtx_description);
793
794	if (!mtx_owned(m)) {
795		MPASS(m->mtx_lock == MTX_UNOWNED);
796	} else {
797		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
798	}
799
800	/* diagnostic */
801	mtx_validate(m, MV_DESTROY);
802#endif
803
804#ifdef WITNESS
805	if (m->mtx_witness)
806		witness_destroy(m);
807#endif /* WITNESS */
808
809	/* Remove from the all mutex queue */
810	mtx_lock(&all_mtx);
811	m->mtx_next->mtx_prev = m->mtx_prev;
812	m->mtx_prev->mtx_next = m->mtx_next;
813
814#ifdef MUTEX_DEBUG
815	m->mtx_next = m->mtx_prev = NULL;
816#endif
817
818#ifdef WITNESS
819	free(m->mtx_debug, M_WITNESS);
820	m->mtx_debug = NULL;
821#endif
822
823	mtx_cur_cnt--;
824	mtx_unlock(&all_mtx);
825}
826
827
828/*
829 * The WITNESS-enabled diagnostic code.
830 */
831#ifdef WITNESS
832static void
833witness_fixup(void *dummy __unused)
834{
835	struct mtx *mp;
836
837	/*
838	 * We have to release Giant before initializing its witness
839	 * structure so that WITNESS doesn't get confused.
840	 */
841	mtx_unlock(&Giant);
842	mtx_assert(&Giant, MA_NOTOWNED);
843
844	mtx_lock(&all_mtx);
845
846	/* Iterate through all mutexes and finish up mutex initialization. */
847	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
848
849		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
850		    M_WITNESS, M_NOWAIT | M_ZERO);
851		MPASS(mp->mtx_debug != NULL);
852
853		witness_init(mp, mp->mtx_flags);
854	}
855	mtx_unlock(&all_mtx);
856
857	/* Mark the witness code as being ready for use. */
858	atomic_store_rel_int(&witness_cold, 0);
859
860	mtx_lock(&Giant);
861}
862SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
863
864#define WITNESS_COUNT 200
865#define	WITNESS_NCHILDREN 2
866
867int witness_watch = 1;
868
869struct witness {
870	struct witness	*w_next;
871	const char	*w_description;
872	const char	*w_file;
873	int		 w_line;
874	struct witness	*w_morechildren;
875	u_char		 w_childcnt;
876	u_char		 w_Giant_squawked:1;
877	u_char		 w_other_squawked:1;
878	u_char		 w_same_squawked:1;
879	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
880	u_int		 w_level;
881	struct witness	*w_children[WITNESS_NCHILDREN];
882};
883
884struct witness_blessed {
885	char 	*b_lock1;
886	char	*b_lock2;
887};
888
889#ifdef DDB
890/*
891 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
892 * drop into kdebug() when:
893 *	- a lock heirarchy violation occurs
894 *	- locks are held when going to sleep.
895 */
896int	witness_ddb;
897#ifdef WITNESS_DDB
898TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
899#else
900TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
901#endif
902SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
903#endif /* DDB */
904
905int	witness_skipspin;
906#ifdef WITNESS_SKIPSPIN
907TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
908#else
909TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
910#endif
911SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
912    "");
913
914/*
915 * Witness-enabled globals
916 */
917static struct mtx	w_mtx;
918static struct witness	*w_free;
919static struct witness	*w_all;
920static int		 w_inited;
921static int		 witness_dead;	/* fatal error, probably no memory */
922
923static struct witness	 w_data[WITNESS_COUNT];
924
925/*
926 * Internal witness routine prototypes
927 */
928static struct witness *enroll(const char *description, int flag);
929static int itismychild(struct witness *parent, struct witness *child);
930static void removechild(struct witness *parent, struct witness *child);
931static int isitmychild(struct witness *parent, struct witness *child);
932static int isitmydescendant(struct witness *parent, struct witness *child);
933static int dup_ok(struct witness *);
934static int blessed(struct witness *, struct witness *);
935static void
936    witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
937static void witness_leveldescendents(struct witness *parent, int level);
938static void witness_levelall(void);
939static struct witness * witness_get(void);
940static void witness_free(struct witness *m);
941
942static char *ignore_list[] = {
943	"witness lock",
944	NULL
945};
946
947static char *spin_order_list[] = {
948	"sio",
949	"sched lock",
950#ifdef __i386__
951	"clk",
952#endif
953	"callout",
954	/*
955	 * leaf locks
956	 */
957#ifdef __i386__
958	"ap boot",
959	"imen",
960#endif
961	"com",
962	"smp rendezvous",
963	NULL
964};
965
966static char *order_list[] = {
967	"Giant", "uidinfo hash", "uidinfo struct", NULL,
968	"Giant", "proctree", "allproc", "process lock", NULL,
969	NULL
970};
971
972static char *dup_list[] = {
973	NULL
974};
975
976static char *sleep_list[] = {
977	"Giant",
978	NULL
979};
980
981/*
982 * Pairs of locks which have been blessed
983 * Don't complain about order problems with blessed locks
984 */
985static struct witness_blessed blessed_list[] = {
986};
987static int blessed_count =
988	sizeof(blessed_list) / sizeof(struct witness_blessed);
989
990static void
991witness_init(struct mtx *m, int flag)
992{
993	m->mtx_witness = enroll(m->mtx_description, flag);
994}
995
996static void
997witness_destroy(struct mtx *m)
998{
999	struct mtx *m1;
1000	struct proc *p;
1001	p = CURPROC;
1002	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
1003		m1 = LIST_NEXT(m1, mtx_held)) {
1004		if (m1 == m) {
1005			LIST_REMOVE(m, mtx_held);
1006			break;
1007		}
1008	}
1009	return;
1010
1011}
1012
1013static void
1014witness_display(void(*prnt)(const char *fmt, ...))
1015{
1016	struct witness *w, *w1;
1017
1018	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1019	witness_levelall();
1020
1021	for (w = w_all; w; w = w->w_next) {
1022		if (w->w_file == NULL)
1023			continue;
1024		for (w1 = w_all; w1; w1 = w1->w_next) {
1025			if (isitmychild(w1, w))
1026				break;
1027		}
1028		if (w1 != NULL)
1029			continue;
1030		/*
1031		 * This lock has no anscestors, display its descendants.
1032		 */
1033		witness_displaydescendants(prnt, w);
1034	}
1035	prnt("\nMutex which were never acquired\n");
1036	for (w = w_all; w; w = w->w_next) {
1037		if (w->w_file != NULL)
1038			continue;
1039		prnt("%s\n", w->w_description);
1040	}
1041}
1042
1043void
1044witness_enter(struct mtx *m, int flags, const char *file, int line)
1045{
1046	struct witness *w, *w1;
1047	struct mtx *m1;
1048	struct proc *p;
1049	int i;
1050#ifdef DDB
1051	int go_into_ddb = 0;
1052#endif /* DDB */
1053
1054	if (witness_cold || m->mtx_witness == NULL || panicstr)
1055		return;
1056	w = m->mtx_witness;
1057	p = CURPROC;
1058
1059	if (flags & MTX_SPIN) {
1060		if ((m->mtx_flags & MTX_SPIN) == 0)
1061			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1062			    " %s:%d", m->mtx_description, file, line);
1063		if (mtx_recursed(m)) {
1064			if ((m->mtx_flags & MTX_RECURSE) == 0)
1065				panic("mutex_enter: recursion on non-recursive"
1066				    " mutex %s @ %s:%d", m->mtx_description,
1067				    file, line);
1068			return;
1069		}
1070		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1071		i = PCPU_GET(witness_spin_check);
1072		if (i != 0 && w->w_level < i) {
1073			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1074			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1075			    " %s:%d already holding %s:%x",
1076			    m->mtx_description, w->w_level, file, line,
1077			    spin_order_list[ffs(i)-1], i);
1078		}
1079		PCPU_SET(witness_spin_check, i | w->w_level);
1080		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1081		w->w_file = file;
1082		w->w_line = line;
1083		m->mtx_line = line;
1084		m->mtx_file = file;
1085		return;
1086	}
1087	if ((m->mtx_flags & MTX_SPIN) != 0)
1088		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1089		    m->mtx_description, file, line);
1090
1091	if (mtx_recursed(m)) {
1092		if ((m->mtx_flags & MTX_RECURSE) == 0)
1093			panic("mutex_enter: recursion on non-recursive"
1094			    " mutex %s @ %s:%d", m->mtx_description,
1095			    file, line);
1096		return;
1097	}
1098	if (witness_dead)
1099		goto out;
1100	if (cold)
1101		goto out;
1102
1103	if (!mtx_legal2block())
1104		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1105			    m->mtx_description, file, line);
1106	/*
1107	 * Is this the first mutex acquired
1108	 */
1109	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1110		goto out;
1111
1112	if ((w1 = m1->mtx_witness) == w) {
1113		if (w->w_same_squawked || dup_ok(w))
1114			goto out;
1115		w->w_same_squawked = 1;
1116		printf("acquring duplicate lock of same type: \"%s\"\n",
1117			m->mtx_description);
1118		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1119		printf(" 2nd @ %s:%d\n", file, line);
1120#ifdef DDB
1121		go_into_ddb = 1;
1122#endif /* DDB */
1123		goto out;
1124	}
1125	MPASS(!mtx_owned(&w_mtx));
1126	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1127	/*
1128	 * If we have a known higher number just say ok
1129	 */
1130	if (witness_watch > 1 && w->w_level > w1->w_level) {
1131		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1132		goto out;
1133	}
1134	if (isitmydescendant(m1->mtx_witness, w)) {
1135		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1136		goto out;
1137	}
1138	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1139
1140		MPASS(i < 200);
1141		w1 = m1->mtx_witness;
1142		if (isitmydescendant(w, w1)) {
1143			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1144			if (blessed(w, w1))
1145				goto out;
1146			if (m1 == &Giant) {
1147				if (w1->w_Giant_squawked)
1148					goto out;
1149				else
1150					w1->w_Giant_squawked = 1;
1151			} else {
1152				if (w1->w_other_squawked)
1153					goto out;
1154				else
1155					w1->w_other_squawked = 1;
1156			}
1157			printf("lock order reversal\n");
1158			printf(" 1st %s last acquired @ %s:%d\n",
1159			    w->w_description, w->w_file, w->w_line);
1160			printf(" 2nd %p %s @ %s:%d\n",
1161			    m1, w1->w_description, w1->w_file, w1->w_line);
1162			printf(" 3rd %p %s @ %s:%d\n",
1163			    m, w->w_description, file, line);
1164#ifdef DDB
1165			go_into_ddb = 1;
1166#endif /* DDB */
1167			goto out;
1168		}
1169	}
1170	m1 = LIST_FIRST(&p->p_heldmtx);
1171	if (!itismychild(m1->mtx_witness, w))
1172		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1173
1174out:
1175#ifdef DDB
1176	if (witness_ddb && go_into_ddb)
1177		Debugger("witness_enter");
1178#endif /* DDB */
1179	w->w_file = file;
1180	w->w_line = line;
1181	m->mtx_line = line;
1182	m->mtx_file = file;
1183
1184	/*
1185	 * If this pays off it likely means that a mutex being witnessed
1186	 * is acquired in hardclock. Put it in the ignore list. It is
1187	 * likely not the mutex this assert fails on.
1188	 */
1189	MPASS(m->mtx_held.le_prev == NULL);
1190	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1191}
1192
1193void
1194witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1195{
1196	struct proc *p;
1197	struct witness *w = m->mtx_witness;
1198
1199	if (witness_cold)
1200		return;
1201	if (panicstr)
1202		return;
1203	if (flags & MTX_SPIN) {
1204		if ((m->mtx_flags & MTX_SPIN) == 0)
1205			panic("mutex_try_enter: "
1206			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1207			    m->mtx_description, file, line);
1208		if (mtx_recursed(m)) {
1209			if ((m->mtx_flags & MTX_RECURSE) == 0)
1210				panic("mutex_try_enter: recursion on"
1211				    " non-recursive mutex %s @ %s:%d",
1212				    m->mtx_description, file, line);
1213			return;
1214		}
1215		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1216		PCPU_SET(witness_spin_check,
1217		    PCPU_GET(witness_spin_check) | w->w_level);
1218		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1219		w->w_file = file;
1220		w->w_line = line;
1221		m->mtx_line = line;
1222		m->mtx_file = file;
1223		return;
1224	}
1225
1226	if ((m->mtx_flags & MTX_SPIN) != 0)
1227		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1228		    m->mtx_description, file, line);
1229
1230	if (mtx_recursed(m)) {
1231		if ((m->mtx_flags & MTX_RECURSE) == 0)
1232			panic("mutex_try_enter: recursion on non-recursive"
1233			    " mutex %s @ %s:%d", m->mtx_description, file,
1234			    line);
1235		return;
1236	}
1237	w->w_file = file;
1238	w->w_line = line;
1239	m->mtx_line = line;
1240	m->mtx_file = file;
1241	p = CURPROC;
1242	MPASS(m->mtx_held.le_prev == NULL);
1243	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1244}
1245
1246void
1247witness_exit(struct mtx *m, int flags, const char *file, int line)
1248{
1249	struct witness *w;
1250
1251	if (witness_cold || m->mtx_witness == NULL || panicstr)
1252		return;
1253	w = m->mtx_witness;
1254
1255	if (flags & MTX_SPIN) {
1256		if ((m->mtx_flags & MTX_SPIN) == 0)
1257			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1258			    " %s:%d", m->mtx_description, file, line);
1259		if (mtx_recursed(m)) {
1260			if ((m->mtx_flags & MTX_RECURSE) == 0)
1261				panic("mutex_exit: recursion on non-recursive"
1262				    " mutex %s @ %s:%d", m->mtx_description,
1263				    file, line);
1264			return;
1265		}
1266		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1267		PCPU_SET(witness_spin_check,
1268		    PCPU_GET(witness_spin_check) & ~w->w_level);
1269		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1270		return;
1271	}
1272	if ((m->mtx_flags & MTX_SPIN) != 0)
1273		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1274		    m->mtx_description, file, line);
1275
1276	if (mtx_recursed(m)) {
1277		if ((m->mtx_flags & MTX_RECURSE) == 0)
1278			panic("mutex_exit: recursion on non-recursive"
1279			    " mutex %s @ %s:%d", m->mtx_description,
1280			    file, line);
1281		return;
1282	}
1283
1284	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1285		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1286			    m->mtx_description, file, line);
1287	LIST_REMOVE(m, mtx_held);
1288	m->mtx_held.le_prev = NULL;
1289}
1290
1291int
1292witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1293{
1294	struct mtx *m;
1295	struct proc *p;
1296	char **sleep;
1297	int n = 0;
1298
1299	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1300	p = CURPROC;
1301	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1302	    m = LIST_NEXT(m, mtx_held)) {
1303		if (m == mtx)
1304			continue;
1305		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1306			if (strcmp(m->mtx_description, *sleep) == 0)
1307				goto next;
1308		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1309			file, line, check_only ? "could sleep" : "sleeping",
1310			m->mtx_description,
1311			m->mtx_witness->w_file, m->mtx_witness->w_line);
1312		n++;
1313	next:
1314	}
1315#ifdef DDB
1316	if (witness_ddb && n)
1317		Debugger("witness_sleep");
1318#endif /* DDB */
1319	return (n);
1320}
1321
1322static struct witness *
1323enroll(const char *description, int flag)
1324{
1325	int i;
1326	struct witness *w, *w1;
1327	char **ignore;
1328	char **order;
1329
1330	if (!witness_watch)
1331		return (NULL);
1332	for (ignore = ignore_list; *ignore != NULL; ignore++)
1333		if (strcmp(description, *ignore) == 0)
1334			return (NULL);
1335
1336	if (w_inited == 0) {
1337		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1338		for (i = 0; i < WITNESS_COUNT; i++) {
1339			w = &w_data[i];
1340			witness_free(w);
1341		}
1342		w_inited = 1;
1343		for (order = order_list; *order != NULL; order++) {
1344			w = enroll(*order, MTX_DEF);
1345			w->w_file = "order list";
1346			for (order++; *order != NULL; order++) {
1347				w1 = enroll(*order, MTX_DEF);
1348				w1->w_file = "order list";
1349				itismychild(w, w1);
1350				w = w1;
1351    	    	    	}
1352		}
1353	}
1354	if ((flag & MTX_SPIN) && witness_skipspin)
1355		return (NULL);
1356	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1357	for (w = w_all; w; w = w->w_next) {
1358		if (strcmp(description, w->w_description) == 0) {
1359			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1360			return (w);
1361		}
1362	}
1363	if ((w = witness_get()) == NULL)
1364		return (NULL);
1365	w->w_next = w_all;
1366	w_all = w;
1367	w->w_description = description;
1368	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1369	if (flag & MTX_SPIN) {
1370		w->w_spin = 1;
1371
1372		i = 1;
1373		for (order = spin_order_list; *order != NULL; order++) {
1374			if (strcmp(description, *order) == 0)
1375				break;
1376			i <<= 1;
1377		}
1378		if (*order == NULL)
1379			panic("spin lock %s not in order list", description);
1380		w->w_level = i;
1381	}
1382
1383	return (w);
1384}
1385
1386static int
1387itismychild(struct witness *parent, struct witness *child)
1388{
1389	static int recursed;
1390
1391	/*
1392	 * Insert "child" after "parent"
1393	 */
1394	while (parent->w_morechildren)
1395		parent = parent->w_morechildren;
1396
1397	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1398		if ((parent->w_morechildren = witness_get()) == NULL)
1399			return (1);
1400		parent = parent->w_morechildren;
1401	}
1402	MPASS(child != NULL);
1403	parent->w_children[parent->w_childcnt++] = child;
1404	/*
1405	 * now prune whole tree
1406	 */
1407	if (recursed)
1408		return (0);
1409	recursed = 1;
1410	for (child = w_all; child != NULL; child = child->w_next) {
1411		for (parent = w_all; parent != NULL;
1412		    parent = parent->w_next) {
1413			if (!isitmychild(parent, child))
1414				continue;
1415			removechild(parent, child);
1416			if (isitmydescendant(parent, child))
1417				continue;
1418			itismychild(parent, child);
1419		}
1420	}
1421	recursed = 0;
1422	witness_levelall();
1423	return (0);
1424}
1425
1426static void
1427removechild(struct witness *parent, struct witness *child)
1428{
1429	struct witness *w, *w1;
1430	int i;
1431
1432	for (w = parent; w != NULL; w = w->w_morechildren)
1433		for (i = 0; i < w->w_childcnt; i++)
1434			if (w->w_children[i] == child)
1435				goto found;
1436	return;
1437found:
1438	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1439		continue;
1440	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1441	MPASS(w->w_children[i] != NULL);
1442
1443	if (w1->w_childcnt != 0)
1444		return;
1445
1446	if (w1 == parent)
1447		return;
1448	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1449		continue;
1450	w->w_morechildren = 0;
1451	witness_free(w1);
1452}
1453
1454static int
1455isitmychild(struct witness *parent, struct witness *child)
1456{
1457	struct witness *w;
1458	int i;
1459
1460	for (w = parent; w != NULL; w = w->w_morechildren) {
1461		for (i = 0; i < w->w_childcnt; i++) {
1462			if (w->w_children[i] == child)
1463				return (1);
1464		}
1465	}
1466	return (0);
1467}
1468
1469static int
1470isitmydescendant(struct witness *parent, struct witness *child)
1471{
1472	struct witness *w;
1473	int i;
1474	int j;
1475
1476	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1477		MPASS(j < 1000);
1478		for (i = 0; i < w->w_childcnt; i++) {
1479			if (w->w_children[i] == child)
1480				return (1);
1481		}
1482		for (i = 0; i < w->w_childcnt; i++) {
1483			if (isitmydescendant(w->w_children[i], child))
1484				return (1);
1485		}
1486	}
1487	return (0);
1488}
1489
1490void
1491witness_levelall (void)
1492{
1493	struct witness *w, *w1;
1494
1495	for (w = w_all; w; w = w->w_next)
1496		if (!(w->w_spin))
1497			w->w_level = 0;
1498	for (w = w_all; w; w = w->w_next) {
1499		if (w->w_spin)
1500			continue;
1501		for (w1 = w_all; w1; w1 = w1->w_next) {
1502			if (isitmychild(w1, w))
1503				break;
1504		}
1505		if (w1 != NULL)
1506			continue;
1507		witness_leveldescendents(w, 0);
1508	}
1509}
1510
1511static void
1512witness_leveldescendents(struct witness *parent, int level)
1513{
1514	int i;
1515	struct witness *w;
1516
1517	if (parent->w_level < level)
1518		parent->w_level = level;
1519	level++;
1520	for (w = parent; w != NULL; w = w->w_morechildren)
1521		for (i = 0; i < w->w_childcnt; i++)
1522			witness_leveldescendents(w->w_children[i], level);
1523}
1524
1525static void
1526witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1527			   struct witness *parent)
1528{
1529	struct witness *w;
1530	int i;
1531	int level = parent->w_level;
1532
1533	prnt("%d", level);
1534	if (level < 10)
1535		prnt(" ");
1536	for (i = 0; i < level; i++)
1537		prnt(" ");
1538	prnt("%s", parent->w_description);
1539	if (parent->w_file != NULL) {
1540		prnt(" -- last acquired @ %s", parent->w_file);
1541#ifndef W_USE_WHERE
1542		prnt(":%d", parent->w_line);
1543#endif
1544		prnt("\n");
1545	}
1546
1547	for (w = parent; w != NULL; w = w->w_morechildren)
1548		for (i = 0; i < w->w_childcnt; i++)
1549			    witness_displaydescendants(prnt, w->w_children[i]);
1550    }
1551
1552static int
1553dup_ok(struct witness *w)
1554{
1555	char **dup;
1556
1557	for (dup = dup_list; *dup!= NULL; dup++)
1558		if (strcmp(w->w_description, *dup) == 0)
1559			return (1);
1560	return (0);
1561}
1562
1563static int
1564blessed(struct witness *w1, struct witness *w2)
1565{
1566	int i;
1567	struct witness_blessed *b;
1568
1569	for (i = 0; i < blessed_count; i++) {
1570		b = &blessed_list[i];
1571		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1572			if (strcmp(w2->w_description, b->b_lock2) == 0)
1573				return (1);
1574			continue;
1575		}
1576		if (strcmp(w1->w_description, b->b_lock2) == 0)
1577			if (strcmp(w2->w_description, b->b_lock1) == 0)
1578				return (1);
1579	}
1580	return (0);
1581}
1582
1583static struct witness *
1584witness_get()
1585{
1586	struct witness *w;
1587
1588	if ((w = w_free) == NULL) {
1589		witness_dead = 1;
1590		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1591		printf("witness exhausted\n");
1592		return (NULL);
1593	}
1594	w_free = w->w_next;
1595	bzero(w, sizeof(*w));
1596	return (w);
1597}
1598
1599static void
1600witness_free(struct witness *w)
1601{
1602	w->w_next = w_free;
1603	w_free = w;
1604}
1605
1606int
1607witness_list(struct proc *p)
1608{
1609	struct mtx *m;
1610	int nheld;
1611
1612	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1613	nheld = 0;
1614	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1615	    m = LIST_NEXT(m, mtx_held)) {
1616		printf("\t\"%s\" (%p) locked at %s:%d\n",
1617		    m->mtx_description, m,
1618		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1619		nheld++;
1620	}
1621
1622	return (nheld);
1623}
1624
1625#ifdef DDB
1626
1627DB_COMMAND(witness_list, db_witness_list)
1628{
1629
1630	witness_list(CURPROC);
1631}
1632
1633#endif
1634
1635void
1636witness_save(struct mtx *m, const char **filep, int *linep)
1637{
1638
1639	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1640	if (m->mtx_witness == NULL)
1641		return;
1642
1643	*filep = m->mtx_witness->w_file;
1644	*linep = m->mtx_witness->w_line;
1645}
1646
1647void
1648witness_restore(struct mtx *m, const char *file, int line)
1649{
1650
1651	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1652	if (m->mtx_witness == NULL)
1653		return;
1654
1655	m->mtx_witness->w_file = file;
1656	m->mtx_witness->w_line = line;
1657}
1658
1659#endif	/* WITNESS */
1660