kern_mutex.c revision 69208
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/kern_mutex.c 69208 2000-11-26 15:05:46Z jake $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56#include <sys/param.h>
57#include <sys/bus.h>
58#include <sys/kernel.h>
59#include <sys/malloc.h>
60#include <sys/proc.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63#include <sys/vmmeter.h>
64#include <sys/ktr.h>
65
66#include <machine/atomic.h>
67#include <machine/bus.h>
68#include <machine/clock.h>
69#include <machine/cpu.h>
70
71#include <ddb/ddb.h>
72
73#include <vm/vm.h>
74#include <vm/vm_extern.h>
75
76#define _KERN_MUTEX_C_		/* Cause non-inlined mtx_*() to be compiled. */
77#include <sys/mutex.h>
78
79/*
80 * Machine independent bits of the mutex implementation
81 */
82/* All mutexes in system (used for debug/panic) */
83#ifdef MUTEX_DEBUG
84static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
85	"All mutexes queue head" };
86static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
87	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
88	{ NULL, NULL }, &all_mtx, &all_mtx };
89#else	/* MUTEX_DEBUG */
90static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
91	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
92	{ NULL, NULL }, &all_mtx, &all_mtx };
93#endif	/* MUTEX_DEBUG */
94
95static int	mtx_cur_cnt;
96static int	mtx_max_cnt;
97
98void	_mtx_enter_giant_def(void);
99void	_mtx_exit_giant_def(void);
100static void propagate_priority(struct proc *) __unused;
101
102#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
103#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
104			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
105
106#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
107#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
108
109/*
110 * XXX Temporary, for use from assembly language
111 */
112
113void
114_mtx_enter_giant_def(void)
115{
116
117	mtx_enter(&Giant, MTX_DEF);
118}
119
120void
121_mtx_exit_giant_def(void)
122{
123
124	mtx_exit(&Giant, MTX_DEF);
125}
126
127static void
128propagate_priority(struct proc *p)
129{
130	int pri = p->p_priority;
131	struct mtx *m = p->p_blocked;
132
133	for (;;) {
134		struct proc *p1;
135
136		p = mtx_owner(m);
137
138		if (p == NULL) {
139			/*
140			 * This really isn't quite right. Really
141			 * ought to bump priority of process that
142			 * next acquires the mutex.
143			 */
144			MPASS(m->mtx_lock == MTX_CONTESTED);
145			return;
146		}
147		MPASS(p->p_magic == P_MAGIC);
148		if (p->p_priority <= pri)
149			return;
150		/*
151		 * If lock holder is actually running, just bump priority.
152		 */
153		if (TAILQ_NEXT(p, p_procq) == NULL) {
154			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
155			SET_PRIO(p, pri);
156			return;
157		}
158		/*
159		 * If on run queue move to new run queue, and
160		 * quit.
161		 */
162		if (p->p_stat == SRUN) {
163			MPASS(p->p_blocked == NULL);
164			remrunqueue(p);
165			SET_PRIO(p, pri);
166			setrunqueue(p);
167			return;
168		}
169
170		/*
171		 * If we aren't blocked on a mutex, give up and quit.
172		 */
173		if (p->p_stat != SMTX) {
174			printf(
175	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
176			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
177			return;
178		}
179
180		/*
181		 * Pick up the mutex that p is blocked on.
182		 */
183		m = p->p_blocked;
184		MPASS(m != NULL);
185
186		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
187		    p->p_comm, m->mtx_description);
188		/*
189		 * Check if the proc needs to be moved up on
190		 * the blocked chain
191		 */
192		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
193		    p1->p_priority <= pri) {
194			if (p1)
195				printf(
196	"XXX: previous process %d(%s) has higher priority\n",
197				    p->p_pid, p->p_comm);
198			else
199				printf("XXX: process at head of run queue\n");
200			continue;
201		}
202
203		/*
204		 * Remove proc from blocked chain
205		 */
206		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
207		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
208			MPASS(p1->p_magic == P_MAGIC);
209			if (p1->p_priority > pri)
210				break;
211		}
212		if (p1)
213			TAILQ_INSERT_BEFORE(p1, p, p_procq);
214		else
215			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
216		CTR4(KTR_LOCK,
217		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
218		    p, p1, m, m->mtx_description);
219	}
220}
221
222void
223mtx_enter_hard(struct mtx *m, int type, int saveintr)
224{
225	struct proc *p = CURPROC;
226
227	KASSERT(p != NULL, ("curproc is NULL in mutex"));
228
229	switch (type) {
230	case MTX_DEF:
231		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
232			m->mtx_recurse++;
233			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
234			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
235			return;
236		}
237		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
238		    m, (void *)m->mtx_lock, (void *)RETIP(m));
239		while (!_obtain_lock(m, p)) {
240			uintptr_t v;
241			struct proc *p1;
242
243			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
244			/*
245			 * check if the lock has been released while
246			 * waiting for the schedlock.
247			 */
248			if ((v = m->mtx_lock) == MTX_UNOWNED) {
249				mtx_exit(&sched_lock, MTX_SPIN);
250				continue;
251			}
252			/*
253			 * The mutex was marked contested on release. This
254			 * means that there are processes blocked on it.
255			 */
256			if (v == MTX_CONTESTED) {
257				p1 = TAILQ_FIRST(&m->mtx_blocked);
258				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
259				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
260				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
261				if (p1->p_priority < p->p_priority) {
262					SET_PRIO(p, p1->p_priority);
263				}
264				mtx_exit(&sched_lock, MTX_SPIN);
265				return;
266			}
267			/*
268			 * If the mutex isn't already contested and
269			 * a failure occurs setting the contested bit the
270			 * mutex was either release or the
271			 * state of the RECURSION bit changed.
272			 */
273			if ((v & MTX_CONTESTED) == 0 &&
274			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
275				               (void *)(v | MTX_CONTESTED))) {
276				mtx_exit(&sched_lock, MTX_SPIN);
277				continue;
278			}
279
280			/* We definitely have to sleep for this lock */
281			mtx_assert(m, MA_NOTOWNED);
282
283#ifdef notyet
284			/*
285			 * If we're borrowing an interrupted thread's VM
286			 * context must clean up before going to sleep.
287			 */
288			if (p->p_flag & (P_ITHD | P_SITHD)) {
289				ithd_t *it = (ithd_t *)p;
290
291				if (it->it_interrupted) {
292					CTR2(KTR_LOCK,
293					    "mtx_enter: 0x%x interrupted 0x%x",
294					    it, it->it_interrupted);
295					intr_thd_fixup(it);
296				}
297			}
298#endif
299
300			/* Put us on the list of procs blocked on this mutex */
301			if (TAILQ_EMPTY(&m->mtx_blocked)) {
302				p1 = (struct proc *)(m->mtx_lock &
303						     MTX_FLAGMASK);
304				LIST_INSERT_HEAD(&p1->p_contested, m,
305						 mtx_contested);
306				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
307			} else {
308				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
309					if (p1->p_priority > p->p_priority)
310						break;
311				if (p1)
312					TAILQ_INSERT_BEFORE(p1, p, p_procq);
313				else
314					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
315							  p_procq);
316			}
317
318			p->p_blocked = m;	/* Who we're blocked on */
319			p->p_stat = SMTX;
320#if 0
321			propagate_priority(p);
322#endif
323			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
324			    p, m, m->mtx_description);
325			mi_switch();
326			CTR3(KTR_LOCK,
327			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
328			    p, m, m->mtx_description);
329			mtx_exit(&sched_lock, MTX_SPIN);
330		}
331		return;
332	case MTX_SPIN:
333	case MTX_SPIN | MTX_FIRST:
334	case MTX_SPIN | MTX_TOPHALF:
335	    {
336		int i = 0;
337
338		if (m->mtx_lock == (uintptr_t)p) {
339			m->mtx_recurse++;
340			return;
341		}
342		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
343		for (;;) {
344			if (_obtain_lock(m, p))
345				break;
346			while (m->mtx_lock != MTX_UNOWNED) {
347				if (i++ < 1000000)
348					continue;
349				if (i++ < 6000000)
350					DELAY (1);
351#ifdef DDB
352				else if (!db_active)
353#else
354				else
355#endif
356					panic(
357				"spin lock %s held by 0x%p for > 5 seconds",
358					    m->mtx_description,
359					    (void *)m->mtx_lock);
360			}
361		}
362
363#ifdef MUTEX_DEBUG
364		if (type != MTX_SPIN)
365			m->mtx_saveintr = 0xbeefface;
366		else
367#endif
368			m->mtx_saveintr = saveintr;
369		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
370		return;
371	    }
372	}
373}
374
375void
376mtx_exit_hard(struct mtx *m, int type)
377{
378	struct proc *p, *p1;
379	struct mtx *m1;
380	int pri;
381
382	p = CURPROC;
383	switch (type) {
384	case MTX_DEF:
385	case MTX_DEF | MTX_NOSWITCH:
386		if (m->mtx_recurse != 0) {
387			if (--(m->mtx_recurse) == 0)
388				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
389			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
390			return;
391		}
392		mtx_enter(&sched_lock, MTX_SPIN);
393		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
394		p1 = TAILQ_FIRST(&m->mtx_blocked);
395		MPASS(p->p_magic == P_MAGIC);
396		MPASS(p1->p_magic == P_MAGIC);
397		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
398		if (TAILQ_EMPTY(&m->mtx_blocked)) {
399			LIST_REMOVE(m, mtx_contested);
400			_release_lock_quick(m);
401			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
402		} else
403			m->mtx_lock = MTX_CONTESTED;
404		pri = MAXPRI;
405		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
406			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
407			if (cp < pri)
408				pri = cp;
409		}
410		if (pri > p->p_nativepri)
411			pri = p->p_nativepri;
412		SET_PRIO(p, pri);
413		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
414		    m, p1);
415		p1->p_blocked = NULL;
416		p1->p_stat = SRUN;
417		setrunqueue(p1);
418		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
419#ifdef notyet
420			if (p->p_flag & (P_ITHD | P_SITHD)) {
421				ithd_t *it = (ithd_t *)p;
422
423				if (it->it_interrupted) {
424					CTR2(KTR_LOCK,
425					    "mtx_exit: 0x%x interruped 0x%x",
426					    it, it->it_interrupted);
427					intr_thd_fixup(it);
428				}
429			}
430#endif
431			setrunqueue(p);
432			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
433			    m, (void *)m->mtx_lock);
434			mi_switch();
435			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
436			    m, (void *)m->mtx_lock);
437		}
438		mtx_exit(&sched_lock, MTX_SPIN);
439		break;
440	case MTX_SPIN:
441	case MTX_SPIN | MTX_FIRST:
442		if (m->mtx_recurse != 0) {
443			m->mtx_recurse--;
444			return;
445		}
446		MPASS(mtx_owned(m));
447		_release_lock_quick(m);
448		if (type & MTX_FIRST)
449			enable_intr();	/* XXX is this kosher? */
450		else {
451			MPASS(m->mtx_saveintr != 0xbeefface);
452			restore_intr(m->mtx_saveintr);
453		}
454		break;
455	case MTX_SPIN | MTX_TOPHALF:
456		if (m->mtx_recurse != 0) {
457			m->mtx_recurse--;
458			return;
459		}
460		MPASS(mtx_owned(m));
461		_release_lock_quick(m);
462		break;
463	default:
464		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
465	}
466}
467
468#define MV_DESTROY	0	/* validate before destory */
469#define MV_INIT		1	/* validate before init */
470
471#ifdef MUTEX_DEBUG
472
473int mtx_validate __P((struct mtx *, int));
474
475int
476mtx_validate(struct mtx *m, int when)
477{
478	struct mtx *mp;
479	int i;
480	int retval = 0;
481
482	if (m == &all_mtx || cold)
483		return 0;
484
485	mtx_enter(&all_mtx, MTX_DEF);
486/*
487 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
488 * we can re-enable the kernacc() checks.
489 */
490#ifndef __alpha__
491	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
492	    VM_PROT_READ) == 1);
493#endif
494	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
495	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
496#ifndef __alpha__
497		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
498		    VM_PROT_READ) != 1) {
499			panic("mtx_validate: mp=%p mp->mtx_next=%p",
500			    mp, mp->mtx_next);
501		}
502#endif
503		i++;
504		if (i > mtx_cur_cnt) {
505			panic("mtx_validate: too many in chain, known=%d\n",
506			    mtx_cur_cnt);
507		}
508	}
509	MPASS(i == mtx_cur_cnt);
510	switch (when) {
511	case MV_DESTROY:
512		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
513			if (mp == m)
514				break;
515		MPASS(mp == m);
516		break;
517	case MV_INIT:
518		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
519		if (mp == m) {
520			/*
521			 * Not good. This mutex already exists.
522			 */
523			printf("re-initing existing mutex %s\n",
524			    m->mtx_description);
525			MPASS(m->mtx_lock == MTX_UNOWNED);
526			retval = 1;
527		}
528	}
529	mtx_exit(&all_mtx, MTX_DEF);
530	return (retval);
531}
532#endif
533
534void
535mtx_init(struct mtx *m, const char *t, int flag)
536{
537#ifdef MUTEX_DEBUG
538	struct mtx_debug *debug;
539#endif
540
541	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
542#ifdef MUTEX_DEBUG
543	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
544		return;
545	if (flag & MTX_COLD)
546		debug = m->mtx_debug;
547	else
548		debug = NULL;
549	if (debug == NULL) {
550#ifdef DIAGNOSTIC
551		if(cold && bootverbose)
552			printf("malloc'ing mtx_debug while cold for %s\n", t);
553#endif
554
555		/* XXX - should not use DEVBUF */
556		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
557		MPASS(debug != NULL);
558		bzero(debug, sizeof(struct mtx_debug));
559	}
560#endif
561	bzero((void *)m, sizeof *m);
562	TAILQ_INIT(&m->mtx_blocked);
563#ifdef MUTEX_DEBUG
564	m->mtx_debug = debug;
565#endif
566	m->mtx_description = t;
567	m->mtx_lock = MTX_UNOWNED;
568	/* Put on all mutex queue */
569	mtx_enter(&all_mtx, MTX_DEF);
570	m->mtx_next = &all_mtx;
571	m->mtx_prev = all_mtx.mtx_prev;
572	m->mtx_prev->mtx_next = m;
573	all_mtx.mtx_prev = m;
574	if (++mtx_cur_cnt > mtx_max_cnt)
575		mtx_max_cnt = mtx_cur_cnt;
576	mtx_exit(&all_mtx, MTX_DEF);
577	witness_init(m, flag);
578}
579
580void
581mtx_destroy(struct mtx *m)
582{
583
584	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
585#ifdef MUTEX_DEBUG
586	if (m->mtx_next == NULL)
587		panic("mtx_destroy: %p (%s) already destroyed",
588		    m, m->mtx_description);
589
590	if (!mtx_owned(m)) {
591		MPASS(m->mtx_lock == MTX_UNOWNED);
592	} else {
593		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
594	}
595	mtx_validate(m, MV_DESTROY);		/* diagnostic */
596#endif
597
598#ifdef WITNESS
599	if (m->mtx_witness)
600		witness_destroy(m);
601#endif /* WITNESS */
602
603	/* Remove from the all mutex queue */
604	mtx_enter(&all_mtx, MTX_DEF);
605	m->mtx_next->mtx_prev = m->mtx_prev;
606	m->mtx_prev->mtx_next = m->mtx_next;
607#ifdef MUTEX_DEBUG
608	m->mtx_next = m->mtx_prev = NULL;
609	free(m->mtx_debug, M_DEVBUF);
610	m->mtx_debug = NULL;
611#endif
612	mtx_cur_cnt--;
613	mtx_exit(&all_mtx, MTX_DEF);
614}
615
616/*
617 * The non-inlined versions of the mtx_*() functions are always built (above),
618 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
619 * being specified.
620 */
621#if (defined(MUTEX_DEBUG) && defined(WITNESS))
622
623#define WITNESS_COUNT 200
624#define	WITNESS_NCHILDREN 2
625
626int witness_watch = 1;
627
628struct witness {
629	struct witness	*w_next;
630	const char	*w_description;
631	const char	*w_file;
632	int		 w_line;
633	struct witness	*w_morechildren;
634	u_char		 w_childcnt;
635	u_char		 w_Giant_squawked:1;
636	u_char		 w_other_squawked:1;
637	u_char		 w_same_squawked:1;
638	u_char		 w_sleep:1;
639	u_char		 w_spin:1;	/* this is a spin mutex */
640	u_int		 w_level;
641	struct witness	*w_children[WITNESS_NCHILDREN];
642};
643
644struct witness_blessed {
645	char 	*b_lock1;
646	char	*b_lock2;
647};
648
649#ifdef DDB
650/*
651 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
652 * drop into kdebug() when:
653 *	- a lock heirarchy violation occurs
654 *	- locks are held when going to sleep.
655 */
656#ifdef WITNESS_DDB
657int	witness_ddb = 1;
658#else
659int	witness_ddb = 0;
660#endif
661SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
662#endif /* DDB */
663
664#ifdef WITNESS_SKIPSPIN
665int	witness_skipspin = 1;
666#else
667int	witness_skipspin = 0;
668#endif
669SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
670    "");
671
672MUTEX_DECLARE(static,w_mtx);
673static struct witness	*w_free;
674static struct witness	*w_all;
675static int		 w_inited;
676static int		 witness_dead;	/* fatal error, probably no memory */
677
678static struct witness	 w_data[WITNESS_COUNT];
679
680static struct witness	 *enroll __P((const char *description, int flag));
681static int itismychild __P((struct witness *parent, struct witness *child));
682static void removechild __P((struct witness *parent, struct witness *child));
683static int isitmychild __P((struct witness *parent, struct witness *child));
684static int isitmydescendant __P((struct witness *parent, struct witness *child));
685static int dup_ok __P((struct witness *));
686static int blessed __P((struct witness *, struct witness *));
687static void witness_displaydescendants
688    __P((void(*)(const char *fmt, ...), struct witness *));
689static void witness_leveldescendents __P((struct witness *parent, int level));
690static void witness_levelall __P((void));
691static struct witness * witness_get __P((void));
692static void witness_free __P((struct witness *m));
693
694
695static char *ignore_list[] = {
696	"witness lock",
697	NULL
698};
699
700static char *spin_order_list[] = {
701	"sched lock",
702	"sio",
703#ifdef __i386__
704	"clk",
705#endif
706	"callout",
707	/*
708	 * leaf locks
709	 */
710	NULL
711};
712
713static char *order_list[] = {
714	"uidinfo hash", "uidinfo struct", NULL,
715	NULL
716};
717
718static char *dup_list[] = {
719	NULL
720};
721
722static char *sleep_list[] = {
723	"Giant",
724	NULL
725};
726
727/*
728 * Pairs of locks which have been blessed
729 * Don't complain about order problems with blessed locks
730 */
731static struct witness_blessed blessed_list[] = {
732};
733static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
734
735void
736witness_init(struct mtx *m, int flag)
737{
738	m->mtx_witness = enroll(m->mtx_description, flag);
739}
740
741void
742witness_destroy(struct mtx *m)
743{
744	struct mtx *m1;
745	struct proc *p;
746	p = CURPROC;
747	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
748		m1 = LIST_NEXT(m1, mtx_held)) {
749		if (m1 == m) {
750			LIST_REMOVE(m, mtx_held);
751			break;
752		}
753	}
754	return;
755
756}
757
758void
759witness_enter(struct mtx *m, int flags, const char *file, int line)
760{
761	struct witness *w, *w1;
762	struct mtx *m1;
763	struct proc *p;
764	int i;
765#ifdef DDB
766	int go_into_ddb = 0;
767#endif /* DDB */
768
769	w = m->mtx_witness;
770	p = CURPROC;
771
772	if (flags & MTX_SPIN) {
773		if (!w->w_spin)
774			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
775			    " %s:%d", m->mtx_description, file, line);
776		if (m->mtx_recurse != 0)
777			return;
778		mtx_enter(&w_mtx, MTX_SPIN);
779		i = witness_spin_check;
780		if (i != 0 && w->w_level < i) {
781			mtx_exit(&w_mtx, MTX_SPIN);
782			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
783			    " %s:%d already holding %s:%x",
784			    m->mtx_description, w->w_level, file, line,
785			    spin_order_list[ffs(i)-1], i);
786		}
787		PCPU_SET(witness_spin_check, i | w->w_level);
788		mtx_exit(&w_mtx, MTX_SPIN);
789		return;
790	}
791	if (w->w_spin)
792		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
793		    m->mtx_description, file, line);
794
795	if (m->mtx_recurse != 0)
796		return;
797	if (witness_dead)
798		goto out;
799	if (cold || panicstr)
800		goto out;
801
802	if (!mtx_legal2block())
803		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
804			    m->mtx_description, file, line);
805	/*
806	 * Is this the first mutex acquired
807	 */
808	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
809		goto out;
810
811	if ((w1 = m1->mtx_witness) == w) {
812		if (w->w_same_squawked || dup_ok(w))
813			goto out;
814		w->w_same_squawked = 1;
815		printf("acquring duplicate lock of same type: \"%s\"\n",
816			m->mtx_description);
817		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
818		printf(" 2nd @ %s:%d\n", file, line);
819#ifdef DDB
820		go_into_ddb = 1;
821#endif /* DDB */
822		goto out;
823	}
824	MPASS(!mtx_owned(&w_mtx));
825	mtx_enter(&w_mtx, MTX_SPIN);
826	/*
827	 * If we have a known higher number just say ok
828	 */
829	if (witness_watch > 1 && w->w_level > w1->w_level) {
830		mtx_exit(&w_mtx, MTX_SPIN);
831		goto out;
832	}
833	if (isitmydescendant(m1->mtx_witness, w)) {
834		mtx_exit(&w_mtx, MTX_SPIN);
835		goto out;
836	}
837	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
838
839		MPASS(i < 200);
840		w1 = m1->mtx_witness;
841		if (isitmydescendant(w, w1)) {
842			mtx_exit(&w_mtx, MTX_SPIN);
843			if (blessed(w, w1))
844				goto out;
845			if (m1 == &Giant) {
846				if (w1->w_Giant_squawked)
847					goto out;
848				else
849					w1->w_Giant_squawked = 1;
850			} else {
851				if (w1->w_other_squawked)
852					goto out;
853				else
854					w1->w_other_squawked = 1;
855			}
856			printf("lock order reversal\n");
857			printf(" 1st %s last acquired @ %s:%d\n",
858			    w->w_description, w->w_file, w->w_line);
859			printf(" 2nd %p %s @ %s:%d\n",
860			    m1, w1->w_description, w1->w_file, w1->w_line);
861			printf(" 3rd %p %s @ %s:%d\n",
862			    m, w->w_description, file, line);
863#ifdef DDB
864			go_into_ddb = 1;
865#endif /* DDB */
866			goto out;
867		}
868	}
869	m1 = LIST_FIRST(&p->p_heldmtx);
870	if (!itismychild(m1->mtx_witness, w))
871		mtx_exit(&w_mtx, MTX_SPIN);
872
873out:
874#ifdef DDB
875	if (witness_ddb && go_into_ddb)
876		Debugger("witness_enter");
877#endif /* DDB */
878	w->w_file = file;
879	w->w_line = line;
880	m->mtx_line = line;
881	m->mtx_file = file;
882
883	/*
884	 * If this pays off it likely means that a mutex being witnessed
885	 * is acquired in hardclock. Put it in the ignore list. It is
886	 * likely not the mutex this assert fails on.
887	 */
888	MPASS(m->mtx_held.le_prev == NULL);
889	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
890}
891
892void
893witness_exit(struct mtx *m, int flags, const char *file, int line)
894{
895	struct witness *w;
896
897	w = m->mtx_witness;
898
899	if (flags & MTX_SPIN) {
900		if (!w->w_spin)
901			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
902			    " %s:%d", m->mtx_description, file, line);
903		if (m->mtx_recurse != 0)
904			return;
905		mtx_enter(&w_mtx, MTX_SPIN);
906		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
907		mtx_exit(&w_mtx, MTX_SPIN);
908		return;
909	}
910	if (w->w_spin)
911		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
912		    m->mtx_description, file, line);
913
914	if (m->mtx_recurse != 0)
915		return;
916
917	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
918		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
919			    m->mtx_description, file, line);
920	LIST_REMOVE(m, mtx_held);
921	m->mtx_held.le_prev = NULL;
922}
923
924void
925witness_try_enter(struct mtx *m, int flags, const char *file, int line)
926{
927	struct proc *p;
928	struct witness *w = m->mtx_witness;
929
930	if (flags & MTX_SPIN) {
931		if (!w->w_spin)
932			panic("mutex_try_enter: "
933			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
934			    m->mtx_description, file, line);
935		if (m->mtx_recurse != 0)
936			return;
937		mtx_enter(&w_mtx, MTX_SPIN);
938		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
939		mtx_exit(&w_mtx, MTX_SPIN);
940		return;
941	}
942
943	if (w->w_spin)
944		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
945		    m->mtx_description, file, line);
946
947	if (m->mtx_recurse != 0)
948		return;
949
950	w->w_file = file;
951	w->w_line = line;
952	m->mtx_line = line;
953	m->mtx_file = file;
954	p = CURPROC;
955	MPASS(m->mtx_held.le_prev == NULL);
956	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
957}
958
959void
960witness_display(void(*prnt)(const char *fmt, ...))
961{
962	struct witness *w, *w1;
963
964	witness_levelall();
965
966	for (w = w_all; w; w = w->w_next) {
967		if (w->w_file == NULL)
968			continue;
969		for (w1 = w_all; w1; w1 = w1->w_next) {
970			if (isitmychild(w1, w))
971				break;
972		}
973		if (w1 != NULL)
974			continue;
975		/*
976		 * This lock has no anscestors, display its descendants.
977		 */
978		witness_displaydescendants(prnt, w);
979	}
980	prnt("\nMutex which were never acquired\n");
981	for (w = w_all; w; w = w->w_next) {
982		if (w->w_file != NULL)
983			continue;
984		prnt("%s\n", w->w_description);
985	}
986}
987
988int
989witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
990{
991	struct mtx *m;
992	struct proc *p;
993	char **sleep;
994	int n = 0;
995
996	p = CURPROC;
997	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
998	    m = LIST_NEXT(m, mtx_held)) {
999		if (m == mtx)
1000			continue;
1001		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1002			if (strcmp(m->mtx_description, *sleep) == 0)
1003				goto next;
1004		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1005			file, line, check_only ? "could sleep" : "sleeping",
1006			m->mtx_description,
1007			m->mtx_witness->w_file, m->mtx_witness->w_line);
1008		n++;
1009	next:
1010	}
1011#ifdef DDB
1012	if (witness_ddb && n)
1013		Debugger("witness_sleep");
1014#endif /* DDB */
1015	return (n);
1016}
1017
1018static struct witness *
1019enroll(const char *description, int flag)
1020{
1021	int i;
1022	struct witness *w, *w1;
1023	char **ignore;
1024	char **order;
1025
1026	if (!witness_watch)
1027		return (NULL);
1028	for (ignore = ignore_list; *ignore != NULL; ignore++)
1029		if (strcmp(description, *ignore) == 0)
1030			return (NULL);
1031
1032	if (w_inited == 0) {
1033		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
1034		for (i = 0; i < WITNESS_COUNT; i++) {
1035			w = &w_data[i];
1036			witness_free(w);
1037		}
1038		w_inited = 1;
1039		for (order = order_list; *order != NULL; order++) {
1040			w = enroll(*order, MTX_DEF);
1041			w->w_file = "order list";
1042			for (order++; *order != NULL; order++) {
1043				w1 = enroll(*order, MTX_DEF);
1044				w1->w_file = "order list";
1045				itismychild(w, w1);
1046				w = w1;
1047    	    	    	}
1048		}
1049	}
1050	if ((flag & MTX_SPIN) && witness_skipspin)
1051		return (NULL);
1052	mtx_enter(&w_mtx, MTX_SPIN);
1053	for (w = w_all; w; w = w->w_next) {
1054		if (strcmp(description, w->w_description) == 0) {
1055			mtx_exit(&w_mtx, MTX_SPIN);
1056			return (w);
1057		}
1058	}
1059	if ((w = witness_get()) == NULL)
1060		return (NULL);
1061	w->w_next = w_all;
1062	w_all = w;
1063	w->w_description = description;
1064	mtx_exit(&w_mtx, MTX_SPIN);
1065	if (flag & MTX_SPIN) {
1066		w->w_spin = 1;
1067
1068		i = 1;
1069		for (order = spin_order_list; *order != NULL; order++) {
1070			if (strcmp(description, *order) == 0)
1071				break;
1072			i <<= 1;
1073		}
1074		if (*order == NULL)
1075			panic("spin lock %s not in order list", description);
1076		w->w_level = i;
1077	}
1078	return (w);
1079}
1080
1081static int
1082itismychild(struct witness *parent, struct witness *child)
1083{
1084	static int recursed;
1085
1086	/*
1087	 * Insert "child" after "parent"
1088	 */
1089	while (parent->w_morechildren)
1090		parent = parent->w_morechildren;
1091
1092	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1093		if ((parent->w_morechildren = witness_get()) == NULL)
1094			return (1);
1095		parent = parent->w_morechildren;
1096	}
1097	MPASS(child != NULL);
1098	parent->w_children[parent->w_childcnt++] = child;
1099	/*
1100	 * now prune whole tree
1101	 */
1102	if (recursed)
1103		return (0);
1104	recursed = 1;
1105	for (child = w_all; child != NULL; child = child->w_next) {
1106		for (parent = w_all; parent != NULL;
1107		    parent = parent->w_next) {
1108			if (!isitmychild(parent, child))
1109				continue;
1110			removechild(parent, child);
1111			if (isitmydescendant(parent, child))
1112				continue;
1113			itismychild(parent, child);
1114		}
1115	}
1116	recursed = 0;
1117	witness_levelall();
1118	return (0);
1119}
1120
1121static void
1122removechild(struct witness *parent, struct witness *child)
1123{
1124	struct witness *w, *w1;
1125	int i;
1126
1127	for (w = parent; w != NULL; w = w->w_morechildren)
1128		for (i = 0; i < w->w_childcnt; i++)
1129			if (w->w_children[i] == child)
1130				goto found;
1131	return;
1132found:
1133	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1134		continue;
1135	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1136	MPASS(w->w_children[i] != NULL);
1137
1138	if (w1->w_childcnt != 0)
1139		return;
1140
1141	if (w1 == parent)
1142		return;
1143	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1144		continue;
1145	w->w_morechildren = 0;
1146	witness_free(w1);
1147}
1148
1149static int
1150isitmychild(struct witness *parent, struct witness *child)
1151{
1152	struct witness *w;
1153	int i;
1154
1155	for (w = parent; w != NULL; w = w->w_morechildren) {
1156		for (i = 0; i < w->w_childcnt; i++) {
1157			if (w->w_children[i] == child)
1158				return (1);
1159		}
1160	}
1161	return (0);
1162}
1163
1164static int
1165isitmydescendant(struct witness *parent, struct witness *child)
1166{
1167	struct witness *w;
1168	int i;
1169	int j;
1170
1171	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1172		MPASS(j < 1000);
1173		for (i = 0; i < w->w_childcnt; i++) {
1174			if (w->w_children[i] == child)
1175				return (1);
1176		}
1177		for (i = 0; i < w->w_childcnt; i++) {
1178			if (isitmydescendant(w->w_children[i], child))
1179				return (1);
1180		}
1181	}
1182	return (0);
1183}
1184
1185void
1186witness_levelall (void)
1187{
1188	struct witness *w, *w1;
1189
1190	for (w = w_all; w; w = w->w_next)
1191		if (!w->w_spin)
1192			w->w_level = 0;
1193	for (w = w_all; w; w = w->w_next) {
1194		if (w->w_spin)
1195			continue;
1196		for (w1 = w_all; w1; w1 = w1->w_next) {
1197			if (isitmychild(w1, w))
1198				break;
1199		}
1200		if (w1 != NULL)
1201			continue;
1202		witness_leveldescendents(w, 0);
1203	}
1204}
1205
1206static void
1207witness_leveldescendents(struct witness *parent, int level)
1208{
1209	int i;
1210	struct witness *w;
1211
1212	if (parent->w_level < level)
1213		parent->w_level = level;
1214	level++;
1215	for (w = parent; w != NULL; w = w->w_morechildren)
1216		for (i = 0; i < w->w_childcnt; i++)
1217			witness_leveldescendents(w->w_children[i], level);
1218}
1219
1220static void
1221witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1222			   struct witness *parent)
1223{
1224	struct witness *w;
1225	int i;
1226	int level = parent->w_level;
1227
1228	prnt("%d", level);
1229	if (level < 10)
1230		prnt(" ");
1231	for (i = 0; i < level; i++)
1232		prnt(" ");
1233	prnt("%s", parent->w_description);
1234	if (parent->w_file != NULL) {
1235		prnt(" -- last acquired @ %s", parent->w_file);
1236#ifndef W_USE_WHERE
1237		prnt(":%d", parent->w_line);
1238#endif
1239		prnt("\n");
1240	}
1241
1242	for (w = parent; w != NULL; w = w->w_morechildren)
1243		for (i = 0; i < w->w_childcnt; i++)
1244			    witness_displaydescendants(prnt, w->w_children[i]);
1245    }
1246
1247static int
1248dup_ok(struct witness *w)
1249{
1250	char **dup;
1251
1252	for (dup = dup_list; *dup!= NULL; dup++)
1253		if (strcmp(w->w_description, *dup) == 0)
1254			return (1);
1255	return (0);
1256}
1257
1258static int
1259blessed(struct witness *w1, struct witness *w2)
1260{
1261	int i;
1262	struct witness_blessed *b;
1263
1264	for (i = 0; i < blessed_count; i++) {
1265		b = &blessed_list[i];
1266		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1267			if (strcmp(w2->w_description, b->b_lock2) == 0)
1268				return (1);
1269			continue;
1270		}
1271		if (strcmp(w1->w_description, b->b_lock2) == 0)
1272			if (strcmp(w2->w_description, b->b_lock1) == 0)
1273				return (1);
1274	}
1275	return (0);
1276}
1277
1278static struct witness *
1279witness_get()
1280{
1281	struct witness *w;
1282
1283	if ((w = w_free) == NULL) {
1284		witness_dead = 1;
1285		mtx_exit(&w_mtx, MTX_SPIN);
1286		printf("witness exhausted\n");
1287		return (NULL);
1288	}
1289	w_free = w->w_next;
1290	bzero(w, sizeof(*w));
1291	return (w);
1292}
1293
1294static void
1295witness_free(struct witness *w)
1296{
1297	w->w_next = w_free;
1298	w_free = w;
1299}
1300
1301void
1302witness_list(struct proc *p)
1303{
1304	struct mtx *m;
1305
1306	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1307	    m = LIST_NEXT(m, mtx_held)) {
1308		printf("\t\"%s\" (%p) locked at %s:%d\n",
1309		    m->mtx_description, m,
1310		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1311	}
1312}
1313
1314void
1315witness_save(struct mtx *m, const char **filep, int *linep)
1316{
1317	*filep = m->mtx_witness->w_file;
1318	*linep = m->mtx_witness->w_line;
1319}
1320
1321void
1322witness_restore(struct mtx *m, const char *file, int line)
1323{
1324	m->mtx_witness->w_file = file;
1325	m->mtx_witness->w_line = line;
1326}
1327
1328#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1329