kern_mutex.c revision 68790
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 * $FreeBSD: head/sys/kern/kern_mutex.c 68790 2000-11-15 22:08:16Z jhb $
31 */
32
33/*
34 *	Main Entry: witness
35 *	Pronunciation: 'wit-n&s
36 *	Function: noun
37 *	Etymology: Middle English witnesse, from Old English witnes knowledge,
38 *	    testimony, witness, from 2wit
39 *	Date: before 12th century
40 *	1 : attestation of a fact or event : TESTIMONY
41 *	2 : one that gives evidence; specifically : one who testifies in
42 *	    a cause or before a judicial tribunal
43 *	3 : one asked to be present at a transaction so as to be able to
44 *	    testify to its having taken place
45 *	4 : one who has personal knowledge of something
46 *	5 a : something serving as evidence or proof : SIGN
47 *	  b : public affirmation by word or example of usually
48 *	      religious faith or conviction <the heroic witness to divine
49 *	      life -- Pilot>
50 *	6 capitalized : a member of the Jehovah's Witnesses
51 */
52
53#include "opt_ddb.h"
54#include "opt_witness.h"
55
56#include <sys/param.h>
57#include <sys/bus.h>
58#include <sys/kernel.h>
59#include <sys/malloc.h>
60#include <sys/proc.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63#include <sys/vmmeter.h>
64#include <sys/ktr.h>
65
66#include <machine/atomic.h>
67#include <machine/bus.h>
68#include <machine/clock.h>
69#include <machine/cpu.h>
70
71#include <ddb/ddb.h>
72
73#include <vm/vm.h>
74#include <vm/vm_extern.h>
75
76#define _KERN_MUTEX_C_		/* Cause non-inlined mtx_*() to be compiled. */
77#include <sys/mutex.h>
78
79/*
80 * Machine independent bits of the mutex implementation
81 */
82/* All mutexes in system (used for debug/panic) */
83#ifdef MUTEX_DEBUG
84static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0,
85	"All mutexes queue head" };
86static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug,
87	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
88	{ NULL, NULL }, &all_mtx, &all_mtx };
89#else	/* MUTEX_DEBUG */
90static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head",
91	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
92	{ NULL, NULL }, &all_mtx, &all_mtx };
93#endif	/* MUTEX_DEBUG */
94
95static int	mtx_cur_cnt;
96static int	mtx_max_cnt;
97
98void	_mtx_enter_giant_def(void);
99void	_mtx_exit_giant_def(void);
100static void propagate_priority(struct proc *) __unused;
101
102#define	mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
103#define	mtx_owner(m)	(mtx_unowned(m) ? NULL \
104			    : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
105
106#define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
107#define	SET_PRIO(p, pri)	(p)->p_priority = (pri)
108
109/*
110 * XXX Temporary, for use from assembly language
111 */
112
113void
114_mtx_enter_giant_def(void)
115{
116
117	mtx_enter(&Giant, MTX_DEF);
118}
119
120void
121_mtx_exit_giant_def(void)
122{
123
124	mtx_exit(&Giant, MTX_DEF);
125}
126
127static void
128propagate_priority(struct proc *p)
129{
130	int pri = p->p_priority;
131	struct mtx *m = p->p_blocked;
132
133	for (;;) {
134		struct proc *p1;
135
136		p = mtx_owner(m);
137
138		if (p == NULL) {
139			/*
140			 * This really isn't quite right. Really
141			 * ought to bump priority of process that
142			 * next acquires the mutex.
143			 */
144			MPASS(m->mtx_lock == MTX_CONTESTED);
145			return;
146		}
147		MPASS(p->p_magic == P_MAGIC);
148		if (p->p_priority <= pri)
149			return;
150		/*
151		 * If lock holder is actually running, just bump priority.
152		 */
153		if (TAILQ_NEXT(p, p_procq) == NULL) {
154			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
155			SET_PRIO(p, pri);
156			return;
157		}
158		/*
159		 * If on run queue move to new run queue, and
160		 * quit.
161		 */
162		if (p->p_stat == SRUN) {
163			MPASS(p->p_blocked == NULL);
164			remrunqueue(p);
165			SET_PRIO(p, pri);
166			setrunqueue(p);
167			return;
168		}
169
170		/*
171		 * If we aren't blocked on a mutex, give up and quit.
172		 */
173		if (p->p_stat != SMTX) {
174			printf(
175	"XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
176			    p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
177			return;
178		}
179
180		/*
181		 * Pick up the mutex that p is blocked on.
182		 */
183		m = p->p_blocked;
184		MPASS(m != NULL);
185
186		printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
187		    p->p_comm, m->mtx_description);
188		/*
189		 * Check if the proc needs to be moved up on
190		 * the blocked chain
191		 */
192		if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
193		    p1->p_priority <= pri) {
194			if (p1)
195				printf(
196	"XXX: previous process %d(%s) has higher priority\n",
197				    p->p_pid, p->p_comm);
198			else
199				printf("XXX: process at head of run queue\n");
200			continue;
201		}
202
203		/*
204		 * Remove proc from blocked chain
205		 */
206		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
207		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
208			MPASS(p1->p_magic == P_MAGIC);
209			if (p1->p_priority > pri)
210				break;
211		}
212		if (p1)
213			TAILQ_INSERT_BEFORE(p1, p, p_procq);
214		else
215			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
216		CTR4(KTR_LOCK,
217		    "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
218		    p, p1, m, m->mtx_description);
219	}
220}
221
222void
223mtx_enter_hard(struct mtx *m, int type, int saveintr)
224{
225	struct proc *p = CURPROC;
226	struct timeval new_switchtime;
227
228	KASSERT(p != NULL, ("curproc is NULL in mutex"));
229
230	switch (type) {
231	case MTX_DEF:
232		if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
233			m->mtx_recurse++;
234			atomic_set_ptr(&m->mtx_lock, MTX_RECURSE);
235			CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
236			return;
237		}
238		CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]",
239		    m, (void *)m->mtx_lock, (void *)RETIP(m));
240		while (!_obtain_lock(m, p)) {
241			uintptr_t v;
242			struct proc *p1;
243
244			mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
245			/*
246			 * check if the lock has been released while
247			 * waiting for the schedlock.
248			 */
249			if ((v = m->mtx_lock) == MTX_UNOWNED) {
250				mtx_exit(&sched_lock, MTX_SPIN);
251				continue;
252			}
253			/*
254			 * The mutex was marked contested on release. This
255			 * means that there are processes blocked on it.
256			 */
257			if (v == MTX_CONTESTED) {
258				p1 = TAILQ_FIRST(&m->mtx_blocked);
259				KASSERT(p1 != NULL, ("contested mutex has no contesters"));
260				KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
261				m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
262				if (p1->p_priority < p->p_priority) {
263					SET_PRIO(p, p1->p_priority);
264				}
265				mtx_exit(&sched_lock, MTX_SPIN);
266				return;
267			}
268			/*
269			 * If the mutex isn't already contested and
270			 * a failure occurs setting the contested bit the
271			 * mutex was either release or the
272			 * state of the RECURSION bit changed.
273			 */
274			if ((v & MTX_CONTESTED) == 0 &&
275			    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
276				               (void *)(v | MTX_CONTESTED))) {
277				mtx_exit(&sched_lock, MTX_SPIN);
278				continue;
279			}
280
281			/* We definitely have to sleep for this lock */
282			mtx_assert(m, MA_NOTOWNED);
283
284#ifdef notyet
285			/*
286			 * If we're borrowing an interrupted thread's VM
287			 * context must clean up before going to sleep.
288			 */
289			if (p->p_flag & (P_ITHD | P_SITHD)) {
290				ithd_t *it = (ithd_t *)p;
291
292				if (it->it_interrupted) {
293					CTR2(KTR_LOCK,
294					    "mtx_enter: 0x%x interrupted 0x%x",
295					    it, it->it_interrupted);
296					intr_thd_fixup(it);
297				}
298			}
299#endif
300
301			/* Put us on the list of procs blocked on this mutex */
302			if (TAILQ_EMPTY(&m->mtx_blocked)) {
303				p1 = (struct proc *)(m->mtx_lock &
304						     MTX_FLAGMASK);
305				LIST_INSERT_HEAD(&p1->p_contested, m,
306						 mtx_contested);
307				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
308			} else {
309				TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
310					if (p1->p_priority > p->p_priority)
311						break;
312				if (p1)
313					TAILQ_INSERT_BEFORE(p1, p, p_procq);
314				else
315					TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
316							  p_procq);
317			}
318
319			p->p_blocked = m;	/* Who we're blocked on */
320			p->p_stat = SMTX;
321#if 0
322			propagate_priority(p);
323#endif
324			CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
325			    p, m, m->mtx_description);
326			/*
327			 * Blatantly copied from mi_switch nearly verbatim.
328			 * When Giant goes away and we stop dinking with it
329			 * in mi_switch, we can go back to calling mi_switch
330			 * directly here.
331			 */
332
333			/*
334			 * Compute the amount of time during which the current
335			 * process was running, and add that to its total so
336			 * far.
337			 */
338			microuptime(&new_switchtime);
339			if (timevalcmp(&new_switchtime, &switchtime, <)) {
340				printf(
341		    "microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
342		    		    switchtime.tv_sec, switchtime.tv_usec,
343		    		    new_switchtime.tv_sec,
344		    		    new_switchtime.tv_usec);
345				new_switchtime = switchtime;
346			} else {
347				p->p_runtime += (new_switchtime.tv_usec -
348				    switchtime.tv_usec) +
349				    (new_switchtime.tv_sec - switchtime.tv_sec) *
350				    (int64_t)1000000;
351			}
352
353			/*
354			 * Pick a new current process and record its start time.
355			 */
356			cnt.v_swtch++;
357			switchtime = new_switchtime;
358			cpu_switch();
359			if (switchtime.tv_sec == 0)
360				microuptime(&switchtime);
361			switchticks = ticks;
362			CTR3(KTR_LOCK,
363			    "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
364			    p, m, m->mtx_description);
365			mtx_exit(&sched_lock, MTX_SPIN);
366		}
367		return;
368	case MTX_SPIN:
369	case MTX_SPIN | MTX_FIRST:
370	case MTX_SPIN | MTX_TOPHALF:
371	    {
372		int i = 0;
373
374		if (m->mtx_lock == (uintptr_t)p) {
375			m->mtx_recurse++;
376			return;
377		}
378		CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
379		for (;;) {
380			if (_obtain_lock(m, p))
381				break;
382			while (m->mtx_lock != MTX_UNOWNED) {
383				if (i++ < 1000000)
384					continue;
385				if (i++ < 6000000)
386					DELAY (1);
387#ifdef DDB
388				else if (!db_active)
389#else
390				else
391#endif
392					panic(
393				"spin lock %s held by 0x%p for > 5 seconds",
394					    m->mtx_description,
395					    (void *)m->mtx_lock);
396			}
397		}
398
399#ifdef MUTEX_DEBUG
400		if (type != MTX_SPIN)
401			m->mtx_saveintr = 0xbeefface;
402		else
403#endif
404			m->mtx_saveintr = saveintr;
405		CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
406		return;
407	    }
408	}
409}
410
411void
412mtx_exit_hard(struct mtx *m, int type)
413{
414	struct proc *p, *p1;
415	struct mtx *m1;
416	int pri;
417
418	p = CURPROC;
419	switch (type) {
420	case MTX_DEF:
421	case MTX_DEF | MTX_NOSWITCH:
422		if (m->mtx_recurse != 0) {
423			if (--(m->mtx_recurse) == 0)
424				atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE);
425			CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
426			return;
427		}
428		mtx_enter(&sched_lock, MTX_SPIN);
429		CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
430		p1 = TAILQ_FIRST(&m->mtx_blocked);
431		MPASS(p->p_magic == P_MAGIC);
432		MPASS(p1->p_magic == P_MAGIC);
433		TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
434		if (TAILQ_EMPTY(&m->mtx_blocked)) {
435			LIST_REMOVE(m, mtx_contested);
436			_release_lock_quick(m);
437			CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
438		} else
439			m->mtx_lock = MTX_CONTESTED;
440		pri = MAXPRI;
441		LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
442			int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
443			if (cp < pri)
444				pri = cp;
445		}
446		if (pri > p->p_nativepri)
447			pri = p->p_nativepri;
448		SET_PRIO(p, pri);
449		CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
450		    m, p1);
451		p1->p_blocked = NULL;
452		p1->p_stat = SRUN;
453		setrunqueue(p1);
454		if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
455#ifdef notyet
456			if (p->p_flag & (P_ITHD | P_SITHD)) {
457				ithd_t *it = (ithd_t *)p;
458
459				if (it->it_interrupted) {
460					CTR2(KTR_LOCK,
461					    "mtx_exit: 0x%x interruped 0x%x",
462					    it, it->it_interrupted);
463					intr_thd_fixup(it);
464				}
465			}
466#endif
467			setrunqueue(p);
468			CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p",
469			    m, (void *)m->mtx_lock);
470			mi_switch();
471			CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p",
472			    m, (void *)m->mtx_lock);
473		}
474		mtx_exit(&sched_lock, MTX_SPIN);
475		break;
476	case MTX_SPIN:
477	case MTX_SPIN | MTX_FIRST:
478		if (m->mtx_recurse != 0) {
479			m->mtx_recurse--;
480			return;
481		}
482		MPASS(mtx_owned(m));
483		_release_lock_quick(m);
484		if (type & MTX_FIRST)
485			enable_intr();	/* XXX is this kosher? */
486		else {
487			MPASS(m->mtx_saveintr != 0xbeefface);
488			restore_intr(m->mtx_saveintr);
489		}
490		break;
491	case MTX_SPIN | MTX_TOPHALF:
492		if (m->mtx_recurse != 0) {
493			m->mtx_recurse--;
494			return;
495		}
496		MPASS(mtx_owned(m));
497		_release_lock_quick(m);
498		break;
499	default:
500		panic("mtx_exit_hard: unsupported type 0x%x\n", type);
501	}
502}
503
504#define MV_DESTROY	0	/* validate before destory */
505#define MV_INIT		1	/* validate before init */
506
507#ifdef MUTEX_DEBUG
508
509int mtx_validate __P((struct mtx *, int));
510
511int
512mtx_validate(struct mtx *m, int when)
513{
514	struct mtx *mp;
515	int i;
516	int retval = 0;
517
518	if (m == &all_mtx || cold)
519		return 0;
520
521	mtx_enter(&all_mtx, MTX_DEF);
522/*
523 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
524 * we can re-enable the kernacc() checks.
525 */
526#ifndef __alpha__
527	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
528	    VM_PROT_READ) == 1);
529#endif
530	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
531	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
532#ifndef __alpha__
533		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
534		    VM_PROT_READ) != 1) {
535			panic("mtx_validate: mp=%p mp->mtx_next=%p",
536			    mp, mp->mtx_next);
537		}
538#endif
539		i++;
540		if (i > mtx_cur_cnt) {
541			panic("mtx_validate: too many in chain, known=%d\n",
542			    mtx_cur_cnt);
543		}
544	}
545	MPASS(i == mtx_cur_cnt);
546	switch (when) {
547	case MV_DESTROY:
548		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
549			if (mp == m)
550				break;
551		MPASS(mp == m);
552		break;
553	case MV_INIT:
554		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
555		if (mp == m) {
556			/*
557			 * Not good. This mutex already exists.
558			 */
559			printf("re-initing existing mutex %s\n",
560			    m->mtx_description);
561			MPASS(m->mtx_lock == MTX_UNOWNED);
562			retval = 1;
563		}
564	}
565	mtx_exit(&all_mtx, MTX_DEF);
566	return (retval);
567}
568#endif
569
570void
571mtx_init(struct mtx *m, const char *t, int flag)
572{
573#ifdef MUTEX_DEBUG
574	struct mtx_debug *debug;
575#endif
576
577	CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
578#ifdef MUTEX_DEBUG
579	if (mtx_validate(m, MV_INIT))	/* diagnostic and error correction */
580		return;
581	if (flag & MTX_COLD)
582		debug = m->mtx_debug;
583	else
584		debug = NULL;
585	if (debug == NULL) {
586#ifdef DIAGNOSTIC
587		if(cold && bootverbose)
588			printf("malloc'ing mtx_debug while cold for %s\n", t);
589#endif
590
591		/* XXX - should not use DEVBUF */
592		debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT);
593		MPASS(debug != NULL);
594		bzero(debug, sizeof(struct mtx_debug));
595	}
596#endif
597	bzero((void *)m, sizeof *m);
598	TAILQ_INIT(&m->mtx_blocked);
599#ifdef MUTEX_DEBUG
600	m->mtx_debug = debug;
601#endif
602	m->mtx_description = t;
603	m->mtx_lock = MTX_UNOWNED;
604	/* Put on all mutex queue */
605	mtx_enter(&all_mtx, MTX_DEF);
606	m->mtx_next = &all_mtx;
607	m->mtx_prev = all_mtx.mtx_prev;
608	m->mtx_prev->mtx_next = m;
609	all_mtx.mtx_prev = m;
610	if (++mtx_cur_cnt > mtx_max_cnt)
611		mtx_max_cnt = mtx_cur_cnt;
612	mtx_exit(&all_mtx, MTX_DEF);
613	witness_init(m, flag);
614}
615
616void
617mtx_destroy(struct mtx *m)
618{
619
620	CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
621#ifdef MUTEX_DEBUG
622	if (m->mtx_next == NULL)
623		panic("mtx_destroy: %p (%s) already destroyed",
624		    m, m->mtx_description);
625
626	if (!mtx_owned(m)) {
627		MPASS(m->mtx_lock == MTX_UNOWNED);
628	} else {
629		MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
630	}
631	mtx_validate(m, MV_DESTROY);		/* diagnostic */
632#endif
633
634#ifdef WITNESS
635	if (m->mtx_witness)
636		witness_destroy(m);
637#endif /* WITNESS */
638
639	/* Remove from the all mutex queue */
640	mtx_enter(&all_mtx, MTX_DEF);
641	m->mtx_next->mtx_prev = m->mtx_prev;
642	m->mtx_prev->mtx_next = m->mtx_next;
643#ifdef MUTEX_DEBUG
644	m->mtx_next = m->mtx_prev = NULL;
645	free(m->mtx_debug, M_DEVBUF);
646	m->mtx_debug = NULL;
647#endif
648	mtx_cur_cnt--;
649	mtx_exit(&all_mtx, MTX_DEF);
650}
651
652/*
653 * The non-inlined versions of the mtx_*() functions are always built (above),
654 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options
655 * being specified.
656 */
657#if (defined(MUTEX_DEBUG) && defined(WITNESS))
658
659#define WITNESS_COUNT 200
660#define	WITNESS_NCHILDREN 2
661
662int witness_watch = 1;
663
664struct witness {
665	struct witness	*w_next;
666	const char	*w_description;
667	const char	*w_file;
668	int		 w_line;
669	struct witness	*w_morechildren;
670	u_char		 w_childcnt;
671	u_char		 w_Giant_squawked:1;
672	u_char		 w_other_squawked:1;
673	u_char		 w_same_squawked:1;
674	u_char		 w_sleep:1;
675	u_char		 w_spin:1;	/* this is a spin mutex */
676	u_int		 w_level;
677	struct witness	*w_children[WITNESS_NCHILDREN];
678};
679
680struct witness_blessed {
681	char 	*b_lock1;
682	char	*b_lock2;
683};
684
685#ifdef DDB
686/*
687 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
688 * drop into kdebug() when:
689 *	- a lock heirarchy violation occurs
690 *	- locks are held when going to sleep.
691 */
692#ifdef WITNESS_DDB
693int	witness_ddb = 1;
694#else
695int	witness_ddb = 0;
696#endif
697SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
698#endif /* DDB */
699
700#ifdef WITNESS_SKIPSPIN
701int	witness_skipspin = 1;
702#else
703int	witness_skipspin = 0;
704#endif
705SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
706    "");
707
708MUTEX_DECLARE(static,w_mtx);
709static struct witness	*w_free;
710static struct witness	*w_all;
711static int		 w_inited;
712static int		 witness_dead;	/* fatal error, probably no memory */
713
714static struct witness	 w_data[WITNESS_COUNT];
715
716static struct witness	 *enroll __P((const char *description, int flag));
717static int itismychild __P((struct witness *parent, struct witness *child));
718static void removechild __P((struct witness *parent, struct witness *child));
719static int isitmychild __P((struct witness *parent, struct witness *child));
720static int isitmydescendant __P((struct witness *parent, struct witness *child));
721static int dup_ok __P((struct witness *));
722static int blessed __P((struct witness *, struct witness *));
723static void witness_displaydescendants
724    __P((void(*)(const char *fmt, ...), struct witness *));
725static void witness_leveldescendents __P((struct witness *parent, int level));
726static void witness_levelall __P((void));
727static struct witness * witness_get __P((void));
728static void witness_free __P((struct witness *m));
729
730
731static char *ignore_list[] = {
732	"witness lock",
733	NULL
734};
735
736static char *spin_order_list[] = {
737	"sched lock",
738	"clk",
739	"sio",
740	/*
741	 * leaf locks
742	 */
743	NULL
744};
745
746static char *order_list[] = {
747	NULL
748};
749
750static char *dup_list[] = {
751	NULL
752};
753
754static char *sleep_list[] = {
755	"Giant lock",
756	NULL
757};
758
759/*
760 * Pairs of locks which have been blessed
761 * Don't complain about order problems with blessed locks
762 */
763static struct witness_blessed blessed_list[] = {
764};
765static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed);
766
767void
768witness_init(struct mtx *m, int flag)
769{
770	m->mtx_witness = enroll(m->mtx_description, flag);
771}
772
773void
774witness_destroy(struct mtx *m)
775{
776	struct mtx *m1;
777	struct proc *p;
778	p = CURPROC;
779	for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
780		m1 = LIST_NEXT(m1, mtx_held)) {
781		if (m1 == m) {
782			LIST_REMOVE(m, mtx_held);
783			break;
784		}
785	}
786	return;
787
788}
789
790void
791witness_enter(struct mtx *m, int flags, const char *file, int line)
792{
793	struct witness *w, *w1;
794	struct mtx *m1;
795	struct proc *p;
796	int i;
797#ifdef DDB
798	int go_into_ddb = 0;
799#endif /* DDB */
800
801	w = m->mtx_witness;
802	p = CURPROC;
803
804	if (flags & MTX_SPIN) {
805		if (!w->w_spin)
806			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
807			    " %s:%d", m->mtx_description, file, line);
808		if (m->mtx_recurse != 0)
809			return;
810		mtx_enter(&w_mtx, MTX_SPIN);
811		i = witness_spin_check;
812		if (i != 0 && w->w_level < i) {
813			mtx_exit(&w_mtx, MTX_SPIN);
814			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
815			    " %s:%d already holding %s:%x",
816			    m->mtx_description, w->w_level, file, line,
817			    spin_order_list[ffs(i)-1], i);
818		}
819		PCPU_SET(witness_spin_check, i | w->w_level);
820		mtx_exit(&w_mtx, MTX_SPIN);
821		return;
822	}
823	if (w->w_spin)
824		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
825		    m->mtx_description, file, line);
826
827	if (m->mtx_recurse != 0)
828		return;
829	if (witness_dead)
830		goto out;
831	if (cold || panicstr)
832		goto out;
833
834	if (!mtx_legal2block())
835		panic("blockable mtx_enter() of %s when not legal @ %s:%d",
836			    m->mtx_description, file, line);
837	/*
838	 * Is this the first mutex acquired
839	 */
840	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
841		goto out;
842
843	if ((w1 = m1->mtx_witness) == w) {
844		if (w->w_same_squawked || dup_ok(w))
845			goto out;
846		w->w_same_squawked = 1;
847		printf("acquring duplicate lock of same type: \"%s\"\n",
848			m->mtx_description);
849		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
850		printf(" 2nd @ %s:%d\n", file, line);
851#ifdef DDB
852		go_into_ddb = 1;
853#endif /* DDB */
854		goto out;
855	}
856	MPASS(!mtx_owned(&w_mtx));
857	mtx_enter(&w_mtx, MTX_SPIN);
858	/*
859	 * If we have a known higher number just say ok
860	 */
861	if (witness_watch > 1 && w->w_level > w1->w_level) {
862		mtx_exit(&w_mtx, MTX_SPIN);
863		goto out;
864	}
865	if (isitmydescendant(m1->mtx_witness, w)) {
866		mtx_exit(&w_mtx, MTX_SPIN);
867		goto out;
868	}
869	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
870
871		MPASS(i < 200);
872		w1 = m1->mtx_witness;
873		if (isitmydescendant(w, w1)) {
874			mtx_exit(&w_mtx, MTX_SPIN);
875			if (blessed(w, w1))
876				goto out;
877			if (m1 == &Giant) {
878				if (w1->w_Giant_squawked)
879					goto out;
880				else
881					w1->w_Giant_squawked = 1;
882			} else {
883				if (w1->w_other_squawked)
884					goto out;
885				else
886					w1->w_other_squawked = 1;
887			}
888			printf("lock order reversal\n");
889			printf(" 1st %s last acquired @ %s:%d\n",
890			    w->w_description, w->w_file, w->w_line);
891			printf(" 2nd %p %s @ %s:%d\n",
892			    m1, w1->w_description, w1->w_file, w1->w_line);
893			printf(" 3rd %p %s @ %s:%d\n",
894			    m, w->w_description, file, line);
895#ifdef DDB
896			go_into_ddb = 1;
897#endif /* DDB */
898			goto out;
899		}
900	}
901	m1 = LIST_FIRST(&p->p_heldmtx);
902	if (!itismychild(m1->mtx_witness, w))
903		mtx_exit(&w_mtx, MTX_SPIN);
904
905out:
906#ifdef DDB
907	if (witness_ddb && go_into_ddb)
908		Debugger("witness_enter");
909#endif /* DDB */
910	w->w_file = file;
911	w->w_line = line;
912	m->mtx_line = line;
913	m->mtx_file = file;
914
915	/*
916	 * If this pays off it likely means that a mutex being witnessed
917	 * is acquired in hardclock. Put it in the ignore list. It is
918	 * likely not the mutex this assert fails on.
919	 */
920	MPASS(m->mtx_held.le_prev == NULL);
921	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
922}
923
924void
925witness_exit(struct mtx *m, int flags, const char *file, int line)
926{
927	struct witness *w;
928
929	w = m->mtx_witness;
930
931	if (flags & MTX_SPIN) {
932		if (!w->w_spin)
933			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
934			    " %s:%d", m->mtx_description, file, line);
935		if (m->mtx_recurse != 0)
936			return;
937		mtx_enter(&w_mtx, MTX_SPIN);
938		PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
939		mtx_exit(&w_mtx, MTX_SPIN);
940		return;
941	}
942	if (w->w_spin)
943		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
944		    m->mtx_description, file, line);
945
946	if (m->mtx_recurse != 0)
947		return;
948
949	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
950		panic("switchable mtx_exit() of %s when not legal @ %s:%d",
951			    m->mtx_description, file, line);
952	LIST_REMOVE(m, mtx_held);
953	m->mtx_held.le_prev = NULL;
954}
955
956void
957witness_try_enter(struct mtx *m, int flags, const char *file, int line)
958{
959	struct proc *p;
960	struct witness *w = m->mtx_witness;
961
962	if (flags & MTX_SPIN) {
963		if (!w->w_spin)
964			panic("mutex_try_enter: "
965			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
966			    m->mtx_description, file, line);
967		if (m->mtx_recurse != 0)
968			return;
969		mtx_enter(&w_mtx, MTX_SPIN);
970		PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
971		mtx_exit(&w_mtx, MTX_SPIN);
972		return;
973	}
974
975	if (w->w_spin)
976		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
977		    m->mtx_description, file, line);
978
979	if (m->mtx_recurse != 0)
980		return;
981
982	w->w_file = file;
983	w->w_line = line;
984	m->mtx_line = line;
985	m->mtx_file = file;
986	p = CURPROC;
987	MPASS(m->mtx_held.le_prev == NULL);
988	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
989}
990
991void
992witness_display(void(*prnt)(const char *fmt, ...))
993{
994	struct witness *w, *w1;
995
996	witness_levelall();
997
998	for (w = w_all; w; w = w->w_next) {
999		if (w->w_file == NULL)
1000			continue;
1001		for (w1 = w_all; w1; w1 = w1->w_next) {
1002			if (isitmychild(w1, w))
1003				break;
1004		}
1005		if (w1 != NULL)
1006			continue;
1007		/*
1008		 * This lock has no anscestors, display its descendants.
1009		 */
1010		witness_displaydescendants(prnt, w);
1011	}
1012	prnt("\nMutex which were never acquired\n");
1013	for (w = w_all; w; w = w->w_next) {
1014		if (w->w_file != NULL)
1015			continue;
1016		prnt("%s\n", w->w_description);
1017	}
1018}
1019
1020int
1021witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1022{
1023	struct mtx *m;
1024	struct proc *p;
1025	char **sleep;
1026	int n = 0;
1027
1028	p = CURPROC;
1029	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1030	    m = LIST_NEXT(m, mtx_held)) {
1031		if (m == mtx)
1032			continue;
1033		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1034			if (strcmp(m->mtx_description, *sleep) == 0)
1035				goto next;
1036		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1037			file, line, check_only ? "could sleep" : "sleeping",
1038			m->mtx_description,
1039			m->mtx_witness->w_file, m->mtx_witness->w_line);
1040		n++;
1041	next:
1042	}
1043#ifdef DDB
1044	if (witness_ddb && n)
1045		Debugger("witness_sleep");
1046#endif /* DDB */
1047	return (n);
1048}
1049
1050static struct witness *
1051enroll(const char *description, int flag)
1052{
1053	int i;
1054	struct witness *w, *w1;
1055	char **ignore;
1056	char **order;
1057
1058	if (!witness_watch)
1059		return (NULL);
1060	for (ignore = ignore_list; *ignore != NULL; ignore++)
1061		if (strcmp(description, *ignore) == 0)
1062			return (NULL);
1063
1064	if (w_inited == 0) {
1065		mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF);
1066		for (i = 0; i < WITNESS_COUNT; i++) {
1067			w = &w_data[i];
1068			witness_free(w);
1069		}
1070		w_inited = 1;
1071		for (order = order_list; *order != NULL; order++) {
1072			w = enroll(*order, MTX_DEF);
1073			w->w_file = "order list";
1074			for (order++; *order != NULL; order++) {
1075				w1 = enroll(*order, MTX_DEF);
1076				w1->w_file = "order list";
1077				itismychild(w, w1);
1078				w = w1;
1079    	    	    	}
1080		}
1081	}
1082	if ((flag & MTX_SPIN) && witness_skipspin)
1083		return (NULL);
1084	mtx_enter(&w_mtx, MTX_SPIN);
1085	for (w = w_all; w; w = w->w_next) {
1086		if (strcmp(description, w->w_description) == 0) {
1087			mtx_exit(&w_mtx, MTX_SPIN);
1088			return (w);
1089		}
1090	}
1091	if ((w = witness_get()) == NULL)
1092		return (NULL);
1093	w->w_next = w_all;
1094	w_all = w;
1095	w->w_description = description;
1096	mtx_exit(&w_mtx, MTX_SPIN);
1097	if (flag & MTX_SPIN) {
1098		w->w_spin = 1;
1099
1100		i = 1;
1101		for (order = spin_order_list; *order != NULL; order++) {
1102			if (strcmp(description, *order) == 0)
1103				break;
1104			i <<= 1;
1105		}
1106		if (*order == NULL)
1107			panic("spin lock %s not in order list", description);
1108		w->w_level = i;
1109	}
1110	return (w);
1111}
1112
1113static int
1114itismychild(struct witness *parent, struct witness *child)
1115{
1116	static int recursed;
1117
1118	/*
1119	 * Insert "child" after "parent"
1120	 */
1121	while (parent->w_morechildren)
1122		parent = parent->w_morechildren;
1123
1124	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1125		if ((parent->w_morechildren = witness_get()) == NULL)
1126			return (1);
1127		parent = parent->w_morechildren;
1128	}
1129	MPASS(child != NULL);
1130	parent->w_children[parent->w_childcnt++] = child;
1131	/*
1132	 * now prune whole tree
1133	 */
1134	if (recursed)
1135		return (0);
1136	recursed = 1;
1137	for (child = w_all; child != NULL; child = child->w_next) {
1138		for (parent = w_all; parent != NULL;
1139		    parent = parent->w_next) {
1140			if (!isitmychild(parent, child))
1141				continue;
1142			removechild(parent, child);
1143			if (isitmydescendant(parent, child))
1144				continue;
1145			itismychild(parent, child);
1146		}
1147	}
1148	recursed = 0;
1149	witness_levelall();
1150	return (0);
1151}
1152
1153static void
1154removechild(struct witness *parent, struct witness *child)
1155{
1156	struct witness *w, *w1;
1157	int i;
1158
1159	for (w = parent; w != NULL; w = w->w_morechildren)
1160		for (i = 0; i < w->w_childcnt; i++)
1161			if (w->w_children[i] == child)
1162				goto found;
1163	return;
1164found:
1165	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1166		continue;
1167	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1168	MPASS(w->w_children[i] != NULL);
1169
1170	if (w1->w_childcnt != 0)
1171		return;
1172
1173	if (w1 == parent)
1174		return;
1175	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1176		continue;
1177	w->w_morechildren = 0;
1178	witness_free(w1);
1179}
1180
1181static int
1182isitmychild(struct witness *parent, struct witness *child)
1183{
1184	struct witness *w;
1185	int i;
1186
1187	for (w = parent; w != NULL; w = w->w_morechildren) {
1188		for (i = 0; i < w->w_childcnt; i++) {
1189			if (w->w_children[i] == child)
1190				return (1);
1191		}
1192	}
1193	return (0);
1194}
1195
1196static int
1197isitmydescendant(struct witness *parent, struct witness *child)
1198{
1199	struct witness *w;
1200	int i;
1201	int j;
1202
1203	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1204		MPASS(j < 1000);
1205		for (i = 0; i < w->w_childcnt; i++) {
1206			if (w->w_children[i] == child)
1207				return (1);
1208		}
1209		for (i = 0; i < w->w_childcnt; i++) {
1210			if (isitmydescendant(w->w_children[i], child))
1211				return (1);
1212		}
1213	}
1214	return (0);
1215}
1216
1217void
1218witness_levelall (void)
1219{
1220	struct witness *w, *w1;
1221
1222	for (w = w_all; w; w = w->w_next)
1223		if (!w->w_spin)
1224			w->w_level = 0;
1225	for (w = w_all; w; w = w->w_next) {
1226		if (w->w_spin)
1227			continue;
1228		for (w1 = w_all; w1; w1 = w1->w_next) {
1229			if (isitmychild(w1, w))
1230				break;
1231		}
1232		if (w1 != NULL)
1233			continue;
1234		witness_leveldescendents(w, 0);
1235	}
1236}
1237
1238static void
1239witness_leveldescendents(struct witness *parent, int level)
1240{
1241	int i;
1242	struct witness *w;
1243
1244	if (parent->w_level < level)
1245		parent->w_level = level;
1246	level++;
1247	for (w = parent; w != NULL; w = w->w_morechildren)
1248		for (i = 0; i < w->w_childcnt; i++)
1249			witness_leveldescendents(w->w_children[i], level);
1250}
1251
1252static void
1253witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1254			   struct witness *parent)
1255{
1256	struct witness *w;
1257	int i;
1258	int level = parent->w_level;
1259
1260	prnt("%d", level);
1261	if (level < 10)
1262		prnt(" ");
1263	for (i = 0; i < level; i++)
1264		prnt(" ");
1265	prnt("%s", parent->w_description);
1266	if (parent->w_file != NULL) {
1267		prnt(" -- last acquired @ %s", parent->w_file);
1268#ifndef W_USE_WHERE
1269		prnt(":%d", parent->w_line);
1270#endif
1271		prnt("\n");
1272	}
1273
1274	for (w = parent; w != NULL; w = w->w_morechildren)
1275		for (i = 0; i < w->w_childcnt; i++)
1276			    witness_displaydescendants(prnt, w->w_children[i]);
1277    }
1278
1279static int
1280dup_ok(struct witness *w)
1281{
1282	char **dup;
1283
1284	for (dup = dup_list; *dup!= NULL; dup++)
1285		if (strcmp(w->w_description, *dup) == 0)
1286			return (1);
1287	return (0);
1288}
1289
1290static int
1291blessed(struct witness *w1, struct witness *w2)
1292{
1293	int i;
1294	struct witness_blessed *b;
1295
1296	for (i = 0; i < blessed_count; i++) {
1297		b = &blessed_list[i];
1298		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1299			if (strcmp(w2->w_description, b->b_lock2) == 0)
1300				return (1);
1301			continue;
1302		}
1303		if (strcmp(w1->w_description, b->b_lock2) == 0)
1304			if (strcmp(w2->w_description, b->b_lock1) == 0)
1305				return (1);
1306	}
1307	return (0);
1308}
1309
1310static struct witness *
1311witness_get()
1312{
1313	struct witness *w;
1314
1315	if ((w = w_free) == NULL) {
1316		witness_dead = 1;
1317		mtx_exit(&w_mtx, MTX_SPIN);
1318		printf("witness exhausted\n");
1319		return (NULL);
1320	}
1321	w_free = w->w_next;
1322	bzero(w, sizeof(*w));
1323	return (w);
1324}
1325
1326static void
1327witness_free(struct witness *w)
1328{
1329	w->w_next = w_free;
1330	w_free = w;
1331}
1332
1333void
1334witness_list(struct proc *p)
1335{
1336	struct mtx *m;
1337
1338	for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
1339	    m = LIST_NEXT(m, mtx_held)) {
1340		printf("\t\"%s\" (%p) locked at %s:%d\n",
1341		    m->mtx_description, m,
1342		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1343	}
1344}
1345
1346void
1347witness_save(struct mtx *m, const char **filep, int *linep)
1348{
1349	*filep = m->mtx_witness->w_file;
1350	*linep = m->mtx_witness->w_line;
1351}
1352
1353void
1354witness_restore(struct mtx *m, const char *file, int line)
1355{
1356	m->mtx_witness->w_file = file;
1357	m->mtx_witness->w_line = line;
1358}
1359
1360#endif	/* (defined(MUTEX_DEBUG) && defined(WITNESS)) */
1361