kern_mutex.c revision 240424
1/*-
2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Berkeley Software Design Inc's name may not be used to endorse or
13 *    promote products derived from this software without specific prior
14 *    written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29 *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30 */
31
32/*
33 * Machine independent bits of mutex implementation.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/kern/kern_mutex.c 240424 2012-09-12 22:10:53Z attilio $");
38
39#include "opt_adaptive_mutexes.h"
40#include "opt_ddb.h"
41#include "opt_global.h"
42#include "opt_hwpmc_hooks.h"
43#include "opt_kdtrace.h"
44#include "opt_sched.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bus.h>
49#include <sys/conf.h>
50#include <sys/kdb.h>
51#include <sys/kernel.h>
52#include <sys/ktr.h>
53#include <sys/lock.h>
54#include <sys/malloc.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/resourcevar.h>
58#include <sys/sched.h>
59#include <sys/sbuf.h>
60#include <sys/sysctl.h>
61#include <sys/turnstile.h>
62#include <sys/vmmeter.h>
63#include <sys/lock_profile.h>
64
65#include <machine/atomic.h>
66#include <machine/bus.h>
67#include <machine/cpu.h>
68
69#include <ddb/ddb.h>
70
71#include <fs/devfs/devfs_int.h>
72
73#include <vm/vm.h>
74#include <vm/vm_extern.h>
75
76#if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
77#define	ADAPTIVE_MUTEXES
78#endif
79
80#ifdef HWPMC_HOOKS
81#include <sys/pmckern.h>
82PMC_SOFT_DEFINE( , , lock, failed);
83#endif
84
85/*
86 * Internal utility macros.
87 */
88#define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
89
90#define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
91
92#define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
93
94static void	assert_mtx(const struct lock_object *lock, int what);
95#ifdef DDB
96static void	db_show_mtx(const struct lock_object *lock);
97#endif
98static void	lock_mtx(struct lock_object *lock, int how);
99static void	lock_spin(struct lock_object *lock, int how);
100#ifdef KDTRACE_HOOKS
101static int	owner_mtx(const struct lock_object *lock,
102		    struct thread **owner);
103#endif
104static int	unlock_mtx(struct lock_object *lock);
105static int	unlock_spin(struct lock_object *lock);
106
107/*
108 * Lock classes for sleep and spin mutexes.
109 */
110struct lock_class lock_class_mtx_sleep = {
111	.lc_name = "sleep mutex",
112	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
113	.lc_assert = assert_mtx,
114#ifdef DDB
115	.lc_ddb_show = db_show_mtx,
116#endif
117	.lc_lock = lock_mtx,
118	.lc_unlock = unlock_mtx,
119#ifdef KDTRACE_HOOKS
120	.lc_owner = owner_mtx,
121#endif
122};
123struct lock_class lock_class_mtx_spin = {
124	.lc_name = "spin mutex",
125	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
126	.lc_assert = assert_mtx,
127#ifdef DDB
128	.lc_ddb_show = db_show_mtx,
129#endif
130	.lc_lock = lock_spin,
131	.lc_unlock = unlock_spin,
132#ifdef KDTRACE_HOOKS
133	.lc_owner = owner_mtx,
134#endif
135};
136
137/*
138 * System-wide mutexes
139 */
140struct mtx blocked_lock;
141struct mtx Giant;
142
143void
144assert_mtx(const struct lock_object *lock, int what)
145{
146
147	mtx_assert((const struct mtx *)lock, what);
148}
149
150void
151lock_mtx(struct lock_object *lock, int how)
152{
153
154	mtx_lock((struct mtx *)lock);
155}
156
157void
158lock_spin(struct lock_object *lock, int how)
159{
160
161	panic("spin locks can only use msleep_spin");
162}
163
164int
165unlock_mtx(struct lock_object *lock)
166{
167	struct mtx *m;
168
169	m = (struct mtx *)lock;
170	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
171	mtx_unlock(m);
172	return (0);
173}
174
175int
176unlock_spin(struct lock_object *lock)
177{
178
179	panic("spin locks can only use msleep_spin");
180}
181
182#ifdef KDTRACE_HOOKS
183int
184owner_mtx(const struct lock_object *lock, struct thread **owner)
185{
186	const struct mtx *m = (const struct mtx *)lock;
187
188	*owner = mtx_owner(m);
189	return (mtx_unowned(m) == 0);
190}
191#endif
192
193/*
194 * Function versions of the inlined __mtx_* macros.  These are used by
195 * modules and can also be called from assembly language if needed.
196 */
197void
198_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
199{
200
201	if (SCHEDULER_STOPPED())
202		return;
203	MPASS(curthread != NULL);
204	KASSERT(!TD_IS_IDLETHREAD(curthread),
205	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
206	    curthread, m->lock_object.lo_name, file, line));
207	KASSERT(m->mtx_lock != MTX_DESTROYED,
208	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
209	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
210	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
211	    file, line));
212	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
213	    file, line, NULL);
214
215	__mtx_lock(m, curthread, opts, file, line);
216	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
217	    line);
218	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
219	curthread->td_locks++;
220}
221
222void
223_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
224{
225
226	if (SCHEDULER_STOPPED())
227		return;
228	MPASS(curthread != NULL);
229	KASSERT(m->mtx_lock != MTX_DESTROYED,
230	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
231	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
232	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
233	    file, line));
234	curthread->td_locks--;
235	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
236	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
237	    line);
238	mtx_assert(m, MA_OWNED);
239
240	if (m->mtx_recurse == 0)
241		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_UNLOCK_RELEASE, m);
242	__mtx_unlock(m, curthread, opts, file, line);
243}
244
245void
246_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
247{
248
249	if (SCHEDULER_STOPPED())
250		return;
251	MPASS(curthread != NULL);
252	KASSERT(m->mtx_lock != MTX_DESTROYED,
253	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
254	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
255	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
256	    m->lock_object.lo_name, file, line));
257	if (mtx_owned(m))
258		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
259	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
260		    m->lock_object.lo_name, file, line));
261	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
262	    file, line, NULL);
263	__mtx_lock_spin(m, curthread, opts, file, line);
264	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
265	    line);
266	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
267}
268
269void
270_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
271{
272
273	if (SCHEDULER_STOPPED())
274		return;
275	MPASS(curthread != NULL);
276	KASSERT(m->mtx_lock != MTX_DESTROYED,
277	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
278	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
279	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
280	    m->lock_object.lo_name, file, line));
281	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
282	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
283	    line);
284	mtx_assert(m, MA_OWNED);
285
286	__mtx_unlock_spin(m);
287}
288
289/*
290 * The important part of mtx_trylock{,_flags}()
291 * Tries to acquire lock `m.'  If this function is called on a mutex that
292 * is already owned, it will recursively acquire the lock.
293 */
294int
295mtx_trylock_flags_(struct mtx *m, int opts, const char *file, int line)
296{
297#ifdef LOCK_PROFILING
298	uint64_t waittime = 0;
299	int contested = 0;
300#endif
301	int rval;
302
303	if (SCHEDULER_STOPPED())
304		return (1);
305
306	MPASS(curthread != NULL);
307	KASSERT(!TD_IS_IDLETHREAD(curthread),
308	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
309	    curthread, m->lock_object.lo_name, file, line));
310	KASSERT(m->mtx_lock != MTX_DESTROYED,
311	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
312	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
313	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
314	    file, line));
315
316	if (mtx_owned(m) && (m->lock_object.lo_flags & LO_RECURSABLE) != 0) {
317		m->mtx_recurse++;
318		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
319		rval = 1;
320	} else
321		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
322
323	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
324	if (rval) {
325		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
326		    file, line);
327		curthread->td_locks++;
328		if (m->mtx_recurse == 0)
329			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE,
330			    m, contested, waittime, file, line);
331
332	}
333
334	return (rval);
335}
336
337/*
338 * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
339 *
340 * We call this if the lock is either contested (i.e. we need to go to
341 * sleep waiting for it), or if we need to recurse on it.
342 */
343void
344_mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
345    int line)
346{
347	struct turnstile *ts;
348	uintptr_t v;
349#ifdef ADAPTIVE_MUTEXES
350	volatile struct thread *owner;
351#endif
352#ifdef KTR
353	int cont_logged = 0;
354#endif
355#ifdef LOCK_PROFILING
356	int contested = 0;
357	uint64_t waittime = 0;
358#endif
359#ifdef KDTRACE_HOOKS
360	uint64_t spin_cnt = 0;
361	uint64_t sleep_cnt = 0;
362	int64_t sleep_time = 0;
363#endif
364
365	if (SCHEDULER_STOPPED())
366		return;
367
368	if (mtx_owned(m)) {
369		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
370	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
371		    m->lock_object.lo_name, file, line));
372		m->mtx_recurse++;
373		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
374		if (LOCK_LOG_TEST(&m->lock_object, opts))
375			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
376		return;
377	}
378
379#ifdef HWPMC_HOOKS
380	PMC_SOFT_CALL( , , lock, failed);
381#endif
382	lock_profile_obtain_lock_failed(&m->lock_object,
383		    &contested, &waittime);
384	if (LOCK_LOG_TEST(&m->lock_object, opts))
385		CTR4(KTR_LOCK,
386		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
387		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
388
389	while (!_mtx_obtain_lock(m, tid)) {
390#ifdef KDTRACE_HOOKS
391		spin_cnt++;
392#endif
393#ifdef ADAPTIVE_MUTEXES
394		/*
395		 * If the owner is running on another CPU, spin until the
396		 * owner stops running or the state of the lock changes.
397		 */
398		v = m->mtx_lock;
399		if (v != MTX_UNOWNED) {
400			owner = (struct thread *)(v & ~MTX_FLAGMASK);
401			if (TD_IS_RUNNING(owner)) {
402				if (LOCK_LOG_TEST(&m->lock_object, 0))
403					CTR3(KTR_LOCK,
404					    "%s: spinning on %p held by %p",
405					    __func__, m, owner);
406				while (mtx_owner(m) == owner &&
407				    TD_IS_RUNNING(owner)) {
408					cpu_spinwait();
409#ifdef KDTRACE_HOOKS
410					spin_cnt++;
411#endif
412				}
413				continue;
414			}
415		}
416#endif
417
418		ts = turnstile_trywait(&m->lock_object);
419		v = m->mtx_lock;
420
421		/*
422		 * Check if the lock has been released while spinning for
423		 * the turnstile chain lock.
424		 */
425		if (v == MTX_UNOWNED) {
426			turnstile_cancel(ts);
427			continue;
428		}
429
430#ifdef ADAPTIVE_MUTEXES
431		/*
432		 * The current lock owner might have started executing
433		 * on another CPU (or the lock could have changed
434		 * owners) while we were waiting on the turnstile
435		 * chain lock.  If so, drop the turnstile lock and try
436		 * again.
437		 */
438		owner = (struct thread *)(v & ~MTX_FLAGMASK);
439		if (TD_IS_RUNNING(owner)) {
440			turnstile_cancel(ts);
441			continue;
442		}
443#endif
444
445		/*
446		 * If the mutex isn't already contested and a failure occurs
447		 * setting the contested bit, the mutex was either released
448		 * or the state of the MTX_RECURSED bit changed.
449		 */
450		if ((v & MTX_CONTESTED) == 0 &&
451		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
452			turnstile_cancel(ts);
453			continue;
454		}
455
456		/*
457		 * We definitely must sleep for this lock.
458		 */
459		mtx_assert(m, MA_NOTOWNED);
460
461#ifdef KTR
462		if (!cont_logged) {
463			CTR6(KTR_CONTENTION,
464			    "contention: %p at %s:%d wants %s, taken by %s:%d",
465			    (void *)tid, file, line, m->lock_object.lo_name,
466			    WITNESS_FILE(&m->lock_object),
467			    WITNESS_LINE(&m->lock_object));
468			cont_logged = 1;
469		}
470#endif
471
472		/*
473		 * Block on the turnstile.
474		 */
475#ifdef KDTRACE_HOOKS
476		sleep_time -= lockstat_nsecs();
477#endif
478		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
479#ifdef KDTRACE_HOOKS
480		sleep_time += lockstat_nsecs();
481		sleep_cnt++;
482#endif
483	}
484#ifdef KTR
485	if (cont_logged) {
486		CTR4(KTR_CONTENTION,
487		    "contention end: %s acquired by %p at %s:%d",
488		    m->lock_object.lo_name, (void *)tid, file, line);
489	}
490#endif
491	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, m, contested,
492	    waittime, file, line);
493#ifdef KDTRACE_HOOKS
494	if (sleep_time)
495		LOCKSTAT_RECORD1(LS_MTX_LOCK_BLOCK, m, sleep_time);
496
497	/*
498	 * Only record the loops spinning and not sleeping.
499	 */
500	if (spin_cnt > sleep_cnt)
501		LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt));
502#endif
503}
504
505static void
506_mtx_lock_spin_failed(struct mtx *m)
507{
508	struct thread *td;
509
510	td = mtx_owner(m);
511
512	/* If the mutex is unlocked, try again. */
513	if (td == NULL)
514		return;
515
516	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
517	    m, m->lock_object.lo_name, td, td->td_tid);
518#ifdef WITNESS
519	witness_display_spinlock(&m->lock_object, td, printf);
520#endif
521	panic("spin lock held too long");
522}
523
524#ifdef SMP
525/*
526 * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
527 *
528 * This is only called if we need to actually spin for the lock. Recursion
529 * is handled inline.
530 */
531void
532_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file,
533    int line)
534{
535	int i = 0;
536#ifdef LOCK_PROFILING
537	int contested = 0;
538	uint64_t waittime = 0;
539#endif
540
541	if (SCHEDULER_STOPPED())
542		return;
543
544	if (LOCK_LOG_TEST(&m->lock_object, opts))
545		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
546
547#ifdef HWPMC_HOOKS
548	PMC_SOFT_CALL( , , lock, failed);
549#endif
550	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
551	while (!_mtx_obtain_lock(m, tid)) {
552
553		/* Give interrupts a chance while we spin. */
554		spinlock_exit();
555		while (m->mtx_lock != MTX_UNOWNED) {
556			if (i++ < 10000000) {
557				cpu_spinwait();
558				continue;
559			}
560			if (i < 60000000 || kdb_active || panicstr != NULL)
561				DELAY(1);
562			else
563				_mtx_lock_spin_failed(m);
564			cpu_spinwait();
565		}
566		spinlock_enter();
567	}
568
569	if (LOCK_LOG_TEST(&m->lock_object, opts))
570		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
571
572	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m,
573	    contested, waittime, (file), (line));
574	LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i);
575}
576#endif /* SMP */
577
578void
579thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
580{
581	struct mtx *m;
582	uintptr_t tid;
583	int i;
584#ifdef LOCK_PROFILING
585	int contested = 0;
586	uint64_t waittime = 0;
587#endif
588#ifdef KDTRACE_HOOKS
589	uint64_t spin_cnt = 0;
590#endif
591
592	i = 0;
593	tid = (uintptr_t)curthread;
594
595	if (SCHEDULER_STOPPED())
596		return;
597
598	for (;;) {
599retry:
600		spinlock_enter();
601		m = td->td_lock;
602		KASSERT(m->mtx_lock != MTX_DESTROYED,
603		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
604		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
605		    ("thread_lock() of sleep mutex %s @ %s:%d",
606		    m->lock_object.lo_name, file, line));
607		if (mtx_owned(m))
608			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
609	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
610			    m->lock_object.lo_name, file, line));
611		WITNESS_CHECKORDER(&m->lock_object,
612		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
613		while (!_mtx_obtain_lock(m, tid)) {
614#ifdef KDTRACE_HOOKS
615			spin_cnt++;
616#endif
617			if (m->mtx_lock == tid) {
618				m->mtx_recurse++;
619				break;
620			}
621#ifdef HWPMC_HOOKS
622			PMC_SOFT_CALL( , , lock, failed);
623#endif
624			lock_profile_obtain_lock_failed(&m->lock_object,
625			    &contested, &waittime);
626			/* Give interrupts a chance while we spin. */
627			spinlock_exit();
628			while (m->mtx_lock != MTX_UNOWNED) {
629				if (i++ < 10000000)
630					cpu_spinwait();
631				else if (i < 60000000 ||
632				    kdb_active || panicstr != NULL)
633					DELAY(1);
634				else
635					_mtx_lock_spin_failed(m);
636				cpu_spinwait();
637				if (m != td->td_lock)
638					goto retry;
639			}
640			spinlock_enter();
641		}
642		if (m == td->td_lock)
643			break;
644		__mtx_unlock_spin(m);	/* does spinlock_exit() */
645#ifdef KDTRACE_HOOKS
646		spin_cnt++;
647#endif
648	}
649	if (m->mtx_recurse == 0)
650		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE,
651		    m, contested, waittime, (file), (line));
652	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
653	    line);
654	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
655	LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt);
656}
657
658struct mtx *
659thread_lock_block(struct thread *td)
660{
661	struct mtx *lock;
662
663	THREAD_LOCK_ASSERT(td, MA_OWNED);
664	lock = td->td_lock;
665	td->td_lock = &blocked_lock;
666	mtx_unlock_spin(lock);
667
668	return (lock);
669}
670
671void
672thread_lock_unblock(struct thread *td, struct mtx *new)
673{
674	mtx_assert(new, MA_OWNED);
675	MPASS(td->td_lock == &blocked_lock);
676	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
677}
678
679void
680thread_lock_set(struct thread *td, struct mtx *new)
681{
682	struct mtx *lock;
683
684	mtx_assert(new, MA_OWNED);
685	THREAD_LOCK_ASSERT(td, MA_OWNED);
686	lock = td->td_lock;
687	td->td_lock = new;
688	mtx_unlock_spin(lock);
689}
690
691/*
692 * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
693 *
694 * We are only called here if the lock is recursed or contested (i.e. we
695 * need to wake up a blocked thread).
696 */
697void
698_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
699{
700	struct turnstile *ts;
701
702	if (SCHEDULER_STOPPED())
703		return;
704
705	if (mtx_recursed(m)) {
706		if (--(m->mtx_recurse) == 0)
707			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
708		if (LOCK_LOG_TEST(&m->lock_object, opts))
709			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
710		return;
711	}
712
713	/*
714	 * We have to lock the chain before the turnstile so this turnstile
715	 * can be removed from the hash list if it is empty.
716	 */
717	turnstile_chain_lock(&m->lock_object);
718	ts = turnstile_lookup(&m->lock_object);
719	if (LOCK_LOG_TEST(&m->lock_object, opts))
720		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
721	MPASS(ts != NULL);
722	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
723	_mtx_release_lock_quick(m);
724
725	/*
726	 * This turnstile is now no longer associated with the mutex.  We can
727	 * unlock the chain lock so a new turnstile may take it's place.
728	 */
729	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
730	turnstile_chain_unlock(&m->lock_object);
731}
732
733/*
734 * All the unlocking of MTX_SPIN locks is done inline.
735 * See the __mtx_unlock_spin() macro for the details.
736 */
737
738/*
739 * The backing function for the INVARIANTS-enabled mtx_assert()
740 */
741#ifdef INVARIANT_SUPPORT
742void
743_mtx_assert(const struct mtx *m, int what, const char *file, int line)
744{
745
746	if (panicstr != NULL || dumping)
747		return;
748	switch (what) {
749	case MA_OWNED:
750	case MA_OWNED | MA_RECURSED:
751	case MA_OWNED | MA_NOTRECURSED:
752		if (!mtx_owned(m))
753			panic("mutex %s not owned at %s:%d",
754			    m->lock_object.lo_name, file, line);
755		if (mtx_recursed(m)) {
756			if ((what & MA_NOTRECURSED) != 0)
757				panic("mutex %s recursed at %s:%d",
758				    m->lock_object.lo_name, file, line);
759		} else if ((what & MA_RECURSED) != 0) {
760			panic("mutex %s unrecursed at %s:%d",
761			    m->lock_object.lo_name, file, line);
762		}
763		break;
764	case MA_NOTOWNED:
765		if (mtx_owned(m))
766			panic("mutex %s owned at %s:%d",
767			    m->lock_object.lo_name, file, line);
768		break;
769	default:
770		panic("unknown mtx_assert at %s:%d", file, line);
771	}
772}
773#endif
774
775/*
776 * The MUTEX_DEBUG-enabled mtx_validate()
777 *
778 * Most of these checks have been moved off into the LO_INITIALIZED flag
779 * maintained by the witness code.
780 */
781#ifdef MUTEX_DEBUG
782
783void	mtx_validate(struct mtx *);
784
785void
786mtx_validate(struct mtx *m)
787{
788
789/*
790 * XXX: When kernacc() does not require Giant we can reenable this check
791 */
792#ifdef notyet
793	/*
794	 * Can't call kernacc() from early init386(), especially when
795	 * initializing Giant mutex, because some stuff in kernacc()
796	 * requires Giant itself.
797	 */
798	if (!cold)
799		if (!kernacc((caddr_t)m, sizeof(m),
800		    VM_PROT_READ | VM_PROT_WRITE))
801			panic("Can't read and write to mutex %p", m);
802#endif
803}
804#endif
805
806/*
807 * General init routine used by the MTX_SYSINIT() macro.
808 */
809void
810mtx_sysinit(void *arg)
811{
812	struct mtx_args *margs = arg;
813
814	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
815}
816
817/*
818 * Mutex initialization routine; initialize lock `m' of type contained in
819 * `opts' with options contained in `opts' and name `name.'  The optional
820 * lock type `type' is used as a general lock category name for use with
821 * witness.
822 */
823void
824mtx_init(struct mtx *m, const char *name, const char *type, int opts)
825{
826	struct lock_class *class;
827	int flags;
828
829	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
830		MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE)) == 0);
831	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
832	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
833	    &m->mtx_lock));
834
835#ifdef MUTEX_DEBUG
836	/* Diagnostic and error correction */
837	mtx_validate(m);
838#endif
839
840	/* Determine lock class and lock flags. */
841	if (opts & MTX_SPIN)
842		class = &lock_class_mtx_spin;
843	else
844		class = &lock_class_mtx_sleep;
845	flags = 0;
846	if (opts & MTX_QUIET)
847		flags |= LO_QUIET;
848	if (opts & MTX_RECURSE)
849		flags |= LO_RECURSABLE;
850	if ((opts & MTX_NOWITNESS) == 0)
851		flags |= LO_WITNESS;
852	if (opts & MTX_DUPOK)
853		flags |= LO_DUPOK;
854	if (opts & MTX_NOPROFILE)
855		flags |= LO_NOPROFILE;
856
857	/* Initialize mutex. */
858	m->mtx_lock = MTX_UNOWNED;
859	m->mtx_recurse = 0;
860
861	lock_init(&m->lock_object, class, name, type, flags);
862}
863
864/*
865 * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
866 * passed in as a flag here because if the corresponding mtx_init() was
867 * called with MTX_QUIET set, then it will already be set in the mutex's
868 * flags.
869 */
870void
871mtx_destroy(struct mtx *m)
872{
873
874	if (!mtx_owned(m))
875		MPASS(mtx_unowned(m));
876	else {
877		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
878
879		/* Perform the non-mtx related part of mtx_unlock_spin(). */
880		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
881			spinlock_exit();
882		else
883			curthread->td_locks--;
884
885		lock_profile_release_lock(&m->lock_object);
886		/* Tell witness this isn't locked to make it happy. */
887		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
888		    __LINE__);
889	}
890
891	m->mtx_lock = MTX_DESTROYED;
892	lock_destroy(&m->lock_object);
893}
894
895/*
896 * Intialize the mutex code and system mutexes.  This is called from the MD
897 * startup code prior to mi_startup().  The per-CPU data space needs to be
898 * setup before this is called.
899 */
900void
901mutex_init(void)
902{
903
904	/* Setup turnstiles so that sleep mutexes work. */
905	init_turnstiles();
906
907	/*
908	 * Initialize mutexes.
909	 */
910	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
911	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
912	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
913	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
914	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
915	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
916	mtx_lock(&Giant);
917}
918
919#ifdef DDB
920void
921db_show_mtx(const struct lock_object *lock)
922{
923	struct thread *td;
924	const struct mtx *m;
925
926	m = (const struct mtx *)lock;
927
928	db_printf(" flags: {");
929	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
930		db_printf("SPIN");
931	else
932		db_printf("DEF");
933	if (m->lock_object.lo_flags & LO_RECURSABLE)
934		db_printf(", RECURSE");
935	if (m->lock_object.lo_flags & LO_DUPOK)
936		db_printf(", DUPOK");
937	db_printf("}\n");
938	db_printf(" state: {");
939	if (mtx_unowned(m))
940		db_printf("UNOWNED");
941	else if (mtx_destroyed(m))
942		db_printf("DESTROYED");
943	else {
944		db_printf("OWNED");
945		if (m->mtx_lock & MTX_CONTESTED)
946			db_printf(", CONTESTED");
947		if (m->mtx_lock & MTX_RECURSED)
948			db_printf(", RECURSED");
949	}
950	db_printf("}\n");
951	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
952		td = mtx_owner(m);
953		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
954		    td->td_tid, td->td_proc->p_pid, td->td_name);
955		if (mtx_recursed(m))
956			db_printf(" recursed: %d\n", m->mtx_recurse);
957	}
958}
959#endif
960