1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice(s), this list of conditions and the following disclaimer as
12 *    the first lines of this file unmodified other than the possible
13 *    addition of one or more copyright notices.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice(s), this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 * DAMAGE.
29 */
30
31#include "opt_ddb.h"
32#include "opt_hwpmc_hooks.h"
33
34#include <sys/param.h>
35#include <sys/kdb.h>
36#include <sys/ktr.h>
37#include <sys/limits.h>
38#include <sys/lock.h>
39#include <sys/lock_profile.h>
40#include <sys/lockmgr.h>
41#include <sys/lockstat.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/sleepqueue.h>
45#ifdef DEBUG_LOCKS
46#include <sys/stack.h>
47#endif
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50
51#include <machine/cpu.h>
52
53#ifdef DDB
54#include <ddb/ddb.h>
55#endif
56
57#ifdef HWPMC_HOOKS
58#include <sys/pmckern.h>
59PMC_SOFT_DECLARE( , , lock, failed);
60#endif
61
62/*
63 * Hack. There should be prio_t or similar so that this is not necessary.
64 */
65_Static_assert((PRILASTFLAG * 2) - 1 <= USHRT_MAX,
66    "prio flags wont fit in u_short pri in struct lock");
67
68CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
69    ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
70
71#define	SQ_EXCLUSIVE_QUEUE	0
72#define	SQ_SHARED_QUEUE		1
73
74#ifndef INVARIANTS
75#define	_lockmgr_assert(lk, what, file, line)
76#endif
77
78#define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
79#define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
80
81#ifndef DEBUG_LOCKS
82#define	STACK_PRINT(lk)
83#define	STACK_SAVE(lk)
84#define	STACK_ZERO(lk)
85#else
86#define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
87#define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
88#define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
89#endif
90
91#define	LOCK_LOG2(lk, string, arg1, arg2)				\
92	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
93		CTR2(KTR_LOCK, (string), (arg1), (arg2))
94#define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
95	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
96		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
97
98#define	GIANT_DECLARE							\
99	int _i = 0;							\
100	WITNESS_SAVE_DECL(Giant)
101#define	GIANT_RESTORE() do {						\
102	if (__predict_false(_i > 0)) {					\
103		while (_i--)						\
104			mtx_lock(&Giant);				\
105		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
106	}								\
107} while (0)
108#define	GIANT_SAVE() do {						\
109	if (__predict_false(mtx_owned(&Giant))) {			\
110		WITNESS_SAVE(&Giant.lock_object, Giant);		\
111		while (mtx_owned(&Giant)) {				\
112			_i++;						\
113			mtx_unlock(&Giant);				\
114		}							\
115	}								\
116} while (0)
117
118static bool __always_inline
119LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
120{
121
122	if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
123	    LK_SHARE)
124		return (true);
125	if (fp || (!(x & LK_SHARE)))
126		return (false);
127	if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
128	    (curthread->td_pflags & TDP_DEADLKTREAT))
129		return (true);
130	return (false);
131}
132
133#define	LK_TRYOP(x)							\
134	((x) & LK_NOWAIT)
135
136#define	LK_CAN_WITNESS(x)						\
137	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
138#define	LK_TRYWIT(x)							\
139	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
140
141#define	lockmgr_xlocked_v(v)						\
142	(((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
143
144#define	lockmgr_xlocked(lk) lockmgr_xlocked_v(lockmgr_read_value(lk))
145
146static void	assert_lockmgr(const struct lock_object *lock, int how);
147#ifdef DDB
148static void	db_show_lockmgr(const struct lock_object *lock);
149#endif
150static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
151#ifdef KDTRACE_HOOKS
152static int	owner_lockmgr(const struct lock_object *lock,
153		    struct thread **owner);
154#endif
155static uintptr_t unlock_lockmgr(struct lock_object *lock);
156
157struct lock_class lock_class_lockmgr = {
158	.lc_name = "lockmgr",
159	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
160	.lc_assert = assert_lockmgr,
161#ifdef DDB
162	.lc_ddb_show = db_show_lockmgr,
163#endif
164	.lc_lock = lock_lockmgr,
165	.lc_unlock = unlock_lockmgr,
166#ifdef KDTRACE_HOOKS
167	.lc_owner = owner_lockmgr,
168#endif
169};
170
171static __read_mostly bool lk_adaptive = true;
172static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL, "lockmgr debugging");
173SYSCTL_BOOL(_debug_lockmgr, OID_AUTO, adaptive_spinning, CTLFLAG_RW, &lk_adaptive,
174    0, "");
175#define lockmgr_delay  locks_delay
176
177struct lockmgr_wait {
178	const char *iwmesg;
179	int ipri;
180	int itimo;
181};
182
183static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
184    int flags, bool fp);
185static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
186
187static void
188lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
189{
190	struct lock_class *class;
191
192	if (flags & LK_INTERLOCK) {
193		class = LOCK_CLASS(ilk);
194		class->lc_unlock(ilk);
195	}
196
197	if (__predict_false(wakeup_swapper))
198		kick_proc0();
199}
200
201static void
202lockmgr_note_shared_acquire(struct lock *lk, int contested,
203    uint64_t waittime, const char *file, int line, int flags)
204{
205
206	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
207	    waittime, file, line, LOCKSTAT_READER);
208	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
209	WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
210	TD_LOCKS_INC(curthread);
211	TD_SLOCKS_INC(curthread);
212	STACK_SAVE(lk);
213}
214
215static void
216lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
217{
218
219	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
220	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
221	TD_LOCKS_DEC(curthread);
222	TD_SLOCKS_DEC(curthread);
223}
224
225static void
226lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
227    uint64_t waittime, const char *file, int line, int flags)
228{
229
230	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
231	    waittime, file, line, LOCKSTAT_WRITER);
232	LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
233	WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
234	    line);
235	TD_LOCKS_INC(curthread);
236	STACK_SAVE(lk);
237}
238
239static void
240lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
241{
242
243	if (!lockmgr_disowned(lk)) {
244		WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
245		TD_LOCKS_DEC(curthread);
246	}
247	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
248	    line);
249}
250
251static __inline struct thread *
252lockmgr_xholder(const struct lock *lk)
253{
254	uintptr_t x;
255
256	x = lockmgr_read_value(lk);
257	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
258}
259
260/*
261 * It assumes sleepq_lock held and returns with this one unheld.
262 * It also assumes the generic interlock is sane and previously checked.
263 * If LK_INTERLOCK is specified the interlock is not reacquired after the
264 * sleep.
265 */
266static __inline int
267sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
268    const char *wmesg, int pri, int timo, int queue)
269{
270	GIANT_DECLARE;
271	struct lock_class *class;
272	int catch, error;
273
274	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
275	catch = pri & PCATCH;
276	pri &= PRIMASK;
277	error = 0;
278
279	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
280	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
281
282	if (flags & LK_INTERLOCK)
283		class->lc_unlock(ilk);
284	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0) {
285		if (lk->lk_exslpfail < USHRT_MAX)
286			lk->lk_exslpfail++;
287	}
288	GIANT_SAVE();
289	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
290	    SLEEPQ_INTERRUPTIBLE : 0), queue);
291	if ((flags & LK_TIMELOCK) && timo)
292		sleepq_set_timeout(&lk->lock_object, timo);
293
294	/*
295	 * Decisional switch for real sleeping.
296	 */
297	if ((flags & LK_TIMELOCK) && timo && catch)
298		error = sleepq_timedwait_sig(&lk->lock_object, pri);
299	else if ((flags & LK_TIMELOCK) && timo)
300		error = sleepq_timedwait(&lk->lock_object, pri);
301	else if (catch)
302		error = sleepq_wait_sig(&lk->lock_object, pri);
303	else
304		sleepq_wait(&lk->lock_object, pri);
305	GIANT_RESTORE();
306	if ((flags & LK_SLEEPFAIL) && error == 0)
307		error = ENOLCK;
308
309	return (error);
310}
311
312static __inline int
313wakeupshlk(struct lock *lk, const char *file, int line)
314{
315	uintptr_t v, x, orig_x;
316	u_int realexslp;
317	int queue, wakeup_swapper;
318
319	wakeup_swapper = 0;
320	for (;;) {
321		x = lockmgr_read_value(lk);
322		if (lockmgr_sunlock_try(lk, &x))
323			break;
324
325		/*
326		 * We should have a sharer with waiters, so enter the hard
327		 * path in order to handle wakeups correctly.
328		 */
329		sleepq_lock(&lk->lock_object);
330		orig_x = lockmgr_read_value(lk);
331retry_sleepq:
332		x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
333		v = LK_UNLOCKED;
334
335		/*
336		 * If the lock has exclusive waiters, give them preference in
337		 * order to avoid deadlock with shared runners up.
338		 * If interruptible sleeps left the exclusive queue empty
339		 * avoid a starvation for the threads sleeping on the shared
340		 * queue by giving them precedence and cleaning up the
341		 * exclusive waiters bit anyway.
342		 * Please note that lk_exslpfail count may be lying about
343		 * the real number of waiters with the LK_SLEEPFAIL flag on
344		 * because they may be used in conjunction with interruptible
345		 * sleeps so lk_exslpfail might be considered an 'upper limit'
346		 * bound, including the edge cases.
347		 */
348		realexslp = sleepq_sleepcnt(&lk->lock_object,
349		    SQ_EXCLUSIVE_QUEUE);
350		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
351			if (lk->lk_exslpfail != USHRT_MAX && lk->lk_exslpfail < realexslp) {
352				lk->lk_exslpfail = 0;
353				queue = SQ_EXCLUSIVE_QUEUE;
354				v |= (x & LK_SHARED_WAITERS);
355			} else {
356				lk->lk_exslpfail = 0;
357				LOCK_LOG2(lk,
358				    "%s: %p has only LK_SLEEPFAIL sleepers",
359				    __func__, lk);
360				LOCK_LOG2(lk,
361			    "%s: %p waking up threads on the exclusive queue",
362				    __func__, lk);
363				wakeup_swapper =
364				    sleepq_broadcast(&lk->lock_object,
365				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
366				queue = SQ_SHARED_QUEUE;
367			}
368		} else {
369			/*
370			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
371			 * and using interruptible sleeps/timeout may have
372			 * left spourious lk_exslpfail counts on, so clean
373			 * it up anyway.
374			 */
375			lk->lk_exslpfail = 0;
376			queue = SQ_SHARED_QUEUE;
377		}
378
379		if (lockmgr_sunlock_try(lk, &orig_x)) {
380			sleepq_release(&lk->lock_object);
381			break;
382		}
383
384		x |= LK_SHARERS_LOCK(1);
385		if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
386			orig_x = x;
387			goto retry_sleepq;
388		}
389		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
390		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
391		    "exclusive");
392		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
393		    0, queue);
394		sleepq_release(&lk->lock_object);
395		break;
396	}
397
398	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
399	return (wakeup_swapper);
400}
401
402static void
403assert_lockmgr(const struct lock_object *lock, int what)
404{
405
406	panic("lockmgr locks do not support assertions");
407}
408
409static void
410lock_lockmgr(struct lock_object *lock, uintptr_t how)
411{
412
413	panic("lockmgr locks do not support sleep interlocking");
414}
415
416static uintptr_t
417unlock_lockmgr(struct lock_object *lock)
418{
419
420	panic("lockmgr locks do not support sleep interlocking");
421}
422
423#ifdef KDTRACE_HOOKS
424static int
425owner_lockmgr(const struct lock_object *lock, struct thread **owner)
426{
427
428	panic("lockmgr locks do not support owner inquiring");
429}
430#endif
431
432void
433lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
434{
435	int iflags;
436
437	MPASS((flags & ~LK_INIT_MASK) == 0);
438	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
439            ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
440            &lk->lk_lock));
441
442	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
443	if (flags & LK_CANRECURSE)
444		iflags |= LO_RECURSABLE;
445	if ((flags & LK_NODUP) == 0)
446		iflags |= LO_DUPOK;
447	if (flags & LK_NOPROFILE)
448		iflags |= LO_NOPROFILE;
449	if ((flags & LK_NOWITNESS) == 0)
450		iflags |= LO_WITNESS;
451	if (flags & LK_QUIET)
452		iflags |= LO_QUIET;
453	if (flags & LK_IS_VNODE)
454		iflags |= LO_IS_VNODE;
455	if (flags & LK_NEW)
456		iflags |= LO_NEW;
457	iflags |= flags & LK_NOSHARE;
458
459	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
460	lk->lk_lock = LK_UNLOCKED;
461	lk->lk_recurse = 0;
462	lk->lk_exslpfail = 0;
463	lk->lk_timo = timo;
464	lk->lk_pri = pri;
465	STACK_ZERO(lk);
466}
467
468/*
469 * XXX: Gross hacks to manipulate external lock flags after
470 * initialization.  Used for certain vnode and buf locks.
471 */
472void
473lockallowshare(struct lock *lk)
474{
475
476	lockmgr_assert(lk, KA_XLOCKED);
477	lk->lock_object.lo_flags &= ~LK_NOSHARE;
478}
479
480void
481lockdisableshare(struct lock *lk)
482{
483
484	lockmgr_assert(lk, KA_XLOCKED);
485	lk->lock_object.lo_flags |= LK_NOSHARE;
486}
487
488void
489lockallowrecurse(struct lock *lk)
490{
491
492	lockmgr_assert(lk, KA_XLOCKED);
493	lk->lock_object.lo_flags |= LO_RECURSABLE;
494}
495
496void
497lockdisablerecurse(struct lock *lk)
498{
499
500	lockmgr_assert(lk, KA_XLOCKED);
501	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
502}
503
504void
505lockdestroy(struct lock *lk)
506{
507
508	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
509	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
510	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
511	lock_destroy(&lk->lock_object);
512}
513
514static bool __always_inline
515lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
516{
517
518	/*
519	 * If no other thread has an exclusive lock, or
520	 * no exclusive waiter is present, bump the count of
521	 * sharers.  Since we have to preserve the state of
522	 * waiters, if we fail to acquire the shared lock
523	 * loop back and retry.
524	 */
525	while (LK_CAN_SHARE(*xp, flags, fp)) {
526		if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
527		    *xp + LK_ONE_SHARER)) {
528			return (true);
529		}
530	}
531	return (false);
532}
533
534static bool __always_inline
535lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
536{
537
538	for (;;) {
539		if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
540			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
541			    *xp - LK_ONE_SHARER))
542				return (true);
543			continue;
544		}
545		break;
546	}
547	return (false);
548}
549
550static bool
551lockmgr_slock_adaptive(struct lock_delay_arg *lda, struct lock *lk, uintptr_t *xp,
552    int flags)
553{
554	struct thread *owner;
555	uintptr_t x;
556
557	x = *xp;
558	MPASS(x != LK_UNLOCKED);
559	owner = (struct thread *)LK_HOLDER(x);
560	for (;;) {
561		MPASS(owner != curthread);
562		if (owner == (struct thread *)LK_KERNPROC)
563			return (false);
564		if ((x & LK_SHARE) && LK_SHARERS(x) > 0)
565			return (false);
566		if (owner == NULL)
567			return (false);
568		if (!TD_IS_RUNNING(owner))
569			return (false);
570		if ((x & LK_ALL_WAITERS) != 0)
571			return (false);
572		lock_delay(lda);
573		x = lockmgr_read_value(lk);
574		if (LK_CAN_SHARE(x, flags, false)) {
575			*xp = x;
576			return (true);
577		}
578		owner = (struct thread *)LK_HOLDER(x);
579	}
580}
581
582static __noinline int
583lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
584    const char *file, int line, struct lockmgr_wait *lwa)
585{
586	uintptr_t tid, x;
587	int error = 0;
588	const char *iwmesg;
589	int ipri, itimo;
590
591#ifdef KDTRACE_HOOKS
592	uint64_t sleep_time = 0;
593#endif
594#ifdef LOCK_PROFILING
595	uint64_t waittime = 0;
596	int contested = 0;
597#endif
598	struct lock_delay_arg lda;
599
600	if (SCHEDULER_STOPPED())
601		goto out;
602
603	tid = (uintptr_t)curthread;
604
605	if (LK_CAN_WITNESS(flags))
606		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
607		    file, line, flags & LK_INTERLOCK ? ilk : NULL);
608	x = lockmgr_read_value(lk);
609	lock_delay_arg_init(&lda, &lockmgr_delay);
610	if (!lk_adaptive)
611		flags &= ~LK_ADAPTIVE;
612	/*
613	 * The lock may already be locked exclusive by curthread,
614	 * avoid deadlock.
615	 */
616	if (LK_HOLDER(x) == tid) {
617		LOCK_LOG2(lk,
618		    "%s: %p already held in exclusive mode",
619		    __func__, lk);
620		error = EDEADLK;
621		goto out;
622	}
623
624	for (;;) {
625		if (lockmgr_slock_try(lk, &x, flags, false))
626			break;
627
628		lock_profile_obtain_lock_failed(&lk->lock_object, false,
629		    &contested, &waittime);
630
631		if ((flags & (LK_ADAPTIVE | LK_INTERLOCK)) == LK_ADAPTIVE) {
632			if (lockmgr_slock_adaptive(&lda, lk, &x, flags))
633				continue;
634		}
635
636#ifdef HWPMC_HOOKS
637		PMC_SOFT_CALL( , , lock, failed);
638#endif
639
640		/*
641		 * If the lock is expected to not sleep just give up
642		 * and return.
643		 */
644		if (LK_TRYOP(flags)) {
645			LOCK_LOG2(lk, "%s: %p fails the try operation",
646			    __func__, lk);
647			error = EBUSY;
648			break;
649		}
650
651		/*
652		 * Acquire the sleepqueue chain lock because we
653		 * probabilly will need to manipulate waiters flags.
654		 */
655		sleepq_lock(&lk->lock_object);
656		x = lockmgr_read_value(lk);
657retry_sleepq:
658
659		/*
660		 * if the lock can be acquired in shared mode, try
661		 * again.
662		 */
663		if (LK_CAN_SHARE(x, flags, false)) {
664			sleepq_release(&lk->lock_object);
665			continue;
666		}
667
668		/*
669		 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
670		 * loop back and retry.
671		 */
672		if ((x & LK_SHARED_WAITERS) == 0) {
673			if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
674			    x | LK_SHARED_WAITERS)) {
675				goto retry_sleepq;
676			}
677			LOCK_LOG2(lk, "%s: %p set shared waiters flag",
678			    __func__, lk);
679		}
680
681		if (lwa == NULL) {
682			iwmesg = lk->lock_object.lo_name;
683			ipri = lk->lk_pri;
684			itimo = lk->lk_timo;
685		} else {
686			iwmesg = lwa->iwmesg;
687			ipri = lwa->ipri;
688			itimo = lwa->itimo;
689		}
690
691		/*
692		 * As far as we have been unable to acquire the
693		 * shared lock and the shared waiters flag is set,
694		 * we will sleep.
695		 */
696#ifdef KDTRACE_HOOKS
697		sleep_time -= lockstat_nsecs(&lk->lock_object);
698#endif
699		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
700		    SQ_SHARED_QUEUE);
701#ifdef KDTRACE_HOOKS
702		sleep_time += lockstat_nsecs(&lk->lock_object);
703#endif
704		flags &= ~LK_INTERLOCK;
705		if (error) {
706			LOCK_LOG3(lk,
707			    "%s: interrupted sleep for %p with %d",
708			    __func__, lk, error);
709			break;
710		}
711		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
712		    __func__, lk);
713		x = lockmgr_read_value(lk);
714	}
715	if (error == 0) {
716#ifdef KDTRACE_HOOKS
717		if (sleep_time != 0)
718			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
719			    LOCKSTAT_READER, (x & LK_SHARE) == 0,
720			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
721#endif
722#ifdef LOCK_PROFILING
723		lockmgr_note_shared_acquire(lk, contested, waittime,
724		    file, line, flags);
725#else
726		lockmgr_note_shared_acquire(lk, 0, 0, file, line,
727		    flags);
728#endif
729	}
730
731out:
732	lockmgr_exit(flags, ilk, 0);
733	return (error);
734}
735
736static bool
737lockmgr_xlock_adaptive(struct lock_delay_arg *lda, struct lock *lk, uintptr_t *xp)
738{
739	struct thread *owner;
740	uintptr_t x;
741
742	x = *xp;
743	MPASS(x != LK_UNLOCKED);
744	owner = (struct thread *)LK_HOLDER(x);
745	for (;;) {
746		MPASS(owner != curthread);
747		if (owner == NULL)
748			return (false);
749		if ((x & LK_SHARE) && LK_SHARERS(x) > 0)
750			return (false);
751		if (owner == (struct thread *)LK_KERNPROC)
752			return (false);
753		if (!TD_IS_RUNNING(owner))
754			return (false);
755		if ((x & LK_ALL_WAITERS) != 0)
756			return (false);
757		lock_delay(lda);
758		x = lockmgr_read_value(lk);
759		if (x == LK_UNLOCKED) {
760			*xp = x;
761			return (true);
762		}
763		owner = (struct thread *)LK_HOLDER(x);
764	}
765}
766
767static __noinline int
768lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
769    const char *file, int line, struct lockmgr_wait *lwa)
770{
771	struct lock_class *class;
772	uintptr_t tid, x, v;
773	int error = 0;
774	const char *iwmesg;
775	int ipri, itimo;
776
777#ifdef KDTRACE_HOOKS
778	uint64_t sleep_time = 0;
779#endif
780#ifdef LOCK_PROFILING
781	uint64_t waittime = 0;
782	int contested = 0;
783#endif
784	struct lock_delay_arg lda;
785
786	if (SCHEDULER_STOPPED())
787		goto out;
788
789	tid = (uintptr_t)curthread;
790
791	if (LK_CAN_WITNESS(flags))
792		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
793		    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
794		    ilk : NULL);
795
796	/*
797	 * If curthread already holds the lock and this one is
798	 * allowed to recurse, simply recurse on it.
799	 */
800	if (lockmgr_xlocked(lk)) {
801		if ((flags & LK_CANRECURSE) == 0 &&
802		    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
803			/*
804			 * If the lock is expected to not panic just
805			 * give up and return.
806			 */
807			if (LK_TRYOP(flags)) {
808				LOCK_LOG2(lk,
809				    "%s: %p fails the try operation",
810				    __func__, lk);
811				error = EBUSY;
812				goto out;
813			}
814			if (flags & LK_INTERLOCK) {
815				class = LOCK_CLASS(ilk);
816				class->lc_unlock(ilk);
817			}
818			STACK_PRINT(lk);
819			panic("%s: recursing on non recursive lockmgr %p "
820			    "@ %s:%d\n", __func__, lk, file, line);
821		}
822		atomic_set_ptr(&lk->lk_lock, LK_WRITER_RECURSED);
823		lk->lk_recurse++;
824		LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
825		LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
826		    lk->lk_recurse, file, line);
827		WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
828		    LK_TRYWIT(flags), file, line);
829		TD_LOCKS_INC(curthread);
830		goto out;
831	}
832
833	x = LK_UNLOCKED;
834	lock_delay_arg_init(&lda, &lockmgr_delay);
835	if (!lk_adaptive)
836		flags &= ~LK_ADAPTIVE;
837	for (;;) {
838		if (x == LK_UNLOCKED) {
839			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x, tid))
840				break;
841			continue;
842		}
843
844		lock_profile_obtain_lock_failed(&lk->lock_object, false,
845		    &contested, &waittime);
846
847		if ((flags & (LK_ADAPTIVE | LK_INTERLOCK)) == LK_ADAPTIVE) {
848			if (lockmgr_xlock_adaptive(&lda, lk, &x))
849				continue;
850		}
851#ifdef HWPMC_HOOKS
852		PMC_SOFT_CALL( , , lock, failed);
853#endif
854
855		/*
856		 * If the lock is expected to not sleep just give up
857		 * and return.
858		 */
859		if (LK_TRYOP(flags)) {
860			LOCK_LOG2(lk, "%s: %p fails the try operation",
861			    __func__, lk);
862			error = EBUSY;
863			break;
864		}
865
866		/*
867		 * Acquire the sleepqueue chain lock because we
868		 * probabilly will need to manipulate waiters flags.
869		 */
870		sleepq_lock(&lk->lock_object);
871		x = lockmgr_read_value(lk);
872retry_sleepq:
873
874		/*
875		 * if the lock has been released while we spun on
876		 * the sleepqueue chain lock just try again.
877		 */
878		if (x == LK_UNLOCKED) {
879			sleepq_release(&lk->lock_object);
880			continue;
881		}
882
883		/*
884		 * The lock can be in the state where there is a
885		 * pending queue of waiters, but still no owner.
886		 * This happens when the lock is contested and an
887		 * owner is going to claim the lock.
888		 * If curthread is the one successfully acquiring it
889		 * claim lock ownership and return, preserving waiters
890		 * flags.
891		 */
892		v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
893		if ((x & ~v) == LK_UNLOCKED) {
894			v &= ~LK_EXCLUSIVE_SPINNERS;
895			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
896			    tid | v)) {
897				sleepq_release(&lk->lock_object);
898				LOCK_LOG2(lk,
899				    "%s: %p claimed by a new writer",
900				    __func__, lk);
901				break;
902			}
903			goto retry_sleepq;
904		}
905
906		/*
907		 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
908		 * fail, loop back and retry.
909		 */
910		if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
911			if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
912			    x | LK_EXCLUSIVE_WAITERS)) {
913				goto retry_sleepq;
914			}
915			LOCK_LOG2(lk, "%s: %p set excl waiters flag",
916			    __func__, lk);
917		}
918
919		if (lwa == NULL) {
920			iwmesg = lk->lock_object.lo_name;
921			ipri = lk->lk_pri;
922			itimo = lk->lk_timo;
923		} else {
924			iwmesg = lwa->iwmesg;
925			ipri = lwa->ipri;
926			itimo = lwa->itimo;
927		}
928
929		/*
930		 * As far as we have been unable to acquire the
931		 * exclusive lock and the exclusive waiters flag
932		 * is set, we will sleep.
933		 */
934#ifdef KDTRACE_HOOKS
935		sleep_time -= lockstat_nsecs(&lk->lock_object);
936#endif
937		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
938		    SQ_EXCLUSIVE_QUEUE);
939#ifdef KDTRACE_HOOKS
940		sleep_time += lockstat_nsecs(&lk->lock_object);
941#endif
942		flags &= ~LK_INTERLOCK;
943		if (error) {
944			LOCK_LOG3(lk,
945			    "%s: interrupted sleep for %p with %d",
946			    __func__, lk, error);
947			break;
948		}
949		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
950		    __func__, lk);
951		x = lockmgr_read_value(lk);
952	}
953	if (error == 0) {
954#ifdef KDTRACE_HOOKS
955		if (sleep_time != 0)
956			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
957			    LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
958			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
959#endif
960#ifdef LOCK_PROFILING
961		lockmgr_note_exclusive_acquire(lk, contested, waittime,
962		    file, line, flags);
963#else
964		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
965		    flags);
966#endif
967	}
968
969out:
970	lockmgr_exit(flags, ilk, 0);
971	return (error);
972}
973
974static __noinline int
975lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
976    const char *file, int line, struct lockmgr_wait *lwa)
977{
978	uintptr_t tid, v, setv;
979	int error = 0;
980	int op;
981
982	if (SCHEDULER_STOPPED())
983		goto out;
984
985	tid = (uintptr_t)curthread;
986
987	_lockmgr_assert(lk, KA_SLOCKED, file, line);
988
989	op = flags & LK_TYPE_MASK;
990	v = lockmgr_read_value(lk);
991	for (;;) {
992		if (LK_SHARERS(v) > 1) {
993			if (op == LK_TRYUPGRADE) {
994				LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
995				    __func__, lk);
996				error = EBUSY;
997				goto out;
998			}
999			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &v,
1000			    v - LK_ONE_SHARER)) {
1001				lockmgr_note_shared_release(lk, file, line);
1002				goto out_xlock;
1003			}
1004			continue;
1005		}
1006		MPASS((v & ~LK_ALL_WAITERS) == LK_SHARERS_LOCK(1));
1007
1008		setv = tid;
1009		setv |= (v & LK_ALL_WAITERS);
1010
1011		/*
1012		 * Try to switch from one shared lock to an exclusive one.
1013		 * We need to preserve waiters flags during the operation.
1014		 */
1015		if (atomic_fcmpset_ptr(&lk->lk_lock, &v, setv)) {
1016			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
1017			    line);
1018			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
1019			    LK_TRYWIT(flags), file, line);
1020			LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
1021			TD_SLOCKS_DEC(curthread);
1022			goto out;
1023		}
1024	}
1025
1026out_xlock:
1027	error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
1028	flags &= ~LK_INTERLOCK;
1029out:
1030	lockmgr_exit(flags, ilk, 0);
1031	return (error);
1032}
1033
1034int
1035lockmgr_lock_flags(struct lock *lk, u_int flags, struct lock_object *ilk,
1036    const char *file, int line)
1037{
1038	struct lock_class *class;
1039	uintptr_t x, tid;
1040	u_int op;
1041	bool locked;
1042
1043	if (SCHEDULER_STOPPED())
1044		return (0);
1045
1046	op = flags & LK_TYPE_MASK;
1047	locked = false;
1048	switch (op) {
1049	case LK_SHARED:
1050		if (LK_CAN_WITNESS(flags))
1051			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
1052			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
1053		if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
1054			break;
1055		x = lockmgr_read_value(lk);
1056		if (lockmgr_slock_try(lk, &x, flags, true)) {
1057			lockmgr_note_shared_acquire(lk, 0, 0,
1058			    file, line, flags);
1059			locked = true;
1060		} else {
1061			return (lockmgr_slock_hard(lk, flags, ilk, file, line,
1062			    NULL));
1063		}
1064		break;
1065	case LK_EXCLUSIVE:
1066		if (LK_CAN_WITNESS(flags))
1067			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1068			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1069			    ilk : NULL);
1070		tid = (uintptr_t)curthread;
1071		if (lockmgr_read_value(lk) == LK_UNLOCKED &&
1072		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1073			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
1074			    flags);
1075			locked = true;
1076		} else {
1077			return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
1078			    NULL));
1079		}
1080		break;
1081	case LK_UPGRADE:
1082	case LK_TRYUPGRADE:
1083		return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
1084	default:
1085		break;
1086	}
1087	if (__predict_true(locked)) {
1088		if (__predict_false(flags & LK_INTERLOCK)) {
1089			class = LOCK_CLASS(ilk);
1090			class->lc_unlock(ilk);
1091		}
1092		return (0);
1093	} else {
1094		return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
1095		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
1096	}
1097}
1098
1099static __noinline int
1100lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1101    const char *file, int line)
1102
1103{
1104	int wakeup_swapper = 0;
1105
1106	if (SCHEDULER_STOPPED())
1107		goto out;
1108
1109	wakeup_swapper = wakeupshlk(lk, file, line);
1110
1111out:
1112	lockmgr_exit(flags, ilk, wakeup_swapper);
1113	return (0);
1114}
1115
1116static __noinline int
1117lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1118    const char *file, int line)
1119{
1120	uintptr_t tid, v;
1121	int wakeup_swapper = 0;
1122	u_int realexslp;
1123	int queue;
1124
1125	if (SCHEDULER_STOPPED())
1126		goto out;
1127
1128	tid = (uintptr_t)curthread;
1129
1130	/*
1131	 * As first option, treact the lock as if it has not
1132	 * any waiter.
1133	 * Fix-up the tid var if the lock has been disowned.
1134	 */
1135	if (lockmgr_disowned_v(x))
1136		tid = LK_KERNPROC;
1137
1138	/*
1139	 * The lock is held in exclusive mode.
1140	 * If the lock is recursed also, then unrecurse it.
1141	 */
1142	if (lockmgr_recursed_v(x)) {
1143		LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1144		lk->lk_recurse--;
1145		if (lk->lk_recurse == 0)
1146			atomic_clear_ptr(&lk->lk_lock, LK_WRITER_RECURSED);
1147		goto out;
1148	}
1149	if (tid != LK_KERNPROC)
1150		LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
1151		    LOCKSTAT_WRITER);
1152
1153	if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1154		goto out;
1155
1156	sleepq_lock(&lk->lock_object);
1157	x = lockmgr_read_value(lk);
1158	v = LK_UNLOCKED;
1159
1160	/*
1161	 * If the lock has exclusive waiters, give them
1162	 * preference in order to avoid deadlock with
1163	 * shared runners up.
1164	 * If interruptible sleeps left the exclusive queue
1165	 * empty avoid a starvation for the threads sleeping
1166	 * on the shared queue by giving them precedence
1167	 * and cleaning up the exclusive waiters bit anyway.
1168	 * Please note that lk_exslpfail count may be lying
1169	 * about the real number of waiters with the
1170	 * LK_SLEEPFAIL flag on because they may be used in
1171	 * conjunction with interruptible sleeps so
1172	 * lk_exslpfail might be considered an 'upper limit'
1173	 * bound, including the edge cases.
1174	 */
1175	MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1176	realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1177	if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1178		if (lk->lk_exslpfail != USHRT_MAX && lk->lk_exslpfail < realexslp) {
1179			lk->lk_exslpfail = 0;
1180			queue = SQ_EXCLUSIVE_QUEUE;
1181			v |= (x & LK_SHARED_WAITERS);
1182		} else {
1183			lk->lk_exslpfail = 0;
1184			LOCK_LOG2(lk,
1185			    "%s: %p has only LK_SLEEPFAIL sleepers",
1186			    __func__, lk);
1187			LOCK_LOG2(lk,
1188			    "%s: %p waking up threads on the exclusive queue",
1189			    __func__, lk);
1190			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1191			    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1192			queue = SQ_SHARED_QUEUE;
1193		}
1194	} else {
1195		/*
1196		 * Exclusive waiters sleeping with LK_SLEEPFAIL
1197		 * on and using interruptible sleeps/timeout
1198		 * may have left spourious lk_exslpfail counts
1199		 * on, so clean it up anyway.
1200		 */
1201		lk->lk_exslpfail = 0;
1202		queue = SQ_SHARED_QUEUE;
1203	}
1204
1205	LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1206	    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1207	    "exclusive");
1208	atomic_store_rel_ptr(&lk->lk_lock, v);
1209	wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1210	sleepq_release(&lk->lock_object);
1211
1212out:
1213	lockmgr_exit(flags, ilk, wakeup_swapper);
1214	return (0);
1215}
1216
1217/*
1218 * Lightweight entry points for common operations.
1219 *
1220 * Functionality is similar to sx locks, in that none of the additional lockmgr
1221 * features are supported. To be clear, these are NOT supported:
1222 * 1. shared locking disablement
1223 * 2. returning with an error after sleep
1224 * 3. unlocking the interlock
1225 *
1226 * If in doubt, use lockmgr_lock_flags.
1227 */
1228int
1229lockmgr_slock(struct lock *lk, u_int flags, const char *file, int line)
1230{
1231	uintptr_t x;
1232
1233	MPASS((flags & LK_TYPE_MASK) == LK_SHARED);
1234	MPASS((flags & LK_INTERLOCK) == 0);
1235	MPASS((lk->lock_object.lo_flags & LK_NOSHARE) == 0);
1236
1237	if (LK_CAN_WITNESS(flags))
1238		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
1239		    file, line, NULL);
1240	x = lockmgr_read_value(lk);
1241	if (__predict_true(lockmgr_slock_try(lk, &x, flags, true))) {
1242		lockmgr_note_shared_acquire(lk, 0, 0, file, line, flags);
1243		return (0);
1244	}
1245
1246	return (lockmgr_slock_hard(lk, flags | LK_ADAPTIVE, NULL, file, line, NULL));
1247}
1248
1249int
1250lockmgr_xlock(struct lock *lk, u_int flags, const char *file, int line)
1251{
1252	uintptr_t tid;
1253
1254	MPASS((flags & LK_TYPE_MASK) == LK_EXCLUSIVE);
1255	MPASS((flags & LK_INTERLOCK) == 0);
1256
1257	if (LK_CAN_WITNESS(flags))
1258		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1259		    LOP_EXCLUSIVE, file, line, NULL);
1260	tid = (uintptr_t)curthread;
1261	if (atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1262		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
1263		    flags);
1264		return (0);
1265	}
1266
1267	return (lockmgr_xlock_hard(lk, flags | LK_ADAPTIVE, NULL, file, line, NULL));
1268}
1269
1270int
1271lockmgr_unlock(struct lock *lk)
1272{
1273	uintptr_t x, tid;
1274	const char *file;
1275	int line;
1276
1277	file = __FILE__;
1278	line = __LINE__;
1279
1280	_lockmgr_assert(lk, KA_LOCKED, file, line);
1281	x = lockmgr_read_value(lk);
1282	if (__predict_true(x & LK_SHARE) != 0) {
1283		lockmgr_note_shared_release(lk, file, line);
1284		if (lockmgr_sunlock_try(lk, &x)) {
1285			LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
1286		} else {
1287			return (lockmgr_sunlock_hard(lk, x, LK_RELEASE, NULL, file, line));
1288		}
1289	} else {
1290		tid = (uintptr_t)curthread;
1291		lockmgr_note_exclusive_release(lk, file, line);
1292		if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1293			LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,LOCKSTAT_WRITER);
1294		} else {
1295			return (lockmgr_xunlock_hard(lk, x, LK_RELEASE, NULL, file, line));
1296		}
1297	}
1298	return (0);
1299}
1300
1301int
1302__lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1303    const char *wmesg, int pri, int timo, const char *file, int line)
1304{
1305	GIANT_DECLARE;
1306	struct lockmgr_wait lwa;
1307	struct lock_class *class;
1308	const char *iwmesg;
1309	uintptr_t tid, v, x;
1310	u_int op, realexslp;
1311	int error, ipri, itimo, queue, wakeup_swapper;
1312#ifdef LOCK_PROFILING
1313	uint64_t waittime = 0;
1314	int contested = 0;
1315#endif
1316
1317	if (SCHEDULER_STOPPED())
1318		return (0);
1319
1320	error = 0;
1321	tid = (uintptr_t)curthread;
1322	op = (flags & LK_TYPE_MASK);
1323	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1324	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1325	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1326
1327	lwa.iwmesg = iwmesg;
1328	lwa.ipri = ipri;
1329	lwa.itimo = itimo;
1330
1331	MPASS((flags & ~LK_TOTAL_MASK) == 0);
1332	KASSERT((op & (op - 1)) == 0,
1333	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1334	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1335	    (op != LK_DOWNGRADE && op != LK_RELEASE),
1336	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1337	    __func__, file, line));
1338	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1339	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1340	    __func__, file, line));
1341	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1342	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1343	    lk->lock_object.lo_name, file, line));
1344
1345	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1346
1347	if (lk->lock_object.lo_flags & LK_NOSHARE) {
1348		switch (op) {
1349		case LK_SHARED:
1350			op = LK_EXCLUSIVE;
1351			break;
1352		case LK_UPGRADE:
1353		case LK_TRYUPGRADE:
1354		case LK_DOWNGRADE:
1355			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1356			    file, line);
1357			if (flags & LK_INTERLOCK)
1358				class->lc_unlock(ilk);
1359			return (0);
1360		}
1361	}
1362
1363	wakeup_swapper = 0;
1364	switch (op) {
1365	case LK_SHARED:
1366		return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1367		break;
1368	case LK_UPGRADE:
1369	case LK_TRYUPGRADE:
1370		return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1371		break;
1372	case LK_EXCLUSIVE:
1373		return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1374		break;
1375	case LK_DOWNGRADE:
1376		_lockmgr_assert(lk, KA_XLOCKED, file, line);
1377		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1378
1379		/*
1380		 * Panic if the lock is recursed.
1381		 */
1382		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1383			if (flags & LK_INTERLOCK)
1384				class->lc_unlock(ilk);
1385			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1386			    __func__, iwmesg, file, line);
1387		}
1388		TD_SLOCKS_INC(curthread);
1389
1390		/*
1391		 * In order to preserve waiters flags, just spin.
1392		 */
1393		for (;;) {
1394			x = lockmgr_read_value(lk);
1395			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1396			x &= LK_ALL_WAITERS;
1397			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1398			    LK_SHARERS_LOCK(1) | x))
1399				break;
1400			cpu_spinwait();
1401		}
1402		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1403		LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
1404		break;
1405	case LK_RELEASE:
1406		_lockmgr_assert(lk, KA_LOCKED, file, line);
1407		x = lockmgr_read_value(lk);
1408
1409		if (__predict_true(x & LK_SHARE) != 0) {
1410			lockmgr_note_shared_release(lk, file, line);
1411			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1412		} else {
1413			lockmgr_note_exclusive_release(lk, file, line);
1414			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1415		}
1416		break;
1417	case LK_DRAIN:
1418		if (LK_CAN_WITNESS(flags))
1419			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1420			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1421			    ilk : NULL);
1422
1423		/*
1424		 * Trying to drain a lock we already own will result in a
1425		 * deadlock.
1426		 */
1427		if (lockmgr_xlocked(lk)) {
1428			if (flags & LK_INTERLOCK)
1429				class->lc_unlock(ilk);
1430			panic("%s: draining %s with the lock held @ %s:%d\n",
1431			    __func__, iwmesg, file, line);
1432		}
1433
1434		for (;;) {
1435			if (lk->lk_lock == LK_UNLOCKED &&
1436			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1437				break;
1438
1439#ifdef HWPMC_HOOKS
1440			PMC_SOFT_CALL( , , lock, failed);
1441#endif
1442			lock_profile_obtain_lock_failed(&lk->lock_object, false,
1443			    &contested, &waittime);
1444
1445			/*
1446			 * If the lock is expected to not sleep just give up
1447			 * and return.
1448			 */
1449			if (LK_TRYOP(flags)) {
1450				LOCK_LOG2(lk, "%s: %p fails the try operation",
1451				    __func__, lk);
1452				error = EBUSY;
1453				break;
1454			}
1455
1456			/*
1457			 * Acquire the sleepqueue chain lock because we
1458			 * probabilly will need to manipulate waiters flags.
1459			 */
1460			sleepq_lock(&lk->lock_object);
1461			x = lockmgr_read_value(lk);
1462
1463			/*
1464			 * if the lock has been released while we spun on
1465			 * the sleepqueue chain lock just try again.
1466			 */
1467			if (x == LK_UNLOCKED) {
1468				sleepq_release(&lk->lock_object);
1469				continue;
1470			}
1471
1472			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1473			if ((x & ~v) == LK_UNLOCKED) {
1474				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1475
1476				/*
1477				 * If interruptible sleeps left the exclusive
1478				 * queue empty avoid a starvation for the
1479				 * threads sleeping on the shared queue by
1480				 * giving them precedence and cleaning up the
1481				 * exclusive waiters bit anyway.
1482				 * Please note that lk_exslpfail count may be
1483				 * lying about the real number of waiters with
1484				 * the LK_SLEEPFAIL flag on because they may
1485				 * be used in conjunction with interruptible
1486				 * sleeps so lk_exslpfail might be considered
1487				 * an 'upper limit' bound, including the edge
1488				 * cases.
1489				 */
1490				if (v & LK_EXCLUSIVE_WAITERS) {
1491					queue = SQ_EXCLUSIVE_QUEUE;
1492					v &= ~LK_EXCLUSIVE_WAITERS;
1493				} else {
1494					/*
1495					 * Exclusive waiters sleeping with
1496					 * LK_SLEEPFAIL on and using
1497					 * interruptible sleeps/timeout may
1498					 * have left spourious lk_exslpfail
1499					 * counts on, so clean it up anyway.
1500					 */
1501					MPASS(v & LK_SHARED_WAITERS);
1502					lk->lk_exslpfail = 0;
1503					queue = SQ_SHARED_QUEUE;
1504					v &= ~LK_SHARED_WAITERS;
1505				}
1506				if (queue == SQ_EXCLUSIVE_QUEUE) {
1507					realexslp =
1508					    sleepq_sleepcnt(&lk->lock_object,
1509					    SQ_EXCLUSIVE_QUEUE);
1510					if (lk->lk_exslpfail >= realexslp) {
1511						lk->lk_exslpfail = 0;
1512						queue = SQ_SHARED_QUEUE;
1513						v &= ~LK_SHARED_WAITERS;
1514						if (realexslp != 0) {
1515							LOCK_LOG2(lk,
1516					"%s: %p has only LK_SLEEPFAIL sleepers",
1517							    __func__, lk);
1518							LOCK_LOG2(lk,
1519			"%s: %p waking up threads on the exclusive queue",
1520							    __func__, lk);
1521							wakeup_swapper =
1522							    sleepq_broadcast(
1523							    &lk->lock_object,
1524							    SLEEPQ_LK, 0,
1525							    SQ_EXCLUSIVE_QUEUE);
1526						}
1527					} else
1528						lk->lk_exslpfail = 0;
1529				}
1530				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1531					sleepq_release(&lk->lock_object);
1532					continue;
1533				}
1534				LOCK_LOG3(lk,
1535				"%s: %p waking up all threads on the %s queue",
1536				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1537				    "shared" : "exclusive");
1538				wakeup_swapper |= sleepq_broadcast(
1539				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1540
1541				/*
1542				 * If shared waiters have been woken up we need
1543				 * to wait for one of them to acquire the lock
1544				 * before to set the exclusive waiters in
1545				 * order to avoid a deadlock.
1546				 */
1547				if (queue == SQ_SHARED_QUEUE) {
1548					for (v = lk->lk_lock;
1549					    (v & LK_SHARE) && !LK_SHARERS(v);
1550					    v = lk->lk_lock)
1551						cpu_spinwait();
1552				}
1553			}
1554
1555			/*
1556			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1557			 * fail, loop back and retry.
1558			 */
1559			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1560				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1561				    x | LK_EXCLUSIVE_WAITERS)) {
1562					sleepq_release(&lk->lock_object);
1563					continue;
1564				}
1565				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1566				    __func__, lk);
1567			}
1568
1569			/*
1570			 * As far as we have been unable to acquire the
1571			 * exclusive lock and the exclusive waiters flag
1572			 * is set, we will sleep.
1573			 */
1574			if (flags & LK_INTERLOCK) {
1575				class->lc_unlock(ilk);
1576				flags &= ~LK_INTERLOCK;
1577			}
1578			GIANT_SAVE();
1579			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1580			    SQ_EXCLUSIVE_QUEUE);
1581			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1582			GIANT_RESTORE();
1583			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1584			    __func__, lk);
1585		}
1586
1587		if (error == 0) {
1588			lock_profile_obtain_lock_success(&lk->lock_object,
1589			    false, contested, waittime, file, line);
1590			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1591			    lk->lk_recurse, file, line);
1592			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1593			    LK_TRYWIT(flags), file, line);
1594			TD_LOCKS_INC(curthread);
1595			STACK_SAVE(lk);
1596		}
1597		break;
1598	default:
1599		if (flags & LK_INTERLOCK)
1600			class->lc_unlock(ilk);
1601		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1602	}
1603
1604	if (flags & LK_INTERLOCK)
1605		class->lc_unlock(ilk);
1606	if (wakeup_swapper)
1607		kick_proc0();
1608
1609	return (error);
1610}
1611
1612void
1613_lockmgr_disown(struct lock *lk, const char *file, int line)
1614{
1615	uintptr_t tid, x;
1616
1617	if (SCHEDULER_STOPPED())
1618		return;
1619
1620	tid = (uintptr_t)curthread;
1621	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1622
1623	/*
1624	 * Panic if the lock is recursed.
1625	 */
1626	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1627		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1628		    __func__,  file, line);
1629
1630	/*
1631	 * If the owner is already LK_KERNPROC just skip the whole operation.
1632	 */
1633	if (LK_HOLDER(lk->lk_lock) != tid)
1634		return;
1635	lock_profile_release_lock(&lk->lock_object, false);
1636	LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
1637	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1638	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1639	TD_LOCKS_DEC(curthread);
1640	STACK_SAVE(lk);
1641
1642	/*
1643	 * In order to preserve waiters flags, just spin.
1644	 */
1645	for (;;) {
1646		x = lockmgr_read_value(lk);
1647		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1648		x &= LK_ALL_WAITERS;
1649		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1650		    LK_KERNPROC | x))
1651			return;
1652		cpu_spinwait();
1653	}
1654}
1655
1656void
1657lockmgr_printinfo(const struct lock *lk)
1658{
1659	struct thread *td;
1660	uintptr_t x;
1661
1662	if (lk->lk_lock == LK_UNLOCKED)
1663		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1664	else if (lk->lk_lock & LK_SHARE)
1665		printf("lock type %s: SHARED (count %ju)\n",
1666		    lk->lock_object.lo_name,
1667		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1668	else {
1669		td = lockmgr_xholder(lk);
1670		if (td == (struct thread *)LK_KERNPROC)
1671			printf("lock type %s: EXCL by KERNPROC\n",
1672			    lk->lock_object.lo_name);
1673		else
1674			printf("lock type %s: EXCL by thread %p "
1675			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1676			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1677			    td->td_tid);
1678	}
1679
1680	x = lk->lk_lock;
1681	if (x & LK_EXCLUSIVE_WAITERS)
1682		printf(" with exclusive waiters pending\n");
1683	if (x & LK_SHARED_WAITERS)
1684		printf(" with shared waiters pending\n");
1685	if (x & LK_EXCLUSIVE_SPINNERS)
1686		printf(" with exclusive spinners pending\n");
1687
1688	STACK_PRINT(lk);
1689}
1690
1691int
1692lockstatus(const struct lock *lk)
1693{
1694	uintptr_t v, x;
1695	int ret;
1696
1697	ret = LK_SHARED;
1698	x = lockmgr_read_value(lk);
1699	v = LK_HOLDER(x);
1700
1701	if ((x & LK_SHARE) == 0) {
1702		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1703			ret = LK_EXCLUSIVE;
1704		else
1705			ret = LK_EXCLOTHER;
1706	} else if (x == LK_UNLOCKED)
1707		ret = 0;
1708
1709	return (ret);
1710}
1711
1712#ifdef INVARIANT_SUPPORT
1713
1714FEATURE(invariant_support,
1715    "Support for modules compiled with INVARIANTS option");
1716
1717#ifndef INVARIANTS
1718#undef	_lockmgr_assert
1719#endif
1720
1721void
1722_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1723{
1724	int slocked = 0;
1725
1726	if (SCHEDULER_STOPPED())
1727		return;
1728	switch (what) {
1729	case KA_SLOCKED:
1730	case KA_SLOCKED | KA_NOTRECURSED:
1731	case KA_SLOCKED | KA_RECURSED:
1732		slocked = 1;
1733	case KA_LOCKED:
1734	case KA_LOCKED | KA_NOTRECURSED:
1735	case KA_LOCKED | KA_RECURSED:
1736#ifdef WITNESS
1737
1738		/*
1739		 * We cannot trust WITNESS if the lock is held in exclusive
1740		 * mode and a call to lockmgr_disown() happened.
1741		 * Workaround this skipping the check if the lock is held in
1742		 * exclusive mode even for the KA_LOCKED case.
1743		 */
1744		if (slocked || (lk->lk_lock & LK_SHARE)) {
1745			witness_assert(&lk->lock_object, what, file, line);
1746			break;
1747		}
1748#endif
1749		if (lk->lk_lock == LK_UNLOCKED ||
1750		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1751		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1752			panic("Lock %s not %slocked @ %s:%d\n",
1753			    lk->lock_object.lo_name, slocked ? "share" : "",
1754			    file, line);
1755
1756		if ((lk->lk_lock & LK_SHARE) == 0) {
1757			if (lockmgr_recursed(lk)) {
1758				if (what & KA_NOTRECURSED)
1759					panic("Lock %s recursed @ %s:%d\n",
1760					    lk->lock_object.lo_name, file,
1761					    line);
1762			} else if (what & KA_RECURSED)
1763				panic("Lock %s not recursed @ %s:%d\n",
1764				    lk->lock_object.lo_name, file, line);
1765		}
1766		break;
1767	case KA_XLOCKED:
1768	case KA_XLOCKED | KA_NOTRECURSED:
1769	case KA_XLOCKED | KA_RECURSED:
1770		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1771			panic("Lock %s not exclusively locked @ %s:%d\n",
1772			    lk->lock_object.lo_name, file, line);
1773		if (lockmgr_recursed(lk)) {
1774			if (what & KA_NOTRECURSED)
1775				panic("Lock %s recursed @ %s:%d\n",
1776				    lk->lock_object.lo_name, file, line);
1777		} else if (what & KA_RECURSED)
1778			panic("Lock %s not recursed @ %s:%d\n",
1779			    lk->lock_object.lo_name, file, line);
1780		break;
1781	case KA_UNLOCKED:
1782		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1783			panic("Lock %s exclusively locked @ %s:%d\n",
1784			    lk->lock_object.lo_name, file, line);
1785		break;
1786	default:
1787		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1788		    line);
1789	}
1790}
1791#endif
1792
1793#ifdef DDB
1794int
1795lockmgr_chain(struct thread *td, struct thread **ownerp)
1796{
1797	const struct lock *lk;
1798
1799	lk = td->td_wchan;
1800
1801	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1802		return (0);
1803	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1804	if (lk->lk_lock & LK_SHARE)
1805		db_printf("SHARED (count %ju)\n",
1806		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1807	else
1808		db_printf("EXCL\n");
1809	*ownerp = lockmgr_xholder(lk);
1810
1811	return (1);
1812}
1813
1814static void
1815db_show_lockmgr(const struct lock_object *lock)
1816{
1817	struct thread *td;
1818	const struct lock *lk;
1819
1820	lk = (const struct lock *)lock;
1821
1822	db_printf(" state: ");
1823	if (lk->lk_lock == LK_UNLOCKED)
1824		db_printf("UNLOCKED\n");
1825	else if (lk->lk_lock & LK_SHARE)
1826		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1827	else {
1828		td = lockmgr_xholder(lk);
1829		if (td == (struct thread *)LK_KERNPROC)
1830			db_printf("XLOCK: LK_KERNPROC\n");
1831		else
1832			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1833			    td->td_tid, td->td_proc->p_pid,
1834			    td->td_proc->p_comm);
1835		if (lockmgr_recursed(lk))
1836			db_printf(" recursed: %d\n", lk->lk_recurse);
1837	}
1838	db_printf(" waiters: ");
1839	switch (lk->lk_lock & LK_ALL_WAITERS) {
1840	case LK_SHARED_WAITERS:
1841		db_printf("shared\n");
1842		break;
1843	case LK_EXCLUSIVE_WAITERS:
1844		db_printf("exclusive\n");
1845		break;
1846	case LK_ALL_WAITERS:
1847		db_printf("shared and exclusive\n");
1848		break;
1849	default:
1850		db_printf("none\n");
1851	}
1852	db_printf(" spinners: ");
1853	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1854		db_printf("exclusive\n");
1855	else
1856		db_printf("none\n");
1857}
1858#endif
1859