kern_umtx.c revision 164876
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 164876 2006-12-04 14:15:12Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41161678Sdavidxu#include <sys/sysctl.h>
42112904Sjeff#include <sys/sysent.h>
43112904Sjeff#include <sys/systm.h>
44112904Sjeff#include <sys/sysproto.h>
45139013Sdavidxu#include <sys/eventhandler.h>
46112904Sjeff#include <sys/umtx.h>
47112904Sjeff
48139013Sdavidxu#include <vm/vm.h>
49139013Sdavidxu#include <vm/vm_param.h>
50139013Sdavidxu#include <vm/pmap.h>
51139013Sdavidxu#include <vm/vm_map.h>
52139013Sdavidxu#include <vm/vm_object.h>
53139013Sdavidxu
54162536Sdavidxu#ifdef COMPAT_IA32
55162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
56162536Sdavidxu#endif
57162536Sdavidxu
58161678Sdavidxu#define TYPE_SIMPLE_LOCK	0
59161678Sdavidxu#define TYPE_SIMPLE_WAIT	1
60161678Sdavidxu#define TYPE_NORMAL_UMUTEX	2
61161678Sdavidxu#define TYPE_PI_UMUTEX		3
62161678Sdavidxu#define TYPE_PP_UMUTEX		4
63161678Sdavidxu#define TYPE_CV			5
64139013Sdavidxu
65161678Sdavidxu/* Key to represent a unique userland synchronous object */
66139013Sdavidxustruct umtx_key {
67161678Sdavidxu	int	hash;
68139013Sdavidxu	int	type;
69161678Sdavidxu	int	shared;
70139013Sdavidxu	union {
71139013Sdavidxu		struct {
72139013Sdavidxu			vm_object_t	object;
73161678Sdavidxu			uintptr_t	offset;
74139013Sdavidxu		} shared;
75139013Sdavidxu		struct {
76161678Sdavidxu			struct vmspace	*vs;
77161678Sdavidxu			uintptr_t	addr;
78139013Sdavidxu		} private;
79139013Sdavidxu		struct {
80161678Sdavidxu			void		*a;
81161678Sdavidxu			uintptr_t	b;
82139013Sdavidxu		} both;
83139013Sdavidxu	} info;
84139013Sdavidxu};
85139013Sdavidxu
86161678Sdavidxu/* Priority inheritance mutex info. */
87161678Sdavidxustruct umtx_pi {
88161678Sdavidxu	/* Owner thread */
89161678Sdavidxu	struct thread		*pi_owner;
90161678Sdavidxu
91161678Sdavidxu	/* Reference count */
92161678Sdavidxu	int			pi_refcount;
93161678Sdavidxu
94161678Sdavidxu 	/* List entry to link umtx holding by thread */
95161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
96161678Sdavidxu
97161678Sdavidxu	/* List entry in hash */
98161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
99161678Sdavidxu
100161678Sdavidxu	/* List for waiters */
101161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
102161678Sdavidxu
103161678Sdavidxu	/* Identify a userland lock object */
104161678Sdavidxu	struct umtx_key		pi_key;
105161678Sdavidxu};
106161678Sdavidxu
107161678Sdavidxu/* A userland synchronous object user. */
108115765Sjeffstruct umtx_q {
109161678Sdavidxu	/* Linked list for the hash. */
110161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
111161678Sdavidxu
112161678Sdavidxu	/* Umtx key. */
113161678Sdavidxu	struct umtx_key		uq_key;
114161678Sdavidxu
115161678Sdavidxu	/* Umtx flags. */
116161678Sdavidxu	int			uq_flags;
117161678Sdavidxu#define UQF_UMTXQ	0x0001
118161678Sdavidxu
119161678Sdavidxu	/* The thread waits on. */
120161678Sdavidxu	struct thread		*uq_thread;
121161678Sdavidxu
122161678Sdavidxu	/*
123161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
124161678Sdavidxu	 * or sched_lock, write must have both chain lock and
125161678Sdavidxu	 * sched_lock being hold.
126161678Sdavidxu	 */
127161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
128161678Sdavidxu
129161678Sdavidxu	/* On blocked list */
130161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
131161678Sdavidxu
132161678Sdavidxu	/* Thread contending with us */
133161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
134161678Sdavidxu
135161742Sdavidxu	/* Inherited priority from PP mutex */
136161678Sdavidxu	u_char			uq_inherited_pri;
137115765Sjeff};
138115765Sjeff
139161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
140161678Sdavidxu
141161678Sdavidxu/* Userland lock object's wait-queue chain */
142138224Sdavidxustruct umtxq_chain {
143161678Sdavidxu	/* Lock for this chain. */
144161678Sdavidxu	struct mtx		uc_lock;
145161678Sdavidxu
146161678Sdavidxu	/* List of sleep queues. */
147161678Sdavidxu	struct umtxq_head	uc_queue;
148161678Sdavidxu
149161678Sdavidxu	/* Busy flag */
150161678Sdavidxu	char			uc_busy;
151161678Sdavidxu
152161678Sdavidxu	/* Chain lock waiters */
153158377Sdavidxu	int			uc_waiters;
154161678Sdavidxu
155161678Sdavidxu	/* All PI in the list */
156161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
157138224Sdavidxu};
158115765Sjeff
159161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
160161678Sdavidxu
161161678Sdavidxu/*
162161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
163161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
164161678Sdavidxu * and let another thread B block on the mutex, because B is
165161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
166161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
167161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
168161678Sdavidxu */
169161678Sdavidxu
170163709Sjb#ifdef KSE
171161678Sdavidxu#define UPRI(td)	(((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\
172161678Sdavidxu			  (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\
173161678Sdavidxu			 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri)
174163709Sjb#else
175163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
176163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
177163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
178163709Sjb#endif
179161678Sdavidxu
180138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
181138224Sdavidxu#define	UMTX_CHAINS		128
182138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
183115765Sjeff
184161678Sdavidxu#define THREAD_SHARE		0
185161678Sdavidxu#define PROCESS_SHARE		1
186161678Sdavidxu#define AUTO_SHARE		2
187161678Sdavidxu
188161678Sdavidxu#define	GET_SHARE(flags)	\
189161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
190161678Sdavidxu
191161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
192161678Sdavidxustatic struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
193138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
194161678Sdavidxustatic int			umtx_pi_allocated;
195115310Sjeff
196161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
197161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
198161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
199161678Sdavidxu
200161678Sdavidxustatic void umtxq_sysinit(void *);
201161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
202161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
203139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
204139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
205139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
206139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
207139013Sdavidxustatic void umtxq_insert(struct umtx_q *uq);
208139013Sdavidxustatic void umtxq_remove(struct umtx_q *uq);
209161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
210139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
211139257Sdavidxustatic int umtxq_signal(struct umtx_key *key, int nr_wakeup);
212139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
213161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
214139013Sdavidxu	struct umtx_key *key);
215139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
216163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
217161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
218161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
219161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
220161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
221161678Sdavidxu	struct image_params *imgp __unused);
222161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
223115310Sjeff
224161678Sdavidxustatic void
225161678Sdavidxuumtxq_sysinit(void *arg __unused)
226161678Sdavidxu{
227161678Sdavidxu	int i;
228138224Sdavidxu
229161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
230161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
231161678Sdavidxu	for (i = 0; i < UMTX_CHAINS; ++i) {
232161678Sdavidxu		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
233161678Sdavidxu			 MTX_DEF | MTX_DUPOK);
234161678Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_queue);
235161678Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
236161678Sdavidxu		umtxq_chains[i].uc_busy = 0;
237161678Sdavidxu		umtxq_chains[i].uc_waiters = 0;
238161678Sdavidxu	}
239161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
240161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
241161678Sdavidxu}
242161678Sdavidxu
243143149Sdavidxustruct umtx_q *
244143149Sdavidxuumtxq_alloc(void)
245143149Sdavidxu{
246161678Sdavidxu	struct umtx_q *uq;
247161678Sdavidxu
248161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
249161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
250161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
251161678Sdavidxu	return (uq);
252143149Sdavidxu}
253143149Sdavidxu
254143149Sdavidxuvoid
255143149Sdavidxuumtxq_free(struct umtx_q *uq)
256143149Sdavidxu{
257143149Sdavidxu	free(uq, M_UMTX);
258143149Sdavidxu}
259143149Sdavidxu
260161678Sdavidxustatic inline void
261139013Sdavidxuumtxq_hash(struct umtx_key *key)
262138224Sdavidxu{
263161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
264161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
265138224Sdavidxu}
266138224Sdavidxu
267139013Sdavidxustatic inline int
268139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
269139013Sdavidxu{
270139013Sdavidxu	return (k1->type == k2->type &&
271161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
272161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
273139013Sdavidxu}
274139013Sdavidxu
275161678Sdavidxustatic inline struct umtxq_chain *
276161678Sdavidxuumtxq_getchain(struct umtx_key *key)
277139013Sdavidxu{
278161678Sdavidxu	return (&umtxq_chains[key->hash]);
279139013Sdavidxu}
280139013Sdavidxu
281161678Sdavidxu/*
282161678Sdavidxu * Set chain to busy state when following operation
283161678Sdavidxu * may be blocked (kernel mutex can not be used).
284161678Sdavidxu */
285138224Sdavidxustatic inline void
286139257Sdavidxuumtxq_busy(struct umtx_key *key)
287139257Sdavidxu{
288161678Sdavidxu	struct umtxq_chain *uc;
289139257Sdavidxu
290161678Sdavidxu	uc = umtxq_getchain(key);
291161678Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
292161678Sdavidxu	while (uc->uc_busy != 0) {
293161678Sdavidxu		uc->uc_waiters++;
294161678Sdavidxu		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
295161678Sdavidxu		uc->uc_waiters--;
296139257Sdavidxu	}
297161678Sdavidxu	uc->uc_busy = 1;
298139257Sdavidxu}
299139257Sdavidxu
300161678Sdavidxu/*
301161678Sdavidxu * Unbusy a chain.
302161678Sdavidxu */
303139257Sdavidxustatic inline void
304139257Sdavidxuumtxq_unbusy(struct umtx_key *key)
305139257Sdavidxu{
306161678Sdavidxu	struct umtxq_chain *uc;
307139257Sdavidxu
308161678Sdavidxu	uc = umtxq_getchain(key);
309161678Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
310161678Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
311161678Sdavidxu	uc->uc_busy = 0;
312161678Sdavidxu	if (uc->uc_waiters)
313161678Sdavidxu		wakeup_one(uc);
314139257Sdavidxu}
315139257Sdavidxu
316161678Sdavidxu/*
317161678Sdavidxu * Lock a chain.
318161678Sdavidxu */
319139257Sdavidxustatic inline void
320139013Sdavidxuumtxq_lock(struct umtx_key *key)
321138224Sdavidxu{
322161678Sdavidxu	struct umtxq_chain *uc;
323161678Sdavidxu
324161678Sdavidxu	uc = umtxq_getchain(key);
325161678Sdavidxu	mtx_lock(&uc->uc_lock);
326138224Sdavidxu}
327138224Sdavidxu
328161678Sdavidxu/*
329161678Sdavidxu * Unlock a chain.
330161678Sdavidxu */
331138225Sdavidxustatic inline void
332139013Sdavidxuumtxq_unlock(struct umtx_key *key)
333138224Sdavidxu{
334161678Sdavidxu	struct umtxq_chain *uc;
335161678Sdavidxu
336161678Sdavidxu	uc = umtxq_getchain(key);
337161678Sdavidxu	mtx_unlock(&uc->uc_lock);
338138224Sdavidxu}
339138224Sdavidxu
340139013Sdavidxu/*
341139013Sdavidxu * Insert a thread onto the umtx queue.
342139013Sdavidxu */
343139013Sdavidxustatic inline void
344139013Sdavidxuumtxq_insert(struct umtx_q *uq)
345115765Sjeff{
346161678Sdavidxu	struct umtxq_chain *uc;
347139013Sdavidxu
348161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
349161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
350161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
351158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
352139013Sdavidxu}
353139013Sdavidxu
354139013Sdavidxu/*
355139013Sdavidxu * Remove thread from the umtx queue.
356139013Sdavidxu */
357139013Sdavidxustatic inline void
358139013Sdavidxuumtxq_remove(struct umtx_q *uq)
359139013Sdavidxu{
360161678Sdavidxu	struct umtxq_chain *uc;
361161678Sdavidxu
362161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
363161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
364158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
365161678Sdavidxu		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
366158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
367139013Sdavidxu	}
368139013Sdavidxu}
369139013Sdavidxu
370161678Sdavidxu/*
371161678Sdavidxu * Check if there are multiple waiters
372161678Sdavidxu */
373139013Sdavidxustatic int
374139013Sdavidxuumtxq_count(struct umtx_key *key)
375139013Sdavidxu{
376161678Sdavidxu	struct umtxq_chain *uc;
377115765Sjeff	struct umtx_q *uq;
378161678Sdavidxu	int count = 0;
379115765Sjeff
380161678Sdavidxu	uc = umtxq_getchain(key);
381161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
382161678Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
383139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
384139013Sdavidxu			if (++count > 1)
385139013Sdavidxu				break;
386139013Sdavidxu		}
387115765Sjeff	}
388139013Sdavidxu	return (count);
389115765Sjeff}
390115765Sjeff
391161678Sdavidxu/*
392161678Sdavidxu * Check if there are multiple PI waiters and returns first
393161678Sdavidxu * waiter.
394161678Sdavidxu */
395139257Sdavidxustatic int
396161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
397161678Sdavidxu{
398161678Sdavidxu	struct umtxq_chain *uc;
399161678Sdavidxu	struct umtx_q *uq;
400161678Sdavidxu	int count = 0;
401161678Sdavidxu
402161678Sdavidxu	*first = NULL;
403161678Sdavidxu	uc = umtxq_getchain(key);
404161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
405161678Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
406161678Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
407161678Sdavidxu			if (++count > 1)
408161678Sdavidxu				break;
409161678Sdavidxu			*first = uq;
410161678Sdavidxu		}
411161678Sdavidxu	}
412161678Sdavidxu	return (count);
413161678Sdavidxu}
414161678Sdavidxu
415161678Sdavidxu/*
416161678Sdavidxu * Wake up threads waiting on an userland object.
417161678Sdavidxu */
418161678Sdavidxustatic int
419139257Sdavidxuumtxq_signal(struct umtx_key *key, int n_wake)
420115765Sjeff{
421161678Sdavidxu	struct umtxq_chain *uc;
422139257Sdavidxu	struct umtx_q *uq, *next;
423161678Sdavidxu	int ret;
424115765Sjeff
425139257Sdavidxu	ret = 0;
426161678Sdavidxu	uc = umtxq_getchain(key);
427161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
428161678Sdavidxu	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
429139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
430139013Sdavidxu			umtxq_remove(uq);
431161678Sdavidxu			wakeup(uq);
432139257Sdavidxu			if (++ret >= n_wake)
433139257Sdavidxu				break;
434139013Sdavidxu		}
435139013Sdavidxu	}
436139257Sdavidxu	return (ret);
437138224Sdavidxu}
438138224Sdavidxu
439161678Sdavidxu/*
440161678Sdavidxu * Wake up specified thread.
441161678Sdavidxu */
442161678Sdavidxustatic inline void
443161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
444161678Sdavidxu{
445161678Sdavidxu	struct umtxq_chain *uc;
446161678Sdavidxu
447161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
448161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
449161678Sdavidxu	umtxq_remove(uq);
450161678Sdavidxu	wakeup(uq);
451161678Sdavidxu}
452161678Sdavidxu
453161678Sdavidxu/*
454161678Sdavidxu * Put thread into sleep state, before sleeping, check if
455161678Sdavidxu * thread was removed from umtx queue.
456161678Sdavidxu */
457138224Sdavidxustatic inline int
458161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
459138224Sdavidxu{
460161678Sdavidxu	struct umtxq_chain *uc;
461161678Sdavidxu	int error;
462161678Sdavidxu
463161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
464161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
465161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
466161678Sdavidxu		return (0);
467161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
468139751Sdavidxu	if (error == EWOULDBLOCK)
469139751Sdavidxu		error = ETIMEDOUT;
470139751Sdavidxu	return (error);
471138224Sdavidxu}
472138224Sdavidxu
473161678Sdavidxu/*
474161678Sdavidxu * Convert userspace address into unique logical address.
475161678Sdavidxu */
476139013Sdavidxustatic int
477161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
478139013Sdavidxu{
479161678Sdavidxu	struct thread *td = curthread;
480139013Sdavidxu	vm_map_t map;
481139013Sdavidxu	vm_map_entry_t entry;
482139013Sdavidxu	vm_pindex_t pindex;
483139013Sdavidxu	vm_prot_t prot;
484139013Sdavidxu	boolean_t wired;
485139013Sdavidxu
486161678Sdavidxu	key->type = type;
487161678Sdavidxu	if (share == THREAD_SHARE) {
488161678Sdavidxu		key->shared = 0;
489161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
490161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
491163677Sdavidxu	} else {
492163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
493161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
494161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
495161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
496161678Sdavidxu		    &wired) != KERN_SUCCESS) {
497161678Sdavidxu			return EFAULT;
498161678Sdavidxu		}
499161678Sdavidxu
500161678Sdavidxu		if ((share == PROCESS_SHARE) ||
501161678Sdavidxu		    (share == AUTO_SHARE &&
502161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
503161678Sdavidxu			key->shared = 1;
504161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
505161678Sdavidxu				(vm_offset_t)addr;
506161678Sdavidxu			vm_object_reference(key->info.shared.object);
507161678Sdavidxu		} else {
508161678Sdavidxu			key->shared = 0;
509161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
510161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
511161678Sdavidxu		}
512161678Sdavidxu		vm_map_lookup_done(map, entry);
513139013Sdavidxu	}
514139013Sdavidxu
515161678Sdavidxu	umtxq_hash(key);
516139013Sdavidxu	return (0);
517139013Sdavidxu}
518139013Sdavidxu
519161678Sdavidxu/*
520161678Sdavidxu * Release key.
521161678Sdavidxu */
522139013Sdavidxustatic inline void
523139013Sdavidxuumtx_key_release(struct umtx_key *key)
524139013Sdavidxu{
525161678Sdavidxu	if (key->shared)
526139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
527139013Sdavidxu}
528139013Sdavidxu
529161678Sdavidxu/*
530161678Sdavidxu * Lock a umtx object.
531161678Sdavidxu */
532139013Sdavidxustatic int
533163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
534112904Sjeff{
535143149Sdavidxu	struct umtx_q *uq;
536163449Sdavidxu	u_long owner;
537163449Sdavidxu	u_long old;
538138224Sdavidxu	int error = 0;
539112904Sjeff
540143149Sdavidxu	uq = td->td_umtxq;
541161678Sdavidxu
542112904Sjeff	/*
543161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
544112904Sjeff	 * can fault on any access.
545112904Sjeff	 */
546112904Sjeff	for (;;) {
547112904Sjeff		/*
548112904Sjeff		 * Try the uncontested case.  This should be done in userland.
549112904Sjeff		 */
550163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
551112904Sjeff
552138224Sdavidxu		/* The acquire succeeded. */
553138224Sdavidxu		if (owner == UMTX_UNOWNED)
554138224Sdavidxu			return (0);
555138224Sdavidxu
556115765Sjeff		/* The address was invalid. */
557115765Sjeff		if (owner == -1)
558115765Sjeff			return (EFAULT);
559115765Sjeff
560115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
561115765Sjeff		if (owner == UMTX_CONTESTED) {
562163449Sdavidxu			owner = casuword(&umtx->u_owner,
563139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
564115765Sjeff
565138224Sdavidxu			if (owner == UMTX_CONTESTED)
566138224Sdavidxu				return (0);
567138224Sdavidxu
568115765Sjeff			/* The address was invalid. */
569115765Sjeff			if (owner == -1)
570115765Sjeff				return (EFAULT);
571115765Sjeff
572115765Sjeff			/* If this failed the lock has changed, restart. */
573115765Sjeff			continue;
574112904Sjeff		}
575112904Sjeff
576138224Sdavidxu		/*
577138224Sdavidxu		 * If we caught a signal, we have retried and now
578138224Sdavidxu		 * exit immediately.
579138224Sdavidxu		 */
580161678Sdavidxu		if (error != 0)
581138224Sdavidxu			return (error);
582112904Sjeff
583161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
584161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
585161678Sdavidxu			return (error);
586161678Sdavidxu
587161678Sdavidxu		umtxq_lock(&uq->uq_key);
588161678Sdavidxu		umtxq_busy(&uq->uq_key);
589161678Sdavidxu		umtxq_insert(uq);
590161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
591161678Sdavidxu		umtxq_unlock(&uq->uq_key);
592161678Sdavidxu
593112904Sjeff		/*
594112904Sjeff		 * Set the contested bit so that a release in user space
595112904Sjeff		 * knows to use the system call for unlock.  If this fails
596112904Sjeff		 * either some one else has acquired the lock or it has been
597112904Sjeff		 * released.
598112904Sjeff		 */
599163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
600112904Sjeff
601112904Sjeff		/* The address was invalid. */
602112967Sjake		if (old == -1) {
603143149Sdavidxu			umtxq_lock(&uq->uq_key);
604143149Sdavidxu			umtxq_remove(uq);
605143149Sdavidxu			umtxq_unlock(&uq->uq_key);
606143149Sdavidxu			umtx_key_release(&uq->uq_key);
607115765Sjeff			return (EFAULT);
608112904Sjeff		}
609112904Sjeff
610112904Sjeff		/*
611115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
612117685Smtm		 * and we need to retry or we lost a race to the thread
613117685Smtm		 * unlocking the umtx.
614112904Sjeff		 */
615143149Sdavidxu		umtxq_lock(&uq->uq_key);
616161678Sdavidxu		if (old == owner)
617161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
618143149Sdavidxu		umtxq_remove(uq);
619143149Sdavidxu		umtxq_unlock(&uq->uq_key);
620143149Sdavidxu		umtx_key_release(&uq->uq_key);
621112904Sjeff	}
622117743Smtm
623117743Smtm	return (0);
624112904Sjeff}
625112904Sjeff
626161678Sdavidxu/*
627161678Sdavidxu * Lock a umtx object.
628161678Sdavidxu */
629139013Sdavidxustatic int
630163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
631140245Sdavidxu	struct timespec *timeout)
632112904Sjeff{
633140245Sdavidxu	struct timespec ts, ts2, ts3;
634139013Sdavidxu	struct timeval tv;
635140245Sdavidxu	int error;
636139013Sdavidxu
637140245Sdavidxu	if (timeout == NULL) {
638162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
639162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
640162030Sdavidxu		if (error == EINTR)
641162030Sdavidxu			error = ERESTART;
642139013Sdavidxu	} else {
643140245Sdavidxu		getnanouptime(&ts);
644140245Sdavidxu		timespecadd(&ts, timeout);
645140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
646139013Sdavidxu		for (;;) {
647162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
648140245Sdavidxu			if (error != ETIMEDOUT)
649140245Sdavidxu				break;
650140245Sdavidxu			getnanouptime(&ts2);
651140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
652139751Sdavidxu				error = ETIMEDOUT;
653139013Sdavidxu				break;
654139013Sdavidxu			}
655140245Sdavidxu			ts3 = ts;
656140245Sdavidxu			timespecsub(&ts3, &ts2);
657140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
658139013Sdavidxu		}
659162030Sdavidxu		/* Timed-locking is not restarted. */
660162030Sdavidxu		if (error == ERESTART)
661162030Sdavidxu			error = EINTR;
662139013Sdavidxu	}
663139013Sdavidxu	return (error);
664139013Sdavidxu}
665139013Sdavidxu
666161678Sdavidxu/*
667161678Sdavidxu * Unlock a umtx object.
668161678Sdavidxu */
669139013Sdavidxustatic int
670163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
671139013Sdavidxu{
672139013Sdavidxu	struct umtx_key key;
673163449Sdavidxu	u_long owner;
674163449Sdavidxu	u_long old;
675139257Sdavidxu	int error;
676139257Sdavidxu	int count;
677112904Sjeff
678112904Sjeff	/*
679112904Sjeff	 * Make sure we own this mtx.
680112904Sjeff	 */
681163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
682161678Sdavidxu	if (owner == -1)
683115765Sjeff		return (EFAULT);
684115765Sjeff
685139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
686115765Sjeff		return (EPERM);
687112904Sjeff
688161678Sdavidxu	/* This should be done in userland */
689161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
690163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
691161678Sdavidxu		if (old == -1)
692161678Sdavidxu			return (EFAULT);
693161678Sdavidxu		if (old == owner)
694161678Sdavidxu			return (0);
695161855Sdavidxu		owner = old;
696161678Sdavidxu	}
697161678Sdavidxu
698117685Smtm	/* We should only ever be in here for contested locks */
699161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
700161678Sdavidxu		&key)) != 0)
701139257Sdavidxu		return (error);
702139257Sdavidxu
703139257Sdavidxu	umtxq_lock(&key);
704139257Sdavidxu	umtxq_busy(&key);
705139257Sdavidxu	count = umtxq_count(&key);
706139257Sdavidxu	umtxq_unlock(&key);
707139257Sdavidxu
708117743Smtm	/*
709117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
710117743Smtm	 * there is zero or one thread only waiting for it.
711117743Smtm	 * Otherwise, it must be marked as contested.
712117743Smtm	 */
713163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
714163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
715139257Sdavidxu	umtxq_lock(&key);
716161678Sdavidxu	umtxq_signal(&key,1);
717139257Sdavidxu	umtxq_unbusy(&key);
718139257Sdavidxu	umtxq_unlock(&key);
719139257Sdavidxu	umtx_key_release(&key);
720115765Sjeff	if (old == -1)
721115765Sjeff		return (EFAULT);
722138224Sdavidxu	if (old != owner)
723138224Sdavidxu		return (EINVAL);
724115765Sjeff	return (0);
725112904Sjeff}
726139013Sdavidxu
727162536Sdavidxu#ifdef COMPAT_IA32
728162536Sdavidxu
729161678Sdavidxu/*
730162536Sdavidxu * Lock a umtx object.
731162536Sdavidxu */
732162536Sdavidxustatic int
733162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
734162536Sdavidxu{
735162536Sdavidxu	struct umtx_q *uq;
736162536Sdavidxu	uint32_t owner;
737162536Sdavidxu	uint32_t old;
738162536Sdavidxu	int error = 0;
739162536Sdavidxu
740162536Sdavidxu	uq = td->td_umtxq;
741162536Sdavidxu
742162536Sdavidxu	/*
743162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
744162536Sdavidxu	 * can fault on any access.
745162536Sdavidxu	 */
746162536Sdavidxu	for (;;) {
747162536Sdavidxu		/*
748162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
749162536Sdavidxu		 */
750162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
751162536Sdavidxu
752162536Sdavidxu		/* The acquire succeeded. */
753162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
754162536Sdavidxu			return (0);
755162536Sdavidxu
756162536Sdavidxu		/* The address was invalid. */
757162536Sdavidxu		if (owner == -1)
758162536Sdavidxu			return (EFAULT);
759162536Sdavidxu
760162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
761162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
762162536Sdavidxu			owner = casuword32(m,
763162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
764162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
765162536Sdavidxu				return (0);
766162536Sdavidxu
767162536Sdavidxu			/* The address was invalid. */
768162536Sdavidxu			if (owner == -1)
769162536Sdavidxu				return (EFAULT);
770162536Sdavidxu
771162536Sdavidxu			/* If this failed the lock has changed, restart. */
772162536Sdavidxu			continue;
773162536Sdavidxu		}
774162536Sdavidxu
775162536Sdavidxu		/*
776162536Sdavidxu		 * If we caught a signal, we have retried and now
777162536Sdavidxu		 * exit immediately.
778162536Sdavidxu		 */
779162536Sdavidxu		if (error != 0)
780162536Sdavidxu			return (error);
781162536Sdavidxu
782162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
783162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
784162536Sdavidxu			return (error);
785162536Sdavidxu
786162536Sdavidxu		umtxq_lock(&uq->uq_key);
787162536Sdavidxu		umtxq_busy(&uq->uq_key);
788162536Sdavidxu		umtxq_insert(uq);
789162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
790162536Sdavidxu		umtxq_unlock(&uq->uq_key);
791162536Sdavidxu
792162536Sdavidxu		/*
793162536Sdavidxu		 * Set the contested bit so that a release in user space
794162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
795162536Sdavidxu		 * either some one else has acquired the lock or it has been
796162536Sdavidxu		 * released.
797162536Sdavidxu		 */
798162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
799162536Sdavidxu
800162536Sdavidxu		/* The address was invalid. */
801162536Sdavidxu		if (old == -1) {
802162536Sdavidxu			umtxq_lock(&uq->uq_key);
803162536Sdavidxu			umtxq_remove(uq);
804162536Sdavidxu			umtxq_unlock(&uq->uq_key);
805162536Sdavidxu			umtx_key_release(&uq->uq_key);
806162536Sdavidxu			return (EFAULT);
807162536Sdavidxu		}
808162536Sdavidxu
809162536Sdavidxu		/*
810162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
811162536Sdavidxu		 * and we need to retry or we lost a race to the thread
812162536Sdavidxu		 * unlocking the umtx.
813162536Sdavidxu		 */
814162536Sdavidxu		umtxq_lock(&uq->uq_key);
815162536Sdavidxu		if (old == owner)
816162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
817162536Sdavidxu		umtxq_remove(uq);
818162536Sdavidxu		umtxq_unlock(&uq->uq_key);
819162536Sdavidxu		umtx_key_release(&uq->uq_key);
820162536Sdavidxu	}
821162536Sdavidxu
822162536Sdavidxu	return (0);
823162536Sdavidxu}
824162536Sdavidxu
825162536Sdavidxu/*
826162536Sdavidxu * Lock a umtx object.
827162536Sdavidxu */
828162536Sdavidxustatic int
829162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
830162536Sdavidxu	struct timespec *timeout)
831162536Sdavidxu{
832162536Sdavidxu	struct timespec ts, ts2, ts3;
833162536Sdavidxu	struct timeval tv;
834162536Sdavidxu	int error;
835162536Sdavidxu
836162536Sdavidxu	if (timeout == NULL) {
837162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
838162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
839162536Sdavidxu		if (error == EINTR)
840162536Sdavidxu			error = ERESTART;
841162536Sdavidxu	} else {
842162536Sdavidxu		getnanouptime(&ts);
843162536Sdavidxu		timespecadd(&ts, timeout);
844162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
845162536Sdavidxu		for (;;) {
846162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
847162536Sdavidxu			if (error != ETIMEDOUT)
848162536Sdavidxu				break;
849162536Sdavidxu			getnanouptime(&ts2);
850162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
851162536Sdavidxu				error = ETIMEDOUT;
852162536Sdavidxu				break;
853162536Sdavidxu			}
854162536Sdavidxu			ts3 = ts;
855162536Sdavidxu			timespecsub(&ts3, &ts2);
856162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
857162536Sdavidxu		}
858162536Sdavidxu		/* Timed-locking is not restarted. */
859162536Sdavidxu		if (error == ERESTART)
860162536Sdavidxu			error = EINTR;
861162536Sdavidxu	}
862162536Sdavidxu	return (error);
863162536Sdavidxu}
864162536Sdavidxu
865162536Sdavidxu/*
866162536Sdavidxu * Unlock a umtx object.
867162536Sdavidxu */
868162536Sdavidxustatic int
869162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
870162536Sdavidxu{
871162536Sdavidxu	struct umtx_key key;
872162536Sdavidxu	uint32_t owner;
873162536Sdavidxu	uint32_t old;
874162536Sdavidxu	int error;
875162536Sdavidxu	int count;
876162536Sdavidxu
877162536Sdavidxu	/*
878162536Sdavidxu	 * Make sure we own this mtx.
879162536Sdavidxu	 */
880162536Sdavidxu	owner = fuword32(m);
881162536Sdavidxu	if (owner == -1)
882162536Sdavidxu		return (EFAULT);
883162536Sdavidxu
884162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
885162536Sdavidxu		return (EPERM);
886162536Sdavidxu
887162536Sdavidxu	/* This should be done in userland */
888162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
889162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
890162536Sdavidxu		if (old == -1)
891162536Sdavidxu			return (EFAULT);
892162536Sdavidxu		if (old == owner)
893162536Sdavidxu			return (0);
894162536Sdavidxu		owner = old;
895162536Sdavidxu	}
896162536Sdavidxu
897162536Sdavidxu	/* We should only ever be in here for contested locks */
898162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
899162536Sdavidxu		&key)) != 0)
900162536Sdavidxu		return (error);
901162536Sdavidxu
902162536Sdavidxu	umtxq_lock(&key);
903162536Sdavidxu	umtxq_busy(&key);
904162536Sdavidxu	count = umtxq_count(&key);
905162536Sdavidxu	umtxq_unlock(&key);
906162536Sdavidxu
907162536Sdavidxu	/*
908162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
909162536Sdavidxu	 * there is zero or one thread only waiting for it.
910162536Sdavidxu	 * Otherwise, it must be marked as contested.
911162536Sdavidxu	 */
912162536Sdavidxu	old = casuword32(m, owner,
913162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
914162536Sdavidxu	umtxq_lock(&key);
915162536Sdavidxu	umtxq_signal(&key,1);
916162536Sdavidxu	umtxq_unbusy(&key);
917162536Sdavidxu	umtxq_unlock(&key);
918162536Sdavidxu	umtx_key_release(&key);
919162536Sdavidxu	if (old == -1)
920162536Sdavidxu		return (EFAULT);
921162536Sdavidxu	if (old != owner)
922162536Sdavidxu		return (EINVAL);
923162536Sdavidxu	return (0);
924162536Sdavidxu}
925162536Sdavidxu#endif
926162536Sdavidxu
927162536Sdavidxu/*
928161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
929161678Sdavidxu */
930139013Sdavidxustatic int
931163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
932162536Sdavidxu	struct timespec *timeout, int compat32)
933139013Sdavidxu{
934143149Sdavidxu	struct umtx_q *uq;
935140245Sdavidxu	struct timespec ts, ts2, ts3;
936139013Sdavidxu	struct timeval tv;
937163449Sdavidxu	u_long tmp;
938140245Sdavidxu	int error = 0;
939139013Sdavidxu
940143149Sdavidxu	uq = td->td_umtxq;
941162536Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
942161678Sdavidxu	    &uq->uq_key)) != 0)
943139013Sdavidxu		return (error);
944161678Sdavidxu
945161678Sdavidxu	umtxq_lock(&uq->uq_key);
946161678Sdavidxu	umtxq_insert(uq);
947161678Sdavidxu	umtxq_unlock(&uq->uq_key);
948162536Sdavidxu	if (compat32 == 0)
949162536Sdavidxu		tmp = fuword(addr);
950162536Sdavidxu        else
951162536Sdavidxu		tmp = fuword32(addr);
952139427Sdavidxu	if (tmp != id) {
953143149Sdavidxu		umtxq_lock(&uq->uq_key);
954143149Sdavidxu		umtxq_remove(uq);
955143149Sdavidxu		umtxq_unlock(&uq->uq_key);
956140245Sdavidxu	} else if (timeout == NULL) {
957143149Sdavidxu		umtxq_lock(&uq->uq_key);
958164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
959161678Sdavidxu		umtxq_remove(uq);
960143149Sdavidxu		umtxq_unlock(&uq->uq_key);
961139013Sdavidxu	} else {
962140245Sdavidxu		getnanouptime(&ts);
963140245Sdavidxu		timespecadd(&ts, timeout);
964140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
965161678Sdavidxu		umtxq_lock(&uq->uq_key);
966139013Sdavidxu		for (;;) {
967164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
968161678Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ))
969161678Sdavidxu				break;
970140245Sdavidxu			if (error != ETIMEDOUT)
971140245Sdavidxu				break;
972161678Sdavidxu			umtxq_unlock(&uq->uq_key);
973140245Sdavidxu			getnanouptime(&ts2);
974140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
975139751Sdavidxu				error = ETIMEDOUT;
976161678Sdavidxu				umtxq_lock(&uq->uq_key);
977139013Sdavidxu				break;
978139013Sdavidxu			}
979140245Sdavidxu			ts3 = ts;
980140245Sdavidxu			timespecsub(&ts3, &ts2);
981140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
982161678Sdavidxu			umtxq_lock(&uq->uq_key);
983139013Sdavidxu		}
984143149Sdavidxu		umtxq_remove(uq);
985143149Sdavidxu		umtxq_unlock(&uq->uq_key);
986139013Sdavidxu	}
987143149Sdavidxu	umtx_key_release(&uq->uq_key);
988139257Sdavidxu	if (error == ERESTART)
989139257Sdavidxu		error = EINTR;
990139013Sdavidxu	return (error);
991139013Sdavidxu}
992139013Sdavidxu
993161678Sdavidxu/*
994161678Sdavidxu * Wake up threads sleeping on the specified address.
995161678Sdavidxu */
996151692Sdavidxuint
997151692Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
998139013Sdavidxu{
999139013Sdavidxu	struct umtx_key key;
1000139257Sdavidxu	int ret;
1001139013Sdavidxu
1002161678Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1003161678Sdavidxu	   &key)) != 0)
1004139257Sdavidxu		return (ret);
1005139258Sdavidxu	umtxq_lock(&key);
1006139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1007139258Sdavidxu	umtxq_unlock(&key);
1008139257Sdavidxu	umtx_key_release(&key);
1009139013Sdavidxu	return (0);
1010139013Sdavidxu}
1011139013Sdavidxu
1012161678Sdavidxu/*
1013161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1014161678Sdavidxu */
1015161678Sdavidxustatic int
1016161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1017161678Sdavidxu	int try)
1018161678Sdavidxu{
1019161678Sdavidxu	struct umtx_q *uq;
1020161678Sdavidxu	uint32_t owner, old, id;
1021161678Sdavidxu	int error = 0;
1022161678Sdavidxu
1023161678Sdavidxu	id = td->td_tid;
1024161678Sdavidxu	uq = td->td_umtxq;
1025161678Sdavidxu
1026161678Sdavidxu	/*
1027161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1028161678Sdavidxu	 * can fault on any access.
1029161678Sdavidxu	 */
1030161678Sdavidxu	for (;;) {
1031161678Sdavidxu		/*
1032161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1033161678Sdavidxu		 */
1034161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1035161678Sdavidxu
1036161678Sdavidxu		/* The acquire succeeded. */
1037161678Sdavidxu		if (owner == UMUTEX_UNOWNED)
1038161678Sdavidxu			return (0);
1039161678Sdavidxu
1040161678Sdavidxu		/* The address was invalid. */
1041161678Sdavidxu		if (owner == -1)
1042161678Sdavidxu			return (EFAULT);
1043161678Sdavidxu
1044161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1045161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1046161678Sdavidxu			owner = casuword32(&m->m_owner,
1047161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1048161678Sdavidxu
1049161678Sdavidxu			if (owner == UMUTEX_CONTESTED)
1050161678Sdavidxu				return (0);
1051161678Sdavidxu
1052161678Sdavidxu			/* The address was invalid. */
1053161678Sdavidxu			if (owner == -1)
1054161678Sdavidxu				return (EFAULT);
1055161678Sdavidxu
1056161678Sdavidxu			/* If this failed the lock has changed, restart. */
1057161678Sdavidxu			continue;
1058161678Sdavidxu		}
1059161678Sdavidxu
1060161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1061161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1062161678Sdavidxu			return (EDEADLK);
1063161678Sdavidxu
1064161678Sdavidxu		if (try != 0)
1065161678Sdavidxu			return (EBUSY);
1066161678Sdavidxu
1067161678Sdavidxu		/*
1068161678Sdavidxu		 * If we caught a signal, we have retried and now
1069161678Sdavidxu		 * exit immediately.
1070161678Sdavidxu		 */
1071161678Sdavidxu		if (error != 0)
1072161678Sdavidxu			return (error);
1073161678Sdavidxu
1074161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1075161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1076161678Sdavidxu			return (error);
1077161678Sdavidxu
1078161678Sdavidxu		umtxq_lock(&uq->uq_key);
1079161678Sdavidxu		umtxq_busy(&uq->uq_key);
1080161678Sdavidxu		umtxq_insert(uq);
1081161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1082161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1083161678Sdavidxu
1084161678Sdavidxu		/*
1085161678Sdavidxu		 * Set the contested bit so that a release in user space
1086161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1087161678Sdavidxu		 * either some one else has acquired the lock or it has been
1088161678Sdavidxu		 * released.
1089161678Sdavidxu		 */
1090161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1091161678Sdavidxu
1092161678Sdavidxu		/* The address was invalid. */
1093161678Sdavidxu		if (old == -1) {
1094161678Sdavidxu			umtxq_lock(&uq->uq_key);
1095161678Sdavidxu			umtxq_remove(uq);
1096161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1097161678Sdavidxu			umtx_key_release(&uq->uq_key);
1098161678Sdavidxu			return (EFAULT);
1099161678Sdavidxu		}
1100161678Sdavidxu
1101161678Sdavidxu		/*
1102161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1103161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1104161678Sdavidxu		 * unlocking the umtx.
1105161678Sdavidxu		 */
1106161678Sdavidxu		umtxq_lock(&uq->uq_key);
1107161678Sdavidxu		if (old == owner)
1108161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1109161678Sdavidxu		umtxq_remove(uq);
1110161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1111161678Sdavidxu		umtx_key_release(&uq->uq_key);
1112161678Sdavidxu	}
1113161678Sdavidxu
1114161678Sdavidxu	return (0);
1115161678Sdavidxu}
1116161678Sdavidxu
1117161678Sdavidxu/*
1118161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1119161678Sdavidxu */
1120161678Sdavidxu/*
1121161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1122161678Sdavidxu */
1123161678Sdavidxustatic int
1124161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1125161678Sdavidxu{
1126161678Sdavidxu	struct umtx_key key;
1127161678Sdavidxu	uint32_t owner, old, id;
1128161678Sdavidxu	int error;
1129161678Sdavidxu	int count;
1130161678Sdavidxu
1131161678Sdavidxu	id = td->td_tid;
1132161678Sdavidxu	/*
1133161678Sdavidxu	 * Make sure we own this mtx.
1134161678Sdavidxu	 */
1135163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1136161678Sdavidxu	if (owner == -1)
1137161678Sdavidxu		return (EFAULT);
1138161678Sdavidxu
1139161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1140161678Sdavidxu		return (EPERM);
1141161678Sdavidxu
1142161678Sdavidxu	/* This should be done in userland */
1143161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1144161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1145161678Sdavidxu		if (old == -1)
1146161678Sdavidxu			return (EFAULT);
1147161678Sdavidxu		if (old == owner)
1148161678Sdavidxu			return (0);
1149161855Sdavidxu		owner = old;
1150161678Sdavidxu	}
1151161678Sdavidxu
1152161678Sdavidxu	/* We should only ever be in here for contested locks */
1153161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1154161678Sdavidxu	    &key)) != 0)
1155161678Sdavidxu		return (error);
1156161678Sdavidxu
1157161678Sdavidxu	umtxq_lock(&key);
1158161678Sdavidxu	umtxq_busy(&key);
1159161678Sdavidxu	count = umtxq_count(&key);
1160161678Sdavidxu	umtxq_unlock(&key);
1161161678Sdavidxu
1162161678Sdavidxu	/*
1163161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1164161678Sdavidxu	 * there is zero or one thread only waiting for it.
1165161678Sdavidxu	 * Otherwise, it must be marked as contested.
1166161678Sdavidxu	 */
1167161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1168161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1169161678Sdavidxu	umtxq_lock(&key);
1170161678Sdavidxu	umtxq_signal(&key,1);
1171161678Sdavidxu	umtxq_unbusy(&key);
1172161678Sdavidxu	umtxq_unlock(&key);
1173161678Sdavidxu	umtx_key_release(&key);
1174161678Sdavidxu	if (old == -1)
1175161678Sdavidxu		return (EFAULT);
1176161678Sdavidxu	if (old != owner)
1177161678Sdavidxu		return (EINVAL);
1178161678Sdavidxu	return (0);
1179161678Sdavidxu}
1180161678Sdavidxu
1181161678Sdavidxustatic inline struct umtx_pi *
1182163697Sdavidxuumtx_pi_alloc(int flags)
1183161678Sdavidxu{
1184161678Sdavidxu	struct umtx_pi *pi;
1185161678Sdavidxu
1186163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1187161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1188161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1189161678Sdavidxu	return (pi);
1190161678Sdavidxu}
1191161678Sdavidxu
1192161678Sdavidxustatic inline void
1193161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1194161678Sdavidxu{
1195161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1196161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1197161678Sdavidxu}
1198161678Sdavidxu
1199161678Sdavidxu/*
1200161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1201161678Sdavidxu * changed.
1202161678Sdavidxu */
1203161678Sdavidxustatic int
1204161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1205161678Sdavidxu{
1206161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1207161678Sdavidxu	struct thread *td1;
1208161678Sdavidxu
1209161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1210161678Sdavidxu	if (pi == NULL)
1211161678Sdavidxu		return (0);
1212161678Sdavidxu
1213161678Sdavidxu	uq = td->td_umtxq;
1214161678Sdavidxu
1215161678Sdavidxu	/*
1216161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1217161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1218161678Sdavidxu	 * the previous thread or higher than the next thread.
1219161678Sdavidxu	 */
1220161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1221161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1222161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1223161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1224161678Sdavidxu		/*
1225161678Sdavidxu		 * Remove thread from blocked chain and determine where
1226161678Sdavidxu		 * it should be moved to.
1227161678Sdavidxu		 */
1228161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1229161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1230161678Sdavidxu			td1 = uq1->uq_thread;
1231161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1232161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1233161678Sdavidxu				break;
1234161678Sdavidxu		}
1235161678Sdavidxu
1236161678Sdavidxu		if (uq1 == NULL)
1237161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1238161678Sdavidxu		else
1239161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1240161678Sdavidxu	}
1241161678Sdavidxu	return (1);
1242161678Sdavidxu}
1243161678Sdavidxu
1244161678Sdavidxu/*
1245161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1246161678Sdavidxu * PI mutex.
1247161678Sdavidxu */
1248161678Sdavidxustatic void
1249161678Sdavidxuumtx_propagate_priority(struct thread *td)
1250161678Sdavidxu{
1251161678Sdavidxu	struct umtx_q *uq;
1252161678Sdavidxu	struct umtx_pi *pi;
1253161678Sdavidxu	int pri;
1254161678Sdavidxu
1255161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1256161678Sdavidxu	pri = UPRI(td);
1257161678Sdavidxu	uq = td->td_umtxq;
1258161678Sdavidxu	pi = uq->uq_pi_blocked;
1259161678Sdavidxu	if (pi == NULL)
1260161678Sdavidxu		return;
1261161678Sdavidxu
1262161678Sdavidxu	for (;;) {
1263161678Sdavidxu		td = pi->pi_owner;
1264161678Sdavidxu		if (td == NULL)
1265161678Sdavidxu			return;
1266161678Sdavidxu
1267161678Sdavidxu		MPASS(td->td_proc != NULL);
1268161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1269161678Sdavidxu
1270161678Sdavidxu		if (UPRI(td) <= pri)
1271161678Sdavidxu			return;
1272161678Sdavidxu
1273161678Sdavidxu		sched_lend_user_prio(td, pri);
1274161678Sdavidxu
1275161678Sdavidxu		/*
1276161678Sdavidxu		 * Pick up the lock that td is blocked on.
1277161678Sdavidxu		 */
1278161678Sdavidxu		uq = td->td_umtxq;
1279161678Sdavidxu		pi = uq->uq_pi_blocked;
1280161678Sdavidxu		/* Resort td on the list if needed. */
1281161678Sdavidxu		if (!umtx_pi_adjust_thread(pi, td))
1282161678Sdavidxu			break;
1283161678Sdavidxu	}
1284161678Sdavidxu}
1285161678Sdavidxu
1286161678Sdavidxu/*
1287161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1288161678Sdavidxu * it is interrupted by signal or resumed by others.
1289161678Sdavidxu */
1290161678Sdavidxustatic void
1291161678Sdavidxuumtx_unpropagate_priority(struct umtx_pi *pi)
1292161678Sdavidxu{
1293161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1294161678Sdavidxu	struct umtx_pi *pi2;
1295161678Sdavidxu	int pri;
1296161678Sdavidxu
1297161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1298161678Sdavidxu
1299161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1300161678Sdavidxu		pri = PRI_MAX;
1301161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1302161678Sdavidxu
1303161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1304161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1305161678Sdavidxu			if (uq != NULL) {
1306161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1307161678Sdavidxu					pri = UPRI(uq->uq_thread);
1308161678Sdavidxu			}
1309161678Sdavidxu		}
1310161678Sdavidxu
1311161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1312161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1313161678Sdavidxu		sched_unlend_user_prio(pi->pi_owner, pri);
1314161678Sdavidxu		pi = uq_owner->uq_pi_blocked;
1315161678Sdavidxu	}
1316161678Sdavidxu}
1317161678Sdavidxu
1318161678Sdavidxu/*
1319161678Sdavidxu * Insert a PI mutex into owned list.
1320161678Sdavidxu */
1321161678Sdavidxustatic void
1322161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1323161678Sdavidxu{
1324161678Sdavidxu	struct umtx_q *uq_owner;
1325161678Sdavidxu
1326161678Sdavidxu	uq_owner = owner->td_umtxq;
1327161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1328161678Sdavidxu	if (pi->pi_owner != NULL)
1329161678Sdavidxu		panic("pi_ower != NULL");
1330161678Sdavidxu	pi->pi_owner = owner;
1331161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1332161678Sdavidxu}
1333161678Sdavidxu
1334161678Sdavidxu/*
1335161678Sdavidxu * Claim ownership of a PI mutex.
1336161678Sdavidxu */
1337161678Sdavidxustatic int
1338161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1339161678Sdavidxu{
1340161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1341161678Sdavidxu
1342161678Sdavidxu	uq_owner = owner->td_umtxq;
1343161678Sdavidxu	mtx_lock_spin(&sched_lock);
1344161678Sdavidxu	if (pi->pi_owner == owner) {
1345161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1346161678Sdavidxu		return (0);
1347161678Sdavidxu	}
1348161678Sdavidxu
1349161678Sdavidxu	if (pi->pi_owner != NULL) {
1350161678Sdavidxu		/*
1351161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1352161678Sdavidxu		 */
1353161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1354161678Sdavidxu		return (EPERM);
1355161678Sdavidxu	}
1356161678Sdavidxu	umtx_pi_setowner(pi, owner);
1357161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1358161678Sdavidxu	if (uq != NULL) {
1359161678Sdavidxu		int pri;
1360161678Sdavidxu
1361161678Sdavidxu		pri = UPRI(uq->uq_thread);
1362161678Sdavidxu		if (pri < UPRI(owner))
1363161678Sdavidxu			sched_lend_user_prio(owner, pri);
1364161678Sdavidxu	}
1365161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1366161678Sdavidxu	return (0);
1367161678Sdavidxu}
1368161678Sdavidxu
1369161678Sdavidxu/*
1370161678Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1371161678Sdavidxu * this may result new priority propagating process.
1372161678Sdavidxu */
1373161599Sdavidxuvoid
1374161678Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1375161599Sdavidxu{
1376161678Sdavidxu	struct umtx_q *uq;
1377161678Sdavidxu	struct umtx_pi *pi;
1378161678Sdavidxu
1379161678Sdavidxu	uq = td->td_umtxq;
1380161678Sdavidxu
1381161678Sdavidxu	mtx_assert(&sched_lock, MA_OWNED);
1382161678Sdavidxu	MPASS(TD_ON_UPILOCK(td));
1383161678Sdavidxu
1384161678Sdavidxu	/*
1385161678Sdavidxu	 * Pick up the lock that td is blocked on.
1386161678Sdavidxu	 */
1387161678Sdavidxu	pi = uq->uq_pi_blocked;
1388161678Sdavidxu	MPASS(pi != NULL);
1389161678Sdavidxu
1390161678Sdavidxu	/* Resort the turnstile on the list. */
1391161678Sdavidxu	if (!umtx_pi_adjust_thread(pi, td))
1392161678Sdavidxu		return;
1393161678Sdavidxu
1394161678Sdavidxu	/*
1395161678Sdavidxu	 * If our priority was lowered and we are at the head of the
1396161678Sdavidxu	 * turnstile, then propagate our new priority up the chain.
1397161678Sdavidxu	 */
1398161678Sdavidxu	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1399161678Sdavidxu		umtx_propagate_priority(td);
1400161599Sdavidxu}
1401161599Sdavidxu
1402161678Sdavidxu/*
1403161678Sdavidxu * Sleep on a PI mutex.
1404161678Sdavidxu */
1405161678Sdavidxustatic int
1406161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1407161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1408161678Sdavidxu{
1409161678Sdavidxu	struct umtxq_chain *uc;
1410161678Sdavidxu	struct thread *td, *td1;
1411161678Sdavidxu	struct umtx_q *uq1;
1412161678Sdavidxu	int pri;
1413161678Sdavidxu	int error = 0;
1414161678Sdavidxu
1415161678Sdavidxu	td = uq->uq_thread;
1416161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1417161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1418161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1419161678Sdavidxu	umtxq_insert(uq);
1420161678Sdavidxu	if (pi->pi_owner == NULL) {
1421161678Sdavidxu		/* XXX
1422161678Sdavidxu		 * Current, We only support process private PI-mutex,
1423161678Sdavidxu		 * non-contended PI-mutexes are locked in userland.
1424161678Sdavidxu		 * Process shared PI-mutex should always be initialized
1425161678Sdavidxu		 * by kernel and be registered in kernel, locking should
1426161678Sdavidxu		 * always be done by kernel to avoid security problems.
1427161678Sdavidxu		 * For process private PI-mutex, we can find owner
1428161678Sdavidxu		 * thread and boost its priority safely.
1429161678Sdavidxu		 */
1430161678Sdavidxu		PROC_LOCK(curproc);
1431161678Sdavidxu		td1 = thread_find(curproc, owner);
1432161678Sdavidxu		mtx_lock_spin(&sched_lock);
1433161678Sdavidxu		if (td1 != NULL && pi->pi_owner == NULL) {
1434161678Sdavidxu			uq1 = td1->td_umtxq;
1435161678Sdavidxu			umtx_pi_setowner(pi, td1);
1436161678Sdavidxu		}
1437161678Sdavidxu		PROC_UNLOCK(curproc);
1438161678Sdavidxu	} else {
1439161678Sdavidxu		mtx_lock_spin(&sched_lock);
1440161678Sdavidxu	}
1441161678Sdavidxu
1442161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1443161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1444161678Sdavidxu		if (pri > UPRI(td))
1445161678Sdavidxu			break;
1446161678Sdavidxu	}
1447161678Sdavidxu
1448161678Sdavidxu	if (uq1 != NULL)
1449161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1450161678Sdavidxu	else
1451161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1452161678Sdavidxu
1453161678Sdavidxu	uq->uq_pi_blocked = pi;
1454161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1455161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1456161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1457161678Sdavidxu
1458161678Sdavidxu	mtx_lock_spin(&sched_lock);
1459161678Sdavidxu	umtx_propagate_priority(td);
1460161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1461161678Sdavidxu
1462161678Sdavidxu	umtxq_lock(&uq->uq_key);
1463161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1464161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1465161678Sdavidxu		if (error == EWOULDBLOCK)
1466161678Sdavidxu			error = ETIMEDOUT;
1467161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1468161678Sdavidxu			umtxq_busy(&uq->uq_key);
1469161678Sdavidxu			umtxq_remove(uq);
1470161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1471161678Sdavidxu		}
1472161678Sdavidxu	}
1473161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1474161678Sdavidxu
1475161678Sdavidxu	mtx_lock_spin(&sched_lock);
1476161678Sdavidxu	uq->uq_pi_blocked = NULL;
1477161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1478161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1479161678Sdavidxu	umtx_unpropagate_priority(pi);
1480161678Sdavidxu	mtx_unlock_spin(&sched_lock);
1481161678Sdavidxu
1482161678Sdavidxu	umtxq_lock(&uq->uq_key);
1483161678Sdavidxu
1484161678Sdavidxu	return (error);
1485161678Sdavidxu}
1486161678Sdavidxu
1487161678Sdavidxu/*
1488161678Sdavidxu * Add reference count for a PI mutex.
1489161678Sdavidxu */
1490161678Sdavidxustatic void
1491161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1492161678Sdavidxu{
1493161678Sdavidxu	struct umtxq_chain *uc;
1494161678Sdavidxu
1495161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1496161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1497161678Sdavidxu	pi->pi_refcount++;
1498161678Sdavidxu}
1499161678Sdavidxu
1500161678Sdavidxu/*
1501161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1502161678Sdavidxu * is decreased to zero, its memory space is freed.
1503161678Sdavidxu */
1504161678Sdavidxustatic void
1505161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1506161678Sdavidxu{
1507161678Sdavidxu	struct umtxq_chain *uc;
1508161678Sdavidxu	int free = 0;
1509161678Sdavidxu
1510161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1511161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1512161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1513161678Sdavidxu	if (--pi->pi_refcount == 0) {
1514161678Sdavidxu		mtx_lock_spin(&sched_lock);
1515161678Sdavidxu		if (pi->pi_owner != NULL) {
1516161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1517161678Sdavidxu				pi, pi_link);
1518161678Sdavidxu			pi->pi_owner = NULL;
1519161678Sdavidxu		}
1520161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1521161678Sdavidxu			("blocked queue not empty"));
1522161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1523161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1524161678Sdavidxu		free = 1;
1525161678Sdavidxu	}
1526161678Sdavidxu	if (free)
1527161678Sdavidxu		umtx_pi_free(pi);
1528161678Sdavidxu}
1529161678Sdavidxu
1530161678Sdavidxu/*
1531161678Sdavidxu * Find a PI mutex in hash table.
1532161678Sdavidxu */
1533161678Sdavidxustatic struct umtx_pi *
1534161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1535161678Sdavidxu{
1536161678Sdavidxu	struct umtxq_chain *uc;
1537161678Sdavidxu	struct umtx_pi *pi;
1538161678Sdavidxu
1539161678Sdavidxu	uc = umtxq_getchain(key);
1540161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1541161678Sdavidxu
1542161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1543161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1544161678Sdavidxu			return (pi);
1545161678Sdavidxu		}
1546161678Sdavidxu	}
1547161678Sdavidxu	return (NULL);
1548161678Sdavidxu}
1549161678Sdavidxu
1550161678Sdavidxu/*
1551161678Sdavidxu * Insert a PI mutex into hash table.
1552161678Sdavidxu */
1553161678Sdavidxustatic inline void
1554161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1555161678Sdavidxu{
1556161678Sdavidxu	struct umtxq_chain *uc;
1557161678Sdavidxu
1558161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1559161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1560161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1561161678Sdavidxu}
1562161678Sdavidxu
1563161678Sdavidxu/*
1564161678Sdavidxu * Lock a PI mutex.
1565161678Sdavidxu */
1566161678Sdavidxustatic int
1567161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1568161678Sdavidxu	int try)
1569161678Sdavidxu{
1570161678Sdavidxu	struct umtx_q *uq;
1571161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1572161678Sdavidxu	uint32_t id, owner, old;
1573161678Sdavidxu	int error;
1574161678Sdavidxu
1575161678Sdavidxu	id = td->td_tid;
1576161678Sdavidxu	uq = td->td_umtxq;
1577161678Sdavidxu
1578161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1579161678Sdavidxu	    &uq->uq_key)) != 0)
1580161678Sdavidxu		return (error);
1581163697Sdavidxu	umtxq_lock(&uq->uq_key);
1582163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1583163697Sdavidxu	if (pi == NULL) {
1584163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1585163697Sdavidxu		if (new_pi == NULL) {
1586161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1587163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1588161678Sdavidxu			new_pi->pi_key = uq->uq_key;
1589161678Sdavidxu			umtxq_lock(&uq->uq_key);
1590161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1591163697Sdavidxu			if (pi != NULL) {
1592161678Sdavidxu				umtx_pi_free(new_pi);
1593163697Sdavidxu				new_pi = NULL;
1594161678Sdavidxu			}
1595161678Sdavidxu		}
1596163697Sdavidxu		if (new_pi != NULL) {
1597163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1598163697Sdavidxu			umtx_pi_insert(new_pi);
1599163697Sdavidxu			pi = new_pi;
1600163697Sdavidxu		}
1601163697Sdavidxu	}
1602163697Sdavidxu	umtx_pi_ref(pi);
1603163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1604161678Sdavidxu
1605163697Sdavidxu	/*
1606163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1607163697Sdavidxu	 * can fault on any access.
1608163697Sdavidxu	 */
1609163697Sdavidxu	for (;;) {
1610161678Sdavidxu		/*
1611161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1612161678Sdavidxu		 */
1613161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1614161678Sdavidxu
1615161678Sdavidxu		/* The acquire succeeded. */
1616161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1617161678Sdavidxu			error = 0;
1618161678Sdavidxu			break;
1619161678Sdavidxu		}
1620161678Sdavidxu
1621161678Sdavidxu		/* The address was invalid. */
1622161678Sdavidxu		if (owner == -1) {
1623161678Sdavidxu			error = EFAULT;
1624161678Sdavidxu			break;
1625161678Sdavidxu		}
1626161678Sdavidxu
1627161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1628161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1629161678Sdavidxu			owner = casuword32(&m->m_owner,
1630161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1631161678Sdavidxu
1632161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1633161678Sdavidxu				umtxq_lock(&uq->uq_key);
1634161678Sdavidxu				error = umtx_pi_claim(pi, td);
1635161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1636161678Sdavidxu				break;
1637161678Sdavidxu			}
1638161678Sdavidxu
1639161678Sdavidxu			/* The address was invalid. */
1640161678Sdavidxu			if (owner == -1) {
1641161678Sdavidxu				error = EFAULT;
1642161678Sdavidxu				break;
1643161678Sdavidxu			}
1644161678Sdavidxu
1645161678Sdavidxu			/* If this failed the lock has changed, restart. */
1646161678Sdavidxu			continue;
1647161678Sdavidxu		}
1648161678Sdavidxu
1649161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1650161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1651161678Sdavidxu			error = EDEADLK;
1652161678Sdavidxu			break;
1653161678Sdavidxu		}
1654161678Sdavidxu
1655161678Sdavidxu		if (try != 0) {
1656161678Sdavidxu			error = EBUSY;
1657161678Sdavidxu			break;
1658161678Sdavidxu		}
1659161678Sdavidxu
1660161678Sdavidxu		/*
1661161678Sdavidxu		 * If we caught a signal, we have retried and now
1662161678Sdavidxu		 * exit immediately.
1663161678Sdavidxu		 */
1664161678Sdavidxu		if (error != 0)
1665161678Sdavidxu			break;
1666161678Sdavidxu
1667161678Sdavidxu		umtxq_lock(&uq->uq_key);
1668161678Sdavidxu		umtxq_busy(&uq->uq_key);
1669161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1670161678Sdavidxu
1671161678Sdavidxu		/*
1672161678Sdavidxu		 * Set the contested bit so that a release in user space
1673161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1674161678Sdavidxu		 * either some one else has acquired the lock or it has been
1675161678Sdavidxu		 * released.
1676161678Sdavidxu		 */
1677161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1678161678Sdavidxu
1679161678Sdavidxu		/* The address was invalid. */
1680161678Sdavidxu		if (old == -1) {
1681161678Sdavidxu			umtxq_lock(&uq->uq_key);
1682161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1683161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1684161678Sdavidxu			error = EFAULT;
1685161678Sdavidxu			break;
1686161678Sdavidxu		}
1687161678Sdavidxu
1688161678Sdavidxu		umtxq_lock(&uq->uq_key);
1689161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1690161678Sdavidxu		/*
1691161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1692161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1693161678Sdavidxu		 * unlocking the umtx.
1694161678Sdavidxu		 */
1695161678Sdavidxu		if (old == owner)
1696161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1697161678Sdavidxu				 "umtxpi", timo);
1698161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1699161678Sdavidxu	}
1700161678Sdavidxu
1701163697Sdavidxu	umtxq_lock(&uq->uq_key);
1702163697Sdavidxu	umtx_pi_unref(pi);
1703163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1704161678Sdavidxu
1705161678Sdavidxu	umtx_key_release(&uq->uq_key);
1706161678Sdavidxu	return (error);
1707161678Sdavidxu}
1708161678Sdavidxu
1709161678Sdavidxu/*
1710161678Sdavidxu * Unlock a PI mutex.
1711161678Sdavidxu */
1712161678Sdavidxustatic int
1713161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1714161678Sdavidxu{
1715161678Sdavidxu	struct umtx_key key;
1716161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1717161678Sdavidxu	struct umtx_pi *pi, *pi2;
1718161678Sdavidxu	uint32_t owner, old, id;
1719161678Sdavidxu	int error;
1720161678Sdavidxu	int count;
1721161678Sdavidxu	int pri;
1722161678Sdavidxu
1723161678Sdavidxu	id = td->td_tid;
1724161678Sdavidxu	/*
1725161678Sdavidxu	 * Make sure we own this mtx.
1726161678Sdavidxu	 */
1727163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1728161678Sdavidxu	if (owner == -1)
1729161678Sdavidxu		return (EFAULT);
1730161678Sdavidxu
1731161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1732161678Sdavidxu		return (EPERM);
1733161678Sdavidxu
1734161678Sdavidxu	/* This should be done in userland */
1735161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1736161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1737161678Sdavidxu		if (old == -1)
1738161678Sdavidxu			return (EFAULT);
1739161678Sdavidxu		if (old == owner)
1740161678Sdavidxu			return (0);
1741161855Sdavidxu		owner = old;
1742161678Sdavidxu	}
1743161678Sdavidxu
1744161678Sdavidxu	/* We should only ever be in here for contested locks */
1745161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1746161678Sdavidxu	    &key)) != 0)
1747161678Sdavidxu		return (error);
1748161678Sdavidxu
1749161678Sdavidxu	umtxq_lock(&key);
1750161678Sdavidxu	umtxq_busy(&key);
1751161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1752161678Sdavidxu	if (uq_first != NULL) {
1753161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1754161678Sdavidxu		if (pi->pi_owner != curthread) {
1755161678Sdavidxu			umtxq_unbusy(&key);
1756161678Sdavidxu			umtxq_unlock(&key);
1757161678Sdavidxu			/* userland messed the mutex */
1758161678Sdavidxu			return (EPERM);
1759161678Sdavidxu		}
1760161678Sdavidxu		uq_me = curthread->td_umtxq;
1761161678Sdavidxu		mtx_lock_spin(&sched_lock);
1762161678Sdavidxu		pi->pi_owner = NULL;
1763161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1764161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1765161678Sdavidxu		pri = PRI_MAX;
1766161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1767161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1768161678Sdavidxu			if (uq_first2 != NULL) {
1769161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1770161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1771161678Sdavidxu			}
1772161678Sdavidxu		}
1773161678Sdavidxu		sched_unlend_user_prio(curthread, pri);
1774161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1775161678Sdavidxu	}
1776161678Sdavidxu	umtxq_unlock(&key);
1777161678Sdavidxu
1778161678Sdavidxu	/*
1779161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1780161678Sdavidxu	 * there is zero or one thread only waiting for it.
1781161678Sdavidxu	 * Otherwise, it must be marked as contested.
1782161678Sdavidxu	 */
1783161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1784161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1785161678Sdavidxu
1786161678Sdavidxu	umtxq_lock(&key);
1787161678Sdavidxu	if (uq_first != NULL)
1788161678Sdavidxu		umtxq_signal_thread(uq_first);
1789161678Sdavidxu	umtxq_unbusy(&key);
1790161678Sdavidxu	umtxq_unlock(&key);
1791161678Sdavidxu	umtx_key_release(&key);
1792161678Sdavidxu	if (old == -1)
1793161678Sdavidxu		return (EFAULT);
1794161678Sdavidxu	if (old != owner)
1795161678Sdavidxu		return (EINVAL);
1796161678Sdavidxu	return (0);
1797161678Sdavidxu}
1798161678Sdavidxu
1799161678Sdavidxu/*
1800161678Sdavidxu * Lock a PP mutex.
1801161678Sdavidxu */
1802161678Sdavidxustatic int
1803161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1804161678Sdavidxu	int try)
1805161678Sdavidxu{
1806161678Sdavidxu	struct umtx_q *uq, *uq2;
1807161678Sdavidxu	struct umtx_pi *pi;
1808161678Sdavidxu	uint32_t ceiling;
1809161678Sdavidxu	uint32_t owner, id;
1810161678Sdavidxu	int error, pri, old_inherited_pri, su;
1811161678Sdavidxu
1812161678Sdavidxu	id = td->td_tid;
1813161678Sdavidxu	uq = td->td_umtxq;
1814161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1815161678Sdavidxu	    &uq->uq_key)) != 0)
1816161678Sdavidxu		return (error);
1817164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1818161678Sdavidxu	for (;;) {
1819161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1820161678Sdavidxu		umtxq_lock(&uq->uq_key);
1821161678Sdavidxu		umtxq_busy(&uq->uq_key);
1822161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1823161678Sdavidxu
1824161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1825161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1826161678Sdavidxu			error = EINVAL;
1827161678Sdavidxu			goto out;
1828161678Sdavidxu		}
1829161678Sdavidxu
1830161678Sdavidxu		mtx_lock_spin(&sched_lock);
1831161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1832161678Sdavidxu			mtx_unlock_spin(&sched_lock);
1833161678Sdavidxu			error = EINVAL;
1834161678Sdavidxu			goto out;
1835161678Sdavidxu		}
1836161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1837161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1838161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1839161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1840161678Sdavidxu		}
1841161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1842161678Sdavidxu
1843161678Sdavidxu		owner = casuword32(&m->m_owner,
1844161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1845161678Sdavidxu
1846161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1847161678Sdavidxu			error = 0;
1848161678Sdavidxu			break;
1849161678Sdavidxu		}
1850161678Sdavidxu
1851161678Sdavidxu		/* The address was invalid. */
1852161678Sdavidxu		if (owner == -1) {
1853161678Sdavidxu			error = EFAULT;
1854161678Sdavidxu			break;
1855161678Sdavidxu		}
1856161678Sdavidxu
1857161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1858161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1859161678Sdavidxu			error = EDEADLK;
1860161678Sdavidxu			break;
1861161678Sdavidxu		}
1862161678Sdavidxu
1863161678Sdavidxu		if (try != 0) {
1864161678Sdavidxu			error = EBUSY;
1865161678Sdavidxu			break;
1866161678Sdavidxu		}
1867161678Sdavidxu
1868161678Sdavidxu		/*
1869161678Sdavidxu		 * If we caught a signal, we have retried and now
1870161678Sdavidxu		 * exit immediately.
1871161678Sdavidxu		 */
1872161678Sdavidxu		if (error != 0)
1873161678Sdavidxu			break;
1874161678Sdavidxu
1875161678Sdavidxu		umtxq_lock(&uq->uq_key);
1876161678Sdavidxu		umtxq_insert(uq);
1877161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1878161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
1879161678Sdavidxu		umtxq_remove(uq);
1880161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1881161678Sdavidxu
1882161678Sdavidxu		mtx_lock_spin(&sched_lock);
1883161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1884161678Sdavidxu		pri = PRI_MAX;
1885161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1886161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1887161678Sdavidxu			if (uq2 != NULL) {
1888161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1889161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1890161678Sdavidxu			}
1891161678Sdavidxu		}
1892161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1893161678Sdavidxu			pri = uq->uq_inherited_pri;
1894161678Sdavidxu		sched_unlend_user_prio(td, pri);
1895161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1896161678Sdavidxu	}
1897161678Sdavidxu
1898161678Sdavidxu	if (error != 0) {
1899161678Sdavidxu		mtx_lock_spin(&sched_lock);
1900161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1901161678Sdavidxu		pri = PRI_MAX;
1902161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1903161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1904161678Sdavidxu			if (uq2 != NULL) {
1905161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1906161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1907161678Sdavidxu			}
1908161678Sdavidxu		}
1909161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1910161678Sdavidxu			pri = uq->uq_inherited_pri;
1911161678Sdavidxu		sched_unlend_user_prio(td, pri);
1912161678Sdavidxu		mtx_unlock_spin(&sched_lock);
1913161678Sdavidxu	}
1914161678Sdavidxu
1915161678Sdavidxuout:
1916161678Sdavidxu	umtxq_lock(&uq->uq_key);
1917161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
1918161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1919161678Sdavidxu	umtx_key_release(&uq->uq_key);
1920161678Sdavidxu	return (error);
1921161678Sdavidxu}
1922161678Sdavidxu
1923161678Sdavidxu/*
1924161678Sdavidxu * Unlock a PP mutex.
1925161678Sdavidxu */
1926161678Sdavidxustatic int
1927161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1928161678Sdavidxu{
1929161678Sdavidxu	struct umtx_key key;
1930161678Sdavidxu	struct umtx_q *uq, *uq2;
1931161678Sdavidxu	struct umtx_pi *pi;
1932161678Sdavidxu	uint32_t owner, id;
1933161678Sdavidxu	uint32_t rceiling;
1934161926Sdavidxu	int error, pri, new_inherited_pri, su;
1935161678Sdavidxu
1936161678Sdavidxu	id = td->td_tid;
1937161678Sdavidxu	uq = td->td_umtxq;
1938164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1939161678Sdavidxu
1940161678Sdavidxu	/*
1941161678Sdavidxu	 * Make sure we own this mtx.
1942161678Sdavidxu	 */
1943163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1944161678Sdavidxu	if (owner == -1)
1945161678Sdavidxu		return (EFAULT);
1946161678Sdavidxu
1947161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1948161678Sdavidxu		return (EPERM);
1949161678Sdavidxu
1950161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1951161678Sdavidxu	if (error != 0)
1952161678Sdavidxu		return (error);
1953161678Sdavidxu
1954161678Sdavidxu	if (rceiling == -1)
1955161678Sdavidxu		new_inherited_pri = PRI_MAX;
1956161678Sdavidxu	else {
1957161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
1958161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
1959161678Sdavidxu			return (EINVAL);
1960161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1961161678Sdavidxu	}
1962161678Sdavidxu
1963161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1964161678Sdavidxu	    &key)) != 0)
1965161678Sdavidxu		return (error);
1966161678Sdavidxu	umtxq_lock(&key);
1967161678Sdavidxu	umtxq_busy(&key);
1968161678Sdavidxu	umtxq_unlock(&key);
1969161678Sdavidxu	/*
1970161678Sdavidxu	 * For priority protected mutex, always set unlocked state
1971161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1972161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
1973161678Sdavidxu	 * has to be adjusted for such mutex.
1974161678Sdavidxu	 */
1975163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
1976163449Sdavidxu		UMUTEX_CONTESTED);
1977161678Sdavidxu
1978161678Sdavidxu	umtxq_lock(&key);
1979161678Sdavidxu	if (error == 0)
1980161678Sdavidxu		umtxq_signal(&key, 1);
1981161678Sdavidxu	umtxq_unbusy(&key);
1982161678Sdavidxu	umtxq_unlock(&key);
1983161678Sdavidxu
1984161678Sdavidxu	if (error == -1)
1985161678Sdavidxu		error = EFAULT;
1986161678Sdavidxu	else {
1987161678Sdavidxu		mtx_lock_spin(&sched_lock);
1988161926Sdavidxu		if (su != 0)
1989161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
1990161678Sdavidxu		pri = PRI_MAX;
1991161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1992161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1993161678Sdavidxu			if (uq2 != NULL) {
1994161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1995161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1996161678Sdavidxu			}
1997161678Sdavidxu		}
1998161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1999161678Sdavidxu			pri = uq->uq_inherited_pri;
2000161678Sdavidxu		sched_unlend_user_prio(td, pri);
2001161678Sdavidxu		mtx_unlock_spin(&sched_lock);
2002161678Sdavidxu	}
2003161678Sdavidxu	umtx_key_release(&key);
2004161678Sdavidxu	return (error);
2005161678Sdavidxu}
2006161678Sdavidxu
2007161678Sdavidxustatic int
2008161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2009161678Sdavidxu	uint32_t *old_ceiling)
2010161678Sdavidxu{
2011161678Sdavidxu	struct umtx_q *uq;
2012161678Sdavidxu	uint32_t save_ceiling;
2013161678Sdavidxu	uint32_t owner, id;
2014161678Sdavidxu	uint32_t flags;
2015161678Sdavidxu	int error;
2016161678Sdavidxu
2017161678Sdavidxu	flags = fuword32(&m->m_flags);
2018161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2019161678Sdavidxu		return (EINVAL);
2020161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2021161678Sdavidxu		return (EINVAL);
2022161678Sdavidxu	id = td->td_tid;
2023161678Sdavidxu	uq = td->td_umtxq;
2024161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2025161678Sdavidxu	   &uq->uq_key)) != 0)
2026161678Sdavidxu		return (error);
2027161678Sdavidxu	for (;;) {
2028161678Sdavidxu		umtxq_lock(&uq->uq_key);
2029161678Sdavidxu		umtxq_busy(&uq->uq_key);
2030161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2031161678Sdavidxu
2032161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2033161678Sdavidxu
2034161678Sdavidxu		owner = casuword32(&m->m_owner,
2035161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2036161678Sdavidxu
2037161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2038161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2039163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2040163449Sdavidxu				UMUTEX_CONTESTED);
2041161678Sdavidxu			error = 0;
2042161678Sdavidxu			break;
2043161678Sdavidxu		}
2044161678Sdavidxu
2045161678Sdavidxu		/* The address was invalid. */
2046161678Sdavidxu		if (owner == -1) {
2047161678Sdavidxu			error = EFAULT;
2048161678Sdavidxu			break;
2049161678Sdavidxu		}
2050161678Sdavidxu
2051161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2052161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2053161678Sdavidxu			error = 0;
2054161678Sdavidxu			break;
2055161678Sdavidxu		}
2056161678Sdavidxu
2057161678Sdavidxu		/*
2058161678Sdavidxu		 * If we caught a signal, we have retried and now
2059161678Sdavidxu		 * exit immediately.
2060161678Sdavidxu		 */
2061161678Sdavidxu		if (error != 0)
2062161678Sdavidxu			break;
2063161678Sdavidxu
2064161678Sdavidxu		/*
2065161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2066161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2067161678Sdavidxu		 * unlocking the umtx.
2068161678Sdavidxu		 */
2069161678Sdavidxu		umtxq_lock(&uq->uq_key);
2070161678Sdavidxu		umtxq_insert(uq);
2071161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2072161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2073161678Sdavidxu		umtxq_remove(uq);
2074161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2075161678Sdavidxu	}
2076161678Sdavidxu	umtxq_lock(&uq->uq_key);
2077161678Sdavidxu	if (error == 0)
2078161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2079161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2080161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2081161678Sdavidxu	umtx_key_release(&uq->uq_key);
2082161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2083161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2084161678Sdavidxu	return (error);
2085161678Sdavidxu}
2086161678Sdavidxu
2087162030Sdavidxustatic int
2088162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2089162030Sdavidxu	int try)
2090162030Sdavidxu{
2091162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2092162030Sdavidxu	case 0:
2093162030Sdavidxu		return (_do_lock_normal(td, m, flags, timo, try));
2094162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2095162030Sdavidxu		return (_do_lock_pi(td, m, flags, timo, try));
2096162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2097162030Sdavidxu		return (_do_lock_pp(td, m, flags, timo, try));
2098162030Sdavidxu	}
2099162030Sdavidxu	return (EINVAL);
2100162030Sdavidxu}
2101162030Sdavidxu
2102161678Sdavidxu/*
2103161678Sdavidxu * Lock a userland POSIX mutex.
2104161678Sdavidxu */
2105161678Sdavidxustatic int
2106162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2107162030Sdavidxu	struct timespec *timeout, int try)
2108161678Sdavidxu{
2109162030Sdavidxu	struct timespec ts, ts2, ts3;
2110162030Sdavidxu	struct timeval tv;
2111161678Sdavidxu	uint32_t flags;
2112162030Sdavidxu	int error;
2113161678Sdavidxu
2114161678Sdavidxu	flags = fuword32(&m->m_flags);
2115161678Sdavidxu	if (flags == -1)
2116161678Sdavidxu		return (EFAULT);
2117161678Sdavidxu
2118162030Sdavidxu	if (timeout == NULL) {
2119162030Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, try);
2120162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2121162030Sdavidxu		if (error == EINTR)
2122162030Sdavidxu			error = ERESTART;
2123162030Sdavidxu	} else {
2124162030Sdavidxu		getnanouptime(&ts);
2125162030Sdavidxu		timespecadd(&ts, timeout);
2126162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2127162030Sdavidxu		for (;;) {
2128162030Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2129162030Sdavidxu			if (error != ETIMEDOUT)
2130162030Sdavidxu				break;
2131162030Sdavidxu			getnanouptime(&ts2);
2132162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2133162030Sdavidxu				error = ETIMEDOUT;
2134162030Sdavidxu				break;
2135162030Sdavidxu			}
2136162030Sdavidxu			ts3 = ts;
2137162030Sdavidxu			timespecsub(&ts3, &ts2);
2138162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2139162030Sdavidxu		}
2140162030Sdavidxu		/* Timed-locking is not restarted. */
2141162030Sdavidxu		if (error == ERESTART)
2142162030Sdavidxu			error = EINTR;
2143161742Sdavidxu	}
2144162030Sdavidxu	return (error);
2145161678Sdavidxu}
2146161678Sdavidxu
2147161678Sdavidxu/*
2148161678Sdavidxu * Unlock a userland POSIX mutex.
2149161678Sdavidxu */
2150161678Sdavidxustatic int
2151161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2152161678Sdavidxu{
2153161678Sdavidxu	uint32_t flags;
2154161678Sdavidxu
2155161678Sdavidxu	flags = fuword32(&m->m_flags);
2156161678Sdavidxu	if (flags == -1)
2157161678Sdavidxu		return (EFAULT);
2158161678Sdavidxu
2159161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2160161855Sdavidxu	case 0:
2161161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2162161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2163161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2164161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2165161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2166161855Sdavidxu	}
2167161678Sdavidxu
2168161855Sdavidxu	return (EINVAL);
2169161678Sdavidxu}
2170161678Sdavidxu
2171164839Sdavidxustatic int
2172164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2173164876Sdavidxu	struct timespec *timeout, u_long wflags)
2174164839Sdavidxu{
2175164839Sdavidxu	struct umtx_q *uq;
2176164839Sdavidxu	struct timeval tv;
2177164839Sdavidxu	struct timespec cts, ets, tts;
2178164839Sdavidxu	uint32_t flags;
2179164839Sdavidxu	int error;
2180164839Sdavidxu
2181164839Sdavidxu	uq = td->td_umtxq;
2182164839Sdavidxu	flags = fuword32(&cv->c_flags);
2183164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2184164839Sdavidxu	if (error != 0)
2185164839Sdavidxu		return (error);
2186164839Sdavidxu	umtxq_lock(&uq->uq_key);
2187164839Sdavidxu	umtxq_busy(&uq->uq_key);
2188164839Sdavidxu	umtxq_insert(uq);
2189164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2190164839Sdavidxu
2191164839Sdavidxu	/*
2192164839Sdavidxu	 * The magic thing is we should set c_has_waiters to 1 before
2193164839Sdavidxu	 * releasing user mutex.
2194164839Sdavidxu	 */
2195164839Sdavidxu	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2196164839Sdavidxu
2197164839Sdavidxu	umtxq_lock(&uq->uq_key);
2198164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2199164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2200164839Sdavidxu
2201164839Sdavidxu	error = do_unlock_umutex(td, m);
2202164839Sdavidxu
2203164839Sdavidxu	umtxq_lock(&uq->uq_key);
2204164839Sdavidxu	if (error == 0) {
2205164876Sdavidxu		if ((wflags & UMTX_CHECK_UNPARKING) &&
2206164876Sdavidxu		    (td->td_pflags & TDP_WAKEUP)) {
2207164876Sdavidxu			td->td_pflags &= ~TDP_WAKEUP;
2208164876Sdavidxu			error = EINTR;
2209164876Sdavidxu		} else if (timeout == NULL) {
2210164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2211164839Sdavidxu		} else {
2212164839Sdavidxu			getnanouptime(&ets);
2213164839Sdavidxu			timespecadd(&ets, timeout);
2214164839Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2215164839Sdavidxu			for (;;) {
2216164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2217164839Sdavidxu				if (error != ETIMEDOUT)
2218164839Sdavidxu					break;
2219164839Sdavidxu				getnanouptime(&cts);
2220164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2221164839Sdavidxu					error = ETIMEDOUT;
2222164839Sdavidxu					break;
2223164839Sdavidxu				}
2224164839Sdavidxu				tts = ets;
2225164839Sdavidxu				timespecsub(&tts, &cts);
2226164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2227164839Sdavidxu			}
2228164839Sdavidxu		}
2229164839Sdavidxu	}
2230164839Sdavidxu
2231164839Sdavidxu	if (error != 0) {
2232164839Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2233164839Sdavidxu			/*
2234164839Sdavidxu			 * If we concurrently got do_cv_signal()d
2235164839Sdavidxu			 * and we got an error or UNIX signals or a timeout,
2236164839Sdavidxu			 * then, perform another umtxq_signal to avoid
2237164839Sdavidxu			 * consuming the wakeup. This may cause supurious
2238164839Sdavidxu			 * wakeup for another thread which was just queued,
2239164839Sdavidxu			 * but SUSV3 explicitly allows supurious wakeup to
2240164839Sdavidxu			 * occur, and indeed a kernel based implementation
2241164839Sdavidxu			 * can not avoid it.
2242164839Sdavidxu			 */
2243164876Sdavidxu			if (!umtxq_signal(&uq->uq_key, 1))
2244164876Sdavidxu				error = 0;
2245164839Sdavidxu		}
2246164839Sdavidxu		if (error == ERESTART)
2247164839Sdavidxu			error = EINTR;
2248164839Sdavidxu	}
2249164839Sdavidxu	umtxq_remove(uq);
2250164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2251164839Sdavidxu	umtx_key_release(&uq->uq_key);
2252164839Sdavidxu	return (error);
2253164839Sdavidxu}
2254164839Sdavidxu
2255164839Sdavidxu/*
2256164839Sdavidxu * Signal a userland condition variable.
2257164839Sdavidxu */
2258164839Sdavidxustatic int
2259164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2260164839Sdavidxu{
2261164839Sdavidxu	struct umtx_key key;
2262164839Sdavidxu	int error, cnt, nwake;
2263164839Sdavidxu	uint32_t flags;
2264164839Sdavidxu
2265164839Sdavidxu	flags = fuword32(&cv->c_flags);
2266164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2267164839Sdavidxu		return (error);
2268164839Sdavidxu	umtxq_lock(&key);
2269164839Sdavidxu	umtxq_busy(&key);
2270164839Sdavidxu	cnt = umtxq_count(&key);
2271164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2272164839Sdavidxu	if (cnt <= nwake) {
2273164839Sdavidxu		umtxq_unlock(&key);
2274164839Sdavidxu		error = suword32(
2275164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2276164839Sdavidxu		umtxq_lock(&key);
2277164839Sdavidxu	}
2278164839Sdavidxu	umtxq_unbusy(&key);
2279164839Sdavidxu	umtxq_unlock(&key);
2280164839Sdavidxu	umtx_key_release(&key);
2281164839Sdavidxu	return (error);
2282164839Sdavidxu}
2283164839Sdavidxu
2284164839Sdavidxustatic int
2285164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2286164839Sdavidxu{
2287164839Sdavidxu	struct umtx_key key;
2288164839Sdavidxu	int error;
2289164839Sdavidxu	uint32_t flags;
2290164839Sdavidxu
2291164839Sdavidxu	flags = fuword32(&cv->c_flags);
2292164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2293164839Sdavidxu		return (error);
2294164839Sdavidxu
2295164839Sdavidxu	umtxq_lock(&key);
2296164839Sdavidxu	umtxq_busy(&key);
2297164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2298164839Sdavidxu	umtxq_unlock(&key);
2299164839Sdavidxu
2300164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2301164839Sdavidxu
2302164839Sdavidxu	umtxq_lock(&key);
2303164839Sdavidxu	umtxq_unbusy(&key);
2304164839Sdavidxu	umtxq_unlock(&key);
2305164839Sdavidxu
2306164839Sdavidxu	umtx_key_release(&key);
2307164839Sdavidxu	return (error);
2308164839Sdavidxu}
2309164839Sdavidxu
2310139013Sdavidxuint
2311139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2312139013Sdavidxu    /* struct umtx *umtx */
2313139013Sdavidxu{
2314162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2315139013Sdavidxu}
2316139013Sdavidxu
2317139013Sdavidxuint
2318139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2319139013Sdavidxu    /* struct umtx *umtx */
2320139013Sdavidxu{
2321162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2322139013Sdavidxu}
2323139013Sdavidxu
2324162536Sdavidxustatic int
2325162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2326139013Sdavidxu{
2327162536Sdavidxu	struct timespec *ts, timeout;
2328139013Sdavidxu	int error;
2329139013Sdavidxu
2330162536Sdavidxu	/* Allow a null timespec (wait forever). */
2331162536Sdavidxu	if (uap->uaddr2 == NULL)
2332162536Sdavidxu		ts = NULL;
2333162536Sdavidxu	else {
2334162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2335162536Sdavidxu		if (error != 0)
2336162536Sdavidxu			return (error);
2337162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2338162536Sdavidxu		    timeout.tv_nsec < 0) {
2339162536Sdavidxu			return (EINVAL);
2340161678Sdavidxu		}
2341162536Sdavidxu		ts = &timeout;
2342162536Sdavidxu	}
2343162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2344162536Sdavidxu}
2345162536Sdavidxu
2346162536Sdavidxustatic int
2347162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2348162536Sdavidxu{
2349162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2350162536Sdavidxu}
2351162536Sdavidxu
2352162536Sdavidxustatic int
2353162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2354162536Sdavidxu{
2355162536Sdavidxu	struct timespec *ts, timeout;
2356162536Sdavidxu	int error;
2357162536Sdavidxu
2358162536Sdavidxu	if (uap->uaddr2 == NULL)
2359162536Sdavidxu		ts = NULL;
2360162536Sdavidxu	else {
2361162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2362162536Sdavidxu		if (error != 0)
2363162536Sdavidxu			return (error);
2364162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2365162536Sdavidxu		    timeout.tv_nsec < 0)
2366162536Sdavidxu			return (EINVAL);
2367162536Sdavidxu		ts = &timeout;
2368162536Sdavidxu	}
2369162536Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0);
2370162536Sdavidxu}
2371162536Sdavidxu
2372162536Sdavidxustatic int
2373162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2374162536Sdavidxu{
2375162536Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val));
2376162536Sdavidxu}
2377162536Sdavidxu
2378162536Sdavidxustatic int
2379162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2380162536Sdavidxu{
2381162536Sdavidxu	struct timespec *ts, timeout;
2382162536Sdavidxu	int error;
2383162536Sdavidxu
2384162536Sdavidxu	/* Allow a null timespec (wait forever). */
2385162536Sdavidxu	if (uap->uaddr2 == NULL)
2386162536Sdavidxu		ts = NULL;
2387162536Sdavidxu	else {
2388162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2389162536Sdavidxu		    sizeof(timeout));
2390162536Sdavidxu		if (error != 0)
2391162536Sdavidxu			return (error);
2392162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2393162536Sdavidxu		    timeout.tv_nsec < 0) {
2394162536Sdavidxu			return (EINVAL);
2395139013Sdavidxu		}
2396162536Sdavidxu		ts = &timeout;
2397139013Sdavidxu	}
2398162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
2399162536Sdavidxu}
2400162536Sdavidxu
2401162536Sdavidxustatic int
2402162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2403162536Sdavidxu{
2404162536Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, 1);
2405162536Sdavidxu}
2406162536Sdavidxu
2407162536Sdavidxustatic int
2408162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2409162536Sdavidxu{
2410162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
2411162536Sdavidxu}
2412162536Sdavidxu
2413162536Sdavidxustatic int
2414162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2415162536Sdavidxu{
2416162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2417162536Sdavidxu}
2418162536Sdavidxu
2419164839Sdavidxustatic int
2420164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2421164839Sdavidxu{
2422164839Sdavidxu	struct timespec *ts, timeout;
2423164839Sdavidxu	int error;
2424164839Sdavidxu
2425164839Sdavidxu	/* Allow a null timespec (wait forever). */
2426164839Sdavidxu	if (uap->uaddr2 == NULL)
2427164839Sdavidxu		ts = NULL;
2428164839Sdavidxu	else {
2429164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2430164839Sdavidxu		    sizeof(timeout));
2431164839Sdavidxu		if (error != 0)
2432164839Sdavidxu			return (error);
2433164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2434164839Sdavidxu		    timeout.tv_nsec < 0) {
2435164839Sdavidxu			return (EINVAL);
2436164839Sdavidxu		}
2437164839Sdavidxu		ts = &timeout;
2438164839Sdavidxu	}
2439164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2440164839Sdavidxu}
2441164839Sdavidxu
2442164839Sdavidxustatic int
2443164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2444164839Sdavidxu{
2445164839Sdavidxu	return do_cv_signal(td, uap->obj);
2446164839Sdavidxu}
2447164839Sdavidxu
2448164839Sdavidxustatic int
2449164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2450164839Sdavidxu{
2451164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
2452164839Sdavidxu}
2453164839Sdavidxu
2454162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2455162536Sdavidxu
2456162536Sdavidxustatic _umtx_op_func op_table[] = {
2457162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2458162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2459162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
2460162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
2461162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2462162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2463162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2464164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2465164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
2466164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2467164839Sdavidxu	__umtx_op_cv_broadcast		/* UMTX_OP_CV_BROADCAST */
2468162536Sdavidxu};
2469162536Sdavidxu
2470162536Sdavidxuint
2471162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
2472162536Sdavidxu{
2473163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
2474162536Sdavidxu		return (*op_table[uap->op])(td, uap);
2475162536Sdavidxu	return (EINVAL);
2476162536Sdavidxu}
2477162536Sdavidxu
2478162536Sdavidxu#ifdef COMPAT_IA32
2479162536Sdavidxu
2480163046Sdavidxuint
2481163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2482163046Sdavidxu    /* struct umtx *umtx */
2483163046Sdavidxu{
2484163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2485163046Sdavidxu}
2486163046Sdavidxu
2487163046Sdavidxuint
2488163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2489163046Sdavidxu    /* struct umtx *umtx */
2490163046Sdavidxu{
2491163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2492163046Sdavidxu}
2493163046Sdavidxu
2494162536Sdavidxustruct timespec32 {
2495162536Sdavidxu	u_int32_t tv_sec;
2496162536Sdavidxu	u_int32_t tv_nsec;
2497162536Sdavidxu};
2498162536Sdavidxu
2499162536Sdavidxustatic inline int
2500162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
2501162536Sdavidxu{
2502162536Sdavidxu	struct timespec32 ts32;
2503162536Sdavidxu	int error;
2504162536Sdavidxu
2505162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
2506162536Sdavidxu	if (error == 0) {
2507162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
2508162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
2509162536Sdavidxu	}
2510140421Sdavidxu	return (error);
2511139013Sdavidxu}
2512161678Sdavidxu
2513162536Sdavidxustatic int
2514162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2515162536Sdavidxu{
2516162536Sdavidxu	struct timespec *ts, timeout;
2517162536Sdavidxu	int error;
2518162536Sdavidxu
2519162536Sdavidxu	/* Allow a null timespec (wait forever). */
2520162536Sdavidxu	if (uap->uaddr2 == NULL)
2521162536Sdavidxu		ts = NULL;
2522162536Sdavidxu	else {
2523162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2524162536Sdavidxu		if (error != 0)
2525162536Sdavidxu			return (error);
2526162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2527162536Sdavidxu		    timeout.tv_nsec < 0) {
2528162536Sdavidxu			return (EINVAL);
2529162536Sdavidxu		}
2530162536Sdavidxu		ts = &timeout;
2531162536Sdavidxu	}
2532162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2533162536Sdavidxu}
2534162536Sdavidxu
2535162536Sdavidxustatic int
2536162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2537162536Sdavidxu{
2538162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2539162536Sdavidxu}
2540162536Sdavidxu
2541162536Sdavidxustatic int
2542162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2543162536Sdavidxu{
2544162536Sdavidxu	struct timespec *ts, timeout;
2545162536Sdavidxu	int error;
2546162536Sdavidxu
2547162536Sdavidxu	if (uap->uaddr2 == NULL)
2548162536Sdavidxu		ts = NULL;
2549162536Sdavidxu	else {
2550162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2551162536Sdavidxu		if (error != 0)
2552162536Sdavidxu			return (error);
2553162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2554162536Sdavidxu		    timeout.tv_nsec < 0)
2555162536Sdavidxu			return (EINVAL);
2556162536Sdavidxu		ts = &timeout;
2557162536Sdavidxu	}
2558162536Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1);
2559162536Sdavidxu}
2560162536Sdavidxu
2561162536Sdavidxustatic int
2562162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2563162536Sdavidxu{
2564162536Sdavidxu	struct timespec *ts, timeout;
2565162536Sdavidxu	int error;
2566162536Sdavidxu
2567162536Sdavidxu	/* Allow a null timespec (wait forever). */
2568162536Sdavidxu	if (uap->uaddr2 == NULL)
2569162536Sdavidxu		ts = NULL;
2570162536Sdavidxu	else {
2571162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2572162536Sdavidxu		if (error != 0)
2573162536Sdavidxu			return (error);
2574162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2575162536Sdavidxu		    timeout.tv_nsec < 0)
2576162536Sdavidxu			return (EINVAL);
2577162536Sdavidxu		ts = &timeout;
2578162536Sdavidxu	}
2579162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
2580162536Sdavidxu}
2581162536Sdavidxu
2582164839Sdavidxustatic int
2583164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2584164839Sdavidxu{
2585164839Sdavidxu	struct timespec *ts, timeout;
2586164839Sdavidxu	int error;
2587164839Sdavidxu
2588164839Sdavidxu	/* Allow a null timespec (wait forever). */
2589164839Sdavidxu	if (uap->uaddr2 == NULL)
2590164839Sdavidxu		ts = NULL;
2591164839Sdavidxu	else {
2592164839Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2593164839Sdavidxu		if (error != 0)
2594164839Sdavidxu			return (error);
2595164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2596164839Sdavidxu		    timeout.tv_nsec < 0)
2597164839Sdavidxu			return (EINVAL);
2598164839Sdavidxu		ts = &timeout;
2599164839Sdavidxu	}
2600164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2601164839Sdavidxu}
2602164839Sdavidxu
2603162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
2604162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
2605162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
2606162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
2607162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
2608162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
2609162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
2610162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
2611164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2612164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
2613164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2614164839Sdavidxu	__umtx_op_cv_broadcast		/* UMTX_OP_CV_BROADCAST */
2615162536Sdavidxu};
2616162536Sdavidxu
2617162536Sdavidxuint
2618162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2619162536Sdavidxu{
2620163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
2621162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
2622162536Sdavidxu			(struct _umtx_op_args *)uap);
2623162536Sdavidxu	return (EINVAL);
2624162536Sdavidxu}
2625162536Sdavidxu#endif
2626162536Sdavidxu
2627161678Sdavidxuvoid
2628161678Sdavidxuumtx_thread_init(struct thread *td)
2629161678Sdavidxu{
2630161678Sdavidxu	td->td_umtxq = umtxq_alloc();
2631161678Sdavidxu	td->td_umtxq->uq_thread = td;
2632161678Sdavidxu}
2633161678Sdavidxu
2634161678Sdavidxuvoid
2635161678Sdavidxuumtx_thread_fini(struct thread *td)
2636161678Sdavidxu{
2637161678Sdavidxu	umtxq_free(td->td_umtxq);
2638161678Sdavidxu}
2639161678Sdavidxu
2640161678Sdavidxu/*
2641161678Sdavidxu * It will be called when new thread is created, e.g fork().
2642161678Sdavidxu */
2643161678Sdavidxuvoid
2644161678Sdavidxuumtx_thread_alloc(struct thread *td)
2645161678Sdavidxu{
2646161678Sdavidxu	struct umtx_q *uq;
2647161678Sdavidxu
2648161678Sdavidxu	uq = td->td_umtxq;
2649161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
2650161678Sdavidxu
2651161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2652161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2653161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2654161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2655161678Sdavidxu}
2656161678Sdavidxu
2657161678Sdavidxu/*
2658161678Sdavidxu * exec() hook.
2659161678Sdavidxu */
2660161678Sdavidxustatic void
2661161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
2662161678Sdavidxu	struct image_params *imgp __unused)
2663161678Sdavidxu{
2664161678Sdavidxu	umtx_thread_cleanup(curthread);
2665161678Sdavidxu}
2666161678Sdavidxu
2667161678Sdavidxu/*
2668161678Sdavidxu * thread_exit() hook.
2669161678Sdavidxu */
2670161678Sdavidxuvoid
2671161678Sdavidxuumtx_thread_exit(struct thread *td)
2672161678Sdavidxu{
2673161678Sdavidxu	umtx_thread_cleanup(td);
2674161678Sdavidxu}
2675161678Sdavidxu
2676161678Sdavidxu/*
2677161678Sdavidxu * clean up umtx data.
2678161678Sdavidxu */
2679161678Sdavidxustatic void
2680161678Sdavidxuumtx_thread_cleanup(struct thread *td)
2681161678Sdavidxu{
2682161678Sdavidxu	struct umtx_q *uq;
2683161678Sdavidxu	struct umtx_pi *pi;
2684161678Sdavidxu
2685161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
2686161678Sdavidxu		return;
2687161678Sdavidxu
2688161678Sdavidxu	mtx_lock_spin(&sched_lock);
2689161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
2690161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2691161678Sdavidxu		pi->pi_owner = NULL;
2692161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2693161678Sdavidxu	}
2694161678Sdavidxu	td->td_flags &= ~TDF_UBORROWING;
2695161678Sdavidxu	mtx_unlock_spin(&sched_lock);
2696161678Sdavidxu}
2697