kern_umtx.c revision 213642
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 213642 2010-10-09 02:50:23Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46139013Sdavidxu#include <sys/eventhandler.h>
47112904Sjeff#include <sys/umtx.h>
48112904Sjeff
49139013Sdavidxu#include <vm/vm.h>
50139013Sdavidxu#include <vm/vm_param.h>
51139013Sdavidxu#include <vm/pmap.h>
52139013Sdavidxu#include <vm/vm_map.h>
53139013Sdavidxu#include <vm/vm_object.h>
54139013Sdavidxu
55165369Sdavidxu#include <machine/cpu.h>
56165369Sdavidxu
57205014Snwhitehorn#ifdef COMPAT_FREEBSD32
58162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
59162536Sdavidxu#endif
60162536Sdavidxu
61201887Sdavidxuenum {
62201887Sdavidxu	TYPE_SIMPLE_WAIT,
63201887Sdavidxu	TYPE_CV,
64201887Sdavidxu	TYPE_SEM,
65201887Sdavidxu	TYPE_SIMPLE_LOCK,
66201887Sdavidxu	TYPE_NORMAL_UMUTEX,
67201887Sdavidxu	TYPE_PI_UMUTEX,
68201887Sdavidxu	TYPE_PP_UMUTEX,
69201887Sdavidxu	TYPE_RWLOCK
70201887Sdavidxu};
71139013Sdavidxu
72179970Sdavidxu#define _UMUTEX_TRY		1
73179970Sdavidxu#define _UMUTEX_WAIT		2
74179970Sdavidxu
75161678Sdavidxu/* Key to represent a unique userland synchronous object */
76139013Sdavidxustruct umtx_key {
77161678Sdavidxu	int	hash;
78139013Sdavidxu	int	type;
79161678Sdavidxu	int	shared;
80139013Sdavidxu	union {
81139013Sdavidxu		struct {
82139013Sdavidxu			vm_object_t	object;
83161678Sdavidxu			uintptr_t	offset;
84139013Sdavidxu		} shared;
85139013Sdavidxu		struct {
86161678Sdavidxu			struct vmspace	*vs;
87161678Sdavidxu			uintptr_t	addr;
88139013Sdavidxu		} private;
89139013Sdavidxu		struct {
90161678Sdavidxu			void		*a;
91161678Sdavidxu			uintptr_t	b;
92139013Sdavidxu		} both;
93139013Sdavidxu	} info;
94139013Sdavidxu};
95139013Sdavidxu
96161678Sdavidxu/* Priority inheritance mutex info. */
97161678Sdavidxustruct umtx_pi {
98161678Sdavidxu	/* Owner thread */
99161678Sdavidxu	struct thread		*pi_owner;
100161678Sdavidxu
101161678Sdavidxu	/* Reference count */
102161678Sdavidxu	int			pi_refcount;
103161678Sdavidxu
104161678Sdavidxu 	/* List entry to link umtx holding by thread */
105161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
106161678Sdavidxu
107161678Sdavidxu	/* List entry in hash */
108161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
109161678Sdavidxu
110161678Sdavidxu	/* List for waiters */
111161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
112161678Sdavidxu
113161678Sdavidxu	/* Identify a userland lock object */
114161678Sdavidxu	struct umtx_key		pi_key;
115161678Sdavidxu};
116161678Sdavidxu
117161678Sdavidxu/* A userland synchronous object user. */
118115765Sjeffstruct umtx_q {
119161678Sdavidxu	/* Linked list for the hash. */
120161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
121161678Sdavidxu
122161678Sdavidxu	/* Umtx key. */
123161678Sdavidxu	struct umtx_key		uq_key;
124161678Sdavidxu
125161678Sdavidxu	/* Umtx flags. */
126161678Sdavidxu	int			uq_flags;
127161678Sdavidxu#define UQF_UMTXQ	0x0001
128161678Sdavidxu
129161678Sdavidxu	/* The thread waits on. */
130161678Sdavidxu	struct thread		*uq_thread;
131161678Sdavidxu
132161678Sdavidxu	/*
133161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
134170300Sjeff	 * or umtx_lock, write must have both chain lock and
135170300Sjeff	 * umtx_lock being hold.
136161678Sdavidxu	 */
137161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
138161678Sdavidxu
139161678Sdavidxu	/* On blocked list */
140161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
141161678Sdavidxu
142161678Sdavidxu	/* Thread contending with us */
143161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
144161678Sdavidxu
145161742Sdavidxu	/* Inherited priority from PP mutex */
146161678Sdavidxu	u_char			uq_inherited_pri;
147201991Sdavidxu
148201991Sdavidxu	/* Spare queue ready to be reused */
149201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
150201991Sdavidxu
151201991Sdavidxu	/* The queue we on */
152201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
153115765Sjeff};
154115765Sjeff
155161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
156161678Sdavidxu
157201991Sdavidxu/* Per-key wait-queue */
158201991Sdavidxustruct umtxq_queue {
159201991Sdavidxu	struct umtxq_head	head;
160201991Sdavidxu	struct umtx_key		key;
161201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
162201991Sdavidxu	int			length;
163201991Sdavidxu};
164201991Sdavidxu
165201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
166201991Sdavidxu
167161678Sdavidxu/* Userland lock object's wait-queue chain */
168138224Sdavidxustruct umtxq_chain {
169161678Sdavidxu	/* Lock for this chain. */
170161678Sdavidxu	struct mtx		uc_lock;
171161678Sdavidxu
172161678Sdavidxu	/* List of sleep queues. */
173201991Sdavidxu	struct umtxq_list	uc_queue[2];
174177848Sdavidxu#define UMTX_SHARED_QUEUE	0
175177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
176161678Sdavidxu
177201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
178201991Sdavidxu
179161678Sdavidxu	/* Busy flag */
180161678Sdavidxu	char			uc_busy;
181161678Sdavidxu
182161678Sdavidxu	/* Chain lock waiters */
183158377Sdavidxu	int			uc_waiters;
184161678Sdavidxu
185161678Sdavidxu	/* All PI in the list */
186161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
187201991Sdavidxu
188138224Sdavidxu};
189115765Sjeff
190161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
191189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
192161678Sdavidxu
193161678Sdavidxu/*
194161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
195161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
196161678Sdavidxu * and let another thread B block on the mutex, because B is
197161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
198161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
199161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
200161678Sdavidxu */
201161678Sdavidxu
202163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
203163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
204163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
205161678Sdavidxu
206138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
207138224Sdavidxu#define	UMTX_CHAINS		128
208138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
209115765Sjeff
210161678Sdavidxu#define THREAD_SHARE		0
211161678Sdavidxu#define PROCESS_SHARE		1
212161678Sdavidxu#define AUTO_SHARE		2
213161678Sdavidxu
214161678Sdavidxu#define	GET_SHARE(flags)	\
215161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
216161678Sdavidxu
217177848Sdavidxu#define BUSY_SPINS		200
218177848Sdavidxu
219161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
220179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
221138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
222161678Sdavidxustatic int			umtx_pi_allocated;
223115310Sjeff
224161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
225161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
226161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
227161678Sdavidxu
228161678Sdavidxustatic void umtxq_sysinit(void *);
229161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
230161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
231139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
232139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
233139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
234139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
235177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
236177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
237161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
238139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
239139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
240161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
241139013Sdavidxu	struct umtx_key *key);
242139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
243163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
244161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
245174701Sdavidxustatic void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
246161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
247161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
248161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
249161678Sdavidxu	struct image_params *imgp __unused);
250161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
251115310Sjeff
252177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
253177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
254177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
255177848Sdavidxu
256170300Sjeffstatic struct mtx umtx_lock;
257170300Sjeff
258161678Sdavidxustatic void
259161678Sdavidxuumtxq_sysinit(void *arg __unused)
260161678Sdavidxu{
261179421Sdavidxu	int i, j;
262138224Sdavidxu
263161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
264161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
265179421Sdavidxu	for (i = 0; i < 2; ++i) {
266179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
267179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
268179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
269201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
270201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
271201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
272179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
273179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
274179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
275179421Sdavidxu		}
276161678Sdavidxu	}
277170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
280161678Sdavidxu}
281161678Sdavidxu
282143149Sdavidxustruct umtx_q *
283143149Sdavidxuumtxq_alloc(void)
284143149Sdavidxu{
285161678Sdavidxu	struct umtx_q *uq;
286161678Sdavidxu
287161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
290161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
291161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
292161678Sdavidxu	return (uq);
293143149Sdavidxu}
294143149Sdavidxu
295143149Sdavidxuvoid
296143149Sdavidxuumtxq_free(struct umtx_q *uq)
297143149Sdavidxu{
298201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
299201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
300143149Sdavidxu	free(uq, M_UMTX);
301143149Sdavidxu}
302143149Sdavidxu
303161678Sdavidxustatic inline void
304139013Sdavidxuumtxq_hash(struct umtx_key *key)
305138224Sdavidxu{
306161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308138224Sdavidxu}
309138224Sdavidxu
310139013Sdavidxustatic inline int
311139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
312139013Sdavidxu{
313139013Sdavidxu	return (k1->type == k2->type &&
314161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
315161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
316139013Sdavidxu}
317139013Sdavidxu
318161678Sdavidxustatic inline struct umtxq_chain *
319161678Sdavidxuumtxq_getchain(struct umtx_key *key)
320139013Sdavidxu{
321201886Sdavidxu	if (key->type <= TYPE_SEM)
322179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
323179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
324139013Sdavidxu}
325139013Sdavidxu
326161678Sdavidxu/*
327177848Sdavidxu * Lock a chain.
328161678Sdavidxu */
329138224Sdavidxustatic inline void
330177848Sdavidxuumtxq_lock(struct umtx_key *key)
331139257Sdavidxu{
332161678Sdavidxu	struct umtxq_chain *uc;
333139257Sdavidxu
334161678Sdavidxu	uc = umtxq_getchain(key);
335177848Sdavidxu	mtx_lock(&uc->uc_lock);
336139257Sdavidxu}
337139257Sdavidxu
338161678Sdavidxu/*
339177848Sdavidxu * Unlock a chain.
340161678Sdavidxu */
341139257Sdavidxustatic inline void
342177848Sdavidxuumtxq_unlock(struct umtx_key *key)
343139257Sdavidxu{
344161678Sdavidxu	struct umtxq_chain *uc;
345139257Sdavidxu
346161678Sdavidxu	uc = umtxq_getchain(key);
347177848Sdavidxu	mtx_unlock(&uc->uc_lock);
348139257Sdavidxu}
349139257Sdavidxu
350161678Sdavidxu/*
351177848Sdavidxu * Set chain to busy state when following operation
352177848Sdavidxu * may be blocked (kernel mutex can not be used).
353161678Sdavidxu */
354139257Sdavidxustatic inline void
355177848Sdavidxuumtxq_busy(struct umtx_key *key)
356138224Sdavidxu{
357161678Sdavidxu	struct umtxq_chain *uc;
358161678Sdavidxu
359161678Sdavidxu	uc = umtxq_getchain(key);
360177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
361177848Sdavidxu	if (uc->uc_busy) {
362177880Sdavidxu#ifdef SMP
363177880Sdavidxu		if (smp_cpus > 1) {
364177880Sdavidxu			int count = BUSY_SPINS;
365177880Sdavidxu			if (count > 0) {
366177880Sdavidxu				umtxq_unlock(key);
367177880Sdavidxu				while (uc->uc_busy && --count > 0)
368177880Sdavidxu					cpu_spinwait();
369177880Sdavidxu				umtxq_lock(key);
370177880Sdavidxu			}
371177848Sdavidxu		}
372177880Sdavidxu#endif
373177880Sdavidxu		while (uc->uc_busy) {
374177848Sdavidxu			uc->uc_waiters++;
375177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
376177848Sdavidxu			uc->uc_waiters--;
377177848Sdavidxu		}
378177848Sdavidxu	}
379177848Sdavidxu	uc->uc_busy = 1;
380138224Sdavidxu}
381138224Sdavidxu
382161678Sdavidxu/*
383177848Sdavidxu * Unbusy a chain.
384161678Sdavidxu */
385138225Sdavidxustatic inline void
386177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
387138224Sdavidxu{
388161678Sdavidxu	struct umtxq_chain *uc;
389161678Sdavidxu
390161678Sdavidxu	uc = umtxq_getchain(key);
391177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
392177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
393177848Sdavidxu	uc->uc_busy = 0;
394177848Sdavidxu	if (uc->uc_waiters)
395177848Sdavidxu		wakeup_one(uc);
396138224Sdavidxu}
397138224Sdavidxu
398201991Sdavidxustatic struct umtxq_queue *
399201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
400201991Sdavidxu{
401201991Sdavidxu	struct umtxq_queue *uh;
402201991Sdavidxu	struct umtxq_chain *uc;
403201991Sdavidxu
404201991Sdavidxu	uc = umtxq_getchain(key);
405201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
406201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
407201991Sdavidxu		if (umtx_key_match(&uh->key, key))
408201991Sdavidxu			return (uh);
409201991Sdavidxu	}
410201991Sdavidxu
411201991Sdavidxu	return (NULL);
412201991Sdavidxu}
413201991Sdavidxu
414139013Sdavidxustatic inline void
415177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
416115765Sjeff{
417201991Sdavidxu	struct umtxq_queue *uh;
418161678Sdavidxu	struct umtxq_chain *uc;
419139013Sdavidxu
420161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
421161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
422201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
423203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
424201991Sdavidxu	if (uh != NULL) {
425201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
426201991Sdavidxu	} else {
427201991Sdavidxu		uh = uq->uq_spare_queue;
428201991Sdavidxu		uh->key = uq->uq_key;
429201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
430201991Sdavidxu	}
431201991Sdavidxu	uq->uq_spare_queue = NULL;
432201991Sdavidxu
433201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
434201991Sdavidxu	uh->length++;
435158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
436201991Sdavidxu	uq->uq_cur_queue = uh;
437201991Sdavidxu	return;
438139013Sdavidxu}
439139013Sdavidxu
440139013Sdavidxustatic inline void
441177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
442139013Sdavidxu{
443161678Sdavidxu	struct umtxq_chain *uc;
444201991Sdavidxu	struct umtxq_queue *uh;
445161678Sdavidxu
446161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
447161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
448158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
449201991Sdavidxu		uh = uq->uq_cur_queue;
450201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
451201991Sdavidxu		uh->length--;
452158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
453201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
454201991Sdavidxu			KASSERT(uh->length == 0,
455201991Sdavidxu			    ("inconsistent umtxq_queue length"));
456201991Sdavidxu			LIST_REMOVE(uh, link);
457201991Sdavidxu		} else {
458201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
459201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
460201991Sdavidxu			LIST_REMOVE(uh, link);
461201991Sdavidxu		}
462201991Sdavidxu		uq->uq_spare_queue = uh;
463201991Sdavidxu		uq->uq_cur_queue = NULL;
464139013Sdavidxu	}
465139013Sdavidxu}
466139013Sdavidxu
467161678Sdavidxu/*
468161678Sdavidxu * Check if there are multiple waiters
469161678Sdavidxu */
470139013Sdavidxustatic int
471139013Sdavidxuumtxq_count(struct umtx_key *key)
472139013Sdavidxu{
473161678Sdavidxu	struct umtxq_chain *uc;
474201991Sdavidxu	struct umtxq_queue *uh;
475115765Sjeff
476161678Sdavidxu	uc = umtxq_getchain(key);
477161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
478201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
479201991Sdavidxu	if (uh != NULL)
480201991Sdavidxu		return (uh->length);
481201991Sdavidxu	return (0);
482115765Sjeff}
483115765Sjeff
484161678Sdavidxu/*
485161678Sdavidxu * Check if there are multiple PI waiters and returns first
486161678Sdavidxu * waiter.
487161678Sdavidxu */
488139257Sdavidxustatic int
489161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
490161678Sdavidxu{
491161678Sdavidxu	struct umtxq_chain *uc;
492201991Sdavidxu	struct umtxq_queue *uh;
493161678Sdavidxu
494161678Sdavidxu	*first = NULL;
495161678Sdavidxu	uc = umtxq_getchain(key);
496161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
497201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
498201991Sdavidxu	if (uh != NULL) {
499201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
500201991Sdavidxu		return (uh->length);
501161678Sdavidxu	}
502201991Sdavidxu	return (0);
503161678Sdavidxu}
504161678Sdavidxu
505161678Sdavidxu/*
506161678Sdavidxu * Wake up threads waiting on an userland object.
507161678Sdavidxu */
508177848Sdavidxu
509161678Sdavidxustatic int
510177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
511115765Sjeff{
512161678Sdavidxu	struct umtxq_chain *uc;
513201991Sdavidxu	struct umtxq_queue *uh;
514201991Sdavidxu	struct umtx_q *uq;
515161678Sdavidxu	int ret;
516115765Sjeff
517139257Sdavidxu	ret = 0;
518161678Sdavidxu	uc = umtxq_getchain(key);
519161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
520201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
521201991Sdavidxu	if (uh != NULL) {
522201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
523177848Sdavidxu			umtxq_remove_queue(uq, q);
524161678Sdavidxu			wakeup(uq);
525139257Sdavidxu			if (++ret >= n_wake)
526201991Sdavidxu				return (ret);
527139013Sdavidxu		}
528139013Sdavidxu	}
529139257Sdavidxu	return (ret);
530138224Sdavidxu}
531138224Sdavidxu
532177848Sdavidxu
533161678Sdavidxu/*
534161678Sdavidxu * Wake up specified thread.
535161678Sdavidxu */
536161678Sdavidxustatic inline void
537161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
538161678Sdavidxu{
539161678Sdavidxu	struct umtxq_chain *uc;
540161678Sdavidxu
541161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
542161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
543161678Sdavidxu	umtxq_remove(uq);
544161678Sdavidxu	wakeup(uq);
545161678Sdavidxu}
546161678Sdavidxu
547161678Sdavidxu/*
548161678Sdavidxu * Put thread into sleep state, before sleeping, check if
549161678Sdavidxu * thread was removed from umtx queue.
550161678Sdavidxu */
551138224Sdavidxustatic inline int
552161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
553138224Sdavidxu{
554161678Sdavidxu	struct umtxq_chain *uc;
555161678Sdavidxu	int error;
556161678Sdavidxu
557161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
558161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
559161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
560161678Sdavidxu		return (0);
561161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
562139751Sdavidxu	if (error == EWOULDBLOCK)
563139751Sdavidxu		error = ETIMEDOUT;
564139751Sdavidxu	return (error);
565138224Sdavidxu}
566138224Sdavidxu
567161678Sdavidxu/*
568161678Sdavidxu * Convert userspace address into unique logical address.
569161678Sdavidxu */
570139013Sdavidxustatic int
571161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
572139013Sdavidxu{
573161678Sdavidxu	struct thread *td = curthread;
574139013Sdavidxu	vm_map_t map;
575139013Sdavidxu	vm_map_entry_t entry;
576139013Sdavidxu	vm_pindex_t pindex;
577139013Sdavidxu	vm_prot_t prot;
578139013Sdavidxu	boolean_t wired;
579139013Sdavidxu
580161678Sdavidxu	key->type = type;
581161678Sdavidxu	if (share == THREAD_SHARE) {
582161678Sdavidxu		key->shared = 0;
583161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
584161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
585163677Sdavidxu	} else {
586163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
587161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
588161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
589161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
590161678Sdavidxu		    &wired) != KERN_SUCCESS) {
591161678Sdavidxu			return EFAULT;
592161678Sdavidxu		}
593161678Sdavidxu
594161678Sdavidxu		if ((share == PROCESS_SHARE) ||
595161678Sdavidxu		    (share == AUTO_SHARE &&
596161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
597161678Sdavidxu			key->shared = 1;
598161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
599161678Sdavidxu				(vm_offset_t)addr;
600161678Sdavidxu			vm_object_reference(key->info.shared.object);
601161678Sdavidxu		} else {
602161678Sdavidxu			key->shared = 0;
603161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
604161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
605161678Sdavidxu		}
606161678Sdavidxu		vm_map_lookup_done(map, entry);
607139013Sdavidxu	}
608139013Sdavidxu
609161678Sdavidxu	umtxq_hash(key);
610139013Sdavidxu	return (0);
611139013Sdavidxu}
612139013Sdavidxu
613161678Sdavidxu/*
614161678Sdavidxu * Release key.
615161678Sdavidxu */
616139013Sdavidxustatic inline void
617139013Sdavidxuumtx_key_release(struct umtx_key *key)
618139013Sdavidxu{
619161678Sdavidxu	if (key->shared)
620139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
621139013Sdavidxu}
622139013Sdavidxu
623161678Sdavidxu/*
624161678Sdavidxu * Lock a umtx object.
625161678Sdavidxu */
626139013Sdavidxustatic int
627163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
628112904Sjeff{
629143149Sdavidxu	struct umtx_q *uq;
630163449Sdavidxu	u_long owner;
631163449Sdavidxu	u_long old;
632138224Sdavidxu	int error = 0;
633112904Sjeff
634143149Sdavidxu	uq = td->td_umtxq;
635161678Sdavidxu
636112904Sjeff	/*
637161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
638112904Sjeff	 * can fault on any access.
639112904Sjeff	 */
640112904Sjeff	for (;;) {
641112904Sjeff		/*
642112904Sjeff		 * Try the uncontested case.  This should be done in userland.
643112904Sjeff		 */
644163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
645112904Sjeff
646138224Sdavidxu		/* The acquire succeeded. */
647138224Sdavidxu		if (owner == UMTX_UNOWNED)
648138224Sdavidxu			return (0);
649138224Sdavidxu
650115765Sjeff		/* The address was invalid. */
651115765Sjeff		if (owner == -1)
652115765Sjeff			return (EFAULT);
653115765Sjeff
654115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
655115765Sjeff		if (owner == UMTX_CONTESTED) {
656163449Sdavidxu			owner = casuword(&umtx->u_owner,
657139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
658115765Sjeff
659138224Sdavidxu			if (owner == UMTX_CONTESTED)
660138224Sdavidxu				return (0);
661138224Sdavidxu
662115765Sjeff			/* The address was invalid. */
663115765Sjeff			if (owner == -1)
664115765Sjeff				return (EFAULT);
665115765Sjeff
666115765Sjeff			/* If this failed the lock has changed, restart. */
667115765Sjeff			continue;
668112904Sjeff		}
669112904Sjeff
670138224Sdavidxu		/*
671138224Sdavidxu		 * If we caught a signal, we have retried and now
672138224Sdavidxu		 * exit immediately.
673138224Sdavidxu		 */
674161678Sdavidxu		if (error != 0)
675138224Sdavidxu			return (error);
676112904Sjeff
677161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
678161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
679161678Sdavidxu			return (error);
680161678Sdavidxu
681161678Sdavidxu		umtxq_lock(&uq->uq_key);
682161678Sdavidxu		umtxq_busy(&uq->uq_key);
683161678Sdavidxu		umtxq_insert(uq);
684161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
685161678Sdavidxu		umtxq_unlock(&uq->uq_key);
686161678Sdavidxu
687112904Sjeff		/*
688112904Sjeff		 * Set the contested bit so that a release in user space
689112904Sjeff		 * knows to use the system call for unlock.  If this fails
690112904Sjeff		 * either some one else has acquired the lock or it has been
691112904Sjeff		 * released.
692112904Sjeff		 */
693163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
694112904Sjeff
695112904Sjeff		/* The address was invalid. */
696112967Sjake		if (old == -1) {
697143149Sdavidxu			umtxq_lock(&uq->uq_key);
698143149Sdavidxu			umtxq_remove(uq);
699143149Sdavidxu			umtxq_unlock(&uq->uq_key);
700143149Sdavidxu			umtx_key_release(&uq->uq_key);
701115765Sjeff			return (EFAULT);
702112904Sjeff		}
703112904Sjeff
704112904Sjeff		/*
705115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
706117685Smtm		 * and we need to retry or we lost a race to the thread
707117685Smtm		 * unlocking the umtx.
708112904Sjeff		 */
709143149Sdavidxu		umtxq_lock(&uq->uq_key);
710161678Sdavidxu		if (old == owner)
711161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
712143149Sdavidxu		umtxq_remove(uq);
713143149Sdavidxu		umtxq_unlock(&uq->uq_key);
714143149Sdavidxu		umtx_key_release(&uq->uq_key);
715112904Sjeff	}
716117743Smtm
717117743Smtm	return (0);
718112904Sjeff}
719112904Sjeff
720161678Sdavidxu/*
721161678Sdavidxu * Lock a umtx object.
722161678Sdavidxu */
723139013Sdavidxustatic int
724163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
725140245Sdavidxu	struct timespec *timeout)
726112904Sjeff{
727140245Sdavidxu	struct timespec ts, ts2, ts3;
728139013Sdavidxu	struct timeval tv;
729140245Sdavidxu	int error;
730139013Sdavidxu
731140245Sdavidxu	if (timeout == NULL) {
732162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
733162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
734162030Sdavidxu		if (error == EINTR)
735162030Sdavidxu			error = ERESTART;
736139013Sdavidxu	} else {
737140245Sdavidxu		getnanouptime(&ts);
738140245Sdavidxu		timespecadd(&ts, timeout);
739140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
740139013Sdavidxu		for (;;) {
741162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
742140245Sdavidxu			if (error != ETIMEDOUT)
743140245Sdavidxu				break;
744140245Sdavidxu			getnanouptime(&ts2);
745140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
746139751Sdavidxu				error = ETIMEDOUT;
747139013Sdavidxu				break;
748139013Sdavidxu			}
749140245Sdavidxu			ts3 = ts;
750140245Sdavidxu			timespecsub(&ts3, &ts2);
751140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
752139013Sdavidxu		}
753162030Sdavidxu		/* Timed-locking is not restarted. */
754162030Sdavidxu		if (error == ERESTART)
755162030Sdavidxu			error = EINTR;
756139013Sdavidxu	}
757139013Sdavidxu	return (error);
758139013Sdavidxu}
759139013Sdavidxu
760161678Sdavidxu/*
761161678Sdavidxu * Unlock a umtx object.
762161678Sdavidxu */
763139013Sdavidxustatic int
764163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
765139013Sdavidxu{
766139013Sdavidxu	struct umtx_key key;
767163449Sdavidxu	u_long owner;
768163449Sdavidxu	u_long old;
769139257Sdavidxu	int error;
770139257Sdavidxu	int count;
771112904Sjeff
772112904Sjeff	/*
773112904Sjeff	 * Make sure we own this mtx.
774112904Sjeff	 */
775163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
776161678Sdavidxu	if (owner == -1)
777115765Sjeff		return (EFAULT);
778115765Sjeff
779139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
780115765Sjeff		return (EPERM);
781112904Sjeff
782161678Sdavidxu	/* This should be done in userland */
783161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
784163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
785161678Sdavidxu		if (old == -1)
786161678Sdavidxu			return (EFAULT);
787161678Sdavidxu		if (old == owner)
788161678Sdavidxu			return (0);
789161855Sdavidxu		owner = old;
790161678Sdavidxu	}
791161678Sdavidxu
792117685Smtm	/* We should only ever be in here for contested locks */
793161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
794161678Sdavidxu		&key)) != 0)
795139257Sdavidxu		return (error);
796139257Sdavidxu
797139257Sdavidxu	umtxq_lock(&key);
798139257Sdavidxu	umtxq_busy(&key);
799139257Sdavidxu	count = umtxq_count(&key);
800139257Sdavidxu	umtxq_unlock(&key);
801139257Sdavidxu
802117743Smtm	/*
803117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
804117743Smtm	 * there is zero or one thread only waiting for it.
805117743Smtm	 * Otherwise, it must be marked as contested.
806117743Smtm	 */
807163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
808163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
809139257Sdavidxu	umtxq_lock(&key);
810161678Sdavidxu	umtxq_signal(&key,1);
811139257Sdavidxu	umtxq_unbusy(&key);
812139257Sdavidxu	umtxq_unlock(&key);
813139257Sdavidxu	umtx_key_release(&key);
814115765Sjeff	if (old == -1)
815115765Sjeff		return (EFAULT);
816138224Sdavidxu	if (old != owner)
817138224Sdavidxu		return (EINVAL);
818115765Sjeff	return (0);
819112904Sjeff}
820139013Sdavidxu
821205014Snwhitehorn#ifdef COMPAT_FREEBSD32
822162536Sdavidxu
823161678Sdavidxu/*
824162536Sdavidxu * Lock a umtx object.
825162536Sdavidxu */
826162536Sdavidxustatic int
827162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
828162536Sdavidxu{
829162536Sdavidxu	struct umtx_q *uq;
830162536Sdavidxu	uint32_t owner;
831162536Sdavidxu	uint32_t old;
832162536Sdavidxu	int error = 0;
833162536Sdavidxu
834162536Sdavidxu	uq = td->td_umtxq;
835162536Sdavidxu
836162536Sdavidxu	/*
837162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
838162536Sdavidxu	 * can fault on any access.
839162536Sdavidxu	 */
840162536Sdavidxu	for (;;) {
841162536Sdavidxu		/*
842162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
843162536Sdavidxu		 */
844162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
845162536Sdavidxu
846162536Sdavidxu		/* The acquire succeeded. */
847162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
848162536Sdavidxu			return (0);
849162536Sdavidxu
850162536Sdavidxu		/* The address was invalid. */
851162536Sdavidxu		if (owner == -1)
852162536Sdavidxu			return (EFAULT);
853162536Sdavidxu
854162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
855162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
856162536Sdavidxu			owner = casuword32(m,
857162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
858162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
859162536Sdavidxu				return (0);
860162536Sdavidxu
861162536Sdavidxu			/* The address was invalid. */
862162536Sdavidxu			if (owner == -1)
863162536Sdavidxu				return (EFAULT);
864162536Sdavidxu
865162536Sdavidxu			/* If this failed the lock has changed, restart. */
866162536Sdavidxu			continue;
867162536Sdavidxu		}
868162536Sdavidxu
869162536Sdavidxu		/*
870162536Sdavidxu		 * If we caught a signal, we have retried and now
871162536Sdavidxu		 * exit immediately.
872162536Sdavidxu		 */
873162536Sdavidxu		if (error != 0)
874162536Sdavidxu			return (error);
875162536Sdavidxu
876162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
877162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
878162536Sdavidxu			return (error);
879162536Sdavidxu
880162536Sdavidxu		umtxq_lock(&uq->uq_key);
881162536Sdavidxu		umtxq_busy(&uq->uq_key);
882162536Sdavidxu		umtxq_insert(uq);
883162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
884162536Sdavidxu		umtxq_unlock(&uq->uq_key);
885162536Sdavidxu
886162536Sdavidxu		/*
887162536Sdavidxu		 * Set the contested bit so that a release in user space
888162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
889162536Sdavidxu		 * either some one else has acquired the lock or it has been
890162536Sdavidxu		 * released.
891162536Sdavidxu		 */
892162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
893162536Sdavidxu
894162536Sdavidxu		/* The address was invalid. */
895162536Sdavidxu		if (old == -1) {
896162536Sdavidxu			umtxq_lock(&uq->uq_key);
897162536Sdavidxu			umtxq_remove(uq);
898162536Sdavidxu			umtxq_unlock(&uq->uq_key);
899162536Sdavidxu			umtx_key_release(&uq->uq_key);
900162536Sdavidxu			return (EFAULT);
901162536Sdavidxu		}
902162536Sdavidxu
903162536Sdavidxu		/*
904162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
905162536Sdavidxu		 * and we need to retry or we lost a race to the thread
906162536Sdavidxu		 * unlocking the umtx.
907162536Sdavidxu		 */
908162536Sdavidxu		umtxq_lock(&uq->uq_key);
909162536Sdavidxu		if (old == owner)
910162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
911162536Sdavidxu		umtxq_remove(uq);
912162536Sdavidxu		umtxq_unlock(&uq->uq_key);
913162536Sdavidxu		umtx_key_release(&uq->uq_key);
914162536Sdavidxu	}
915162536Sdavidxu
916162536Sdavidxu	return (0);
917162536Sdavidxu}
918162536Sdavidxu
919162536Sdavidxu/*
920162536Sdavidxu * Lock a umtx object.
921162536Sdavidxu */
922162536Sdavidxustatic int
923162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
924162536Sdavidxu	struct timespec *timeout)
925162536Sdavidxu{
926162536Sdavidxu	struct timespec ts, ts2, ts3;
927162536Sdavidxu	struct timeval tv;
928162536Sdavidxu	int error;
929162536Sdavidxu
930162536Sdavidxu	if (timeout == NULL) {
931162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
932162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
933162536Sdavidxu		if (error == EINTR)
934162536Sdavidxu			error = ERESTART;
935162536Sdavidxu	} else {
936162536Sdavidxu		getnanouptime(&ts);
937162536Sdavidxu		timespecadd(&ts, timeout);
938162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
939162536Sdavidxu		for (;;) {
940162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
941162536Sdavidxu			if (error != ETIMEDOUT)
942162536Sdavidxu				break;
943162536Sdavidxu			getnanouptime(&ts2);
944162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
945162536Sdavidxu				error = ETIMEDOUT;
946162536Sdavidxu				break;
947162536Sdavidxu			}
948162536Sdavidxu			ts3 = ts;
949162536Sdavidxu			timespecsub(&ts3, &ts2);
950162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
951162536Sdavidxu		}
952162536Sdavidxu		/* Timed-locking is not restarted. */
953162536Sdavidxu		if (error == ERESTART)
954162536Sdavidxu			error = EINTR;
955162536Sdavidxu	}
956162536Sdavidxu	return (error);
957162536Sdavidxu}
958162536Sdavidxu
959162536Sdavidxu/*
960162536Sdavidxu * Unlock a umtx object.
961162536Sdavidxu */
962162536Sdavidxustatic int
963162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
964162536Sdavidxu{
965162536Sdavidxu	struct umtx_key key;
966162536Sdavidxu	uint32_t owner;
967162536Sdavidxu	uint32_t old;
968162536Sdavidxu	int error;
969162536Sdavidxu	int count;
970162536Sdavidxu
971162536Sdavidxu	/*
972162536Sdavidxu	 * Make sure we own this mtx.
973162536Sdavidxu	 */
974162536Sdavidxu	owner = fuword32(m);
975162536Sdavidxu	if (owner == -1)
976162536Sdavidxu		return (EFAULT);
977162536Sdavidxu
978162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
979162536Sdavidxu		return (EPERM);
980162536Sdavidxu
981162536Sdavidxu	/* This should be done in userland */
982162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
983162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
984162536Sdavidxu		if (old == -1)
985162536Sdavidxu			return (EFAULT);
986162536Sdavidxu		if (old == owner)
987162536Sdavidxu			return (0);
988162536Sdavidxu		owner = old;
989162536Sdavidxu	}
990162536Sdavidxu
991162536Sdavidxu	/* We should only ever be in here for contested locks */
992162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
993162536Sdavidxu		&key)) != 0)
994162536Sdavidxu		return (error);
995162536Sdavidxu
996162536Sdavidxu	umtxq_lock(&key);
997162536Sdavidxu	umtxq_busy(&key);
998162536Sdavidxu	count = umtxq_count(&key);
999162536Sdavidxu	umtxq_unlock(&key);
1000162536Sdavidxu
1001162536Sdavidxu	/*
1002162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1003162536Sdavidxu	 * there is zero or one thread only waiting for it.
1004162536Sdavidxu	 * Otherwise, it must be marked as contested.
1005162536Sdavidxu	 */
1006162536Sdavidxu	old = casuword32(m, owner,
1007162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1008162536Sdavidxu	umtxq_lock(&key);
1009162536Sdavidxu	umtxq_signal(&key,1);
1010162536Sdavidxu	umtxq_unbusy(&key);
1011162536Sdavidxu	umtxq_unlock(&key);
1012162536Sdavidxu	umtx_key_release(&key);
1013162536Sdavidxu	if (old == -1)
1014162536Sdavidxu		return (EFAULT);
1015162536Sdavidxu	if (old != owner)
1016162536Sdavidxu		return (EINVAL);
1017162536Sdavidxu	return (0);
1018162536Sdavidxu}
1019162536Sdavidxu#endif
1020162536Sdavidxu
1021162536Sdavidxu/*
1022161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1023161678Sdavidxu */
1024139013Sdavidxustatic int
1025163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1026178646Sdavidxu	struct timespec *timeout, int compat32, int is_private)
1027139013Sdavidxu{
1028143149Sdavidxu	struct umtx_q *uq;
1029140245Sdavidxu	struct timespec ts, ts2, ts3;
1030139013Sdavidxu	struct timeval tv;
1031163449Sdavidxu	u_long tmp;
1032140245Sdavidxu	int error = 0;
1033139013Sdavidxu
1034143149Sdavidxu	uq = td->td_umtxq;
1035178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1036178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1037139013Sdavidxu		return (error);
1038161678Sdavidxu
1039161678Sdavidxu	umtxq_lock(&uq->uq_key);
1040161678Sdavidxu	umtxq_insert(uq);
1041161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1042162536Sdavidxu	if (compat32 == 0)
1043162536Sdavidxu		tmp = fuword(addr);
1044162536Sdavidxu        else
1045190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1046139427Sdavidxu	if (tmp != id) {
1047143149Sdavidxu		umtxq_lock(&uq->uq_key);
1048143149Sdavidxu		umtxq_remove(uq);
1049143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1050140245Sdavidxu	} else if (timeout == NULL) {
1051143149Sdavidxu		umtxq_lock(&uq->uq_key);
1052164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
1053161678Sdavidxu		umtxq_remove(uq);
1054143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1055139013Sdavidxu	} else {
1056140245Sdavidxu		getnanouptime(&ts);
1057140245Sdavidxu		timespecadd(&ts, timeout);
1058140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1059161678Sdavidxu		umtxq_lock(&uq->uq_key);
1060139013Sdavidxu		for (;;) {
1061164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1062211794Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ)) {
1063211794Sdavidxu				error = 0;
1064161678Sdavidxu				break;
1065211794Sdavidxu			}
1066140245Sdavidxu			if (error != ETIMEDOUT)
1067140245Sdavidxu				break;
1068161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1069140245Sdavidxu			getnanouptime(&ts2);
1070140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
1071139751Sdavidxu				error = ETIMEDOUT;
1072161678Sdavidxu				umtxq_lock(&uq->uq_key);
1073139013Sdavidxu				break;
1074139013Sdavidxu			}
1075140245Sdavidxu			ts3 = ts;
1076140245Sdavidxu			timespecsub(&ts3, &ts2);
1077140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1078161678Sdavidxu			umtxq_lock(&uq->uq_key);
1079139013Sdavidxu		}
1080143149Sdavidxu		umtxq_remove(uq);
1081143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1082139013Sdavidxu	}
1083143149Sdavidxu	umtx_key_release(&uq->uq_key);
1084139257Sdavidxu	if (error == ERESTART)
1085139257Sdavidxu		error = EINTR;
1086139013Sdavidxu	return (error);
1087139013Sdavidxu}
1088139013Sdavidxu
1089161678Sdavidxu/*
1090161678Sdavidxu * Wake up threads sleeping on the specified address.
1091161678Sdavidxu */
1092151692Sdavidxuint
1093178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1094139013Sdavidxu{
1095139013Sdavidxu	struct umtx_key key;
1096139257Sdavidxu	int ret;
1097139013Sdavidxu
1098178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1099178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1100139257Sdavidxu		return (ret);
1101139258Sdavidxu	umtxq_lock(&key);
1102139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1103139258Sdavidxu	umtxq_unlock(&key);
1104139257Sdavidxu	umtx_key_release(&key);
1105139013Sdavidxu	return (0);
1106139013Sdavidxu}
1107139013Sdavidxu
1108161678Sdavidxu/*
1109161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1110161678Sdavidxu */
1111161678Sdavidxustatic int
1112161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1113179970Sdavidxu	int mode)
1114161678Sdavidxu{
1115161678Sdavidxu	struct umtx_q *uq;
1116161678Sdavidxu	uint32_t owner, old, id;
1117161678Sdavidxu	int error = 0;
1118161678Sdavidxu
1119161678Sdavidxu	id = td->td_tid;
1120161678Sdavidxu	uq = td->td_umtxq;
1121161678Sdavidxu
1122161678Sdavidxu	/*
1123161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1124161678Sdavidxu	 * can fault on any access.
1125161678Sdavidxu	 */
1126161678Sdavidxu	for (;;) {
1127179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1128179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1129179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1130179970Sdavidxu				return (0);
1131179970Sdavidxu		} else {
1132179970Sdavidxu			/*
1133179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1134179970Sdavidxu			 */
1135179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1136161678Sdavidxu
1137179970Sdavidxu			/* The acquire succeeded. */
1138179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1139161678Sdavidxu				return (0);
1140161678Sdavidxu
1141161678Sdavidxu			/* The address was invalid. */
1142161678Sdavidxu			if (owner == -1)
1143161678Sdavidxu				return (EFAULT);
1144161678Sdavidxu
1145179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1146179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1147179970Sdavidxu				owner = casuword32(&m->m_owner,
1148179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1149179970Sdavidxu
1150179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1151179970Sdavidxu					return (0);
1152179970Sdavidxu
1153179970Sdavidxu				/* The address was invalid. */
1154179970Sdavidxu				if (owner == -1)
1155179970Sdavidxu					return (EFAULT);
1156179970Sdavidxu
1157179970Sdavidxu				/* If this failed the lock has changed, restart. */
1158179970Sdavidxu				continue;
1159179970Sdavidxu			}
1160161678Sdavidxu		}
1161161678Sdavidxu
1162161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1163161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1164161678Sdavidxu			return (EDEADLK);
1165161678Sdavidxu
1166179970Sdavidxu		if (mode == _UMUTEX_TRY)
1167161678Sdavidxu			return (EBUSY);
1168161678Sdavidxu
1169161678Sdavidxu		/*
1170161678Sdavidxu		 * If we caught a signal, we have retried and now
1171161678Sdavidxu		 * exit immediately.
1172161678Sdavidxu		 */
1173161678Sdavidxu		if (error != 0)
1174161678Sdavidxu			return (error);
1175161678Sdavidxu
1176161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1177161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1178161678Sdavidxu			return (error);
1179161678Sdavidxu
1180161678Sdavidxu		umtxq_lock(&uq->uq_key);
1181161678Sdavidxu		umtxq_busy(&uq->uq_key);
1182161678Sdavidxu		umtxq_insert(uq);
1183161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1184161678Sdavidxu
1185161678Sdavidxu		/*
1186161678Sdavidxu		 * Set the contested bit so that a release in user space
1187161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1188161678Sdavidxu		 * either some one else has acquired the lock or it has been
1189161678Sdavidxu		 * released.
1190161678Sdavidxu		 */
1191161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1192161678Sdavidxu
1193161678Sdavidxu		/* The address was invalid. */
1194161678Sdavidxu		if (old == -1) {
1195161678Sdavidxu			umtxq_lock(&uq->uq_key);
1196161678Sdavidxu			umtxq_remove(uq);
1197179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1198161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1199161678Sdavidxu			umtx_key_release(&uq->uq_key);
1200161678Sdavidxu			return (EFAULT);
1201161678Sdavidxu		}
1202161678Sdavidxu
1203161678Sdavidxu		/*
1204161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1205161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1206161678Sdavidxu		 * unlocking the umtx.
1207161678Sdavidxu		 */
1208161678Sdavidxu		umtxq_lock(&uq->uq_key);
1209179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1210161678Sdavidxu		if (old == owner)
1211161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1212161678Sdavidxu		umtxq_remove(uq);
1213161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1214161678Sdavidxu		umtx_key_release(&uq->uq_key);
1215161678Sdavidxu	}
1216161678Sdavidxu
1217161678Sdavidxu	return (0);
1218161678Sdavidxu}
1219161678Sdavidxu
1220161678Sdavidxu/*
1221161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1222161678Sdavidxu */
1223161678Sdavidxu/*
1224161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1225161678Sdavidxu */
1226161678Sdavidxustatic int
1227161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1228161678Sdavidxu{
1229161678Sdavidxu	struct umtx_key key;
1230161678Sdavidxu	uint32_t owner, old, id;
1231161678Sdavidxu	int error;
1232161678Sdavidxu	int count;
1233161678Sdavidxu
1234161678Sdavidxu	id = td->td_tid;
1235161678Sdavidxu	/*
1236161678Sdavidxu	 * Make sure we own this mtx.
1237161678Sdavidxu	 */
1238163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1239161678Sdavidxu	if (owner == -1)
1240161678Sdavidxu		return (EFAULT);
1241161678Sdavidxu
1242161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1243161678Sdavidxu		return (EPERM);
1244161678Sdavidxu
1245161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1246161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1247161678Sdavidxu		if (old == -1)
1248161678Sdavidxu			return (EFAULT);
1249161678Sdavidxu		if (old == owner)
1250161678Sdavidxu			return (0);
1251161855Sdavidxu		owner = old;
1252161678Sdavidxu	}
1253161678Sdavidxu
1254161678Sdavidxu	/* We should only ever be in here for contested locks */
1255161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1256161678Sdavidxu	    &key)) != 0)
1257161678Sdavidxu		return (error);
1258161678Sdavidxu
1259161678Sdavidxu	umtxq_lock(&key);
1260161678Sdavidxu	umtxq_busy(&key);
1261161678Sdavidxu	count = umtxq_count(&key);
1262161678Sdavidxu	umtxq_unlock(&key);
1263161678Sdavidxu
1264161678Sdavidxu	/*
1265161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1266161678Sdavidxu	 * there is zero or one thread only waiting for it.
1267161678Sdavidxu	 * Otherwise, it must be marked as contested.
1268161678Sdavidxu	 */
1269161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1270161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1271161678Sdavidxu	umtxq_lock(&key);
1272161678Sdavidxu	umtxq_signal(&key,1);
1273161678Sdavidxu	umtxq_unbusy(&key);
1274161678Sdavidxu	umtxq_unlock(&key);
1275161678Sdavidxu	umtx_key_release(&key);
1276161678Sdavidxu	if (old == -1)
1277161678Sdavidxu		return (EFAULT);
1278161678Sdavidxu	if (old != owner)
1279161678Sdavidxu		return (EINVAL);
1280161678Sdavidxu	return (0);
1281161678Sdavidxu}
1282161678Sdavidxu
1283179970Sdavidxu/*
1284179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1285179970Sdavidxu * only for simple mutex.
1286179970Sdavidxu */
1287179970Sdavidxustatic int
1288179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1289179970Sdavidxu{
1290179970Sdavidxu	struct umtx_key key;
1291179970Sdavidxu	uint32_t owner;
1292179970Sdavidxu	uint32_t flags;
1293179970Sdavidxu	int error;
1294179970Sdavidxu	int count;
1295179970Sdavidxu
1296179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1297179970Sdavidxu	if (owner == -1)
1298179970Sdavidxu		return (EFAULT);
1299179970Sdavidxu
1300179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1301179970Sdavidxu		return (0);
1302179970Sdavidxu
1303179970Sdavidxu	flags = fuword32(&m->m_flags);
1304179970Sdavidxu
1305179970Sdavidxu	/* We should only ever be in here for contested locks */
1306179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1307179970Sdavidxu	    &key)) != 0)
1308179970Sdavidxu		return (error);
1309179970Sdavidxu
1310179970Sdavidxu	umtxq_lock(&key);
1311179970Sdavidxu	umtxq_busy(&key);
1312179970Sdavidxu	count = umtxq_count(&key);
1313179970Sdavidxu	umtxq_unlock(&key);
1314179970Sdavidxu
1315179970Sdavidxu	if (count <= 1)
1316179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1317179970Sdavidxu
1318179970Sdavidxu	umtxq_lock(&key);
1319179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1320179970Sdavidxu		umtxq_signal(&key, 1);
1321179970Sdavidxu	umtxq_unbusy(&key);
1322179970Sdavidxu	umtxq_unlock(&key);
1323179970Sdavidxu	umtx_key_release(&key);
1324179970Sdavidxu	return (0);
1325179970Sdavidxu}
1326179970Sdavidxu
1327161678Sdavidxustatic inline struct umtx_pi *
1328163697Sdavidxuumtx_pi_alloc(int flags)
1329161678Sdavidxu{
1330161678Sdavidxu	struct umtx_pi *pi;
1331161678Sdavidxu
1332163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1333161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1334161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1335161678Sdavidxu	return (pi);
1336161678Sdavidxu}
1337161678Sdavidxu
1338161678Sdavidxustatic inline void
1339161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1340161678Sdavidxu{
1341161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1342161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1343161678Sdavidxu}
1344161678Sdavidxu
1345161678Sdavidxu/*
1346161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1347161678Sdavidxu * changed.
1348161678Sdavidxu */
1349161678Sdavidxustatic int
1350161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1351161678Sdavidxu{
1352161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1353161678Sdavidxu	struct thread *td1;
1354161678Sdavidxu
1355170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1356161678Sdavidxu	if (pi == NULL)
1357161678Sdavidxu		return (0);
1358161678Sdavidxu
1359161678Sdavidxu	uq = td->td_umtxq;
1360161678Sdavidxu
1361161678Sdavidxu	/*
1362161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1363161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1364161678Sdavidxu	 * the previous thread or higher than the next thread.
1365161678Sdavidxu	 */
1366161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1367161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1368161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1369161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1370161678Sdavidxu		/*
1371161678Sdavidxu		 * Remove thread from blocked chain and determine where
1372161678Sdavidxu		 * it should be moved to.
1373161678Sdavidxu		 */
1374161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1375161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1376161678Sdavidxu			td1 = uq1->uq_thread;
1377161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1378161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1379161678Sdavidxu				break;
1380161678Sdavidxu		}
1381161678Sdavidxu
1382161678Sdavidxu		if (uq1 == NULL)
1383161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1384161678Sdavidxu		else
1385161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1386161678Sdavidxu	}
1387161678Sdavidxu	return (1);
1388161678Sdavidxu}
1389161678Sdavidxu
1390161678Sdavidxu/*
1391161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1392161678Sdavidxu * PI mutex.
1393161678Sdavidxu */
1394161678Sdavidxustatic void
1395161678Sdavidxuumtx_propagate_priority(struct thread *td)
1396161678Sdavidxu{
1397161678Sdavidxu	struct umtx_q *uq;
1398161678Sdavidxu	struct umtx_pi *pi;
1399161678Sdavidxu	int pri;
1400161678Sdavidxu
1401170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1402161678Sdavidxu	pri = UPRI(td);
1403161678Sdavidxu	uq = td->td_umtxq;
1404161678Sdavidxu	pi = uq->uq_pi_blocked;
1405161678Sdavidxu	if (pi == NULL)
1406161678Sdavidxu		return;
1407161678Sdavidxu
1408161678Sdavidxu	for (;;) {
1409161678Sdavidxu		td = pi->pi_owner;
1410161678Sdavidxu		if (td == NULL)
1411161678Sdavidxu			return;
1412161678Sdavidxu
1413161678Sdavidxu		MPASS(td->td_proc != NULL);
1414161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1415161678Sdavidxu
1416161678Sdavidxu		if (UPRI(td) <= pri)
1417161678Sdavidxu			return;
1418161678Sdavidxu
1419170300Sjeff		thread_lock(td);
1420161678Sdavidxu		sched_lend_user_prio(td, pri);
1421170300Sjeff		thread_unlock(td);
1422161678Sdavidxu
1423161678Sdavidxu		/*
1424161678Sdavidxu		 * Pick up the lock that td is blocked on.
1425161678Sdavidxu		 */
1426161678Sdavidxu		uq = td->td_umtxq;
1427161678Sdavidxu		pi = uq->uq_pi_blocked;
1428161678Sdavidxu		/* Resort td on the list if needed. */
1429161678Sdavidxu		if (!umtx_pi_adjust_thread(pi, td))
1430161678Sdavidxu			break;
1431161678Sdavidxu	}
1432161678Sdavidxu}
1433161678Sdavidxu
1434161678Sdavidxu/*
1435161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1436161678Sdavidxu * it is interrupted by signal or resumed by others.
1437161678Sdavidxu */
1438161678Sdavidxustatic void
1439161678Sdavidxuumtx_unpropagate_priority(struct umtx_pi *pi)
1440161678Sdavidxu{
1441161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1442161678Sdavidxu	struct umtx_pi *pi2;
1443174701Sdavidxu	int pri, oldpri;
1444161678Sdavidxu
1445170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1446161678Sdavidxu
1447161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1448161678Sdavidxu		pri = PRI_MAX;
1449161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1450161678Sdavidxu
1451161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1452161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1453161678Sdavidxu			if (uq != NULL) {
1454161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1455161678Sdavidxu					pri = UPRI(uq->uq_thread);
1456161678Sdavidxu			}
1457161678Sdavidxu		}
1458161678Sdavidxu
1459161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1460161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1461170300Sjeff		thread_lock(pi->pi_owner);
1462174701Sdavidxu		oldpri = pi->pi_owner->td_user_pri;
1463161678Sdavidxu		sched_unlend_user_prio(pi->pi_owner, pri);
1464170300Sjeff		thread_unlock(pi->pi_owner);
1465189756Sdavidxu		if (uq_owner->uq_pi_blocked != NULL)
1466189756Sdavidxu			umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1467161678Sdavidxu		pi = uq_owner->uq_pi_blocked;
1468161678Sdavidxu	}
1469161678Sdavidxu}
1470161678Sdavidxu
1471161678Sdavidxu/*
1472161678Sdavidxu * Insert a PI mutex into owned list.
1473161678Sdavidxu */
1474161678Sdavidxustatic void
1475161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1476161678Sdavidxu{
1477161678Sdavidxu	struct umtx_q *uq_owner;
1478161678Sdavidxu
1479161678Sdavidxu	uq_owner = owner->td_umtxq;
1480170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1481161678Sdavidxu	if (pi->pi_owner != NULL)
1482161678Sdavidxu		panic("pi_ower != NULL");
1483161678Sdavidxu	pi->pi_owner = owner;
1484161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1485161678Sdavidxu}
1486161678Sdavidxu
1487161678Sdavidxu/*
1488161678Sdavidxu * Claim ownership of a PI mutex.
1489161678Sdavidxu */
1490161678Sdavidxustatic int
1491161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1492161678Sdavidxu{
1493161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1494161678Sdavidxu
1495161678Sdavidxu	uq_owner = owner->td_umtxq;
1496170300Sjeff	mtx_lock_spin(&umtx_lock);
1497161678Sdavidxu	if (pi->pi_owner == owner) {
1498170300Sjeff		mtx_unlock_spin(&umtx_lock);
1499161678Sdavidxu		return (0);
1500161678Sdavidxu	}
1501161678Sdavidxu
1502161678Sdavidxu	if (pi->pi_owner != NULL) {
1503161678Sdavidxu		/*
1504161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1505161678Sdavidxu		 */
1506170300Sjeff		mtx_unlock_spin(&umtx_lock);
1507161678Sdavidxu		return (EPERM);
1508161678Sdavidxu	}
1509161678Sdavidxu	umtx_pi_setowner(pi, owner);
1510161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1511161678Sdavidxu	if (uq != NULL) {
1512161678Sdavidxu		int pri;
1513161678Sdavidxu
1514161678Sdavidxu		pri = UPRI(uq->uq_thread);
1515170300Sjeff		thread_lock(owner);
1516161678Sdavidxu		if (pri < UPRI(owner))
1517161678Sdavidxu			sched_lend_user_prio(owner, pri);
1518170300Sjeff		thread_unlock(owner);
1519161678Sdavidxu	}
1520170300Sjeff	mtx_unlock_spin(&umtx_lock);
1521161678Sdavidxu	return (0);
1522161678Sdavidxu}
1523161678Sdavidxu
1524174701Sdavidxustatic void
1525174701Sdavidxuumtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1526161599Sdavidxu{
1527161678Sdavidxu	struct umtx_q *uq;
1528161678Sdavidxu	struct umtx_pi *pi;
1529161678Sdavidxu
1530161678Sdavidxu	uq = td->td_umtxq;
1531161678Sdavidxu	/*
1532161678Sdavidxu	 * Pick up the lock that td is blocked on.
1533161678Sdavidxu	 */
1534161678Sdavidxu	pi = uq->uq_pi_blocked;
1535161678Sdavidxu	MPASS(pi != NULL);
1536161678Sdavidxu
1537161678Sdavidxu	/* Resort the turnstile on the list. */
1538161678Sdavidxu	if (!umtx_pi_adjust_thread(pi, td))
1539161678Sdavidxu		return;
1540161678Sdavidxu
1541161678Sdavidxu	/*
1542161678Sdavidxu	 * If our priority was lowered and we are at the head of the
1543161678Sdavidxu	 * turnstile, then propagate our new priority up the chain.
1544161678Sdavidxu	 */
1545161678Sdavidxu	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1546161678Sdavidxu		umtx_propagate_priority(td);
1547161599Sdavidxu}
1548161599Sdavidxu
1549161678Sdavidxu/*
1550174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1551174701Sdavidxu * this may result new priority propagating process.
1552174701Sdavidxu */
1553174701Sdavidxuvoid
1554174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1555174701Sdavidxu{
1556174707Sdavidxu	struct umtx_q *uq;
1557174707Sdavidxu	struct umtx_pi *pi;
1558174707Sdavidxu
1559174707Sdavidxu	uq = td->td_umtxq;
1560174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1561174707Sdavidxu	/*
1562174707Sdavidxu	 * Pick up the lock that td is blocked on.
1563174707Sdavidxu	 */
1564174707Sdavidxu	pi = uq->uq_pi_blocked;
1565174707Sdavidxu	if (pi != NULL)
1566174707Sdavidxu		umtx_pi_adjust_locked(td, oldpri);
1567174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1568174701Sdavidxu}
1569174701Sdavidxu
1570174701Sdavidxu/*
1571161678Sdavidxu * Sleep on a PI mutex.
1572161678Sdavidxu */
1573161678Sdavidxustatic int
1574161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1575161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1576161678Sdavidxu{
1577161678Sdavidxu	struct umtxq_chain *uc;
1578161678Sdavidxu	struct thread *td, *td1;
1579161678Sdavidxu	struct umtx_q *uq1;
1580161678Sdavidxu	int pri;
1581161678Sdavidxu	int error = 0;
1582161678Sdavidxu
1583161678Sdavidxu	td = uq->uq_thread;
1584161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1585161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1586161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1587189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1588161678Sdavidxu	umtxq_insert(uq);
1589189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1590161678Sdavidxu	if (pi->pi_owner == NULL) {
1591189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1592213642Sdavidxu		/* XXX Only look up thread in current process. */
1593213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1594170300Sjeff		mtx_lock_spin(&umtx_lock);
1595161678Sdavidxu		if (td1 != NULL && pi->pi_owner == NULL) {
1596161678Sdavidxu			uq1 = td1->td_umtxq;
1597161678Sdavidxu			umtx_pi_setowner(pi, td1);
1598161678Sdavidxu		}
1599213642Sdavidxu		PROC_UNLOCK(td1->td_proc);
1600161678Sdavidxu	}
1601161678Sdavidxu
1602161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1603161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1604161678Sdavidxu		if (pri > UPRI(td))
1605161678Sdavidxu			break;
1606161678Sdavidxu	}
1607161678Sdavidxu
1608161678Sdavidxu	if (uq1 != NULL)
1609161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1610161678Sdavidxu	else
1611161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1612161678Sdavidxu
1613161678Sdavidxu	uq->uq_pi_blocked = pi;
1614174701Sdavidxu	thread_lock(td);
1615161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1616174701Sdavidxu	thread_unlock(td);
1617161678Sdavidxu	umtx_propagate_priority(td);
1618170300Sjeff	mtx_unlock_spin(&umtx_lock);
1619189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1620161678Sdavidxu
1621161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1622161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1623161678Sdavidxu		if (error == EWOULDBLOCK)
1624161678Sdavidxu			error = ETIMEDOUT;
1625161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1626161678Sdavidxu			umtxq_remove(uq);
1627161678Sdavidxu		}
1628161678Sdavidxu	}
1629170300Sjeff	mtx_lock_spin(&umtx_lock);
1630161678Sdavidxu	uq->uq_pi_blocked = NULL;
1631174701Sdavidxu	thread_lock(td);
1632161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1633174701Sdavidxu	thread_unlock(td);
1634161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1635161678Sdavidxu	umtx_unpropagate_priority(pi);
1636170300Sjeff	mtx_unlock_spin(&umtx_lock);
1637189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1638161678Sdavidxu
1639161678Sdavidxu	return (error);
1640161678Sdavidxu}
1641161678Sdavidxu
1642161678Sdavidxu/*
1643161678Sdavidxu * Add reference count for a PI mutex.
1644161678Sdavidxu */
1645161678Sdavidxustatic void
1646161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1647161678Sdavidxu{
1648161678Sdavidxu	struct umtxq_chain *uc;
1649161678Sdavidxu
1650161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1651161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1652161678Sdavidxu	pi->pi_refcount++;
1653161678Sdavidxu}
1654161678Sdavidxu
1655161678Sdavidxu/*
1656161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1657161678Sdavidxu * is decreased to zero, its memory space is freed.
1658161678Sdavidxu */
1659161678Sdavidxustatic void
1660161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1661161678Sdavidxu{
1662161678Sdavidxu	struct umtxq_chain *uc;
1663161678Sdavidxu
1664161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1665161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1666161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1667161678Sdavidxu	if (--pi->pi_refcount == 0) {
1668170300Sjeff		mtx_lock_spin(&umtx_lock);
1669161678Sdavidxu		if (pi->pi_owner != NULL) {
1670161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1671161678Sdavidxu				pi, pi_link);
1672161678Sdavidxu			pi->pi_owner = NULL;
1673161678Sdavidxu		}
1674161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1675161678Sdavidxu			("blocked queue not empty"));
1676170300Sjeff		mtx_unlock_spin(&umtx_lock);
1677161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1678189756Sdavidxu		umtx_pi_free(pi);
1679161678Sdavidxu	}
1680161678Sdavidxu}
1681161678Sdavidxu
1682161678Sdavidxu/*
1683161678Sdavidxu * Find a PI mutex in hash table.
1684161678Sdavidxu */
1685161678Sdavidxustatic struct umtx_pi *
1686161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1687161678Sdavidxu{
1688161678Sdavidxu	struct umtxq_chain *uc;
1689161678Sdavidxu	struct umtx_pi *pi;
1690161678Sdavidxu
1691161678Sdavidxu	uc = umtxq_getchain(key);
1692161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1693161678Sdavidxu
1694161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1695161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1696161678Sdavidxu			return (pi);
1697161678Sdavidxu		}
1698161678Sdavidxu	}
1699161678Sdavidxu	return (NULL);
1700161678Sdavidxu}
1701161678Sdavidxu
1702161678Sdavidxu/*
1703161678Sdavidxu * Insert a PI mutex into hash table.
1704161678Sdavidxu */
1705161678Sdavidxustatic inline void
1706161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1707161678Sdavidxu{
1708161678Sdavidxu	struct umtxq_chain *uc;
1709161678Sdavidxu
1710161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1711161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1712161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1713161678Sdavidxu}
1714161678Sdavidxu
1715161678Sdavidxu/*
1716161678Sdavidxu * Lock a PI mutex.
1717161678Sdavidxu */
1718161678Sdavidxustatic int
1719161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1720161678Sdavidxu	int try)
1721161678Sdavidxu{
1722161678Sdavidxu	struct umtx_q *uq;
1723161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1724161678Sdavidxu	uint32_t id, owner, old;
1725161678Sdavidxu	int error;
1726161678Sdavidxu
1727161678Sdavidxu	id = td->td_tid;
1728161678Sdavidxu	uq = td->td_umtxq;
1729161678Sdavidxu
1730161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1731161678Sdavidxu	    &uq->uq_key)) != 0)
1732161678Sdavidxu		return (error);
1733163697Sdavidxu	umtxq_lock(&uq->uq_key);
1734163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1735163697Sdavidxu	if (pi == NULL) {
1736163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1737163697Sdavidxu		if (new_pi == NULL) {
1738161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1739163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1740161678Sdavidxu			umtxq_lock(&uq->uq_key);
1741161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1742163697Sdavidxu			if (pi != NULL) {
1743161678Sdavidxu				umtx_pi_free(new_pi);
1744163697Sdavidxu				new_pi = NULL;
1745161678Sdavidxu			}
1746161678Sdavidxu		}
1747163697Sdavidxu		if (new_pi != NULL) {
1748163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1749163697Sdavidxu			umtx_pi_insert(new_pi);
1750163697Sdavidxu			pi = new_pi;
1751163697Sdavidxu		}
1752163697Sdavidxu	}
1753163697Sdavidxu	umtx_pi_ref(pi);
1754163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1755161678Sdavidxu
1756163697Sdavidxu	/*
1757163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1758163697Sdavidxu	 * can fault on any access.
1759163697Sdavidxu	 */
1760163697Sdavidxu	for (;;) {
1761161678Sdavidxu		/*
1762161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1763161678Sdavidxu		 */
1764161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1765161678Sdavidxu
1766161678Sdavidxu		/* The acquire succeeded. */
1767161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1768161678Sdavidxu			error = 0;
1769161678Sdavidxu			break;
1770161678Sdavidxu		}
1771161678Sdavidxu
1772161678Sdavidxu		/* The address was invalid. */
1773161678Sdavidxu		if (owner == -1) {
1774161678Sdavidxu			error = EFAULT;
1775161678Sdavidxu			break;
1776161678Sdavidxu		}
1777161678Sdavidxu
1778161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1779161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1780161678Sdavidxu			owner = casuword32(&m->m_owner,
1781161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1782161678Sdavidxu
1783161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1784161678Sdavidxu				umtxq_lock(&uq->uq_key);
1785189756Sdavidxu				umtxq_busy(&uq->uq_key);
1786161678Sdavidxu				error = umtx_pi_claim(pi, td);
1787189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1788161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1789161678Sdavidxu				break;
1790161678Sdavidxu			}
1791161678Sdavidxu
1792161678Sdavidxu			/* The address was invalid. */
1793161678Sdavidxu			if (owner == -1) {
1794161678Sdavidxu				error = EFAULT;
1795161678Sdavidxu				break;
1796161678Sdavidxu			}
1797161678Sdavidxu
1798161678Sdavidxu			/* If this failed the lock has changed, restart. */
1799161678Sdavidxu			continue;
1800161678Sdavidxu		}
1801161678Sdavidxu
1802161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1803161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1804161678Sdavidxu			error = EDEADLK;
1805161678Sdavidxu			break;
1806161678Sdavidxu		}
1807161678Sdavidxu
1808161678Sdavidxu		if (try != 0) {
1809161678Sdavidxu			error = EBUSY;
1810161678Sdavidxu			break;
1811161678Sdavidxu		}
1812161678Sdavidxu
1813161678Sdavidxu		/*
1814161678Sdavidxu		 * If we caught a signal, we have retried and now
1815161678Sdavidxu		 * exit immediately.
1816161678Sdavidxu		 */
1817161678Sdavidxu		if (error != 0)
1818161678Sdavidxu			break;
1819161678Sdavidxu
1820161678Sdavidxu		umtxq_lock(&uq->uq_key);
1821161678Sdavidxu		umtxq_busy(&uq->uq_key);
1822161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1823161678Sdavidxu
1824161678Sdavidxu		/*
1825161678Sdavidxu		 * Set the contested bit so that a release in user space
1826161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1827161678Sdavidxu		 * either some one else has acquired the lock or it has been
1828161678Sdavidxu		 * released.
1829161678Sdavidxu		 */
1830161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1831161678Sdavidxu
1832161678Sdavidxu		/* The address was invalid. */
1833161678Sdavidxu		if (old == -1) {
1834161678Sdavidxu			umtxq_lock(&uq->uq_key);
1835161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1836161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1837161678Sdavidxu			error = EFAULT;
1838161678Sdavidxu			break;
1839161678Sdavidxu		}
1840161678Sdavidxu
1841161678Sdavidxu		umtxq_lock(&uq->uq_key);
1842161678Sdavidxu		/*
1843161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1844161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1845161678Sdavidxu		 * unlocking the umtx.
1846161678Sdavidxu		 */
1847161678Sdavidxu		if (old == owner)
1848161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1849161678Sdavidxu				 "umtxpi", timo);
1850189756Sdavidxu		else {
1851189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1852189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1853189756Sdavidxu		}
1854161678Sdavidxu	}
1855161678Sdavidxu
1856163697Sdavidxu	umtxq_lock(&uq->uq_key);
1857163697Sdavidxu	umtx_pi_unref(pi);
1858163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1859161678Sdavidxu
1860161678Sdavidxu	umtx_key_release(&uq->uq_key);
1861161678Sdavidxu	return (error);
1862161678Sdavidxu}
1863161678Sdavidxu
1864161678Sdavidxu/*
1865161678Sdavidxu * Unlock a PI mutex.
1866161678Sdavidxu */
1867161678Sdavidxustatic int
1868161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1869161678Sdavidxu{
1870161678Sdavidxu	struct umtx_key key;
1871161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1872161678Sdavidxu	struct umtx_pi *pi, *pi2;
1873161678Sdavidxu	uint32_t owner, old, id;
1874161678Sdavidxu	int error;
1875161678Sdavidxu	int count;
1876161678Sdavidxu	int pri;
1877161678Sdavidxu
1878161678Sdavidxu	id = td->td_tid;
1879161678Sdavidxu	/*
1880161678Sdavidxu	 * Make sure we own this mtx.
1881161678Sdavidxu	 */
1882163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1883161678Sdavidxu	if (owner == -1)
1884161678Sdavidxu		return (EFAULT);
1885161678Sdavidxu
1886161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1887161678Sdavidxu		return (EPERM);
1888161678Sdavidxu
1889161678Sdavidxu	/* This should be done in userland */
1890161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1891161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1892161678Sdavidxu		if (old == -1)
1893161678Sdavidxu			return (EFAULT);
1894161678Sdavidxu		if (old == owner)
1895161678Sdavidxu			return (0);
1896161855Sdavidxu		owner = old;
1897161678Sdavidxu	}
1898161678Sdavidxu
1899161678Sdavidxu	/* We should only ever be in here for contested locks */
1900161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1901161678Sdavidxu	    &key)) != 0)
1902161678Sdavidxu		return (error);
1903161678Sdavidxu
1904161678Sdavidxu	umtxq_lock(&key);
1905161678Sdavidxu	umtxq_busy(&key);
1906161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1907161678Sdavidxu	if (uq_first != NULL) {
1908189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1909161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1910189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1911161678Sdavidxu		if (pi->pi_owner != curthread) {
1912189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1913161678Sdavidxu			umtxq_unbusy(&key);
1914161678Sdavidxu			umtxq_unlock(&key);
1915189756Sdavidxu			umtx_key_release(&key);
1916161678Sdavidxu			/* userland messed the mutex */
1917161678Sdavidxu			return (EPERM);
1918161678Sdavidxu		}
1919161678Sdavidxu		uq_me = curthread->td_umtxq;
1920161678Sdavidxu		pi->pi_owner = NULL;
1921161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1922189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1923161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1924189756Sdavidxu		while (uq_first != NULL &&
1925189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1926189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1927189756Sdavidxu		}
1928161678Sdavidxu		pri = PRI_MAX;
1929161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1930161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1931161678Sdavidxu			if (uq_first2 != NULL) {
1932161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1933161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1934161678Sdavidxu			}
1935161678Sdavidxu		}
1936170300Sjeff		thread_lock(curthread);
1937161678Sdavidxu		sched_unlend_user_prio(curthread, pri);
1938170300Sjeff		thread_unlock(curthread);
1939170300Sjeff		mtx_unlock_spin(&umtx_lock);
1940189756Sdavidxu		if (uq_first)
1941189756Sdavidxu			umtxq_signal_thread(uq_first);
1942161678Sdavidxu	}
1943161678Sdavidxu	umtxq_unlock(&key);
1944161678Sdavidxu
1945161678Sdavidxu	/*
1946161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1947161678Sdavidxu	 * there is zero or one thread only waiting for it.
1948161678Sdavidxu	 * Otherwise, it must be marked as contested.
1949161678Sdavidxu	 */
1950161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1951161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1952161678Sdavidxu
1953161678Sdavidxu	umtxq_lock(&key);
1954161678Sdavidxu	umtxq_unbusy(&key);
1955161678Sdavidxu	umtxq_unlock(&key);
1956161678Sdavidxu	umtx_key_release(&key);
1957161678Sdavidxu	if (old == -1)
1958161678Sdavidxu		return (EFAULT);
1959161678Sdavidxu	if (old != owner)
1960161678Sdavidxu		return (EINVAL);
1961161678Sdavidxu	return (0);
1962161678Sdavidxu}
1963161678Sdavidxu
1964161678Sdavidxu/*
1965161678Sdavidxu * Lock a PP mutex.
1966161678Sdavidxu */
1967161678Sdavidxustatic int
1968161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1969161678Sdavidxu	int try)
1970161678Sdavidxu{
1971161678Sdavidxu	struct umtx_q *uq, *uq2;
1972161678Sdavidxu	struct umtx_pi *pi;
1973161678Sdavidxu	uint32_t ceiling;
1974161678Sdavidxu	uint32_t owner, id;
1975161678Sdavidxu	int error, pri, old_inherited_pri, su;
1976161678Sdavidxu
1977161678Sdavidxu	id = td->td_tid;
1978161678Sdavidxu	uq = td->td_umtxq;
1979161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1980161678Sdavidxu	    &uq->uq_key)) != 0)
1981161678Sdavidxu		return (error);
1982164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1983161678Sdavidxu	for (;;) {
1984161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1985161678Sdavidxu		umtxq_lock(&uq->uq_key);
1986161678Sdavidxu		umtxq_busy(&uq->uq_key);
1987161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1988161678Sdavidxu
1989161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1990161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1991161678Sdavidxu			error = EINVAL;
1992161678Sdavidxu			goto out;
1993161678Sdavidxu		}
1994161678Sdavidxu
1995170300Sjeff		mtx_lock_spin(&umtx_lock);
1996161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1997170300Sjeff			mtx_unlock_spin(&umtx_lock);
1998161678Sdavidxu			error = EINVAL;
1999161678Sdavidxu			goto out;
2000161678Sdavidxu		}
2001161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2002161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2003170300Sjeff			thread_lock(td);
2004161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
2005161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
2006170300Sjeff			thread_unlock(td);
2007161678Sdavidxu		}
2008170300Sjeff		mtx_unlock_spin(&umtx_lock);
2009161678Sdavidxu
2010161678Sdavidxu		owner = casuword32(&m->m_owner,
2011161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2012161678Sdavidxu
2013161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2014161678Sdavidxu			error = 0;
2015161678Sdavidxu			break;
2016161678Sdavidxu		}
2017161678Sdavidxu
2018161678Sdavidxu		/* The address was invalid. */
2019161678Sdavidxu		if (owner == -1) {
2020161678Sdavidxu			error = EFAULT;
2021161678Sdavidxu			break;
2022161678Sdavidxu		}
2023161678Sdavidxu
2024161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2025161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2026161678Sdavidxu			error = EDEADLK;
2027161678Sdavidxu			break;
2028161678Sdavidxu		}
2029161678Sdavidxu
2030161678Sdavidxu		if (try != 0) {
2031161678Sdavidxu			error = EBUSY;
2032161678Sdavidxu			break;
2033161678Sdavidxu		}
2034161678Sdavidxu
2035161678Sdavidxu		/*
2036161678Sdavidxu		 * If we caught a signal, we have retried and now
2037161678Sdavidxu		 * exit immediately.
2038161678Sdavidxu		 */
2039161678Sdavidxu		if (error != 0)
2040161678Sdavidxu			break;
2041161678Sdavidxu
2042161678Sdavidxu		umtxq_lock(&uq->uq_key);
2043161678Sdavidxu		umtxq_insert(uq);
2044161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2045161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
2046161678Sdavidxu		umtxq_remove(uq);
2047161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2048161678Sdavidxu
2049170300Sjeff		mtx_lock_spin(&umtx_lock);
2050161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2051161678Sdavidxu		pri = PRI_MAX;
2052161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2053161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2054161678Sdavidxu			if (uq2 != NULL) {
2055161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2056161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2057161678Sdavidxu			}
2058161678Sdavidxu		}
2059161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2060161678Sdavidxu			pri = uq->uq_inherited_pri;
2061170300Sjeff		thread_lock(td);
2062161678Sdavidxu		sched_unlend_user_prio(td, pri);
2063170300Sjeff		thread_unlock(td);
2064170300Sjeff		mtx_unlock_spin(&umtx_lock);
2065161678Sdavidxu	}
2066161678Sdavidxu
2067161678Sdavidxu	if (error != 0) {
2068170300Sjeff		mtx_lock_spin(&umtx_lock);
2069161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2070161678Sdavidxu		pri = PRI_MAX;
2071161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2072161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2073161678Sdavidxu			if (uq2 != NULL) {
2074161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2075161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2076161678Sdavidxu			}
2077161678Sdavidxu		}
2078161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2079161678Sdavidxu			pri = uq->uq_inherited_pri;
2080170300Sjeff		thread_lock(td);
2081161678Sdavidxu		sched_unlend_user_prio(td, pri);
2082170300Sjeff		thread_unlock(td);
2083170300Sjeff		mtx_unlock_spin(&umtx_lock);
2084161678Sdavidxu	}
2085161678Sdavidxu
2086161678Sdavidxuout:
2087161678Sdavidxu	umtxq_lock(&uq->uq_key);
2088161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2089161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2090161678Sdavidxu	umtx_key_release(&uq->uq_key);
2091161678Sdavidxu	return (error);
2092161678Sdavidxu}
2093161678Sdavidxu
2094161678Sdavidxu/*
2095161678Sdavidxu * Unlock a PP mutex.
2096161678Sdavidxu */
2097161678Sdavidxustatic int
2098161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2099161678Sdavidxu{
2100161678Sdavidxu	struct umtx_key key;
2101161678Sdavidxu	struct umtx_q *uq, *uq2;
2102161678Sdavidxu	struct umtx_pi *pi;
2103161678Sdavidxu	uint32_t owner, id;
2104161678Sdavidxu	uint32_t rceiling;
2105161926Sdavidxu	int error, pri, new_inherited_pri, su;
2106161678Sdavidxu
2107161678Sdavidxu	id = td->td_tid;
2108161678Sdavidxu	uq = td->td_umtxq;
2109164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2110161678Sdavidxu
2111161678Sdavidxu	/*
2112161678Sdavidxu	 * Make sure we own this mtx.
2113161678Sdavidxu	 */
2114163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2115161678Sdavidxu	if (owner == -1)
2116161678Sdavidxu		return (EFAULT);
2117161678Sdavidxu
2118161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2119161678Sdavidxu		return (EPERM);
2120161678Sdavidxu
2121161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2122161678Sdavidxu	if (error != 0)
2123161678Sdavidxu		return (error);
2124161678Sdavidxu
2125161678Sdavidxu	if (rceiling == -1)
2126161678Sdavidxu		new_inherited_pri = PRI_MAX;
2127161678Sdavidxu	else {
2128161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2129161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2130161678Sdavidxu			return (EINVAL);
2131161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2132161678Sdavidxu	}
2133161678Sdavidxu
2134161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2135161678Sdavidxu	    &key)) != 0)
2136161678Sdavidxu		return (error);
2137161678Sdavidxu	umtxq_lock(&key);
2138161678Sdavidxu	umtxq_busy(&key);
2139161678Sdavidxu	umtxq_unlock(&key);
2140161678Sdavidxu	/*
2141161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2142161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2143161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2144161678Sdavidxu	 * has to be adjusted for such mutex.
2145161678Sdavidxu	 */
2146163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2147163449Sdavidxu		UMUTEX_CONTESTED);
2148161678Sdavidxu
2149161678Sdavidxu	umtxq_lock(&key);
2150161678Sdavidxu	if (error == 0)
2151161678Sdavidxu		umtxq_signal(&key, 1);
2152161678Sdavidxu	umtxq_unbusy(&key);
2153161678Sdavidxu	umtxq_unlock(&key);
2154161678Sdavidxu
2155161678Sdavidxu	if (error == -1)
2156161678Sdavidxu		error = EFAULT;
2157161678Sdavidxu	else {
2158170300Sjeff		mtx_lock_spin(&umtx_lock);
2159161926Sdavidxu		if (su != 0)
2160161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2161161678Sdavidxu		pri = PRI_MAX;
2162161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2163161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2164161678Sdavidxu			if (uq2 != NULL) {
2165161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2166161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2167161678Sdavidxu			}
2168161678Sdavidxu		}
2169161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2170161678Sdavidxu			pri = uq->uq_inherited_pri;
2171170300Sjeff		thread_lock(td);
2172161678Sdavidxu		sched_unlend_user_prio(td, pri);
2173170300Sjeff		thread_unlock(td);
2174170300Sjeff		mtx_unlock_spin(&umtx_lock);
2175161678Sdavidxu	}
2176161678Sdavidxu	umtx_key_release(&key);
2177161678Sdavidxu	return (error);
2178161678Sdavidxu}
2179161678Sdavidxu
2180161678Sdavidxustatic int
2181161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2182161678Sdavidxu	uint32_t *old_ceiling)
2183161678Sdavidxu{
2184161678Sdavidxu	struct umtx_q *uq;
2185161678Sdavidxu	uint32_t save_ceiling;
2186161678Sdavidxu	uint32_t owner, id;
2187161678Sdavidxu	uint32_t flags;
2188161678Sdavidxu	int error;
2189161678Sdavidxu
2190161678Sdavidxu	flags = fuword32(&m->m_flags);
2191161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2192161678Sdavidxu		return (EINVAL);
2193161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2194161678Sdavidxu		return (EINVAL);
2195161678Sdavidxu	id = td->td_tid;
2196161678Sdavidxu	uq = td->td_umtxq;
2197161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2198161678Sdavidxu	   &uq->uq_key)) != 0)
2199161678Sdavidxu		return (error);
2200161678Sdavidxu	for (;;) {
2201161678Sdavidxu		umtxq_lock(&uq->uq_key);
2202161678Sdavidxu		umtxq_busy(&uq->uq_key);
2203161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2204161678Sdavidxu
2205161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2206161678Sdavidxu
2207161678Sdavidxu		owner = casuword32(&m->m_owner,
2208161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2209161678Sdavidxu
2210161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2211161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2212163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2213163449Sdavidxu				UMUTEX_CONTESTED);
2214161678Sdavidxu			error = 0;
2215161678Sdavidxu			break;
2216161678Sdavidxu		}
2217161678Sdavidxu
2218161678Sdavidxu		/* The address was invalid. */
2219161678Sdavidxu		if (owner == -1) {
2220161678Sdavidxu			error = EFAULT;
2221161678Sdavidxu			break;
2222161678Sdavidxu		}
2223161678Sdavidxu
2224161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2225161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2226161678Sdavidxu			error = 0;
2227161678Sdavidxu			break;
2228161678Sdavidxu		}
2229161678Sdavidxu
2230161678Sdavidxu		/*
2231161678Sdavidxu		 * If we caught a signal, we have retried and now
2232161678Sdavidxu		 * exit immediately.
2233161678Sdavidxu		 */
2234161678Sdavidxu		if (error != 0)
2235161678Sdavidxu			break;
2236161678Sdavidxu
2237161678Sdavidxu		/*
2238161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2239161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2240161678Sdavidxu		 * unlocking the umtx.
2241161678Sdavidxu		 */
2242161678Sdavidxu		umtxq_lock(&uq->uq_key);
2243161678Sdavidxu		umtxq_insert(uq);
2244161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2245161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2246161678Sdavidxu		umtxq_remove(uq);
2247161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2248161678Sdavidxu	}
2249161678Sdavidxu	umtxq_lock(&uq->uq_key);
2250161678Sdavidxu	if (error == 0)
2251161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2252161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2253161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2254161678Sdavidxu	umtx_key_release(&uq->uq_key);
2255161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2256161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2257161678Sdavidxu	return (error);
2258161678Sdavidxu}
2259161678Sdavidxu
2260162030Sdavidxustatic int
2261162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2262179970Sdavidxu	int mode)
2263162030Sdavidxu{
2264162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2265162030Sdavidxu	case 0:
2266179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2267162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2268179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2269162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2270179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2271162030Sdavidxu	}
2272162030Sdavidxu	return (EINVAL);
2273162030Sdavidxu}
2274162030Sdavidxu
2275161678Sdavidxu/*
2276161678Sdavidxu * Lock a userland POSIX mutex.
2277161678Sdavidxu */
2278161678Sdavidxustatic int
2279162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2280179970Sdavidxu	struct timespec *timeout, int mode)
2281161678Sdavidxu{
2282162030Sdavidxu	struct timespec ts, ts2, ts3;
2283162030Sdavidxu	struct timeval tv;
2284161678Sdavidxu	uint32_t flags;
2285162030Sdavidxu	int error;
2286161678Sdavidxu
2287161678Sdavidxu	flags = fuword32(&m->m_flags);
2288161678Sdavidxu	if (flags == -1)
2289161678Sdavidxu		return (EFAULT);
2290161678Sdavidxu
2291162030Sdavidxu	if (timeout == NULL) {
2292179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2293162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2294179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2295162030Sdavidxu			error = ERESTART;
2296162030Sdavidxu	} else {
2297162030Sdavidxu		getnanouptime(&ts);
2298162030Sdavidxu		timespecadd(&ts, timeout);
2299162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2300162030Sdavidxu		for (;;) {
2301179970Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2302162030Sdavidxu			if (error != ETIMEDOUT)
2303162030Sdavidxu				break;
2304162030Sdavidxu			getnanouptime(&ts2);
2305162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2306162030Sdavidxu				error = ETIMEDOUT;
2307162030Sdavidxu				break;
2308162030Sdavidxu			}
2309162030Sdavidxu			ts3 = ts;
2310162030Sdavidxu			timespecsub(&ts3, &ts2);
2311162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2312162030Sdavidxu		}
2313162030Sdavidxu		/* Timed-locking is not restarted. */
2314162030Sdavidxu		if (error == ERESTART)
2315162030Sdavidxu			error = EINTR;
2316161742Sdavidxu	}
2317162030Sdavidxu	return (error);
2318161678Sdavidxu}
2319161678Sdavidxu
2320161678Sdavidxu/*
2321161678Sdavidxu * Unlock a userland POSIX mutex.
2322161678Sdavidxu */
2323161678Sdavidxustatic int
2324161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2325161678Sdavidxu{
2326161678Sdavidxu	uint32_t flags;
2327161678Sdavidxu
2328161678Sdavidxu	flags = fuword32(&m->m_flags);
2329161678Sdavidxu	if (flags == -1)
2330161678Sdavidxu		return (EFAULT);
2331161678Sdavidxu
2332161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2333161855Sdavidxu	case 0:
2334161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2335161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2336161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2337161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2338161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2339161855Sdavidxu	}
2340161678Sdavidxu
2341161855Sdavidxu	return (EINVAL);
2342161678Sdavidxu}
2343161678Sdavidxu
2344164839Sdavidxustatic int
2345164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2346164876Sdavidxu	struct timespec *timeout, u_long wflags)
2347164839Sdavidxu{
2348164839Sdavidxu	struct umtx_q *uq;
2349164839Sdavidxu	struct timeval tv;
2350164839Sdavidxu	struct timespec cts, ets, tts;
2351164839Sdavidxu	uint32_t flags;
2352164839Sdavidxu	int error;
2353164839Sdavidxu
2354164839Sdavidxu	uq = td->td_umtxq;
2355164839Sdavidxu	flags = fuword32(&cv->c_flags);
2356164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2357164839Sdavidxu	if (error != 0)
2358164839Sdavidxu		return (error);
2359164839Sdavidxu	umtxq_lock(&uq->uq_key);
2360164839Sdavidxu	umtxq_busy(&uq->uq_key);
2361164839Sdavidxu	umtxq_insert(uq);
2362164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2363164839Sdavidxu
2364164839Sdavidxu	/*
2365164839Sdavidxu	 * The magic thing is we should set c_has_waiters to 1 before
2366164839Sdavidxu	 * releasing user mutex.
2367164839Sdavidxu	 */
2368164839Sdavidxu	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2369164839Sdavidxu
2370164839Sdavidxu	umtxq_lock(&uq->uq_key);
2371164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2372164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2373164839Sdavidxu
2374164839Sdavidxu	error = do_unlock_umutex(td, m);
2375164839Sdavidxu
2376164839Sdavidxu	umtxq_lock(&uq->uq_key);
2377164839Sdavidxu	if (error == 0) {
2378164876Sdavidxu		if ((wflags & UMTX_CHECK_UNPARKING) &&
2379164876Sdavidxu		    (td->td_pflags & TDP_WAKEUP)) {
2380164876Sdavidxu			td->td_pflags &= ~TDP_WAKEUP;
2381164876Sdavidxu			error = EINTR;
2382164876Sdavidxu		} else if (timeout == NULL) {
2383164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2384164839Sdavidxu		} else {
2385164839Sdavidxu			getnanouptime(&ets);
2386164839Sdavidxu			timespecadd(&ets, timeout);
2387164839Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2388164839Sdavidxu			for (;;) {
2389164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2390164839Sdavidxu				if (error != ETIMEDOUT)
2391164839Sdavidxu					break;
2392164839Sdavidxu				getnanouptime(&cts);
2393164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2394164839Sdavidxu					error = ETIMEDOUT;
2395164839Sdavidxu					break;
2396164839Sdavidxu				}
2397164839Sdavidxu				tts = ets;
2398164839Sdavidxu				timespecsub(&tts, &cts);
2399164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2400164839Sdavidxu			}
2401164839Sdavidxu		}
2402164839Sdavidxu	}
2403164839Sdavidxu
2404211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2405211794Sdavidxu		error = 0;
2406211794Sdavidxu	else {
2407211794Sdavidxu		umtxq_remove(uq);
2408164839Sdavidxu		if (error == ERESTART)
2409164839Sdavidxu			error = EINTR;
2410164839Sdavidxu	}
2411211794Sdavidxu
2412164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2413164839Sdavidxu	umtx_key_release(&uq->uq_key);
2414164839Sdavidxu	return (error);
2415164839Sdavidxu}
2416164839Sdavidxu
2417164839Sdavidxu/*
2418164839Sdavidxu * Signal a userland condition variable.
2419164839Sdavidxu */
2420164839Sdavidxustatic int
2421164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2422164839Sdavidxu{
2423164839Sdavidxu	struct umtx_key key;
2424164839Sdavidxu	int error, cnt, nwake;
2425164839Sdavidxu	uint32_t flags;
2426164839Sdavidxu
2427164839Sdavidxu	flags = fuword32(&cv->c_flags);
2428164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2429164839Sdavidxu		return (error);
2430164839Sdavidxu	umtxq_lock(&key);
2431164839Sdavidxu	umtxq_busy(&key);
2432164839Sdavidxu	cnt = umtxq_count(&key);
2433164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2434164839Sdavidxu	if (cnt <= nwake) {
2435164839Sdavidxu		umtxq_unlock(&key);
2436164839Sdavidxu		error = suword32(
2437164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2438164839Sdavidxu		umtxq_lock(&key);
2439164839Sdavidxu	}
2440164839Sdavidxu	umtxq_unbusy(&key);
2441164839Sdavidxu	umtxq_unlock(&key);
2442164839Sdavidxu	umtx_key_release(&key);
2443164839Sdavidxu	return (error);
2444164839Sdavidxu}
2445164839Sdavidxu
2446164839Sdavidxustatic int
2447164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2448164839Sdavidxu{
2449164839Sdavidxu	struct umtx_key key;
2450164839Sdavidxu	int error;
2451164839Sdavidxu	uint32_t flags;
2452164839Sdavidxu
2453164839Sdavidxu	flags = fuword32(&cv->c_flags);
2454164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2455164839Sdavidxu		return (error);
2456164839Sdavidxu
2457164839Sdavidxu	umtxq_lock(&key);
2458164839Sdavidxu	umtxq_busy(&key);
2459164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2460164839Sdavidxu	umtxq_unlock(&key);
2461164839Sdavidxu
2462164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2463164839Sdavidxu
2464164839Sdavidxu	umtxq_lock(&key);
2465164839Sdavidxu	umtxq_unbusy(&key);
2466164839Sdavidxu	umtxq_unlock(&key);
2467164839Sdavidxu
2468164839Sdavidxu	umtx_key_release(&key);
2469164839Sdavidxu	return (error);
2470164839Sdavidxu}
2471164839Sdavidxu
2472177848Sdavidxustatic int
2473177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2474177848Sdavidxu{
2475177848Sdavidxu	struct umtx_q *uq;
2476177848Sdavidxu	uint32_t flags, wrflags;
2477177848Sdavidxu	int32_t state, oldstate;
2478177848Sdavidxu	int32_t blocked_readers;
2479177848Sdavidxu	int error;
2480177848Sdavidxu
2481177848Sdavidxu	uq = td->td_umtxq;
2482177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2483177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2484177848Sdavidxu	if (error != 0)
2485177848Sdavidxu		return (error);
2486177848Sdavidxu
2487177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2488177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2489177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2490177848Sdavidxu
2491177848Sdavidxu	for (;;) {
2492177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2493177848Sdavidxu		/* try to lock it */
2494177848Sdavidxu		while (!(state & wrflags)) {
2495177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2496177848Sdavidxu				umtx_key_release(&uq->uq_key);
2497177848Sdavidxu				return (EAGAIN);
2498177848Sdavidxu			}
2499177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2500177848Sdavidxu			if (oldstate == state) {
2501177848Sdavidxu				umtx_key_release(&uq->uq_key);
2502177848Sdavidxu				return (0);
2503177848Sdavidxu			}
2504177848Sdavidxu			state = oldstate;
2505177848Sdavidxu		}
2506177848Sdavidxu
2507177848Sdavidxu		if (error)
2508177848Sdavidxu			break;
2509177848Sdavidxu
2510177848Sdavidxu		/* grab monitor lock */
2511177848Sdavidxu		umtxq_lock(&uq->uq_key);
2512177848Sdavidxu		umtxq_busy(&uq->uq_key);
2513177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2514177848Sdavidxu
2515203414Sdavidxu		/*
2516203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2517203414Sdavidxu		 * and the check below
2518203414Sdavidxu		 */
2519203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2520203414Sdavidxu
2521177848Sdavidxu		/* set read contention bit */
2522177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2523177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2524177848Sdavidxu			if (oldstate == state)
2525177848Sdavidxu				goto sleep;
2526177848Sdavidxu			state = oldstate;
2527177848Sdavidxu		}
2528177848Sdavidxu
2529177848Sdavidxu		/* state is changed while setting flags, restart */
2530177848Sdavidxu		if (!(state & wrflags)) {
2531177848Sdavidxu			umtxq_lock(&uq->uq_key);
2532177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2533177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2534177848Sdavidxu			continue;
2535177848Sdavidxu		}
2536177848Sdavidxu
2537177848Sdavidxusleep:
2538177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2539177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2540177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2541177848Sdavidxu
2542177848Sdavidxu		while (state & wrflags) {
2543177848Sdavidxu			umtxq_lock(&uq->uq_key);
2544177848Sdavidxu			umtxq_insert(uq);
2545177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2546177848Sdavidxu
2547177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2548177848Sdavidxu
2549177848Sdavidxu			umtxq_busy(&uq->uq_key);
2550177848Sdavidxu			umtxq_remove(uq);
2551177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2552177848Sdavidxu			if (error)
2553177848Sdavidxu				break;
2554177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2555177848Sdavidxu		}
2556177848Sdavidxu
2557177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2558177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2559177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2560177848Sdavidxu		if (blocked_readers == 1) {
2561177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2562177848Sdavidxu			for (;;) {
2563177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2564177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2565177848Sdavidxu				if (oldstate == state)
2566177848Sdavidxu					break;
2567177848Sdavidxu				state = oldstate;
2568177848Sdavidxu			}
2569177848Sdavidxu		}
2570177848Sdavidxu
2571177848Sdavidxu		umtxq_lock(&uq->uq_key);
2572177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2573177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2574177848Sdavidxu	}
2575177848Sdavidxu	umtx_key_release(&uq->uq_key);
2576177848Sdavidxu	return (error);
2577177848Sdavidxu}
2578177848Sdavidxu
2579177848Sdavidxustatic int
2580177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2581177848Sdavidxu{
2582177848Sdavidxu	struct timespec ts, ts2, ts3;
2583177848Sdavidxu	struct timeval tv;
2584177848Sdavidxu	int error;
2585177848Sdavidxu
2586177848Sdavidxu	getnanouptime(&ts);
2587177848Sdavidxu	timespecadd(&ts, timeout);
2588177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2589177848Sdavidxu	for (;;) {
2590177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2591177848Sdavidxu		if (error != ETIMEDOUT)
2592177848Sdavidxu			break;
2593177848Sdavidxu		getnanouptime(&ts2);
2594177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2595177848Sdavidxu			error = ETIMEDOUT;
2596177848Sdavidxu			break;
2597177848Sdavidxu		}
2598177848Sdavidxu		ts3 = ts;
2599177848Sdavidxu		timespecsub(&ts3, &ts2);
2600177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2601177848Sdavidxu	}
2602177849Sdavidxu	if (error == ERESTART)
2603177849Sdavidxu		error = EINTR;
2604177848Sdavidxu	return (error);
2605177848Sdavidxu}
2606177848Sdavidxu
2607177848Sdavidxustatic int
2608177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2609177848Sdavidxu{
2610177848Sdavidxu	struct umtx_q *uq;
2611177848Sdavidxu	uint32_t flags;
2612177848Sdavidxu	int32_t state, oldstate;
2613177848Sdavidxu	int32_t blocked_writers;
2614197476Sdavidxu	int32_t blocked_readers;
2615177848Sdavidxu	int error;
2616177848Sdavidxu
2617177848Sdavidxu	uq = td->td_umtxq;
2618177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2619177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2620177848Sdavidxu	if (error != 0)
2621177848Sdavidxu		return (error);
2622177848Sdavidxu
2623197476Sdavidxu	blocked_readers = 0;
2624177848Sdavidxu	for (;;) {
2625177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2626177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2627177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2628177848Sdavidxu			if (oldstate == state) {
2629177848Sdavidxu				umtx_key_release(&uq->uq_key);
2630177848Sdavidxu				return (0);
2631177848Sdavidxu			}
2632177848Sdavidxu			state = oldstate;
2633177848Sdavidxu		}
2634177848Sdavidxu
2635197476Sdavidxu		if (error) {
2636197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2637197476Sdavidxu			    blocked_readers != 0) {
2638197476Sdavidxu				umtxq_lock(&uq->uq_key);
2639197476Sdavidxu				umtxq_busy(&uq->uq_key);
2640197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2641197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2642197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2643197476Sdavidxu			}
2644197476Sdavidxu
2645177848Sdavidxu			break;
2646197476Sdavidxu		}
2647177848Sdavidxu
2648177848Sdavidxu		/* grab monitor lock */
2649177848Sdavidxu		umtxq_lock(&uq->uq_key);
2650177848Sdavidxu		umtxq_busy(&uq->uq_key);
2651177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2652177848Sdavidxu
2653203414Sdavidxu		/*
2654203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2655203414Sdavidxu		 * and the check below
2656203414Sdavidxu		 */
2657203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2658203414Sdavidxu
2659177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2660177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2661177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2662177848Sdavidxu			if (oldstate == state)
2663177848Sdavidxu				goto sleep;
2664177848Sdavidxu			state = oldstate;
2665177848Sdavidxu		}
2666177848Sdavidxu
2667177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2668177848Sdavidxu			umtxq_lock(&uq->uq_key);
2669177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2670177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2671177848Sdavidxu			continue;
2672177848Sdavidxu		}
2673177848Sdavidxusleep:
2674177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2675177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2676177848Sdavidxu
2677177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2678177848Sdavidxu			umtxq_lock(&uq->uq_key);
2679177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2680177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2681177848Sdavidxu
2682177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2683177848Sdavidxu
2684177848Sdavidxu			umtxq_busy(&uq->uq_key);
2685177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2686177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2687177848Sdavidxu			if (error)
2688177848Sdavidxu				break;
2689177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2690177848Sdavidxu		}
2691177848Sdavidxu
2692177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2693177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2694177848Sdavidxu		if (blocked_writers == 1) {
2695177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2696177848Sdavidxu			for (;;) {
2697177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2698177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2699177848Sdavidxu				if (oldstate == state)
2700177848Sdavidxu					break;
2701177848Sdavidxu				state = oldstate;
2702177848Sdavidxu			}
2703197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2704197476Sdavidxu		} else
2705197476Sdavidxu			blocked_readers = 0;
2706177848Sdavidxu
2707177848Sdavidxu		umtxq_lock(&uq->uq_key);
2708177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2709177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2710177848Sdavidxu	}
2711177848Sdavidxu
2712177848Sdavidxu	umtx_key_release(&uq->uq_key);
2713177848Sdavidxu	return (error);
2714177848Sdavidxu}
2715177848Sdavidxu
2716177848Sdavidxustatic int
2717177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2718177848Sdavidxu{
2719177848Sdavidxu	struct timespec ts, ts2, ts3;
2720177848Sdavidxu	struct timeval tv;
2721177848Sdavidxu	int error;
2722177848Sdavidxu
2723177848Sdavidxu	getnanouptime(&ts);
2724177848Sdavidxu	timespecadd(&ts, timeout);
2725177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2726177848Sdavidxu	for (;;) {
2727177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2728177848Sdavidxu		if (error != ETIMEDOUT)
2729177848Sdavidxu			break;
2730177848Sdavidxu		getnanouptime(&ts2);
2731177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2732177848Sdavidxu			error = ETIMEDOUT;
2733177848Sdavidxu			break;
2734177848Sdavidxu		}
2735177848Sdavidxu		ts3 = ts;
2736177848Sdavidxu		timespecsub(&ts3, &ts2);
2737177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2738177848Sdavidxu	}
2739177849Sdavidxu	if (error == ERESTART)
2740177849Sdavidxu		error = EINTR;
2741177848Sdavidxu	return (error);
2742177848Sdavidxu}
2743177848Sdavidxu
2744177848Sdavidxustatic int
2745177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2746177848Sdavidxu{
2747177848Sdavidxu	struct umtx_q *uq;
2748177848Sdavidxu	uint32_t flags;
2749177848Sdavidxu	int32_t state, oldstate;
2750177848Sdavidxu	int error, q, count;
2751177848Sdavidxu
2752177848Sdavidxu	uq = td->td_umtxq;
2753177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2754177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2755177848Sdavidxu	if (error != 0)
2756177848Sdavidxu		return (error);
2757177848Sdavidxu
2758177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2759177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2760177848Sdavidxu		for (;;) {
2761177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2762177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2763177848Sdavidxu			if (oldstate != state) {
2764177848Sdavidxu				state = oldstate;
2765177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2766177848Sdavidxu					error = EPERM;
2767177848Sdavidxu					goto out;
2768177848Sdavidxu				}
2769177848Sdavidxu			} else
2770177848Sdavidxu				break;
2771177848Sdavidxu		}
2772177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2773177848Sdavidxu		for (;;) {
2774177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2775177848Sdavidxu				state - 1);
2776177848Sdavidxu			if (oldstate != state) {
2777177848Sdavidxu				state = oldstate;
2778177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2779177848Sdavidxu					error = EPERM;
2780177848Sdavidxu					goto out;
2781177848Sdavidxu				}
2782177848Sdavidxu			}
2783177848Sdavidxu			else
2784177848Sdavidxu				break;
2785177848Sdavidxu		}
2786177848Sdavidxu	} else {
2787177848Sdavidxu		error = EPERM;
2788177848Sdavidxu		goto out;
2789177848Sdavidxu	}
2790177848Sdavidxu
2791177848Sdavidxu	count = 0;
2792177848Sdavidxu
2793177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2794177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2795177848Sdavidxu			count = 1;
2796177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2797177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2798177848Sdavidxu			count = INT_MAX;
2799177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2800177848Sdavidxu		}
2801177848Sdavidxu	} else {
2802177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2803177848Sdavidxu			count = INT_MAX;
2804177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2805177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2806177848Sdavidxu			count = 1;
2807177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2808177848Sdavidxu		}
2809177848Sdavidxu	}
2810177848Sdavidxu
2811177848Sdavidxu	if (count) {
2812177848Sdavidxu		umtxq_lock(&uq->uq_key);
2813177848Sdavidxu		umtxq_busy(&uq->uq_key);
2814177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2815177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2816177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2817177848Sdavidxu	}
2818177848Sdavidxuout:
2819177848Sdavidxu	umtx_key_release(&uq->uq_key);
2820177848Sdavidxu	return (error);
2821177848Sdavidxu}
2822177848Sdavidxu
2823201472Sdavidxustatic int
2824201472Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2825201472Sdavidxu{
2826201472Sdavidxu	struct umtx_q *uq;
2827201472Sdavidxu	struct timeval tv;
2828201472Sdavidxu	struct timespec cts, ets, tts;
2829201472Sdavidxu	uint32_t flags, count;
2830201472Sdavidxu	int error;
2831201472Sdavidxu
2832201472Sdavidxu	uq = td->td_umtxq;
2833201472Sdavidxu	flags = fuword32(&sem->_flags);
2834201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2835201472Sdavidxu	if (error != 0)
2836201472Sdavidxu		return (error);
2837201472Sdavidxu	umtxq_lock(&uq->uq_key);
2838201472Sdavidxu	umtxq_busy(&uq->uq_key);
2839201472Sdavidxu	umtxq_insert(uq);
2840201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2841201472Sdavidxu
2842203657Sdavidxu	suword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 1);
2843203657Sdavidxu
2844201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2845201472Sdavidxu	if (count != 0) {
2846201472Sdavidxu		umtxq_lock(&uq->uq_key);
2847201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2848201472Sdavidxu		umtxq_remove(uq);
2849201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2850201472Sdavidxu		umtx_key_release(&uq->uq_key);
2851201472Sdavidxu		return (0);
2852201472Sdavidxu	}
2853201472Sdavidxu
2854201472Sdavidxu	umtxq_lock(&uq->uq_key);
2855201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2856201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2857201472Sdavidxu
2858201472Sdavidxu	umtxq_lock(&uq->uq_key);
2859201472Sdavidxu	if (timeout == NULL) {
2860201472Sdavidxu		error = umtxq_sleep(uq, "usem", 0);
2861201472Sdavidxu	} else {
2862201472Sdavidxu		getnanouptime(&ets);
2863201472Sdavidxu		timespecadd(&ets, timeout);
2864201472Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2865201472Sdavidxu		for (;;) {
2866201472Sdavidxu			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2867201472Sdavidxu			if (error != ETIMEDOUT)
2868201472Sdavidxu				break;
2869201472Sdavidxu			getnanouptime(&cts);
2870201472Sdavidxu			if (timespeccmp(&cts, &ets, >=)) {
2871201472Sdavidxu				error = ETIMEDOUT;
2872201472Sdavidxu				break;
2873201472Sdavidxu			}
2874201472Sdavidxu			tts = ets;
2875201472Sdavidxu			timespecsub(&tts, &cts);
2876201472Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2877201472Sdavidxu		}
2878201472Sdavidxu	}
2879201472Sdavidxu
2880211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2881211794Sdavidxu		error = 0;
2882211794Sdavidxu	else {
2883211794Sdavidxu		umtxq_remove(uq);
2884201472Sdavidxu		if (error == ERESTART)
2885201472Sdavidxu			error = EINTR;
2886201472Sdavidxu	}
2887201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2888201472Sdavidxu	umtx_key_release(&uq->uq_key);
2889201472Sdavidxu	return (error);
2890201472Sdavidxu}
2891201472Sdavidxu
2892201472Sdavidxu/*
2893201472Sdavidxu * Signal a userland condition variable.
2894201472Sdavidxu */
2895201472Sdavidxustatic int
2896201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2897201472Sdavidxu{
2898201472Sdavidxu	struct umtx_key key;
2899201472Sdavidxu	int error, cnt, nwake;
2900201472Sdavidxu	uint32_t flags;
2901201472Sdavidxu
2902201472Sdavidxu	flags = fuword32(&sem->_flags);
2903201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2904201472Sdavidxu		return (error);
2905201472Sdavidxu	umtxq_lock(&key);
2906201472Sdavidxu	umtxq_busy(&key);
2907201472Sdavidxu	cnt = umtxq_count(&key);
2908201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2909201472Sdavidxu	if (cnt <= nwake) {
2910201472Sdavidxu		umtxq_unlock(&key);
2911201472Sdavidxu		error = suword32(
2912201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2913201472Sdavidxu		umtxq_lock(&key);
2914201472Sdavidxu	}
2915201472Sdavidxu	umtxq_unbusy(&key);
2916201472Sdavidxu	umtxq_unlock(&key);
2917201472Sdavidxu	umtx_key_release(&key);
2918201472Sdavidxu	return (error);
2919201472Sdavidxu}
2920201472Sdavidxu
2921139013Sdavidxuint
2922139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2923139013Sdavidxu    /* struct umtx *umtx */
2924139013Sdavidxu{
2925162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2926139013Sdavidxu}
2927139013Sdavidxu
2928139013Sdavidxuint
2929139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2930139013Sdavidxu    /* struct umtx *umtx */
2931139013Sdavidxu{
2932162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2933139013Sdavidxu}
2934139013Sdavidxu
2935162536Sdavidxustatic int
2936162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2937139013Sdavidxu{
2938162536Sdavidxu	struct timespec *ts, timeout;
2939139013Sdavidxu	int error;
2940139013Sdavidxu
2941162536Sdavidxu	/* Allow a null timespec (wait forever). */
2942162536Sdavidxu	if (uap->uaddr2 == NULL)
2943162536Sdavidxu		ts = NULL;
2944162536Sdavidxu	else {
2945162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2946162536Sdavidxu		if (error != 0)
2947162536Sdavidxu			return (error);
2948162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2949162536Sdavidxu		    timeout.tv_nsec < 0) {
2950162536Sdavidxu			return (EINVAL);
2951161678Sdavidxu		}
2952162536Sdavidxu		ts = &timeout;
2953162536Sdavidxu	}
2954162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2955162536Sdavidxu}
2956162536Sdavidxu
2957162536Sdavidxustatic int
2958162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2959162536Sdavidxu{
2960162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2961162536Sdavidxu}
2962162536Sdavidxu
2963162536Sdavidxustatic int
2964162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2965162536Sdavidxu{
2966162536Sdavidxu	struct timespec *ts, timeout;
2967162536Sdavidxu	int error;
2968162536Sdavidxu
2969162536Sdavidxu	if (uap->uaddr2 == NULL)
2970162536Sdavidxu		ts = NULL;
2971162536Sdavidxu	else {
2972162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2973162536Sdavidxu		if (error != 0)
2974162536Sdavidxu			return (error);
2975162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2976162536Sdavidxu		    timeout.tv_nsec < 0)
2977162536Sdavidxu			return (EINVAL);
2978162536Sdavidxu		ts = &timeout;
2979162536Sdavidxu	}
2980178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2981162536Sdavidxu}
2982162536Sdavidxu
2983162536Sdavidxustatic int
2984173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2985173800Sdavidxu{
2986173800Sdavidxu	struct timespec *ts, timeout;
2987173800Sdavidxu	int error;
2988173800Sdavidxu
2989173800Sdavidxu	if (uap->uaddr2 == NULL)
2990173800Sdavidxu		ts = NULL;
2991173800Sdavidxu	else {
2992173800Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2993173800Sdavidxu		if (error != 0)
2994173800Sdavidxu			return (error);
2995173800Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2996173800Sdavidxu		    timeout.tv_nsec < 0)
2997173800Sdavidxu			return (EINVAL);
2998173800Sdavidxu		ts = &timeout;
2999173800Sdavidxu	}
3000178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3001173800Sdavidxu}
3002173800Sdavidxu
3003173800Sdavidxustatic int
3004178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3005178646Sdavidxu{
3006178646Sdavidxu	struct timespec *ts, timeout;
3007178646Sdavidxu	int error;
3008178646Sdavidxu
3009178646Sdavidxu	if (uap->uaddr2 == NULL)
3010178646Sdavidxu		ts = NULL;
3011178646Sdavidxu	else {
3012178646Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3013178646Sdavidxu		if (error != 0)
3014178646Sdavidxu			return (error);
3015178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3016178646Sdavidxu		    timeout.tv_nsec < 0)
3017178646Sdavidxu			return (EINVAL);
3018178646Sdavidxu		ts = &timeout;
3019178646Sdavidxu	}
3020178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3021178646Sdavidxu}
3022178646Sdavidxu
3023178646Sdavidxustatic int
3024162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3025162536Sdavidxu{
3026178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3027162536Sdavidxu}
3028162536Sdavidxu
3029162536Sdavidxustatic int
3030178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3031178646Sdavidxu{
3032178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3033178646Sdavidxu}
3034178646Sdavidxu
3035178646Sdavidxustatic int
3036162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3037162536Sdavidxu{
3038162536Sdavidxu	struct timespec *ts, timeout;
3039162536Sdavidxu	int error;
3040162536Sdavidxu
3041162536Sdavidxu	/* Allow a null timespec (wait forever). */
3042162536Sdavidxu	if (uap->uaddr2 == NULL)
3043162536Sdavidxu		ts = NULL;
3044162536Sdavidxu	else {
3045162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3046162536Sdavidxu		    sizeof(timeout));
3047162536Sdavidxu		if (error != 0)
3048162536Sdavidxu			return (error);
3049162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3050162536Sdavidxu		    timeout.tv_nsec < 0) {
3051162536Sdavidxu			return (EINVAL);
3052139013Sdavidxu		}
3053162536Sdavidxu		ts = &timeout;
3054139013Sdavidxu	}
3055162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3056162536Sdavidxu}
3057162536Sdavidxu
3058162536Sdavidxustatic int
3059162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3060162536Sdavidxu{
3061179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3062162536Sdavidxu}
3063162536Sdavidxu
3064162536Sdavidxustatic int
3065179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3066179970Sdavidxu{
3067179970Sdavidxu	struct timespec *ts, timeout;
3068179970Sdavidxu	int error;
3069179970Sdavidxu
3070179970Sdavidxu	/* Allow a null timespec (wait forever). */
3071179970Sdavidxu	if (uap->uaddr2 == NULL)
3072179970Sdavidxu		ts = NULL;
3073179970Sdavidxu	else {
3074179970Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3075179970Sdavidxu		    sizeof(timeout));
3076179970Sdavidxu		if (error != 0)
3077179970Sdavidxu			return (error);
3078179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3079179970Sdavidxu		    timeout.tv_nsec < 0) {
3080179970Sdavidxu			return (EINVAL);
3081179970Sdavidxu		}
3082179970Sdavidxu		ts = &timeout;
3083179970Sdavidxu	}
3084179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3085179970Sdavidxu}
3086179970Sdavidxu
3087179970Sdavidxustatic int
3088179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3089179970Sdavidxu{
3090179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3091179970Sdavidxu}
3092179970Sdavidxu
3093179970Sdavidxustatic int
3094162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3095162536Sdavidxu{
3096162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3097162536Sdavidxu}
3098162536Sdavidxu
3099162536Sdavidxustatic int
3100162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3101162536Sdavidxu{
3102162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3103162536Sdavidxu}
3104162536Sdavidxu
3105164839Sdavidxustatic int
3106164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3107164839Sdavidxu{
3108164839Sdavidxu	struct timespec *ts, timeout;
3109164839Sdavidxu	int error;
3110164839Sdavidxu
3111164839Sdavidxu	/* Allow a null timespec (wait forever). */
3112164839Sdavidxu	if (uap->uaddr2 == NULL)
3113164839Sdavidxu		ts = NULL;
3114164839Sdavidxu	else {
3115164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3116164839Sdavidxu		    sizeof(timeout));
3117164839Sdavidxu		if (error != 0)
3118164839Sdavidxu			return (error);
3119164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3120164839Sdavidxu		    timeout.tv_nsec < 0) {
3121164839Sdavidxu			return (EINVAL);
3122164839Sdavidxu		}
3123164839Sdavidxu		ts = &timeout;
3124164839Sdavidxu	}
3125164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3126164839Sdavidxu}
3127164839Sdavidxu
3128164839Sdavidxustatic int
3129164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3130164839Sdavidxu{
3131164839Sdavidxu	return do_cv_signal(td, uap->obj);
3132164839Sdavidxu}
3133164839Sdavidxu
3134164839Sdavidxustatic int
3135164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3136164839Sdavidxu{
3137164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3138164839Sdavidxu}
3139164839Sdavidxu
3140177848Sdavidxustatic int
3141177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3142177848Sdavidxu{
3143177848Sdavidxu	struct timespec timeout;
3144177848Sdavidxu	int error;
3145177848Sdavidxu
3146177848Sdavidxu	/* Allow a null timespec (wait forever). */
3147177848Sdavidxu	if (uap->uaddr2 == NULL) {
3148177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3149177848Sdavidxu	} else {
3150177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3151177848Sdavidxu		    sizeof(timeout));
3152177848Sdavidxu		if (error != 0)
3153177848Sdavidxu			return (error);
3154177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3155177848Sdavidxu		    timeout.tv_nsec < 0) {
3156177848Sdavidxu			return (EINVAL);
3157177848Sdavidxu		}
3158177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3159177848Sdavidxu	}
3160177848Sdavidxu	return (error);
3161177848Sdavidxu}
3162177848Sdavidxu
3163177848Sdavidxustatic int
3164177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3165177848Sdavidxu{
3166177848Sdavidxu	struct timespec timeout;
3167177848Sdavidxu	int error;
3168177848Sdavidxu
3169177848Sdavidxu	/* Allow a null timespec (wait forever). */
3170177848Sdavidxu	if (uap->uaddr2 == NULL) {
3171177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3172177848Sdavidxu	} else {
3173177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3174177848Sdavidxu		    sizeof(timeout));
3175177848Sdavidxu		if (error != 0)
3176177848Sdavidxu			return (error);
3177177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3178177848Sdavidxu		    timeout.tv_nsec < 0) {
3179177848Sdavidxu			return (EINVAL);
3180177848Sdavidxu		}
3181177848Sdavidxu
3182177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3183177848Sdavidxu	}
3184177848Sdavidxu	return (error);
3185177848Sdavidxu}
3186177848Sdavidxu
3187177848Sdavidxustatic int
3188177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3189177848Sdavidxu{
3190177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3191177848Sdavidxu}
3192177848Sdavidxu
3193201472Sdavidxustatic int
3194201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3195201472Sdavidxu{
3196201472Sdavidxu	struct timespec *ts, timeout;
3197201472Sdavidxu	int error;
3198201472Sdavidxu
3199201472Sdavidxu	/* Allow a null timespec (wait forever). */
3200201472Sdavidxu	if (uap->uaddr2 == NULL)
3201201472Sdavidxu		ts = NULL;
3202201472Sdavidxu	else {
3203201472Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3204201472Sdavidxu		    sizeof(timeout));
3205201472Sdavidxu		if (error != 0)
3206201472Sdavidxu			return (error);
3207201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3208201472Sdavidxu		    timeout.tv_nsec < 0) {
3209201472Sdavidxu			return (EINVAL);
3210201472Sdavidxu		}
3211201472Sdavidxu		ts = &timeout;
3212201472Sdavidxu	}
3213201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3214201472Sdavidxu}
3215201472Sdavidxu
3216201472Sdavidxustatic int
3217201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3218201472Sdavidxu{
3219201472Sdavidxu	return do_sem_wake(td, uap->obj);
3220201472Sdavidxu}
3221201472Sdavidxu
3222162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3223162536Sdavidxu
3224162536Sdavidxustatic _umtx_op_func op_table[] = {
3225162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3226162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3227162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3228162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3229162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3230162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3231162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3232164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3233164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3234164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3235173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3236177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3237177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3238177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3239178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3240178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3241179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3242179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3243201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3244201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3245201472Sdavidxu	__umtx_op_sem_wake		/* UMTX_OP_SEM_WAKE */
3246162536Sdavidxu};
3247162536Sdavidxu
3248162536Sdavidxuint
3249162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
3250162536Sdavidxu{
3251163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3252162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3253162536Sdavidxu	return (EINVAL);
3254162536Sdavidxu}
3255162536Sdavidxu
3256205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3257163046Sdavidxuint
3258163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3259163046Sdavidxu    /* struct umtx *umtx */
3260163046Sdavidxu{
3261163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3262163046Sdavidxu}
3263163046Sdavidxu
3264163046Sdavidxuint
3265163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3266163046Sdavidxu    /* struct umtx *umtx */
3267163046Sdavidxu{
3268163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3269163046Sdavidxu}
3270163046Sdavidxu
3271162536Sdavidxustruct timespec32 {
3272209390Sed	uint32_t tv_sec;
3273209390Sed	uint32_t tv_nsec;
3274162536Sdavidxu};
3275162536Sdavidxu
3276162536Sdavidxustatic inline int
3277162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
3278162536Sdavidxu{
3279162536Sdavidxu	struct timespec32 ts32;
3280162536Sdavidxu	int error;
3281162536Sdavidxu
3282162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3283162536Sdavidxu	if (error == 0) {
3284162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
3285162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
3286162536Sdavidxu	}
3287140421Sdavidxu	return (error);
3288139013Sdavidxu}
3289161678Sdavidxu
3290162536Sdavidxustatic int
3291162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3292162536Sdavidxu{
3293162536Sdavidxu	struct timespec *ts, timeout;
3294162536Sdavidxu	int error;
3295162536Sdavidxu
3296162536Sdavidxu	/* Allow a null timespec (wait forever). */
3297162536Sdavidxu	if (uap->uaddr2 == NULL)
3298162536Sdavidxu		ts = NULL;
3299162536Sdavidxu	else {
3300162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3301162536Sdavidxu		if (error != 0)
3302162536Sdavidxu			return (error);
3303162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3304162536Sdavidxu		    timeout.tv_nsec < 0) {
3305162536Sdavidxu			return (EINVAL);
3306162536Sdavidxu		}
3307162536Sdavidxu		ts = &timeout;
3308162536Sdavidxu	}
3309162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3310162536Sdavidxu}
3311162536Sdavidxu
3312162536Sdavidxustatic int
3313162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3314162536Sdavidxu{
3315162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3316162536Sdavidxu}
3317162536Sdavidxu
3318162536Sdavidxustatic int
3319162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3320162536Sdavidxu{
3321162536Sdavidxu	struct timespec *ts, timeout;
3322162536Sdavidxu	int error;
3323162536Sdavidxu
3324162536Sdavidxu	if (uap->uaddr2 == NULL)
3325162536Sdavidxu		ts = NULL;
3326162536Sdavidxu	else {
3327162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3328162536Sdavidxu		if (error != 0)
3329162536Sdavidxu			return (error);
3330162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3331162536Sdavidxu		    timeout.tv_nsec < 0)
3332162536Sdavidxu			return (EINVAL);
3333162536Sdavidxu		ts = &timeout;
3334162536Sdavidxu	}
3335178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3336162536Sdavidxu}
3337162536Sdavidxu
3338162536Sdavidxustatic int
3339162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3340162536Sdavidxu{
3341162536Sdavidxu	struct timespec *ts, timeout;
3342162536Sdavidxu	int error;
3343162536Sdavidxu
3344162536Sdavidxu	/* Allow a null timespec (wait forever). */
3345162536Sdavidxu	if (uap->uaddr2 == NULL)
3346162536Sdavidxu		ts = NULL;
3347162536Sdavidxu	else {
3348162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3349162536Sdavidxu		if (error != 0)
3350162536Sdavidxu			return (error);
3351162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3352162536Sdavidxu		    timeout.tv_nsec < 0)
3353162536Sdavidxu			return (EINVAL);
3354162536Sdavidxu		ts = &timeout;
3355162536Sdavidxu	}
3356162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3357162536Sdavidxu}
3358162536Sdavidxu
3359164839Sdavidxustatic int
3360179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3361179970Sdavidxu{
3362179970Sdavidxu	struct timespec *ts, timeout;
3363179970Sdavidxu	int error;
3364179970Sdavidxu
3365179970Sdavidxu	/* Allow a null timespec (wait forever). */
3366179970Sdavidxu	if (uap->uaddr2 == NULL)
3367179970Sdavidxu		ts = NULL;
3368179970Sdavidxu	else {
3369179970Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3370179970Sdavidxu		if (error != 0)
3371179970Sdavidxu			return (error);
3372179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3373179970Sdavidxu		    timeout.tv_nsec < 0)
3374179970Sdavidxu			return (EINVAL);
3375179970Sdavidxu		ts = &timeout;
3376179970Sdavidxu	}
3377179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3378179970Sdavidxu}
3379179970Sdavidxu
3380179970Sdavidxustatic int
3381164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3382164839Sdavidxu{
3383164839Sdavidxu	struct timespec *ts, timeout;
3384164839Sdavidxu	int error;
3385164839Sdavidxu
3386164839Sdavidxu	/* Allow a null timespec (wait forever). */
3387164839Sdavidxu	if (uap->uaddr2 == NULL)
3388164839Sdavidxu		ts = NULL;
3389164839Sdavidxu	else {
3390164839Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3391164839Sdavidxu		if (error != 0)
3392164839Sdavidxu			return (error);
3393164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3394164839Sdavidxu		    timeout.tv_nsec < 0)
3395164839Sdavidxu			return (EINVAL);
3396164839Sdavidxu		ts = &timeout;
3397164839Sdavidxu	}
3398164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3399164839Sdavidxu}
3400164839Sdavidxu
3401177848Sdavidxustatic int
3402177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3403177848Sdavidxu{
3404177848Sdavidxu	struct timespec timeout;
3405177848Sdavidxu	int error;
3406177848Sdavidxu
3407177848Sdavidxu	/* Allow a null timespec (wait forever). */
3408177848Sdavidxu	if (uap->uaddr2 == NULL) {
3409177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3410177848Sdavidxu	} else {
3411177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3412177848Sdavidxu		    sizeof(timeout));
3413177848Sdavidxu		if (error != 0)
3414177848Sdavidxu			return (error);
3415177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3416177848Sdavidxu		    timeout.tv_nsec < 0) {
3417177848Sdavidxu			return (EINVAL);
3418177848Sdavidxu		}
3419177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3420177848Sdavidxu	}
3421177848Sdavidxu	return (error);
3422177848Sdavidxu}
3423177848Sdavidxu
3424177848Sdavidxustatic int
3425177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3426177848Sdavidxu{
3427177848Sdavidxu	struct timespec timeout;
3428177848Sdavidxu	int error;
3429177848Sdavidxu
3430177848Sdavidxu	/* Allow a null timespec (wait forever). */
3431177848Sdavidxu	if (uap->uaddr2 == NULL) {
3432177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3433177848Sdavidxu	} else {
3434177848Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3435177848Sdavidxu		if (error != 0)
3436177848Sdavidxu			return (error);
3437177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3438177848Sdavidxu		    timeout.tv_nsec < 0) {
3439177848Sdavidxu			return (EINVAL);
3440177848Sdavidxu		}
3441177848Sdavidxu
3442177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3443177848Sdavidxu	}
3444177848Sdavidxu	return (error);
3445177848Sdavidxu}
3446177848Sdavidxu
3447178646Sdavidxustatic int
3448178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3449178646Sdavidxu{
3450178646Sdavidxu	struct timespec *ts, timeout;
3451178646Sdavidxu	int error;
3452178646Sdavidxu
3453178646Sdavidxu	if (uap->uaddr2 == NULL)
3454178646Sdavidxu		ts = NULL;
3455178646Sdavidxu	else {
3456178646Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3457178646Sdavidxu		if (error != 0)
3458178646Sdavidxu			return (error);
3459178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3460178646Sdavidxu		    timeout.tv_nsec < 0)
3461178646Sdavidxu			return (EINVAL);
3462178646Sdavidxu		ts = &timeout;
3463178646Sdavidxu	}
3464178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3465178646Sdavidxu}
3466178646Sdavidxu
3467201472Sdavidxustatic int
3468201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3469201472Sdavidxu{
3470201472Sdavidxu	struct timespec *ts, timeout;
3471201472Sdavidxu	int error;
3472201472Sdavidxu
3473201472Sdavidxu	/* Allow a null timespec (wait forever). */
3474201472Sdavidxu	if (uap->uaddr2 == NULL)
3475201472Sdavidxu		ts = NULL;
3476201472Sdavidxu	else {
3477201472Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3478201472Sdavidxu		if (error != 0)
3479201472Sdavidxu			return (error);
3480201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3481201472Sdavidxu		    timeout.tv_nsec < 0)
3482201472Sdavidxu			return (EINVAL);
3483201472Sdavidxu		ts = &timeout;
3484201472Sdavidxu	}
3485201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3486201472Sdavidxu}
3487201472Sdavidxu
3488162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3489162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3490162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3491162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3492162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3493162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3494162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3495162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3496164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3497164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3498164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3499173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3500177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3501177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3502177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3503178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3504178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3505179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3506179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3507201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3508201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3509201472Sdavidxu	__umtx_op_sem_wake		/* UMTX_OP_SEM_WAKE */
3510162536Sdavidxu};
3511162536Sdavidxu
3512162536Sdavidxuint
3513162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3514162536Sdavidxu{
3515163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3516162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3517162536Sdavidxu			(struct _umtx_op_args *)uap);
3518162536Sdavidxu	return (EINVAL);
3519162536Sdavidxu}
3520162536Sdavidxu#endif
3521162536Sdavidxu
3522161678Sdavidxuvoid
3523161678Sdavidxuumtx_thread_init(struct thread *td)
3524161678Sdavidxu{
3525161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3526161678Sdavidxu	td->td_umtxq->uq_thread = td;
3527161678Sdavidxu}
3528161678Sdavidxu
3529161678Sdavidxuvoid
3530161678Sdavidxuumtx_thread_fini(struct thread *td)
3531161678Sdavidxu{
3532161678Sdavidxu	umtxq_free(td->td_umtxq);
3533161678Sdavidxu}
3534161678Sdavidxu
3535161678Sdavidxu/*
3536161678Sdavidxu * It will be called when new thread is created, e.g fork().
3537161678Sdavidxu */
3538161678Sdavidxuvoid
3539161678Sdavidxuumtx_thread_alloc(struct thread *td)
3540161678Sdavidxu{
3541161678Sdavidxu	struct umtx_q *uq;
3542161678Sdavidxu
3543161678Sdavidxu	uq = td->td_umtxq;
3544161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3545161678Sdavidxu
3546161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3547161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3548161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3549161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3550161678Sdavidxu}
3551161678Sdavidxu
3552161678Sdavidxu/*
3553161678Sdavidxu * exec() hook.
3554161678Sdavidxu */
3555161678Sdavidxustatic void
3556161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3557161678Sdavidxu	struct image_params *imgp __unused)
3558161678Sdavidxu{
3559161678Sdavidxu	umtx_thread_cleanup(curthread);
3560161678Sdavidxu}
3561161678Sdavidxu
3562161678Sdavidxu/*
3563161678Sdavidxu * thread_exit() hook.
3564161678Sdavidxu */
3565161678Sdavidxuvoid
3566161678Sdavidxuumtx_thread_exit(struct thread *td)
3567161678Sdavidxu{
3568161678Sdavidxu	umtx_thread_cleanup(td);
3569161678Sdavidxu}
3570161678Sdavidxu
3571161678Sdavidxu/*
3572161678Sdavidxu * clean up umtx data.
3573161678Sdavidxu */
3574161678Sdavidxustatic void
3575161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3576161678Sdavidxu{
3577161678Sdavidxu	struct umtx_q *uq;
3578161678Sdavidxu	struct umtx_pi *pi;
3579161678Sdavidxu
3580161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3581161678Sdavidxu		return;
3582161678Sdavidxu
3583170300Sjeff	mtx_lock_spin(&umtx_lock);
3584161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3585161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3586161678Sdavidxu		pi->pi_owner = NULL;
3587161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3588161678Sdavidxu	}
3589174701Sdavidxu	thread_lock(td);
3590161678Sdavidxu	td->td_flags &= ~TDF_UBORROWING;
3591174701Sdavidxu	thread_unlock(td);
3592170300Sjeff	mtx_unlock_spin(&umtx_lock);
3593161678Sdavidxu}
3594