kern_umtx.c revision 233700
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 233700 2012-03-30 12:57:14Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32233045Sdavide#include "opt_umtx_profiling.h"
33233045Sdavide
34112904Sjeff#include <sys/param.h>
35112904Sjeff#include <sys/kernel.h>
36131431Smarcel#include <sys/limits.h>
37112904Sjeff#include <sys/lock.h>
38115765Sjeff#include <sys/malloc.h>
39112904Sjeff#include <sys/mutex.h>
40164033Srwatson#include <sys/priv.h>
41112904Sjeff#include <sys/proc.h>
42161678Sdavidxu#include <sys/sched.h>
43165369Sdavidxu#include <sys/smp.h>
44161678Sdavidxu#include <sys/sysctl.h>
45112904Sjeff#include <sys/sysent.h>
46112904Sjeff#include <sys/systm.h>
47112904Sjeff#include <sys/sysproto.h>
48216641Sdavidxu#include <sys/syscallsubr.h>
49139013Sdavidxu#include <sys/eventhandler.h>
50112904Sjeff#include <sys/umtx.h>
51112904Sjeff
52139013Sdavidxu#include <vm/vm.h>
53139013Sdavidxu#include <vm/vm_param.h>
54139013Sdavidxu#include <vm/pmap.h>
55139013Sdavidxu#include <vm/vm_map.h>
56139013Sdavidxu#include <vm/vm_object.h>
57139013Sdavidxu
58165369Sdavidxu#include <machine/cpu.h>
59165369Sdavidxu
60205014Snwhitehorn#ifdef COMPAT_FREEBSD32
61162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
62162536Sdavidxu#endif
63162536Sdavidxu
64179970Sdavidxu#define _UMUTEX_TRY		1
65179970Sdavidxu#define _UMUTEX_WAIT		2
66179970Sdavidxu
67161678Sdavidxu/* Priority inheritance mutex info. */
68161678Sdavidxustruct umtx_pi {
69161678Sdavidxu	/* Owner thread */
70161678Sdavidxu	struct thread		*pi_owner;
71161678Sdavidxu
72161678Sdavidxu	/* Reference count */
73161678Sdavidxu	int			pi_refcount;
74161678Sdavidxu
75161678Sdavidxu 	/* List entry to link umtx holding by thread */
76161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
77161678Sdavidxu
78161678Sdavidxu	/* List entry in hash */
79161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
80161678Sdavidxu
81161678Sdavidxu	/* List for waiters */
82161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
83161678Sdavidxu
84161678Sdavidxu	/* Identify a userland lock object */
85161678Sdavidxu	struct umtx_key		pi_key;
86161678Sdavidxu};
87161678Sdavidxu
88161678Sdavidxu/* A userland synchronous object user. */
89115765Sjeffstruct umtx_q {
90161678Sdavidxu	/* Linked list for the hash. */
91161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
92161678Sdavidxu
93161678Sdavidxu	/* Umtx key. */
94161678Sdavidxu	struct umtx_key		uq_key;
95161678Sdavidxu
96161678Sdavidxu	/* Umtx flags. */
97161678Sdavidxu	int			uq_flags;
98161678Sdavidxu#define UQF_UMTXQ	0x0001
99161678Sdavidxu
100161678Sdavidxu	/* The thread waits on. */
101161678Sdavidxu	struct thread		*uq_thread;
102161678Sdavidxu
103161678Sdavidxu	/*
104161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
105170300Sjeff	 * or umtx_lock, write must have both chain lock and
106170300Sjeff	 * umtx_lock being hold.
107161678Sdavidxu	 */
108161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
109161678Sdavidxu
110161678Sdavidxu	/* On blocked list */
111161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
112161678Sdavidxu
113161678Sdavidxu	/* Thread contending with us */
114161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
115161678Sdavidxu
116161742Sdavidxu	/* Inherited priority from PP mutex */
117161678Sdavidxu	u_char			uq_inherited_pri;
118201991Sdavidxu
119201991Sdavidxu	/* Spare queue ready to be reused */
120201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
121201991Sdavidxu
122201991Sdavidxu	/* The queue we on */
123201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
124115765Sjeff};
125115765Sjeff
126161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
127161678Sdavidxu
128201991Sdavidxu/* Per-key wait-queue */
129201991Sdavidxustruct umtxq_queue {
130201991Sdavidxu	struct umtxq_head	head;
131201991Sdavidxu	struct umtx_key		key;
132201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
133201991Sdavidxu	int			length;
134201991Sdavidxu};
135201991Sdavidxu
136201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
137201991Sdavidxu
138161678Sdavidxu/* Userland lock object's wait-queue chain */
139138224Sdavidxustruct umtxq_chain {
140161678Sdavidxu	/* Lock for this chain. */
141161678Sdavidxu	struct mtx		uc_lock;
142161678Sdavidxu
143161678Sdavidxu	/* List of sleep queues. */
144201991Sdavidxu	struct umtxq_list	uc_queue[2];
145177848Sdavidxu#define UMTX_SHARED_QUEUE	0
146177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
147161678Sdavidxu
148201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
149201991Sdavidxu
150161678Sdavidxu	/* Busy flag */
151161678Sdavidxu	char			uc_busy;
152161678Sdavidxu
153161678Sdavidxu	/* Chain lock waiters */
154158377Sdavidxu	int			uc_waiters;
155161678Sdavidxu
156161678Sdavidxu	/* All PI in the list */
157161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
158201991Sdavidxu
159233045Sdavide#ifdef UMTX_PROFILING
160233045Sdavide	int 			length;
161233045Sdavide	int			max_length;
162233045Sdavide#endif
163138224Sdavidxu};
164115765Sjeff
165161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
166189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
167161678Sdavidxu
168161678Sdavidxu/*
169161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
170161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
171161678Sdavidxu * and let another thread B block on the mutex, because B is
172161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
173161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
174161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
175161678Sdavidxu */
176161678Sdavidxu
177163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
178163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
179163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
180161678Sdavidxu
181138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
182216678Sdavidxu#define	UMTX_CHAINS		512
183216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
184115765Sjeff
185161678Sdavidxu#define	GET_SHARE(flags)	\
186161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187161678Sdavidxu
188177848Sdavidxu#define BUSY_SPINS		200
189177848Sdavidxu
190233690Sdavidxustruct abs_timeout {
191233690Sdavidxu	int clockid;
192233690Sdavidxu	struct timespec cur;
193233690Sdavidxu	struct timespec end;
194233690Sdavidxu};
195233690Sdavidxu
196161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
197179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
198138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
199161678Sdavidxustatic int			umtx_pi_allocated;
200115310Sjeff
201227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
202161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
203161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
204161678Sdavidxu
205233045Sdavide#ifdef UMTX_PROFILING
206233045Sdavidestatic long max_length;
207233045SdavideSYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
208233045Sdavidestatic SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
209233045Sdavide#endif
210233045Sdavide
211161678Sdavidxustatic void umtxq_sysinit(void *);
212161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
213161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
214139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
215139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
216139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
217139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
218177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
219177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
220233690Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
221139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
222163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
223161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
224161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
225161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
226161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
227161678Sdavidxu	struct image_params *imgp __unused);
228161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
229115310Sjeff
230177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
231177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
232177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
233177848Sdavidxu
234170300Sjeffstatic struct mtx umtx_lock;
235170300Sjeff
236233045Sdavide#ifdef UMTX_PROFILING
237161678Sdavidxustatic void
238233045Sdavideumtx_init_profiling(void)
239233045Sdavide{
240233045Sdavide	struct sysctl_oid *chain_oid;
241233045Sdavide	char chain_name[10];
242233045Sdavide	int i;
243233045Sdavide
244233045Sdavide	for (i = 0; i < UMTX_CHAINS; ++i) {
245233045Sdavide		snprintf(chain_name, sizeof(chain_name), "%d", i);
246233045Sdavide		chain_oid = SYSCTL_ADD_NODE(NULL,
247233045Sdavide		    SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
248233045Sdavide		    chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
249233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
250233045Sdavide		    "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
251233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
252233045Sdavide		    "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
253233045Sdavide	}
254233045Sdavide}
255233045Sdavide#endif
256233045Sdavide
257233045Sdavidestatic void
258161678Sdavidxuumtxq_sysinit(void *arg __unused)
259161678Sdavidxu{
260179421Sdavidxu	int i, j;
261138224Sdavidxu
262161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
263161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
264179421Sdavidxu	for (i = 0; i < 2; ++i) {
265179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
266179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
267179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
268201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
269201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
270201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
271179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
272179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
273179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
274233045Sdavide			#ifdef UMTX_PROFILING
275233045Sdavide			umtxq_chains[i][j].length = 0;
276233045Sdavide			umtxq_chains[i][j].max_length = 0;
277233045Sdavide			#endif
278179421Sdavidxu		}
279161678Sdavidxu	}
280233045Sdavide	#ifdef UMTX_PROFILING
281233045Sdavide	umtx_init_profiling();
282233045Sdavide	#endif
283170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
284161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
285161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
286161678Sdavidxu}
287161678Sdavidxu
288143149Sdavidxustruct umtx_q *
289143149Sdavidxuumtxq_alloc(void)
290143149Sdavidxu{
291161678Sdavidxu	struct umtx_q *uq;
292161678Sdavidxu
293161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
294201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
295201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
296161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
297161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
298161678Sdavidxu	return (uq);
299143149Sdavidxu}
300143149Sdavidxu
301143149Sdavidxuvoid
302143149Sdavidxuumtxq_free(struct umtx_q *uq)
303143149Sdavidxu{
304201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
305201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
306143149Sdavidxu	free(uq, M_UMTX);
307143149Sdavidxu}
308143149Sdavidxu
309161678Sdavidxustatic inline void
310139013Sdavidxuumtxq_hash(struct umtx_key *key)
311138224Sdavidxu{
312161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
313161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
314138224Sdavidxu}
315138224Sdavidxu
316161678Sdavidxustatic inline struct umtxq_chain *
317161678Sdavidxuumtxq_getchain(struct umtx_key *key)
318139013Sdavidxu{
319201886Sdavidxu	if (key->type <= TYPE_SEM)
320179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
321179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
322139013Sdavidxu}
323139013Sdavidxu
324161678Sdavidxu/*
325177848Sdavidxu * Lock a chain.
326161678Sdavidxu */
327138224Sdavidxustatic inline void
328177848Sdavidxuumtxq_lock(struct umtx_key *key)
329139257Sdavidxu{
330161678Sdavidxu	struct umtxq_chain *uc;
331139257Sdavidxu
332161678Sdavidxu	uc = umtxq_getchain(key);
333177848Sdavidxu	mtx_lock(&uc->uc_lock);
334139257Sdavidxu}
335139257Sdavidxu
336161678Sdavidxu/*
337177848Sdavidxu * Unlock a chain.
338161678Sdavidxu */
339139257Sdavidxustatic inline void
340177848Sdavidxuumtxq_unlock(struct umtx_key *key)
341139257Sdavidxu{
342161678Sdavidxu	struct umtxq_chain *uc;
343139257Sdavidxu
344161678Sdavidxu	uc = umtxq_getchain(key);
345177848Sdavidxu	mtx_unlock(&uc->uc_lock);
346139257Sdavidxu}
347139257Sdavidxu
348161678Sdavidxu/*
349177848Sdavidxu * Set chain to busy state when following operation
350177848Sdavidxu * may be blocked (kernel mutex can not be used).
351161678Sdavidxu */
352139257Sdavidxustatic inline void
353177848Sdavidxuumtxq_busy(struct umtx_key *key)
354138224Sdavidxu{
355161678Sdavidxu	struct umtxq_chain *uc;
356161678Sdavidxu
357161678Sdavidxu	uc = umtxq_getchain(key);
358177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
359177848Sdavidxu	if (uc->uc_busy) {
360177880Sdavidxu#ifdef SMP
361177880Sdavidxu		if (smp_cpus > 1) {
362177880Sdavidxu			int count = BUSY_SPINS;
363177880Sdavidxu			if (count > 0) {
364177880Sdavidxu				umtxq_unlock(key);
365177880Sdavidxu				while (uc->uc_busy && --count > 0)
366177880Sdavidxu					cpu_spinwait();
367177880Sdavidxu				umtxq_lock(key);
368177880Sdavidxu			}
369177848Sdavidxu		}
370177880Sdavidxu#endif
371177880Sdavidxu		while (uc->uc_busy) {
372177848Sdavidxu			uc->uc_waiters++;
373177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
374177848Sdavidxu			uc->uc_waiters--;
375177848Sdavidxu		}
376177848Sdavidxu	}
377177848Sdavidxu	uc->uc_busy = 1;
378138224Sdavidxu}
379138224Sdavidxu
380161678Sdavidxu/*
381177848Sdavidxu * Unbusy a chain.
382161678Sdavidxu */
383138225Sdavidxustatic inline void
384177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
385138224Sdavidxu{
386161678Sdavidxu	struct umtxq_chain *uc;
387161678Sdavidxu
388161678Sdavidxu	uc = umtxq_getchain(key);
389177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
390177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
391177848Sdavidxu	uc->uc_busy = 0;
392177848Sdavidxu	if (uc->uc_waiters)
393177848Sdavidxu		wakeup_one(uc);
394138224Sdavidxu}
395138224Sdavidxu
396201991Sdavidxustatic struct umtxq_queue *
397201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
398201991Sdavidxu{
399201991Sdavidxu	struct umtxq_queue *uh;
400201991Sdavidxu	struct umtxq_chain *uc;
401201991Sdavidxu
402201991Sdavidxu	uc = umtxq_getchain(key);
403201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
404201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
405201991Sdavidxu		if (umtx_key_match(&uh->key, key))
406201991Sdavidxu			return (uh);
407201991Sdavidxu	}
408201991Sdavidxu
409201991Sdavidxu	return (NULL);
410201991Sdavidxu}
411201991Sdavidxu
412139013Sdavidxustatic inline void
413177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
414115765Sjeff{
415201991Sdavidxu	struct umtxq_queue *uh;
416161678Sdavidxu	struct umtxq_chain *uc;
417139013Sdavidxu
418161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
419161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
420201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
421203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
422201991Sdavidxu	if (uh != NULL) {
423201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
424201991Sdavidxu	} else {
425201991Sdavidxu		uh = uq->uq_spare_queue;
426201991Sdavidxu		uh->key = uq->uq_key;
427201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
428201991Sdavidxu	}
429201991Sdavidxu	uq->uq_spare_queue = NULL;
430201991Sdavidxu
431201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
432201991Sdavidxu	uh->length++;
433233045Sdavide	#ifdef UMTX_PROFILING
434233045Sdavide	uc->length++;
435233045Sdavide	if (uc->length > uc->max_length) {
436233045Sdavide		uc->max_length = uc->length;
437233045Sdavide		if (uc->max_length > max_length)
438233045Sdavide			max_length = uc->max_length;
439233045Sdavide	}
440233045Sdavide	#endif
441158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
442201991Sdavidxu	uq->uq_cur_queue = uh;
443201991Sdavidxu	return;
444139013Sdavidxu}
445139013Sdavidxu
446139013Sdavidxustatic inline void
447177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
448139013Sdavidxu{
449161678Sdavidxu	struct umtxq_chain *uc;
450201991Sdavidxu	struct umtxq_queue *uh;
451161678Sdavidxu
452161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
453161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
454158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
455201991Sdavidxu		uh = uq->uq_cur_queue;
456201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
457201991Sdavidxu		uh->length--;
458233045Sdavide		#ifdef UMTX_PROFILING
459233045Sdavide		uc->length--;
460233045Sdavide		#endif
461158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
462201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
463201991Sdavidxu			KASSERT(uh->length == 0,
464201991Sdavidxu			    ("inconsistent umtxq_queue length"));
465201991Sdavidxu			LIST_REMOVE(uh, link);
466201991Sdavidxu		} else {
467201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
468201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
469201991Sdavidxu			LIST_REMOVE(uh, link);
470201991Sdavidxu		}
471201991Sdavidxu		uq->uq_spare_queue = uh;
472201991Sdavidxu		uq->uq_cur_queue = NULL;
473139013Sdavidxu	}
474139013Sdavidxu}
475139013Sdavidxu
476161678Sdavidxu/*
477161678Sdavidxu * Check if there are multiple waiters
478161678Sdavidxu */
479139013Sdavidxustatic int
480139013Sdavidxuumtxq_count(struct umtx_key *key)
481139013Sdavidxu{
482161678Sdavidxu	struct umtxq_chain *uc;
483201991Sdavidxu	struct umtxq_queue *uh;
484115765Sjeff
485161678Sdavidxu	uc = umtxq_getchain(key);
486161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
487201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
488201991Sdavidxu	if (uh != NULL)
489201991Sdavidxu		return (uh->length);
490201991Sdavidxu	return (0);
491115765Sjeff}
492115765Sjeff
493161678Sdavidxu/*
494161678Sdavidxu * Check if there are multiple PI waiters and returns first
495161678Sdavidxu * waiter.
496161678Sdavidxu */
497139257Sdavidxustatic int
498161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
499161678Sdavidxu{
500161678Sdavidxu	struct umtxq_chain *uc;
501201991Sdavidxu	struct umtxq_queue *uh;
502161678Sdavidxu
503161678Sdavidxu	*first = NULL;
504161678Sdavidxu	uc = umtxq_getchain(key);
505161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
506201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
507201991Sdavidxu	if (uh != NULL) {
508201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
509201991Sdavidxu		return (uh->length);
510161678Sdavidxu	}
511201991Sdavidxu	return (0);
512161678Sdavidxu}
513161678Sdavidxu
514161678Sdavidxu/*
515161678Sdavidxu * Wake up threads waiting on an userland object.
516161678Sdavidxu */
517177848Sdavidxu
518161678Sdavidxustatic int
519177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
520115765Sjeff{
521161678Sdavidxu	struct umtxq_chain *uc;
522201991Sdavidxu	struct umtxq_queue *uh;
523201991Sdavidxu	struct umtx_q *uq;
524161678Sdavidxu	int ret;
525115765Sjeff
526139257Sdavidxu	ret = 0;
527161678Sdavidxu	uc = umtxq_getchain(key);
528161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
529201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
530201991Sdavidxu	if (uh != NULL) {
531201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
532177848Sdavidxu			umtxq_remove_queue(uq, q);
533161678Sdavidxu			wakeup(uq);
534139257Sdavidxu			if (++ret >= n_wake)
535201991Sdavidxu				return (ret);
536139013Sdavidxu		}
537139013Sdavidxu	}
538139257Sdavidxu	return (ret);
539138224Sdavidxu}
540138224Sdavidxu
541177848Sdavidxu
542161678Sdavidxu/*
543161678Sdavidxu * Wake up specified thread.
544161678Sdavidxu */
545161678Sdavidxustatic inline void
546161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
547161678Sdavidxu{
548161678Sdavidxu	struct umtxq_chain *uc;
549161678Sdavidxu
550161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
551161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
552161678Sdavidxu	umtxq_remove(uq);
553161678Sdavidxu	wakeup(uq);
554161678Sdavidxu}
555161678Sdavidxu
556233690Sdavidxustatic inline int
557233690Sdavidxutstohz(const struct timespec *tsp)
558233690Sdavidxu{
559233690Sdavidxu	struct timeval tv;
560233690Sdavidxu
561233690Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, tsp);
562233690Sdavidxu	return tvtohz(&tv);
563233690Sdavidxu}
564233690Sdavidxu
565233690Sdavidxustatic void
566233690Sdavidxuabs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
567233690Sdavidxu	const struct timespec *timeout)
568233690Sdavidxu{
569233690Sdavidxu
570233690Sdavidxu	timo->clockid = clockid;
571233690Sdavidxu	if (!absolute) {
572233690Sdavidxu		kern_clock_gettime(curthread, clockid, &timo->end);
573233690Sdavidxu		timo->cur = timo->end;
574233690Sdavidxu		timespecadd(&timo->end, timeout);
575233690Sdavidxu	} else {
576233690Sdavidxu		timo->end = *timeout;
577233690Sdavidxu		kern_clock_gettime(curthread, clockid, &timo->cur);
578233690Sdavidxu	}
579233690Sdavidxu}
580233690Sdavidxu
581233690Sdavidxustatic void
582233690Sdavidxuabs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
583233690Sdavidxu{
584233690Sdavidxu
585233690Sdavidxu	abs_timeout_init(timo, umtxtime->_clockid,
586233690Sdavidxu		(umtxtime->_flags & UMTX_ABSTIME) != 0,
587233690Sdavidxu		&umtxtime->_timeout);
588233690Sdavidxu}
589233690Sdavidxu
590233690Sdavidxustatic int
591233690Sdavidxuabs_timeout_update(struct abs_timeout *timo)
592233690Sdavidxu{
593233690Sdavidxu	kern_clock_gettime(curthread, timo->clockid, &timo->cur);
594233690Sdavidxu	return (timespeccmp(&timo->cur, &timo->end, >=));
595233690Sdavidxu}
596233690Sdavidxu
597233690Sdavidxustatic int
598233690Sdavidxuabs_timeout_gethz(struct abs_timeout *timo)
599233690Sdavidxu{
600233690Sdavidxu	struct timespec tts;
601233690Sdavidxu
602233690Sdavidxu	tts = timo->end;
603233690Sdavidxu	timespecsub(&tts, &timo->cur);
604233690Sdavidxu	return (tstohz(&tts));
605233690Sdavidxu}
606233690Sdavidxu
607161678Sdavidxu/*
608161678Sdavidxu * Put thread into sleep state, before sleeping, check if
609161678Sdavidxu * thread was removed from umtx queue.
610161678Sdavidxu */
611138224Sdavidxustatic inline int
612233690Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *timo)
613138224Sdavidxu{
614161678Sdavidxu	struct umtxq_chain *uc;
615161678Sdavidxu	int error;
616161678Sdavidxu
617161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
618161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
619233690Sdavidxu	for (;;) {
620233690Sdavidxu		if (!(uq->uq_flags & UQF_UMTXQ))
621233690Sdavidxu			return (0);
622233690Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg,
623233690Sdavidxu		    timo == NULL ? 0 : abs_timeout_gethz(timo));
624233690Sdavidxu		if (error != EWOULDBLOCK)
625233690Sdavidxu			break;
626233690Sdavidxu		umtxq_unlock(&uq->uq_key);
627233690Sdavidxu		if (abs_timeout_update(timo)) {
628233690Sdavidxu			error = ETIMEDOUT;
629233690Sdavidxu			umtxq_lock(&uq->uq_key);
630233690Sdavidxu			break;
631233690Sdavidxu		}
632233690Sdavidxu		umtxq_lock(&uq->uq_key);
633233690Sdavidxu	}
634139751Sdavidxu	return (error);
635138224Sdavidxu}
636138224Sdavidxu
637161678Sdavidxu/*
638161678Sdavidxu * Convert userspace address into unique logical address.
639161678Sdavidxu */
640218969Sjhbint
641161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
642139013Sdavidxu{
643161678Sdavidxu	struct thread *td = curthread;
644139013Sdavidxu	vm_map_t map;
645139013Sdavidxu	vm_map_entry_t entry;
646139013Sdavidxu	vm_pindex_t pindex;
647139013Sdavidxu	vm_prot_t prot;
648139013Sdavidxu	boolean_t wired;
649139013Sdavidxu
650161678Sdavidxu	key->type = type;
651161678Sdavidxu	if (share == THREAD_SHARE) {
652161678Sdavidxu		key->shared = 0;
653161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
654161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
655163677Sdavidxu	} else {
656163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
657161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
658161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
659161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
660161678Sdavidxu		    &wired) != KERN_SUCCESS) {
661161678Sdavidxu			return EFAULT;
662161678Sdavidxu		}
663161678Sdavidxu
664161678Sdavidxu		if ((share == PROCESS_SHARE) ||
665161678Sdavidxu		    (share == AUTO_SHARE &&
666161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
667161678Sdavidxu			key->shared = 1;
668161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
669161678Sdavidxu				(vm_offset_t)addr;
670161678Sdavidxu			vm_object_reference(key->info.shared.object);
671161678Sdavidxu		} else {
672161678Sdavidxu			key->shared = 0;
673161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
674161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
675161678Sdavidxu		}
676161678Sdavidxu		vm_map_lookup_done(map, entry);
677139013Sdavidxu	}
678139013Sdavidxu
679161678Sdavidxu	umtxq_hash(key);
680139013Sdavidxu	return (0);
681139013Sdavidxu}
682139013Sdavidxu
683161678Sdavidxu/*
684161678Sdavidxu * Release key.
685161678Sdavidxu */
686218969Sjhbvoid
687139013Sdavidxuumtx_key_release(struct umtx_key *key)
688139013Sdavidxu{
689161678Sdavidxu	if (key->shared)
690139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
691139013Sdavidxu}
692139013Sdavidxu
693161678Sdavidxu/*
694161678Sdavidxu * Lock a umtx object.
695161678Sdavidxu */
696139013Sdavidxustatic int
697233690Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
698233690Sdavidxu	const struct timespec *timeout)
699112904Sjeff{
700233690Sdavidxu	struct abs_timeout timo;
701143149Sdavidxu	struct umtx_q *uq;
702163449Sdavidxu	u_long owner;
703163449Sdavidxu	u_long old;
704138224Sdavidxu	int error = 0;
705112904Sjeff
706143149Sdavidxu	uq = td->td_umtxq;
707233690Sdavidxu	if (timeout != NULL)
708233690Sdavidxu		abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
709161678Sdavidxu
710112904Sjeff	/*
711161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
712112904Sjeff	 * can fault on any access.
713112904Sjeff	 */
714112904Sjeff	for (;;) {
715112904Sjeff		/*
716112904Sjeff		 * Try the uncontested case.  This should be done in userland.
717112904Sjeff		 */
718163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
719112904Sjeff
720138224Sdavidxu		/* The acquire succeeded. */
721138224Sdavidxu		if (owner == UMTX_UNOWNED)
722138224Sdavidxu			return (0);
723138224Sdavidxu
724115765Sjeff		/* The address was invalid. */
725115765Sjeff		if (owner == -1)
726115765Sjeff			return (EFAULT);
727115765Sjeff
728115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
729115765Sjeff		if (owner == UMTX_CONTESTED) {
730163449Sdavidxu			owner = casuword(&umtx->u_owner,
731139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
732115765Sjeff
733138224Sdavidxu			if (owner == UMTX_CONTESTED)
734138224Sdavidxu				return (0);
735138224Sdavidxu
736115765Sjeff			/* The address was invalid. */
737115765Sjeff			if (owner == -1)
738115765Sjeff				return (EFAULT);
739115765Sjeff
740115765Sjeff			/* If this failed the lock has changed, restart. */
741115765Sjeff			continue;
742112904Sjeff		}
743112904Sjeff
744138224Sdavidxu		/*
745138224Sdavidxu		 * If we caught a signal, we have retried and now
746138224Sdavidxu		 * exit immediately.
747138224Sdavidxu		 */
748161678Sdavidxu		if (error != 0)
749233690Sdavidxu			break;
750112904Sjeff
751161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
752161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
753161678Sdavidxu			return (error);
754161678Sdavidxu
755161678Sdavidxu		umtxq_lock(&uq->uq_key);
756161678Sdavidxu		umtxq_busy(&uq->uq_key);
757161678Sdavidxu		umtxq_insert(uq);
758161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
759161678Sdavidxu		umtxq_unlock(&uq->uq_key);
760161678Sdavidxu
761112904Sjeff		/*
762112904Sjeff		 * Set the contested bit so that a release in user space
763112904Sjeff		 * knows to use the system call for unlock.  If this fails
764112904Sjeff		 * either some one else has acquired the lock or it has been
765112904Sjeff		 * released.
766112904Sjeff		 */
767163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
768112904Sjeff
769112904Sjeff		/* The address was invalid. */
770112967Sjake		if (old == -1) {
771143149Sdavidxu			umtxq_lock(&uq->uq_key);
772143149Sdavidxu			umtxq_remove(uq);
773143149Sdavidxu			umtxq_unlock(&uq->uq_key);
774143149Sdavidxu			umtx_key_release(&uq->uq_key);
775115765Sjeff			return (EFAULT);
776112904Sjeff		}
777112904Sjeff
778112904Sjeff		/*
779115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
780117685Smtm		 * and we need to retry or we lost a race to the thread
781117685Smtm		 * unlocking the umtx.
782112904Sjeff		 */
783143149Sdavidxu		umtxq_lock(&uq->uq_key);
784161678Sdavidxu		if (old == owner)
785233690Sdavidxu			error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
786233690Sdavidxu			    &timo);
787143149Sdavidxu		umtxq_remove(uq);
788143149Sdavidxu		umtxq_unlock(&uq->uq_key);
789143149Sdavidxu		umtx_key_release(&uq->uq_key);
790112904Sjeff	}
791117743Smtm
792140245Sdavidxu	if (timeout == NULL) {
793162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
794162030Sdavidxu		if (error == EINTR)
795162030Sdavidxu			error = ERESTART;
796139013Sdavidxu	} else {
797162030Sdavidxu		/* Timed-locking is not restarted. */
798162030Sdavidxu		if (error == ERESTART)
799162030Sdavidxu			error = EINTR;
800139013Sdavidxu	}
801139013Sdavidxu	return (error);
802139013Sdavidxu}
803139013Sdavidxu
804161678Sdavidxu/*
805161678Sdavidxu * Unlock a umtx object.
806161678Sdavidxu */
807139013Sdavidxustatic int
808163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
809139013Sdavidxu{
810139013Sdavidxu	struct umtx_key key;
811163449Sdavidxu	u_long owner;
812163449Sdavidxu	u_long old;
813139257Sdavidxu	int error;
814139257Sdavidxu	int count;
815112904Sjeff
816112904Sjeff	/*
817112904Sjeff	 * Make sure we own this mtx.
818112904Sjeff	 */
819163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
820161678Sdavidxu	if (owner == -1)
821115765Sjeff		return (EFAULT);
822115765Sjeff
823139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
824115765Sjeff		return (EPERM);
825112904Sjeff
826161678Sdavidxu	/* This should be done in userland */
827161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
828163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
829161678Sdavidxu		if (old == -1)
830161678Sdavidxu			return (EFAULT);
831161678Sdavidxu		if (old == owner)
832161678Sdavidxu			return (0);
833161855Sdavidxu		owner = old;
834161678Sdavidxu	}
835161678Sdavidxu
836117685Smtm	/* We should only ever be in here for contested locks */
837161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
838161678Sdavidxu		&key)) != 0)
839139257Sdavidxu		return (error);
840139257Sdavidxu
841139257Sdavidxu	umtxq_lock(&key);
842139257Sdavidxu	umtxq_busy(&key);
843139257Sdavidxu	count = umtxq_count(&key);
844139257Sdavidxu	umtxq_unlock(&key);
845139257Sdavidxu
846117743Smtm	/*
847117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
848117743Smtm	 * there is zero or one thread only waiting for it.
849117743Smtm	 * Otherwise, it must be marked as contested.
850117743Smtm	 */
851163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
852163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
853139257Sdavidxu	umtxq_lock(&key);
854161678Sdavidxu	umtxq_signal(&key,1);
855139257Sdavidxu	umtxq_unbusy(&key);
856139257Sdavidxu	umtxq_unlock(&key);
857139257Sdavidxu	umtx_key_release(&key);
858115765Sjeff	if (old == -1)
859115765Sjeff		return (EFAULT);
860138224Sdavidxu	if (old != owner)
861138224Sdavidxu		return (EINVAL);
862115765Sjeff	return (0);
863112904Sjeff}
864139013Sdavidxu
865205014Snwhitehorn#ifdef COMPAT_FREEBSD32
866162536Sdavidxu
867161678Sdavidxu/*
868162536Sdavidxu * Lock a umtx object.
869162536Sdavidxu */
870162536Sdavidxustatic int
871233690Sdavidxudo_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
872233690Sdavidxu	const struct timespec *timeout)
873162536Sdavidxu{
874233690Sdavidxu	struct abs_timeout timo;
875162536Sdavidxu	struct umtx_q *uq;
876162536Sdavidxu	uint32_t owner;
877162536Sdavidxu	uint32_t old;
878162536Sdavidxu	int error = 0;
879162536Sdavidxu
880162536Sdavidxu	uq = td->td_umtxq;
881162536Sdavidxu
882233690Sdavidxu	if (timeout != NULL)
883233690Sdavidxu		abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
884233690Sdavidxu
885162536Sdavidxu	/*
886162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
887162536Sdavidxu	 * can fault on any access.
888162536Sdavidxu	 */
889162536Sdavidxu	for (;;) {
890162536Sdavidxu		/*
891162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
892162536Sdavidxu		 */
893162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
894162536Sdavidxu
895162536Sdavidxu		/* The acquire succeeded. */
896162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
897162536Sdavidxu			return (0);
898162536Sdavidxu
899162536Sdavidxu		/* The address was invalid. */
900162536Sdavidxu		if (owner == -1)
901162536Sdavidxu			return (EFAULT);
902162536Sdavidxu
903162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
904162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
905162536Sdavidxu			owner = casuword32(m,
906162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
907162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
908162536Sdavidxu				return (0);
909162536Sdavidxu
910162536Sdavidxu			/* The address was invalid. */
911162536Sdavidxu			if (owner == -1)
912162536Sdavidxu				return (EFAULT);
913162536Sdavidxu
914162536Sdavidxu			/* If this failed the lock has changed, restart. */
915162536Sdavidxu			continue;
916162536Sdavidxu		}
917162536Sdavidxu
918162536Sdavidxu		/*
919162536Sdavidxu		 * If we caught a signal, we have retried and now
920162536Sdavidxu		 * exit immediately.
921162536Sdavidxu		 */
922162536Sdavidxu		if (error != 0)
923162536Sdavidxu			return (error);
924162536Sdavidxu
925162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
926162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
927162536Sdavidxu			return (error);
928162536Sdavidxu
929162536Sdavidxu		umtxq_lock(&uq->uq_key);
930162536Sdavidxu		umtxq_busy(&uq->uq_key);
931162536Sdavidxu		umtxq_insert(uq);
932162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
933162536Sdavidxu		umtxq_unlock(&uq->uq_key);
934162536Sdavidxu
935162536Sdavidxu		/*
936162536Sdavidxu		 * Set the contested bit so that a release in user space
937162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
938162536Sdavidxu		 * either some one else has acquired the lock or it has been
939162536Sdavidxu		 * released.
940162536Sdavidxu		 */
941162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
942162536Sdavidxu
943162536Sdavidxu		/* The address was invalid. */
944162536Sdavidxu		if (old == -1) {
945162536Sdavidxu			umtxq_lock(&uq->uq_key);
946162536Sdavidxu			umtxq_remove(uq);
947162536Sdavidxu			umtxq_unlock(&uq->uq_key);
948162536Sdavidxu			umtx_key_release(&uq->uq_key);
949162536Sdavidxu			return (EFAULT);
950162536Sdavidxu		}
951162536Sdavidxu
952162536Sdavidxu		/*
953162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
954162536Sdavidxu		 * and we need to retry or we lost a race to the thread
955162536Sdavidxu		 * unlocking the umtx.
956162536Sdavidxu		 */
957162536Sdavidxu		umtxq_lock(&uq->uq_key);
958162536Sdavidxu		if (old == owner)
959233690Sdavidxu			error = umtxq_sleep(uq, "umtx", timeout == NULL ?
960233693Sdavidxu			    NULL : &timo);
961162536Sdavidxu		umtxq_remove(uq);
962162536Sdavidxu		umtxq_unlock(&uq->uq_key);
963162536Sdavidxu		umtx_key_release(&uq->uq_key);
964162536Sdavidxu	}
965162536Sdavidxu
966162536Sdavidxu	if (timeout == NULL) {
967162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
968162536Sdavidxu		if (error == EINTR)
969162536Sdavidxu			error = ERESTART;
970162536Sdavidxu	} else {
971162536Sdavidxu		/* Timed-locking is not restarted. */
972162536Sdavidxu		if (error == ERESTART)
973162536Sdavidxu			error = EINTR;
974162536Sdavidxu	}
975162536Sdavidxu	return (error);
976162536Sdavidxu}
977162536Sdavidxu
978162536Sdavidxu/*
979162536Sdavidxu * Unlock a umtx object.
980162536Sdavidxu */
981162536Sdavidxustatic int
982162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
983162536Sdavidxu{
984162536Sdavidxu	struct umtx_key key;
985162536Sdavidxu	uint32_t owner;
986162536Sdavidxu	uint32_t old;
987162536Sdavidxu	int error;
988162536Sdavidxu	int count;
989162536Sdavidxu
990162536Sdavidxu	/*
991162536Sdavidxu	 * Make sure we own this mtx.
992162536Sdavidxu	 */
993162536Sdavidxu	owner = fuword32(m);
994162536Sdavidxu	if (owner == -1)
995162536Sdavidxu		return (EFAULT);
996162536Sdavidxu
997162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
998162536Sdavidxu		return (EPERM);
999162536Sdavidxu
1000162536Sdavidxu	/* This should be done in userland */
1001162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1002162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
1003162536Sdavidxu		if (old == -1)
1004162536Sdavidxu			return (EFAULT);
1005162536Sdavidxu		if (old == owner)
1006162536Sdavidxu			return (0);
1007162536Sdavidxu		owner = old;
1008162536Sdavidxu	}
1009162536Sdavidxu
1010162536Sdavidxu	/* We should only ever be in here for contested locks */
1011162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1012162536Sdavidxu		&key)) != 0)
1013162536Sdavidxu		return (error);
1014162536Sdavidxu
1015162536Sdavidxu	umtxq_lock(&key);
1016162536Sdavidxu	umtxq_busy(&key);
1017162536Sdavidxu	count = umtxq_count(&key);
1018162536Sdavidxu	umtxq_unlock(&key);
1019162536Sdavidxu
1020162536Sdavidxu	/*
1021162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1022162536Sdavidxu	 * there is zero or one thread only waiting for it.
1023162536Sdavidxu	 * Otherwise, it must be marked as contested.
1024162536Sdavidxu	 */
1025162536Sdavidxu	old = casuword32(m, owner,
1026162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1027162536Sdavidxu	umtxq_lock(&key);
1028162536Sdavidxu	umtxq_signal(&key,1);
1029162536Sdavidxu	umtxq_unbusy(&key);
1030162536Sdavidxu	umtxq_unlock(&key);
1031162536Sdavidxu	umtx_key_release(&key);
1032162536Sdavidxu	if (old == -1)
1033162536Sdavidxu		return (EFAULT);
1034162536Sdavidxu	if (old != owner)
1035162536Sdavidxu		return (EINVAL);
1036162536Sdavidxu	return (0);
1037162536Sdavidxu}
1038162536Sdavidxu#endif
1039162536Sdavidxu
1040162536Sdavidxu/*
1041161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1042161678Sdavidxu */
1043139013Sdavidxustatic int
1044163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1045232144Sdavidxu	struct _umtx_time *timeout, int compat32, int is_private)
1046139013Sdavidxu{
1047233690Sdavidxu	struct abs_timeout timo;
1048143149Sdavidxu	struct umtx_q *uq;
1049163449Sdavidxu	u_long tmp;
1050140245Sdavidxu	int error = 0;
1051139013Sdavidxu
1052143149Sdavidxu	uq = td->td_umtxq;
1053178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1054178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1055139013Sdavidxu		return (error);
1056161678Sdavidxu
1057233690Sdavidxu	if (timeout != NULL)
1058233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1059233690Sdavidxu
1060161678Sdavidxu	umtxq_lock(&uq->uq_key);
1061161678Sdavidxu	umtxq_insert(uq);
1062161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1063162536Sdavidxu	if (compat32 == 0)
1064162536Sdavidxu		tmp = fuword(addr);
1065162536Sdavidxu        else
1066190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1067233642Sdavidxu	umtxq_lock(&uq->uq_key);
1068233690Sdavidxu	if (tmp == id)
1069233690Sdavidxu		error = umtxq_sleep(uq, "uwait", timeout == NULL ?
1070233690Sdavidxu		    NULL : &timo);
1071233642Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
1072233642Sdavidxu		error = 0;
1073233642Sdavidxu	else
1074143149Sdavidxu		umtxq_remove(uq);
1075233642Sdavidxu	umtxq_unlock(&uq->uq_key);
1076143149Sdavidxu	umtx_key_release(&uq->uq_key);
1077139257Sdavidxu	if (error == ERESTART)
1078139257Sdavidxu		error = EINTR;
1079139013Sdavidxu	return (error);
1080139013Sdavidxu}
1081139013Sdavidxu
1082161678Sdavidxu/*
1083161678Sdavidxu * Wake up threads sleeping on the specified address.
1084161678Sdavidxu */
1085151692Sdavidxuint
1086178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1087139013Sdavidxu{
1088139013Sdavidxu	struct umtx_key key;
1089139257Sdavidxu	int ret;
1090139013Sdavidxu
1091178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1092178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1093139257Sdavidxu		return (ret);
1094139258Sdavidxu	umtxq_lock(&key);
1095139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1096139258Sdavidxu	umtxq_unlock(&key);
1097139257Sdavidxu	umtx_key_release(&key);
1098139013Sdavidxu	return (0);
1099139013Sdavidxu}
1100139013Sdavidxu
1101161678Sdavidxu/*
1102161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1103161678Sdavidxu */
1104161678Sdavidxustatic int
1105233690Sdavidxudo_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1106233690Sdavidxu	struct _umtx_time *timeout, int mode)
1107161678Sdavidxu{
1108233690Sdavidxu	struct abs_timeout timo;
1109161678Sdavidxu	struct umtx_q *uq;
1110161678Sdavidxu	uint32_t owner, old, id;
1111161678Sdavidxu	int error = 0;
1112161678Sdavidxu
1113161678Sdavidxu	id = td->td_tid;
1114161678Sdavidxu	uq = td->td_umtxq;
1115161678Sdavidxu
1116233690Sdavidxu	if (timeout != NULL)
1117233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1118233690Sdavidxu
1119161678Sdavidxu	/*
1120161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1121161678Sdavidxu	 * can fault on any access.
1122161678Sdavidxu	 */
1123161678Sdavidxu	for (;;) {
1124179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1125179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1126179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1127179970Sdavidxu				return (0);
1128179970Sdavidxu		} else {
1129179970Sdavidxu			/*
1130179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1131179970Sdavidxu			 */
1132179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1133161678Sdavidxu
1134179970Sdavidxu			/* The acquire succeeded. */
1135179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1136161678Sdavidxu				return (0);
1137161678Sdavidxu
1138161678Sdavidxu			/* The address was invalid. */
1139161678Sdavidxu			if (owner == -1)
1140161678Sdavidxu				return (EFAULT);
1141161678Sdavidxu
1142179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1143179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1144179970Sdavidxu				owner = casuword32(&m->m_owner,
1145179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1146179970Sdavidxu
1147179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1148179970Sdavidxu					return (0);
1149179970Sdavidxu
1150179970Sdavidxu				/* The address was invalid. */
1151179970Sdavidxu				if (owner == -1)
1152179970Sdavidxu					return (EFAULT);
1153179970Sdavidxu
1154179970Sdavidxu				/* If this failed the lock has changed, restart. */
1155179970Sdavidxu				continue;
1156179970Sdavidxu			}
1157161678Sdavidxu		}
1158161678Sdavidxu
1159161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1160161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1161161678Sdavidxu			return (EDEADLK);
1162161678Sdavidxu
1163179970Sdavidxu		if (mode == _UMUTEX_TRY)
1164161678Sdavidxu			return (EBUSY);
1165161678Sdavidxu
1166161678Sdavidxu		/*
1167161678Sdavidxu		 * If we caught a signal, we have retried and now
1168161678Sdavidxu		 * exit immediately.
1169161678Sdavidxu		 */
1170233691Sdavidxu		if (error != 0)
1171161678Sdavidxu			return (error);
1172161678Sdavidxu
1173161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1174161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1175161678Sdavidxu			return (error);
1176161678Sdavidxu
1177161678Sdavidxu		umtxq_lock(&uq->uq_key);
1178161678Sdavidxu		umtxq_busy(&uq->uq_key);
1179161678Sdavidxu		umtxq_insert(uq);
1180161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1181161678Sdavidxu
1182161678Sdavidxu		/*
1183161678Sdavidxu		 * Set the contested bit so that a release in user space
1184161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1185161678Sdavidxu		 * either some one else has acquired the lock or it has been
1186161678Sdavidxu		 * released.
1187161678Sdavidxu		 */
1188161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1189161678Sdavidxu
1190161678Sdavidxu		/* The address was invalid. */
1191161678Sdavidxu		if (old == -1) {
1192161678Sdavidxu			umtxq_lock(&uq->uq_key);
1193161678Sdavidxu			umtxq_remove(uq);
1194179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1195161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1196161678Sdavidxu			umtx_key_release(&uq->uq_key);
1197161678Sdavidxu			return (EFAULT);
1198161678Sdavidxu		}
1199161678Sdavidxu
1200161678Sdavidxu		/*
1201161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1202161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1203161678Sdavidxu		 * unlocking the umtx.
1204161678Sdavidxu		 */
1205161678Sdavidxu		umtxq_lock(&uq->uq_key);
1206179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1207161678Sdavidxu		if (old == owner)
1208233690Sdavidxu			error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1209233690Sdavidxu			    NULL : &timo);
1210161678Sdavidxu		umtxq_remove(uq);
1211161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1212161678Sdavidxu		umtx_key_release(&uq->uq_key);
1213161678Sdavidxu	}
1214161678Sdavidxu
1215161678Sdavidxu	return (0);
1216161678Sdavidxu}
1217161678Sdavidxu
1218161678Sdavidxu/*
1219161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1220161678Sdavidxu */
1221161678Sdavidxu/*
1222161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1223161678Sdavidxu */
1224161678Sdavidxustatic int
1225161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1226161678Sdavidxu{
1227161678Sdavidxu	struct umtx_key key;
1228161678Sdavidxu	uint32_t owner, old, id;
1229161678Sdavidxu	int error;
1230161678Sdavidxu	int count;
1231161678Sdavidxu
1232161678Sdavidxu	id = td->td_tid;
1233161678Sdavidxu	/*
1234161678Sdavidxu	 * Make sure we own this mtx.
1235161678Sdavidxu	 */
1236163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1237161678Sdavidxu	if (owner == -1)
1238161678Sdavidxu		return (EFAULT);
1239161678Sdavidxu
1240161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1241161678Sdavidxu		return (EPERM);
1242161678Sdavidxu
1243161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1244161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1245161678Sdavidxu		if (old == -1)
1246161678Sdavidxu			return (EFAULT);
1247161678Sdavidxu		if (old == owner)
1248161678Sdavidxu			return (0);
1249161855Sdavidxu		owner = old;
1250161678Sdavidxu	}
1251161678Sdavidxu
1252161678Sdavidxu	/* We should only ever be in here for contested locks */
1253161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1254161678Sdavidxu	    &key)) != 0)
1255161678Sdavidxu		return (error);
1256161678Sdavidxu
1257161678Sdavidxu	umtxq_lock(&key);
1258161678Sdavidxu	umtxq_busy(&key);
1259161678Sdavidxu	count = umtxq_count(&key);
1260161678Sdavidxu	umtxq_unlock(&key);
1261161678Sdavidxu
1262161678Sdavidxu	/*
1263161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1264161678Sdavidxu	 * there is zero or one thread only waiting for it.
1265161678Sdavidxu	 * Otherwise, it must be marked as contested.
1266161678Sdavidxu	 */
1267161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1268161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1269161678Sdavidxu	umtxq_lock(&key);
1270161678Sdavidxu	umtxq_signal(&key,1);
1271161678Sdavidxu	umtxq_unbusy(&key);
1272161678Sdavidxu	umtxq_unlock(&key);
1273161678Sdavidxu	umtx_key_release(&key);
1274161678Sdavidxu	if (old == -1)
1275161678Sdavidxu		return (EFAULT);
1276161678Sdavidxu	if (old != owner)
1277161678Sdavidxu		return (EINVAL);
1278161678Sdavidxu	return (0);
1279161678Sdavidxu}
1280161678Sdavidxu
1281179970Sdavidxu/*
1282179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1283179970Sdavidxu * only for simple mutex.
1284179970Sdavidxu */
1285179970Sdavidxustatic int
1286179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1287179970Sdavidxu{
1288179970Sdavidxu	struct umtx_key key;
1289179970Sdavidxu	uint32_t owner;
1290179970Sdavidxu	uint32_t flags;
1291179970Sdavidxu	int error;
1292179970Sdavidxu	int count;
1293179970Sdavidxu
1294179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1295179970Sdavidxu	if (owner == -1)
1296179970Sdavidxu		return (EFAULT);
1297179970Sdavidxu
1298179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1299179970Sdavidxu		return (0);
1300179970Sdavidxu
1301179970Sdavidxu	flags = fuword32(&m->m_flags);
1302179970Sdavidxu
1303179970Sdavidxu	/* We should only ever be in here for contested locks */
1304179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1305179970Sdavidxu	    &key)) != 0)
1306179970Sdavidxu		return (error);
1307179970Sdavidxu
1308179970Sdavidxu	umtxq_lock(&key);
1309179970Sdavidxu	umtxq_busy(&key);
1310179970Sdavidxu	count = umtxq_count(&key);
1311179970Sdavidxu	umtxq_unlock(&key);
1312179970Sdavidxu
1313179970Sdavidxu	if (count <= 1)
1314179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1315179970Sdavidxu
1316179970Sdavidxu	umtxq_lock(&key);
1317179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1318179970Sdavidxu		umtxq_signal(&key, 1);
1319179970Sdavidxu	umtxq_unbusy(&key);
1320179970Sdavidxu	umtxq_unlock(&key);
1321179970Sdavidxu	umtx_key_release(&key);
1322179970Sdavidxu	return (0);
1323179970Sdavidxu}
1324179970Sdavidxu
1325161678Sdavidxustatic inline struct umtx_pi *
1326163697Sdavidxuumtx_pi_alloc(int flags)
1327161678Sdavidxu{
1328161678Sdavidxu	struct umtx_pi *pi;
1329161678Sdavidxu
1330163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1331161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1332161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1333161678Sdavidxu	return (pi);
1334161678Sdavidxu}
1335161678Sdavidxu
1336161678Sdavidxustatic inline void
1337161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1338161678Sdavidxu{
1339161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1340161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1341161678Sdavidxu}
1342161678Sdavidxu
1343161678Sdavidxu/*
1344161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1345161678Sdavidxu * changed.
1346161678Sdavidxu */
1347161678Sdavidxustatic int
1348161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1349161678Sdavidxu{
1350161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1351161678Sdavidxu	struct thread *td1;
1352161678Sdavidxu
1353170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1354161678Sdavidxu	if (pi == NULL)
1355161678Sdavidxu		return (0);
1356161678Sdavidxu
1357161678Sdavidxu	uq = td->td_umtxq;
1358161678Sdavidxu
1359161678Sdavidxu	/*
1360161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1361161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1362161678Sdavidxu	 * the previous thread or higher than the next thread.
1363161678Sdavidxu	 */
1364161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1365161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1366161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1367161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1368161678Sdavidxu		/*
1369161678Sdavidxu		 * Remove thread from blocked chain and determine where
1370161678Sdavidxu		 * it should be moved to.
1371161678Sdavidxu		 */
1372161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1373161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1374161678Sdavidxu			td1 = uq1->uq_thread;
1375161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1376161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1377161678Sdavidxu				break;
1378161678Sdavidxu		}
1379161678Sdavidxu
1380161678Sdavidxu		if (uq1 == NULL)
1381161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1382161678Sdavidxu		else
1383161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1384161678Sdavidxu	}
1385161678Sdavidxu	return (1);
1386161678Sdavidxu}
1387161678Sdavidxu
1388161678Sdavidxu/*
1389161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1390161678Sdavidxu * PI mutex.
1391161678Sdavidxu */
1392161678Sdavidxustatic void
1393161678Sdavidxuumtx_propagate_priority(struct thread *td)
1394161678Sdavidxu{
1395161678Sdavidxu	struct umtx_q *uq;
1396161678Sdavidxu	struct umtx_pi *pi;
1397161678Sdavidxu	int pri;
1398161678Sdavidxu
1399170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1400161678Sdavidxu	pri = UPRI(td);
1401161678Sdavidxu	uq = td->td_umtxq;
1402161678Sdavidxu	pi = uq->uq_pi_blocked;
1403161678Sdavidxu	if (pi == NULL)
1404161678Sdavidxu		return;
1405161678Sdavidxu
1406161678Sdavidxu	for (;;) {
1407161678Sdavidxu		td = pi->pi_owner;
1408216313Sdavidxu		if (td == NULL || td == curthread)
1409161678Sdavidxu			return;
1410161678Sdavidxu
1411161678Sdavidxu		MPASS(td->td_proc != NULL);
1412161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1413161678Sdavidxu
1414170300Sjeff		thread_lock(td);
1415216313Sdavidxu		if (td->td_lend_user_pri > pri)
1416216313Sdavidxu			sched_lend_user_prio(td, pri);
1417216313Sdavidxu		else {
1418216313Sdavidxu			thread_unlock(td);
1419216313Sdavidxu			break;
1420216313Sdavidxu		}
1421170300Sjeff		thread_unlock(td);
1422161678Sdavidxu
1423161678Sdavidxu		/*
1424161678Sdavidxu		 * Pick up the lock that td is blocked on.
1425161678Sdavidxu		 */
1426161678Sdavidxu		uq = td->td_umtxq;
1427161678Sdavidxu		pi = uq->uq_pi_blocked;
1428216791Sdavidxu		if (pi == NULL)
1429216791Sdavidxu			break;
1430161678Sdavidxu		/* Resort td on the list if needed. */
1431216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1432161678Sdavidxu	}
1433161678Sdavidxu}
1434161678Sdavidxu
1435161678Sdavidxu/*
1436161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1437161678Sdavidxu * it is interrupted by signal or resumed by others.
1438161678Sdavidxu */
1439161678Sdavidxustatic void
1440216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1441161678Sdavidxu{
1442161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1443161678Sdavidxu	struct umtx_pi *pi2;
1444216791Sdavidxu	int pri;
1445161678Sdavidxu
1446170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1447161678Sdavidxu
1448161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1449161678Sdavidxu		pri = PRI_MAX;
1450161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1451161678Sdavidxu
1452161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1453161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1454161678Sdavidxu			if (uq != NULL) {
1455161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1456161678Sdavidxu					pri = UPRI(uq->uq_thread);
1457161678Sdavidxu			}
1458161678Sdavidxu		}
1459161678Sdavidxu
1460161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1461161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1462170300Sjeff		thread_lock(pi->pi_owner);
1463216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1464170300Sjeff		thread_unlock(pi->pi_owner);
1465216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1466216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1467161678Sdavidxu	}
1468161678Sdavidxu}
1469161678Sdavidxu
1470161678Sdavidxu/*
1471161678Sdavidxu * Insert a PI mutex into owned list.
1472161678Sdavidxu */
1473161678Sdavidxustatic void
1474161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1475161678Sdavidxu{
1476161678Sdavidxu	struct umtx_q *uq_owner;
1477161678Sdavidxu
1478161678Sdavidxu	uq_owner = owner->td_umtxq;
1479170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1480161678Sdavidxu	if (pi->pi_owner != NULL)
1481161678Sdavidxu		panic("pi_ower != NULL");
1482161678Sdavidxu	pi->pi_owner = owner;
1483161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1484161678Sdavidxu}
1485161678Sdavidxu
1486161678Sdavidxu/*
1487161678Sdavidxu * Claim ownership of a PI mutex.
1488161678Sdavidxu */
1489161678Sdavidxustatic int
1490161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1491161678Sdavidxu{
1492161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1493161678Sdavidxu
1494161678Sdavidxu	uq_owner = owner->td_umtxq;
1495170300Sjeff	mtx_lock_spin(&umtx_lock);
1496161678Sdavidxu	if (pi->pi_owner == owner) {
1497170300Sjeff		mtx_unlock_spin(&umtx_lock);
1498161678Sdavidxu		return (0);
1499161678Sdavidxu	}
1500161678Sdavidxu
1501161678Sdavidxu	if (pi->pi_owner != NULL) {
1502161678Sdavidxu		/*
1503161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1504161678Sdavidxu		 */
1505170300Sjeff		mtx_unlock_spin(&umtx_lock);
1506161678Sdavidxu		return (EPERM);
1507161678Sdavidxu	}
1508161678Sdavidxu	umtx_pi_setowner(pi, owner);
1509161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1510161678Sdavidxu	if (uq != NULL) {
1511161678Sdavidxu		int pri;
1512161678Sdavidxu
1513161678Sdavidxu		pri = UPRI(uq->uq_thread);
1514170300Sjeff		thread_lock(owner);
1515161678Sdavidxu		if (pri < UPRI(owner))
1516161678Sdavidxu			sched_lend_user_prio(owner, pri);
1517170300Sjeff		thread_unlock(owner);
1518161678Sdavidxu	}
1519170300Sjeff	mtx_unlock_spin(&umtx_lock);
1520161678Sdavidxu	return (0);
1521161678Sdavidxu}
1522161678Sdavidxu
1523161678Sdavidxu/*
1524174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1525174701Sdavidxu * this may result new priority propagating process.
1526174701Sdavidxu */
1527174701Sdavidxuvoid
1528174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1529174701Sdavidxu{
1530174707Sdavidxu	struct umtx_q *uq;
1531174707Sdavidxu	struct umtx_pi *pi;
1532174707Sdavidxu
1533174707Sdavidxu	uq = td->td_umtxq;
1534174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1535174707Sdavidxu	/*
1536174707Sdavidxu	 * Pick up the lock that td is blocked on.
1537174707Sdavidxu	 */
1538174707Sdavidxu	pi = uq->uq_pi_blocked;
1539216791Sdavidxu	if (pi != NULL) {
1540216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1541216791Sdavidxu		umtx_repropagate_priority(pi);
1542216791Sdavidxu	}
1543174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1544174701Sdavidxu}
1545174701Sdavidxu
1546174701Sdavidxu/*
1547161678Sdavidxu * Sleep on a PI mutex.
1548161678Sdavidxu */
1549161678Sdavidxustatic int
1550161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1551233690Sdavidxu	uint32_t owner, const char *wmesg, struct abs_timeout *timo)
1552161678Sdavidxu{
1553161678Sdavidxu	struct umtxq_chain *uc;
1554161678Sdavidxu	struct thread *td, *td1;
1555161678Sdavidxu	struct umtx_q *uq1;
1556161678Sdavidxu	int pri;
1557161678Sdavidxu	int error = 0;
1558161678Sdavidxu
1559161678Sdavidxu	td = uq->uq_thread;
1560161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1561161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1562161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1563189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1564161678Sdavidxu	umtxq_insert(uq);
1565189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1566161678Sdavidxu	if (pi->pi_owner == NULL) {
1567189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1568213642Sdavidxu		/* XXX Only look up thread in current process. */
1569213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1570170300Sjeff		mtx_lock_spin(&umtx_lock);
1571215336Sdavidxu		if (td1 != NULL) {
1572215336Sdavidxu			if (pi->pi_owner == NULL)
1573215336Sdavidxu				umtx_pi_setowner(pi, td1);
1574215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1575161678Sdavidxu		}
1576161678Sdavidxu	}
1577161678Sdavidxu
1578161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1579161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1580161678Sdavidxu		if (pri > UPRI(td))
1581161678Sdavidxu			break;
1582161678Sdavidxu	}
1583161678Sdavidxu
1584161678Sdavidxu	if (uq1 != NULL)
1585161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1586161678Sdavidxu	else
1587161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1588161678Sdavidxu
1589161678Sdavidxu	uq->uq_pi_blocked = pi;
1590174701Sdavidxu	thread_lock(td);
1591161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1592174701Sdavidxu	thread_unlock(td);
1593161678Sdavidxu	umtx_propagate_priority(td);
1594170300Sjeff	mtx_unlock_spin(&umtx_lock);
1595189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1596161678Sdavidxu
1597233690Sdavidxu	error = umtxq_sleep(uq, wmesg, timo);
1598233690Sdavidxu	umtxq_remove(uq);
1599233690Sdavidxu
1600170300Sjeff	mtx_lock_spin(&umtx_lock);
1601161678Sdavidxu	uq->uq_pi_blocked = NULL;
1602174701Sdavidxu	thread_lock(td);
1603161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1604174701Sdavidxu	thread_unlock(td);
1605161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1606216791Sdavidxu	umtx_repropagate_priority(pi);
1607170300Sjeff	mtx_unlock_spin(&umtx_lock);
1608189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1609161678Sdavidxu
1610161678Sdavidxu	return (error);
1611161678Sdavidxu}
1612161678Sdavidxu
1613161678Sdavidxu/*
1614161678Sdavidxu * Add reference count for a PI mutex.
1615161678Sdavidxu */
1616161678Sdavidxustatic void
1617161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1618161678Sdavidxu{
1619161678Sdavidxu	struct umtxq_chain *uc;
1620161678Sdavidxu
1621161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1622161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1623161678Sdavidxu	pi->pi_refcount++;
1624161678Sdavidxu}
1625161678Sdavidxu
1626161678Sdavidxu/*
1627161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1628161678Sdavidxu * is decreased to zero, its memory space is freed.
1629161678Sdavidxu */
1630161678Sdavidxustatic void
1631161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1632161678Sdavidxu{
1633161678Sdavidxu	struct umtxq_chain *uc;
1634161678Sdavidxu
1635161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1636161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1637161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1638161678Sdavidxu	if (--pi->pi_refcount == 0) {
1639170300Sjeff		mtx_lock_spin(&umtx_lock);
1640161678Sdavidxu		if (pi->pi_owner != NULL) {
1641161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1642161678Sdavidxu				pi, pi_link);
1643161678Sdavidxu			pi->pi_owner = NULL;
1644161678Sdavidxu		}
1645161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1646161678Sdavidxu			("blocked queue not empty"));
1647170300Sjeff		mtx_unlock_spin(&umtx_lock);
1648161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1649189756Sdavidxu		umtx_pi_free(pi);
1650161678Sdavidxu	}
1651161678Sdavidxu}
1652161678Sdavidxu
1653161678Sdavidxu/*
1654161678Sdavidxu * Find a PI mutex in hash table.
1655161678Sdavidxu */
1656161678Sdavidxustatic struct umtx_pi *
1657161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1658161678Sdavidxu{
1659161678Sdavidxu	struct umtxq_chain *uc;
1660161678Sdavidxu	struct umtx_pi *pi;
1661161678Sdavidxu
1662161678Sdavidxu	uc = umtxq_getchain(key);
1663161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1664161678Sdavidxu
1665161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1666161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1667161678Sdavidxu			return (pi);
1668161678Sdavidxu		}
1669161678Sdavidxu	}
1670161678Sdavidxu	return (NULL);
1671161678Sdavidxu}
1672161678Sdavidxu
1673161678Sdavidxu/*
1674161678Sdavidxu * Insert a PI mutex into hash table.
1675161678Sdavidxu */
1676161678Sdavidxustatic inline void
1677161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1678161678Sdavidxu{
1679161678Sdavidxu	struct umtxq_chain *uc;
1680161678Sdavidxu
1681161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1682161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1683161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1684161678Sdavidxu}
1685161678Sdavidxu
1686161678Sdavidxu/*
1687161678Sdavidxu * Lock a PI mutex.
1688161678Sdavidxu */
1689161678Sdavidxustatic int
1690233690Sdavidxudo_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1691233690Sdavidxu    struct _umtx_time *timeout, int try)
1692161678Sdavidxu{
1693233690Sdavidxu	struct abs_timeout timo;
1694161678Sdavidxu	struct umtx_q *uq;
1695161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1696161678Sdavidxu	uint32_t id, owner, old;
1697161678Sdavidxu	int error;
1698161678Sdavidxu
1699161678Sdavidxu	id = td->td_tid;
1700161678Sdavidxu	uq = td->td_umtxq;
1701161678Sdavidxu
1702161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1703161678Sdavidxu	    &uq->uq_key)) != 0)
1704161678Sdavidxu		return (error);
1705233690Sdavidxu
1706233690Sdavidxu	if (timeout != NULL)
1707233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1708233690Sdavidxu
1709163697Sdavidxu	umtxq_lock(&uq->uq_key);
1710163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1711163697Sdavidxu	if (pi == NULL) {
1712163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1713163697Sdavidxu		if (new_pi == NULL) {
1714161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1715163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1716161678Sdavidxu			umtxq_lock(&uq->uq_key);
1717161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1718163697Sdavidxu			if (pi != NULL) {
1719161678Sdavidxu				umtx_pi_free(new_pi);
1720163697Sdavidxu				new_pi = NULL;
1721161678Sdavidxu			}
1722161678Sdavidxu		}
1723163697Sdavidxu		if (new_pi != NULL) {
1724163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1725163697Sdavidxu			umtx_pi_insert(new_pi);
1726163697Sdavidxu			pi = new_pi;
1727163697Sdavidxu		}
1728163697Sdavidxu	}
1729163697Sdavidxu	umtx_pi_ref(pi);
1730163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1731161678Sdavidxu
1732163697Sdavidxu	/*
1733163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1734163697Sdavidxu	 * can fault on any access.
1735163697Sdavidxu	 */
1736163697Sdavidxu	for (;;) {
1737161678Sdavidxu		/*
1738161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1739161678Sdavidxu		 */
1740161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1741161678Sdavidxu
1742161678Sdavidxu		/* The acquire succeeded. */
1743161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1744161678Sdavidxu			error = 0;
1745161678Sdavidxu			break;
1746161678Sdavidxu		}
1747161678Sdavidxu
1748161678Sdavidxu		/* The address was invalid. */
1749161678Sdavidxu		if (owner == -1) {
1750161678Sdavidxu			error = EFAULT;
1751161678Sdavidxu			break;
1752161678Sdavidxu		}
1753161678Sdavidxu
1754161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1755161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1756161678Sdavidxu			owner = casuword32(&m->m_owner,
1757161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1758161678Sdavidxu
1759161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1760161678Sdavidxu				umtxq_lock(&uq->uq_key);
1761189756Sdavidxu				umtxq_busy(&uq->uq_key);
1762161678Sdavidxu				error = umtx_pi_claim(pi, td);
1763189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1764161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1765161678Sdavidxu				break;
1766161678Sdavidxu			}
1767161678Sdavidxu
1768161678Sdavidxu			/* The address was invalid. */
1769161678Sdavidxu			if (owner == -1) {
1770161678Sdavidxu				error = EFAULT;
1771161678Sdavidxu				break;
1772161678Sdavidxu			}
1773161678Sdavidxu
1774161678Sdavidxu			/* If this failed the lock has changed, restart. */
1775161678Sdavidxu			continue;
1776161678Sdavidxu		}
1777161678Sdavidxu
1778161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1779161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1780161678Sdavidxu			error = EDEADLK;
1781161678Sdavidxu			break;
1782161678Sdavidxu		}
1783161678Sdavidxu
1784161678Sdavidxu		if (try != 0) {
1785161678Sdavidxu			error = EBUSY;
1786161678Sdavidxu			break;
1787161678Sdavidxu		}
1788161678Sdavidxu
1789161678Sdavidxu		/*
1790161678Sdavidxu		 * If we caught a signal, we have retried and now
1791161678Sdavidxu		 * exit immediately.
1792161678Sdavidxu		 */
1793161678Sdavidxu		if (error != 0)
1794161678Sdavidxu			break;
1795161678Sdavidxu
1796161678Sdavidxu		umtxq_lock(&uq->uq_key);
1797161678Sdavidxu		umtxq_busy(&uq->uq_key);
1798161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1799161678Sdavidxu
1800161678Sdavidxu		/*
1801161678Sdavidxu		 * Set the contested bit so that a release in user space
1802161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1803161678Sdavidxu		 * either some one else has acquired the lock or it has been
1804161678Sdavidxu		 * released.
1805161678Sdavidxu		 */
1806161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1807161678Sdavidxu
1808161678Sdavidxu		/* The address was invalid. */
1809161678Sdavidxu		if (old == -1) {
1810161678Sdavidxu			umtxq_lock(&uq->uq_key);
1811161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1812161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1813161678Sdavidxu			error = EFAULT;
1814161678Sdavidxu			break;
1815161678Sdavidxu		}
1816161678Sdavidxu
1817161678Sdavidxu		umtxq_lock(&uq->uq_key);
1818161678Sdavidxu		/*
1819161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1820161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1821161678Sdavidxu		 * unlocking the umtx.
1822161678Sdavidxu		 */
1823161678Sdavidxu		if (old == owner)
1824161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1825233690Sdavidxu			    "umtxpi", timeout == NULL ? NULL : &timo);
1826189756Sdavidxu		else {
1827189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1828189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1829189756Sdavidxu		}
1830161678Sdavidxu	}
1831161678Sdavidxu
1832163697Sdavidxu	umtxq_lock(&uq->uq_key);
1833163697Sdavidxu	umtx_pi_unref(pi);
1834163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1835161678Sdavidxu
1836161678Sdavidxu	umtx_key_release(&uq->uq_key);
1837161678Sdavidxu	return (error);
1838161678Sdavidxu}
1839161678Sdavidxu
1840161678Sdavidxu/*
1841161678Sdavidxu * Unlock a PI mutex.
1842161678Sdavidxu */
1843161678Sdavidxustatic int
1844161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1845161678Sdavidxu{
1846161678Sdavidxu	struct umtx_key key;
1847161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1848161678Sdavidxu	struct umtx_pi *pi, *pi2;
1849161678Sdavidxu	uint32_t owner, old, id;
1850161678Sdavidxu	int error;
1851161678Sdavidxu	int count;
1852161678Sdavidxu	int pri;
1853161678Sdavidxu
1854161678Sdavidxu	id = td->td_tid;
1855161678Sdavidxu	/*
1856161678Sdavidxu	 * Make sure we own this mtx.
1857161678Sdavidxu	 */
1858163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1859161678Sdavidxu	if (owner == -1)
1860161678Sdavidxu		return (EFAULT);
1861161678Sdavidxu
1862161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1863161678Sdavidxu		return (EPERM);
1864161678Sdavidxu
1865161678Sdavidxu	/* This should be done in userland */
1866161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1867161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1868161678Sdavidxu		if (old == -1)
1869161678Sdavidxu			return (EFAULT);
1870161678Sdavidxu		if (old == owner)
1871161678Sdavidxu			return (0);
1872161855Sdavidxu		owner = old;
1873161678Sdavidxu	}
1874161678Sdavidxu
1875161678Sdavidxu	/* We should only ever be in here for contested locks */
1876161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1877161678Sdavidxu	    &key)) != 0)
1878161678Sdavidxu		return (error);
1879161678Sdavidxu
1880161678Sdavidxu	umtxq_lock(&key);
1881161678Sdavidxu	umtxq_busy(&key);
1882161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1883161678Sdavidxu	if (uq_first != NULL) {
1884189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1885161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1886189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1887161678Sdavidxu		if (pi->pi_owner != curthread) {
1888189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1889161678Sdavidxu			umtxq_unbusy(&key);
1890161678Sdavidxu			umtxq_unlock(&key);
1891189756Sdavidxu			umtx_key_release(&key);
1892161678Sdavidxu			/* userland messed the mutex */
1893161678Sdavidxu			return (EPERM);
1894161678Sdavidxu		}
1895161678Sdavidxu		uq_me = curthread->td_umtxq;
1896161678Sdavidxu		pi->pi_owner = NULL;
1897161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1898189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1899161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1900189756Sdavidxu		while (uq_first != NULL &&
1901189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1902189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1903189756Sdavidxu		}
1904161678Sdavidxu		pri = PRI_MAX;
1905161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1906161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1907161678Sdavidxu			if (uq_first2 != NULL) {
1908161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1909161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1910161678Sdavidxu			}
1911161678Sdavidxu		}
1912170300Sjeff		thread_lock(curthread);
1913216791Sdavidxu		sched_lend_user_prio(curthread, pri);
1914170300Sjeff		thread_unlock(curthread);
1915170300Sjeff		mtx_unlock_spin(&umtx_lock);
1916189756Sdavidxu		if (uq_first)
1917189756Sdavidxu			umtxq_signal_thread(uq_first);
1918161678Sdavidxu	}
1919161678Sdavidxu	umtxq_unlock(&key);
1920161678Sdavidxu
1921161678Sdavidxu	/*
1922161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1923161678Sdavidxu	 * there is zero or one thread only waiting for it.
1924161678Sdavidxu	 * Otherwise, it must be marked as contested.
1925161678Sdavidxu	 */
1926161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1927161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1928161678Sdavidxu
1929161678Sdavidxu	umtxq_lock(&key);
1930161678Sdavidxu	umtxq_unbusy(&key);
1931161678Sdavidxu	umtxq_unlock(&key);
1932161678Sdavidxu	umtx_key_release(&key);
1933161678Sdavidxu	if (old == -1)
1934161678Sdavidxu		return (EFAULT);
1935161678Sdavidxu	if (old != owner)
1936161678Sdavidxu		return (EINVAL);
1937161678Sdavidxu	return (0);
1938161678Sdavidxu}
1939161678Sdavidxu
1940161678Sdavidxu/*
1941161678Sdavidxu * Lock a PP mutex.
1942161678Sdavidxu */
1943161678Sdavidxustatic int
1944233690Sdavidxudo_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
1945233690Sdavidxu    struct _umtx_time *timeout, int try)
1946161678Sdavidxu{
1947233690Sdavidxu	struct abs_timeout timo;
1948161678Sdavidxu	struct umtx_q *uq, *uq2;
1949161678Sdavidxu	struct umtx_pi *pi;
1950161678Sdavidxu	uint32_t ceiling;
1951161678Sdavidxu	uint32_t owner, id;
1952161678Sdavidxu	int error, pri, old_inherited_pri, su;
1953161678Sdavidxu
1954161678Sdavidxu	id = td->td_tid;
1955161678Sdavidxu	uq = td->td_umtxq;
1956161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1957161678Sdavidxu	    &uq->uq_key)) != 0)
1958161678Sdavidxu		return (error);
1959233690Sdavidxu
1960233690Sdavidxu	if (timeout != NULL)
1961233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1962233690Sdavidxu
1963164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1964161678Sdavidxu	for (;;) {
1965161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1966161678Sdavidxu		umtxq_lock(&uq->uq_key);
1967161678Sdavidxu		umtxq_busy(&uq->uq_key);
1968161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1969161678Sdavidxu
1970161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1971161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1972161678Sdavidxu			error = EINVAL;
1973161678Sdavidxu			goto out;
1974161678Sdavidxu		}
1975161678Sdavidxu
1976170300Sjeff		mtx_lock_spin(&umtx_lock);
1977161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1978170300Sjeff			mtx_unlock_spin(&umtx_lock);
1979161678Sdavidxu			error = EINVAL;
1980161678Sdavidxu			goto out;
1981161678Sdavidxu		}
1982161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1983161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1984170300Sjeff			thread_lock(td);
1985161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1986161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1987170300Sjeff			thread_unlock(td);
1988161678Sdavidxu		}
1989170300Sjeff		mtx_unlock_spin(&umtx_lock);
1990161678Sdavidxu
1991161678Sdavidxu		owner = casuword32(&m->m_owner,
1992161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1993161678Sdavidxu
1994161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1995161678Sdavidxu			error = 0;
1996161678Sdavidxu			break;
1997161678Sdavidxu		}
1998161678Sdavidxu
1999161678Sdavidxu		/* The address was invalid. */
2000161678Sdavidxu		if (owner == -1) {
2001161678Sdavidxu			error = EFAULT;
2002161678Sdavidxu			break;
2003161678Sdavidxu		}
2004161678Sdavidxu
2005161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2006161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2007161678Sdavidxu			error = EDEADLK;
2008161678Sdavidxu			break;
2009161678Sdavidxu		}
2010161678Sdavidxu
2011161678Sdavidxu		if (try != 0) {
2012161678Sdavidxu			error = EBUSY;
2013161678Sdavidxu			break;
2014161678Sdavidxu		}
2015161678Sdavidxu
2016161678Sdavidxu		/*
2017161678Sdavidxu		 * If we caught a signal, we have retried and now
2018161678Sdavidxu		 * exit immediately.
2019161678Sdavidxu		 */
2020161678Sdavidxu		if (error != 0)
2021161678Sdavidxu			break;
2022161678Sdavidxu
2023161678Sdavidxu		umtxq_lock(&uq->uq_key);
2024161678Sdavidxu		umtxq_insert(uq);
2025161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2026233690Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2027233690Sdavidxu		    NULL : &timo);
2028161678Sdavidxu		umtxq_remove(uq);
2029161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2030161678Sdavidxu
2031170300Sjeff		mtx_lock_spin(&umtx_lock);
2032161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2033161678Sdavidxu		pri = PRI_MAX;
2034161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2035161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2036161678Sdavidxu			if (uq2 != NULL) {
2037161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2038161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2039161678Sdavidxu			}
2040161678Sdavidxu		}
2041161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2042161678Sdavidxu			pri = uq->uq_inherited_pri;
2043170300Sjeff		thread_lock(td);
2044216791Sdavidxu		sched_lend_user_prio(td, pri);
2045170300Sjeff		thread_unlock(td);
2046170300Sjeff		mtx_unlock_spin(&umtx_lock);
2047161678Sdavidxu	}
2048161678Sdavidxu
2049161678Sdavidxu	if (error != 0) {
2050170300Sjeff		mtx_lock_spin(&umtx_lock);
2051161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2052161678Sdavidxu		pri = PRI_MAX;
2053161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2054161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2055161678Sdavidxu			if (uq2 != NULL) {
2056161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2057161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2058161678Sdavidxu			}
2059161678Sdavidxu		}
2060161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2061161678Sdavidxu			pri = uq->uq_inherited_pri;
2062170300Sjeff		thread_lock(td);
2063216791Sdavidxu		sched_lend_user_prio(td, pri);
2064170300Sjeff		thread_unlock(td);
2065170300Sjeff		mtx_unlock_spin(&umtx_lock);
2066161678Sdavidxu	}
2067161678Sdavidxu
2068161678Sdavidxuout:
2069161678Sdavidxu	umtxq_lock(&uq->uq_key);
2070161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2071161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2072161678Sdavidxu	umtx_key_release(&uq->uq_key);
2073161678Sdavidxu	return (error);
2074161678Sdavidxu}
2075161678Sdavidxu
2076161678Sdavidxu/*
2077161678Sdavidxu * Unlock a PP mutex.
2078161678Sdavidxu */
2079161678Sdavidxustatic int
2080161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2081161678Sdavidxu{
2082161678Sdavidxu	struct umtx_key key;
2083161678Sdavidxu	struct umtx_q *uq, *uq2;
2084161678Sdavidxu	struct umtx_pi *pi;
2085161678Sdavidxu	uint32_t owner, id;
2086161678Sdavidxu	uint32_t rceiling;
2087161926Sdavidxu	int error, pri, new_inherited_pri, su;
2088161678Sdavidxu
2089161678Sdavidxu	id = td->td_tid;
2090161678Sdavidxu	uq = td->td_umtxq;
2091164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2092161678Sdavidxu
2093161678Sdavidxu	/*
2094161678Sdavidxu	 * Make sure we own this mtx.
2095161678Sdavidxu	 */
2096163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2097161678Sdavidxu	if (owner == -1)
2098161678Sdavidxu		return (EFAULT);
2099161678Sdavidxu
2100161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2101161678Sdavidxu		return (EPERM);
2102161678Sdavidxu
2103161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2104161678Sdavidxu	if (error != 0)
2105161678Sdavidxu		return (error);
2106161678Sdavidxu
2107161678Sdavidxu	if (rceiling == -1)
2108161678Sdavidxu		new_inherited_pri = PRI_MAX;
2109161678Sdavidxu	else {
2110161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2111161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2112161678Sdavidxu			return (EINVAL);
2113161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2114161678Sdavidxu	}
2115161678Sdavidxu
2116161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2117161678Sdavidxu	    &key)) != 0)
2118161678Sdavidxu		return (error);
2119161678Sdavidxu	umtxq_lock(&key);
2120161678Sdavidxu	umtxq_busy(&key);
2121161678Sdavidxu	umtxq_unlock(&key);
2122161678Sdavidxu	/*
2123161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2124161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2125161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2126161678Sdavidxu	 * has to be adjusted for such mutex.
2127161678Sdavidxu	 */
2128163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2129163449Sdavidxu		UMUTEX_CONTESTED);
2130161678Sdavidxu
2131161678Sdavidxu	umtxq_lock(&key);
2132161678Sdavidxu	if (error == 0)
2133161678Sdavidxu		umtxq_signal(&key, 1);
2134161678Sdavidxu	umtxq_unbusy(&key);
2135161678Sdavidxu	umtxq_unlock(&key);
2136161678Sdavidxu
2137161678Sdavidxu	if (error == -1)
2138161678Sdavidxu		error = EFAULT;
2139161678Sdavidxu	else {
2140170300Sjeff		mtx_lock_spin(&umtx_lock);
2141161926Sdavidxu		if (su != 0)
2142161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2143161678Sdavidxu		pri = PRI_MAX;
2144161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2145161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2146161678Sdavidxu			if (uq2 != NULL) {
2147161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2148161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2149161678Sdavidxu			}
2150161678Sdavidxu		}
2151161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2152161678Sdavidxu			pri = uq->uq_inherited_pri;
2153170300Sjeff		thread_lock(td);
2154216791Sdavidxu		sched_lend_user_prio(td, pri);
2155170300Sjeff		thread_unlock(td);
2156170300Sjeff		mtx_unlock_spin(&umtx_lock);
2157161678Sdavidxu	}
2158161678Sdavidxu	umtx_key_release(&key);
2159161678Sdavidxu	return (error);
2160161678Sdavidxu}
2161161678Sdavidxu
2162161678Sdavidxustatic int
2163161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2164161678Sdavidxu	uint32_t *old_ceiling)
2165161678Sdavidxu{
2166161678Sdavidxu	struct umtx_q *uq;
2167161678Sdavidxu	uint32_t save_ceiling;
2168161678Sdavidxu	uint32_t owner, id;
2169161678Sdavidxu	uint32_t flags;
2170161678Sdavidxu	int error;
2171161678Sdavidxu
2172161678Sdavidxu	flags = fuword32(&m->m_flags);
2173161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2174161678Sdavidxu		return (EINVAL);
2175161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2176161678Sdavidxu		return (EINVAL);
2177161678Sdavidxu	id = td->td_tid;
2178161678Sdavidxu	uq = td->td_umtxq;
2179161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2180161678Sdavidxu	   &uq->uq_key)) != 0)
2181161678Sdavidxu		return (error);
2182161678Sdavidxu	for (;;) {
2183161678Sdavidxu		umtxq_lock(&uq->uq_key);
2184161678Sdavidxu		umtxq_busy(&uq->uq_key);
2185161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2186161678Sdavidxu
2187161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2188161678Sdavidxu
2189161678Sdavidxu		owner = casuword32(&m->m_owner,
2190161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2191161678Sdavidxu
2192161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2193161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2194163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2195163449Sdavidxu				UMUTEX_CONTESTED);
2196161678Sdavidxu			error = 0;
2197161678Sdavidxu			break;
2198161678Sdavidxu		}
2199161678Sdavidxu
2200161678Sdavidxu		/* The address was invalid. */
2201161678Sdavidxu		if (owner == -1) {
2202161678Sdavidxu			error = EFAULT;
2203161678Sdavidxu			break;
2204161678Sdavidxu		}
2205161678Sdavidxu
2206161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2207161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2208161678Sdavidxu			error = 0;
2209161678Sdavidxu			break;
2210161678Sdavidxu		}
2211161678Sdavidxu
2212161678Sdavidxu		/*
2213161678Sdavidxu		 * If we caught a signal, we have retried and now
2214161678Sdavidxu		 * exit immediately.
2215161678Sdavidxu		 */
2216161678Sdavidxu		if (error != 0)
2217161678Sdavidxu			break;
2218161678Sdavidxu
2219161678Sdavidxu		/*
2220161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2221161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2222161678Sdavidxu		 * unlocking the umtx.
2223161678Sdavidxu		 */
2224161678Sdavidxu		umtxq_lock(&uq->uq_key);
2225161678Sdavidxu		umtxq_insert(uq);
2226161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2227233690Sdavidxu		error = umtxq_sleep(uq, "umtxpp", NULL);
2228161678Sdavidxu		umtxq_remove(uq);
2229161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2230161678Sdavidxu	}
2231161678Sdavidxu	umtxq_lock(&uq->uq_key);
2232161678Sdavidxu	if (error == 0)
2233161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2234161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2235161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2236161678Sdavidxu	umtx_key_release(&uq->uq_key);
2237161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2238161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2239161678Sdavidxu	return (error);
2240161678Sdavidxu}
2241161678Sdavidxu
2242161678Sdavidxu/*
2243161678Sdavidxu * Lock a userland POSIX mutex.
2244161678Sdavidxu */
2245161678Sdavidxustatic int
2246162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2247233690Sdavidxu    struct _umtx_time *timeout, int mode)
2248161678Sdavidxu{
2249161678Sdavidxu	uint32_t flags;
2250162030Sdavidxu	int error;
2251161678Sdavidxu
2252161678Sdavidxu	flags = fuword32(&m->m_flags);
2253161678Sdavidxu	if (flags == -1)
2254161678Sdavidxu		return (EFAULT);
2255161678Sdavidxu
2256233690Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2257233690Sdavidxu	case 0:
2258233690Sdavidxu		error = do_lock_normal(td, m, flags, timeout, mode);
2259233690Sdavidxu		break;
2260233690Sdavidxu	case UMUTEX_PRIO_INHERIT:
2261233690Sdavidxu		error = do_lock_pi(td, m, flags, timeout, mode);
2262233690Sdavidxu		break;
2263233690Sdavidxu	case UMUTEX_PRIO_PROTECT:
2264233690Sdavidxu		error = do_lock_pp(td, m, flags, timeout, mode);
2265233690Sdavidxu		break;
2266233690Sdavidxu	default:
2267233690Sdavidxu		return (EINVAL);
2268233690Sdavidxu	}
2269162030Sdavidxu	if (timeout == NULL) {
2270179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2271162030Sdavidxu			error = ERESTART;
2272162030Sdavidxu	} else {
2273162030Sdavidxu		/* Timed-locking is not restarted. */
2274162030Sdavidxu		if (error == ERESTART)
2275162030Sdavidxu			error = EINTR;
2276161742Sdavidxu	}
2277162030Sdavidxu	return (error);
2278161678Sdavidxu}
2279161678Sdavidxu
2280161678Sdavidxu/*
2281161678Sdavidxu * Unlock a userland POSIX mutex.
2282161678Sdavidxu */
2283161678Sdavidxustatic int
2284161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2285161678Sdavidxu{
2286161678Sdavidxu	uint32_t flags;
2287161678Sdavidxu
2288161678Sdavidxu	flags = fuword32(&m->m_flags);
2289161678Sdavidxu	if (flags == -1)
2290161678Sdavidxu		return (EFAULT);
2291161678Sdavidxu
2292161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2293161855Sdavidxu	case 0:
2294161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2295161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2296161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2297161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2298161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2299161855Sdavidxu	}
2300161678Sdavidxu
2301161855Sdavidxu	return (EINVAL);
2302161678Sdavidxu}
2303161678Sdavidxu
2304164839Sdavidxustatic int
2305164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2306164876Sdavidxu	struct timespec *timeout, u_long wflags)
2307164839Sdavidxu{
2308233690Sdavidxu	struct abs_timeout timo;
2309164839Sdavidxu	struct umtx_q *uq;
2310164839Sdavidxu	uint32_t flags;
2311216641Sdavidxu	uint32_t clockid;
2312164839Sdavidxu	int error;
2313164839Sdavidxu
2314164839Sdavidxu	uq = td->td_umtxq;
2315164839Sdavidxu	flags = fuword32(&cv->c_flags);
2316164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2317164839Sdavidxu	if (error != 0)
2318164839Sdavidxu		return (error);
2319216641Sdavidxu
2320216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2321216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2322216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2323216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2324216641Sdavidxu			/* hmm, only HW clock id will work. */
2325216641Sdavidxu			return (EINVAL);
2326216641Sdavidxu		}
2327216641Sdavidxu	} else {
2328216641Sdavidxu		clockid = CLOCK_REALTIME;
2329216641Sdavidxu	}
2330216641Sdavidxu
2331164839Sdavidxu	umtxq_lock(&uq->uq_key);
2332164839Sdavidxu	umtxq_busy(&uq->uq_key);
2333164839Sdavidxu	umtxq_insert(uq);
2334164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2335164839Sdavidxu
2336164839Sdavidxu	/*
2337216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2338216641Sdavidxu	 * don't modify cache line when unnecessary.
2339164839Sdavidxu	 */
2340216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2341216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2342164839Sdavidxu
2343164839Sdavidxu	umtxq_lock(&uq->uq_key);
2344164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2345164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2346164839Sdavidxu
2347164839Sdavidxu	error = do_unlock_umutex(td, m);
2348233690Sdavidxu
2349233700Sdavidxu	if (timeout != NULL)
2350233690Sdavidxu		abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
2351233690Sdavidxu			timeout);
2352164839Sdavidxu
2353164839Sdavidxu	umtxq_lock(&uq->uq_key);
2354164839Sdavidxu	if (error == 0) {
2355233690Sdavidxu		error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2356233690Sdavidxu		    NULL : &timo);
2357164839Sdavidxu	}
2358164839Sdavidxu
2359211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2360211794Sdavidxu		error = 0;
2361211794Sdavidxu	else {
2362216641Sdavidxu		/*
2363216641Sdavidxu		 * This must be timeout,interrupted by signal or
2364216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2365216641Sdavidxu		 * necessary.
2366216641Sdavidxu		 */
2367216641Sdavidxu		umtxq_busy(&uq->uq_key);
2368216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2369216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2370216641Sdavidxu			umtxq_remove(uq);
2371216641Sdavidxu			if (oldlen == 1) {
2372216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2373216641Sdavidxu				suword32(
2374216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2375216641Sdavidxu					 &cv->c_has_waiters), 0);
2376216641Sdavidxu				umtxq_lock(&uq->uq_key);
2377216641Sdavidxu			}
2378216641Sdavidxu		}
2379216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2380164839Sdavidxu		if (error == ERESTART)
2381164839Sdavidxu			error = EINTR;
2382164839Sdavidxu	}
2383211794Sdavidxu
2384164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2385164839Sdavidxu	umtx_key_release(&uq->uq_key);
2386164839Sdavidxu	return (error);
2387164839Sdavidxu}
2388164839Sdavidxu
2389164839Sdavidxu/*
2390164839Sdavidxu * Signal a userland condition variable.
2391164839Sdavidxu */
2392164839Sdavidxustatic int
2393164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2394164839Sdavidxu{
2395164839Sdavidxu	struct umtx_key key;
2396164839Sdavidxu	int error, cnt, nwake;
2397164839Sdavidxu	uint32_t flags;
2398164839Sdavidxu
2399164839Sdavidxu	flags = fuword32(&cv->c_flags);
2400164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2401164839Sdavidxu		return (error);
2402164839Sdavidxu	umtxq_lock(&key);
2403164839Sdavidxu	umtxq_busy(&key);
2404164839Sdavidxu	cnt = umtxq_count(&key);
2405164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2406164839Sdavidxu	if (cnt <= nwake) {
2407164839Sdavidxu		umtxq_unlock(&key);
2408164839Sdavidxu		error = suword32(
2409164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2410164839Sdavidxu		umtxq_lock(&key);
2411164839Sdavidxu	}
2412164839Sdavidxu	umtxq_unbusy(&key);
2413164839Sdavidxu	umtxq_unlock(&key);
2414164839Sdavidxu	umtx_key_release(&key);
2415164839Sdavidxu	return (error);
2416164839Sdavidxu}
2417164839Sdavidxu
2418164839Sdavidxustatic int
2419164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2420164839Sdavidxu{
2421164839Sdavidxu	struct umtx_key key;
2422164839Sdavidxu	int error;
2423164839Sdavidxu	uint32_t flags;
2424164839Sdavidxu
2425164839Sdavidxu	flags = fuword32(&cv->c_flags);
2426164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2427164839Sdavidxu		return (error);
2428164839Sdavidxu
2429164839Sdavidxu	umtxq_lock(&key);
2430164839Sdavidxu	umtxq_busy(&key);
2431164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2432164839Sdavidxu	umtxq_unlock(&key);
2433164839Sdavidxu
2434164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2435164839Sdavidxu
2436164839Sdavidxu	umtxq_lock(&key);
2437164839Sdavidxu	umtxq_unbusy(&key);
2438164839Sdavidxu	umtxq_unlock(&key);
2439164839Sdavidxu
2440164839Sdavidxu	umtx_key_release(&key);
2441164839Sdavidxu	return (error);
2442164839Sdavidxu}
2443164839Sdavidxu
2444177848Sdavidxustatic int
2445233690Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
2446177848Sdavidxu{
2447233690Sdavidxu	struct abs_timeout timo;
2448177848Sdavidxu	struct umtx_q *uq;
2449177848Sdavidxu	uint32_t flags, wrflags;
2450177848Sdavidxu	int32_t state, oldstate;
2451177848Sdavidxu	int32_t blocked_readers;
2452177848Sdavidxu	int error;
2453177848Sdavidxu
2454177848Sdavidxu	uq = td->td_umtxq;
2455177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2456177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2457177848Sdavidxu	if (error != 0)
2458177848Sdavidxu		return (error);
2459177848Sdavidxu
2460233690Sdavidxu	if (timeout != NULL)
2461233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2462233690Sdavidxu
2463177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2464177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2465177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2466177848Sdavidxu
2467177848Sdavidxu	for (;;) {
2468177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2469177848Sdavidxu		/* try to lock it */
2470177848Sdavidxu		while (!(state & wrflags)) {
2471177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2472177848Sdavidxu				umtx_key_release(&uq->uq_key);
2473177848Sdavidxu				return (EAGAIN);
2474177848Sdavidxu			}
2475177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2476177848Sdavidxu			if (oldstate == state) {
2477177848Sdavidxu				umtx_key_release(&uq->uq_key);
2478177848Sdavidxu				return (0);
2479177848Sdavidxu			}
2480177848Sdavidxu			state = oldstate;
2481177848Sdavidxu		}
2482177848Sdavidxu
2483177848Sdavidxu		if (error)
2484177848Sdavidxu			break;
2485177848Sdavidxu
2486177848Sdavidxu		/* grab monitor lock */
2487177848Sdavidxu		umtxq_lock(&uq->uq_key);
2488177848Sdavidxu		umtxq_busy(&uq->uq_key);
2489177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2490177848Sdavidxu
2491203414Sdavidxu		/*
2492203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2493203414Sdavidxu		 * and the check below
2494203414Sdavidxu		 */
2495203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2496203414Sdavidxu
2497177848Sdavidxu		/* set read contention bit */
2498177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2499177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2500177848Sdavidxu			if (oldstate == state)
2501177848Sdavidxu				goto sleep;
2502177848Sdavidxu			state = oldstate;
2503177848Sdavidxu		}
2504177848Sdavidxu
2505177848Sdavidxu		/* state is changed while setting flags, restart */
2506177848Sdavidxu		if (!(state & wrflags)) {
2507177848Sdavidxu			umtxq_lock(&uq->uq_key);
2508177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2509177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2510177848Sdavidxu			continue;
2511177848Sdavidxu		}
2512177848Sdavidxu
2513177848Sdavidxusleep:
2514177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2515177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2516177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2517177848Sdavidxu
2518177848Sdavidxu		while (state & wrflags) {
2519177848Sdavidxu			umtxq_lock(&uq->uq_key);
2520177848Sdavidxu			umtxq_insert(uq);
2521177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2522177848Sdavidxu
2523233690Sdavidxu			error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2524233690Sdavidxu			    NULL : &timo);
2525177848Sdavidxu
2526177848Sdavidxu			umtxq_busy(&uq->uq_key);
2527177848Sdavidxu			umtxq_remove(uq);
2528177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2529177848Sdavidxu			if (error)
2530177848Sdavidxu				break;
2531177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2532177848Sdavidxu		}
2533177848Sdavidxu
2534177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2535177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2536177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2537177848Sdavidxu		if (blocked_readers == 1) {
2538177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2539177848Sdavidxu			for (;;) {
2540177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2541177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2542177848Sdavidxu				if (oldstate == state)
2543177848Sdavidxu					break;
2544177848Sdavidxu				state = oldstate;
2545177848Sdavidxu			}
2546177848Sdavidxu		}
2547177848Sdavidxu
2548177848Sdavidxu		umtxq_lock(&uq->uq_key);
2549177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2550177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2551177848Sdavidxu	}
2552177848Sdavidxu	umtx_key_release(&uq->uq_key);
2553177849Sdavidxu	if (error == ERESTART)
2554177849Sdavidxu		error = EINTR;
2555177848Sdavidxu	return (error);
2556177848Sdavidxu}
2557177848Sdavidxu
2558177848Sdavidxustatic int
2559233690Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
2560177848Sdavidxu{
2561233690Sdavidxu	struct abs_timeout timo;
2562177848Sdavidxu	struct umtx_q *uq;
2563177848Sdavidxu	uint32_t flags;
2564177848Sdavidxu	int32_t state, oldstate;
2565177848Sdavidxu	int32_t blocked_writers;
2566197476Sdavidxu	int32_t blocked_readers;
2567177848Sdavidxu	int error;
2568177848Sdavidxu
2569177848Sdavidxu	uq = td->td_umtxq;
2570177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2571177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2572177848Sdavidxu	if (error != 0)
2573177848Sdavidxu		return (error);
2574177848Sdavidxu
2575233690Sdavidxu	if (timeout != NULL)
2576233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2577233690Sdavidxu
2578197476Sdavidxu	blocked_readers = 0;
2579177848Sdavidxu	for (;;) {
2580177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2581177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2582177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2583177848Sdavidxu			if (oldstate == state) {
2584177848Sdavidxu				umtx_key_release(&uq->uq_key);
2585177848Sdavidxu				return (0);
2586177848Sdavidxu			}
2587177848Sdavidxu			state = oldstate;
2588177848Sdavidxu		}
2589177848Sdavidxu
2590197476Sdavidxu		if (error) {
2591197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2592197476Sdavidxu			    blocked_readers != 0) {
2593197476Sdavidxu				umtxq_lock(&uq->uq_key);
2594197476Sdavidxu				umtxq_busy(&uq->uq_key);
2595197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2596197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2597197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2598197476Sdavidxu			}
2599197476Sdavidxu
2600177848Sdavidxu			break;
2601197476Sdavidxu		}
2602177848Sdavidxu
2603177848Sdavidxu		/* grab monitor lock */
2604177848Sdavidxu		umtxq_lock(&uq->uq_key);
2605177848Sdavidxu		umtxq_busy(&uq->uq_key);
2606177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2607177848Sdavidxu
2608203414Sdavidxu		/*
2609203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2610203414Sdavidxu		 * and the check below
2611203414Sdavidxu		 */
2612203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2613203414Sdavidxu
2614177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2615177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2616177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2617177848Sdavidxu			if (oldstate == state)
2618177848Sdavidxu				goto sleep;
2619177848Sdavidxu			state = oldstate;
2620177848Sdavidxu		}
2621177848Sdavidxu
2622177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2623177848Sdavidxu			umtxq_lock(&uq->uq_key);
2624177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2625177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2626177848Sdavidxu			continue;
2627177848Sdavidxu		}
2628177848Sdavidxusleep:
2629177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2630177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2631177848Sdavidxu
2632177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2633177848Sdavidxu			umtxq_lock(&uq->uq_key);
2634177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2635177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2636177848Sdavidxu
2637233690Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
2638233690Sdavidxu			    NULL : &timo);
2639177848Sdavidxu
2640177848Sdavidxu			umtxq_busy(&uq->uq_key);
2641177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2642177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2643177848Sdavidxu			if (error)
2644177848Sdavidxu				break;
2645177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2646177848Sdavidxu		}
2647177848Sdavidxu
2648177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2649177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2650177848Sdavidxu		if (blocked_writers == 1) {
2651177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2652177848Sdavidxu			for (;;) {
2653177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2654177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2655177848Sdavidxu				if (oldstate == state)
2656177848Sdavidxu					break;
2657177848Sdavidxu				state = oldstate;
2658177848Sdavidxu			}
2659197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2660197476Sdavidxu		} else
2661197476Sdavidxu			blocked_readers = 0;
2662177848Sdavidxu
2663177848Sdavidxu		umtxq_lock(&uq->uq_key);
2664177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2665177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2666177848Sdavidxu	}
2667177848Sdavidxu
2668177848Sdavidxu	umtx_key_release(&uq->uq_key);
2669177849Sdavidxu	if (error == ERESTART)
2670177849Sdavidxu		error = EINTR;
2671177848Sdavidxu	return (error);
2672177848Sdavidxu}
2673177848Sdavidxu
2674177848Sdavidxustatic int
2675177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2676177848Sdavidxu{
2677177848Sdavidxu	struct umtx_q *uq;
2678177848Sdavidxu	uint32_t flags;
2679177848Sdavidxu	int32_t state, oldstate;
2680177848Sdavidxu	int error, q, count;
2681177848Sdavidxu
2682177848Sdavidxu	uq = td->td_umtxq;
2683177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2684177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2685177848Sdavidxu	if (error != 0)
2686177848Sdavidxu		return (error);
2687177848Sdavidxu
2688177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2689177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2690177848Sdavidxu		for (;;) {
2691177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2692177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2693177848Sdavidxu			if (oldstate != state) {
2694177848Sdavidxu				state = oldstate;
2695177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2696177848Sdavidxu					error = EPERM;
2697177848Sdavidxu					goto out;
2698177848Sdavidxu				}
2699177848Sdavidxu			} else
2700177848Sdavidxu				break;
2701177848Sdavidxu		}
2702177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2703177848Sdavidxu		for (;;) {
2704177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2705177848Sdavidxu				state - 1);
2706177848Sdavidxu			if (oldstate != state) {
2707177848Sdavidxu				state = oldstate;
2708177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2709177848Sdavidxu					error = EPERM;
2710177848Sdavidxu					goto out;
2711177848Sdavidxu				}
2712177848Sdavidxu			}
2713177848Sdavidxu			else
2714177848Sdavidxu				break;
2715177848Sdavidxu		}
2716177848Sdavidxu	} else {
2717177848Sdavidxu		error = EPERM;
2718177848Sdavidxu		goto out;
2719177848Sdavidxu	}
2720177848Sdavidxu
2721177848Sdavidxu	count = 0;
2722177848Sdavidxu
2723177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2724177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2725177848Sdavidxu			count = 1;
2726177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2727177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2728177848Sdavidxu			count = INT_MAX;
2729177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2730177848Sdavidxu		}
2731177848Sdavidxu	} else {
2732177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2733177848Sdavidxu			count = INT_MAX;
2734177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2735177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2736177848Sdavidxu			count = 1;
2737177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2738177848Sdavidxu		}
2739177848Sdavidxu	}
2740177848Sdavidxu
2741177848Sdavidxu	if (count) {
2742177848Sdavidxu		umtxq_lock(&uq->uq_key);
2743177848Sdavidxu		umtxq_busy(&uq->uq_key);
2744177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2745177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2746177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2747177848Sdavidxu	}
2748177848Sdavidxuout:
2749177848Sdavidxu	umtx_key_release(&uq->uq_key);
2750177848Sdavidxu	return (error);
2751177848Sdavidxu}
2752177848Sdavidxu
2753201472Sdavidxustatic int
2754232144Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
2755201472Sdavidxu{
2756233690Sdavidxu	struct abs_timeout timo;
2757201472Sdavidxu	struct umtx_q *uq;
2758201472Sdavidxu	uint32_t flags, count;
2759201472Sdavidxu	int error;
2760201472Sdavidxu
2761201472Sdavidxu	uq = td->td_umtxq;
2762201472Sdavidxu	flags = fuword32(&sem->_flags);
2763201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2764201472Sdavidxu	if (error != 0)
2765201472Sdavidxu		return (error);
2766233690Sdavidxu
2767233690Sdavidxu	if (timeout != NULL)
2768233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2769233690Sdavidxu
2770201472Sdavidxu	umtxq_lock(&uq->uq_key);
2771201472Sdavidxu	umtxq_busy(&uq->uq_key);
2772201472Sdavidxu	umtxq_insert(uq);
2773201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2774201472Sdavidxu
2775230194Sdavidxu	casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2776230194Sdavidxu	rmb();
2777201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2778201472Sdavidxu	if (count != 0) {
2779201472Sdavidxu		umtxq_lock(&uq->uq_key);
2780201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2781201472Sdavidxu		umtxq_remove(uq);
2782201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2783201472Sdavidxu		umtx_key_release(&uq->uq_key);
2784201472Sdavidxu		return (0);
2785201472Sdavidxu	}
2786201472Sdavidxu	umtxq_lock(&uq->uq_key);
2787201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2788201472Sdavidxu
2789233690Sdavidxu	error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
2790201472Sdavidxu
2791211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2792211794Sdavidxu		error = 0;
2793211794Sdavidxu	else {
2794211794Sdavidxu		umtxq_remove(uq);
2795201472Sdavidxu		if (error == ERESTART)
2796201472Sdavidxu			error = EINTR;
2797201472Sdavidxu	}
2798201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2799201472Sdavidxu	umtx_key_release(&uq->uq_key);
2800201472Sdavidxu	return (error);
2801201472Sdavidxu}
2802201472Sdavidxu
2803201472Sdavidxu/*
2804201472Sdavidxu * Signal a userland condition variable.
2805201472Sdavidxu */
2806201472Sdavidxustatic int
2807201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2808201472Sdavidxu{
2809201472Sdavidxu	struct umtx_key key;
2810201472Sdavidxu	int error, cnt, nwake;
2811201472Sdavidxu	uint32_t flags;
2812201472Sdavidxu
2813201472Sdavidxu	flags = fuword32(&sem->_flags);
2814201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2815201472Sdavidxu		return (error);
2816201472Sdavidxu	umtxq_lock(&key);
2817201472Sdavidxu	umtxq_busy(&key);
2818201472Sdavidxu	cnt = umtxq_count(&key);
2819201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2820201472Sdavidxu	if (cnt <= nwake) {
2821201472Sdavidxu		umtxq_unlock(&key);
2822201472Sdavidxu		error = suword32(
2823201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2824201472Sdavidxu		umtxq_lock(&key);
2825201472Sdavidxu	}
2826201472Sdavidxu	umtxq_unbusy(&key);
2827201472Sdavidxu	umtxq_unlock(&key);
2828201472Sdavidxu	umtx_key_release(&key);
2829201472Sdavidxu	return (error);
2830201472Sdavidxu}
2831201472Sdavidxu
2832139013Sdavidxuint
2833225617Skmacysys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2834139013Sdavidxu    /* struct umtx *umtx */
2835139013Sdavidxu{
2836233690Sdavidxu	return do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2837139013Sdavidxu}
2838139013Sdavidxu
2839139013Sdavidxuint
2840225617Skmacysys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2841139013Sdavidxu    /* struct umtx *umtx */
2842139013Sdavidxu{
2843162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2844139013Sdavidxu}
2845139013Sdavidxu
2846228219Sphoinline int
2847228219Sphoumtx_copyin_timeout(const void *addr, struct timespec *tsp)
2848228219Spho{
2849228219Spho	int error;
2850228219Spho
2851228219Spho	error = copyin(addr, tsp, sizeof(struct timespec));
2852228219Spho	if (error == 0) {
2853228219Spho		if (tsp->tv_sec < 0 ||
2854228219Spho		    tsp->tv_nsec >= 1000000000 ||
2855228219Spho		    tsp->tv_nsec < 0)
2856228219Spho			error = EINVAL;
2857228219Spho	}
2858228219Spho	return (error);
2859228219Spho}
2860228219Spho
2861232144Sdavidxustatic inline int
2862232144Sdavidxuumtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
2863232144Sdavidxu{
2864232144Sdavidxu	int error;
2865232144Sdavidxu
2866232286Sdavidxu	if (size <= sizeof(struct timespec)) {
2867232286Sdavidxu		tp->_clockid = CLOCK_REALTIME;
2868232286Sdavidxu		tp->_flags = 0;
2869232144Sdavidxu		error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
2870232286Sdavidxu	} else
2871232144Sdavidxu		error = copyin(addr, tp, sizeof(struct _umtx_time));
2872232144Sdavidxu	if (error != 0)
2873232144Sdavidxu		return (error);
2874232144Sdavidxu	if (tp->_timeout.tv_sec < 0 ||
2875232144Sdavidxu	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
2876232144Sdavidxu		return (EINVAL);
2877232144Sdavidxu	return (0);
2878232144Sdavidxu}
2879232144Sdavidxu
2880162536Sdavidxustatic int
2881162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2882139013Sdavidxu{
2883162536Sdavidxu	struct timespec *ts, timeout;
2884139013Sdavidxu	int error;
2885139013Sdavidxu
2886162536Sdavidxu	/* Allow a null timespec (wait forever). */
2887162536Sdavidxu	if (uap->uaddr2 == NULL)
2888162536Sdavidxu		ts = NULL;
2889162536Sdavidxu	else {
2890228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2891162536Sdavidxu		if (error != 0)
2892162536Sdavidxu			return (error);
2893162536Sdavidxu		ts = &timeout;
2894162536Sdavidxu	}
2895162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2896162536Sdavidxu}
2897162536Sdavidxu
2898162536Sdavidxustatic int
2899162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2900162536Sdavidxu{
2901162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2902162536Sdavidxu}
2903162536Sdavidxu
2904162536Sdavidxustatic int
2905162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2906162536Sdavidxu{
2907232144Sdavidxu	struct _umtx_time timeout, *tm_p;
2908162536Sdavidxu	int error;
2909162536Sdavidxu
2910162536Sdavidxu	if (uap->uaddr2 == NULL)
2911232144Sdavidxu		tm_p = NULL;
2912162536Sdavidxu	else {
2913232144Sdavidxu		error = umtx_copyin_umtx_time(
2914232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
2915162536Sdavidxu		if (error != 0)
2916162536Sdavidxu			return (error);
2917232144Sdavidxu		tm_p = &timeout;
2918162536Sdavidxu	}
2919232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
2920162536Sdavidxu}
2921162536Sdavidxu
2922162536Sdavidxustatic int
2923173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2924173800Sdavidxu{
2925232144Sdavidxu	struct _umtx_time timeout, *tm_p;
2926173800Sdavidxu	int error;
2927173800Sdavidxu
2928173800Sdavidxu	if (uap->uaddr2 == NULL)
2929232144Sdavidxu		tm_p = NULL;
2930173800Sdavidxu	else {
2931232144Sdavidxu		error = umtx_copyin_umtx_time(
2932232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
2933173800Sdavidxu		if (error != 0)
2934173800Sdavidxu			return (error);
2935232144Sdavidxu		tm_p = &timeout;
2936173800Sdavidxu	}
2937232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
2938173800Sdavidxu}
2939173800Sdavidxu
2940173800Sdavidxustatic int
2941178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2942178646Sdavidxu{
2943232144Sdavidxu	struct _umtx_time *tm_p, timeout;
2944178646Sdavidxu	int error;
2945178646Sdavidxu
2946178646Sdavidxu	if (uap->uaddr2 == NULL)
2947232144Sdavidxu		tm_p = NULL;
2948178646Sdavidxu	else {
2949232144Sdavidxu		error = umtx_copyin_umtx_time(
2950232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
2951178646Sdavidxu		if (error != 0)
2952178646Sdavidxu			return (error);
2953232144Sdavidxu		tm_p = &timeout;
2954178646Sdavidxu	}
2955232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
2956178646Sdavidxu}
2957178646Sdavidxu
2958178646Sdavidxustatic int
2959162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2960162536Sdavidxu{
2961178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2962162536Sdavidxu}
2963162536Sdavidxu
2964216641Sdavidxu#define BATCH_SIZE	128
2965162536Sdavidxustatic int
2966216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
2967216641Sdavidxu{
2968216641Sdavidxu	int count = uap->val;
2969216641Sdavidxu	void *uaddrs[BATCH_SIZE];
2970216641Sdavidxu	char **upp = (char **)uap->obj;
2971216641Sdavidxu	int tocopy;
2972216641Sdavidxu	int error = 0;
2973216641Sdavidxu	int i, pos = 0;
2974216641Sdavidxu
2975216641Sdavidxu	while (count > 0) {
2976216641Sdavidxu		tocopy = count;
2977216641Sdavidxu		if (tocopy > BATCH_SIZE)
2978216641Sdavidxu			tocopy = BATCH_SIZE;
2979216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
2980216641Sdavidxu		if (error != 0)
2981216641Sdavidxu			break;
2982216641Sdavidxu		for (i = 0; i < tocopy; ++i)
2983216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
2984216641Sdavidxu		count -= tocopy;
2985216641Sdavidxu		pos += tocopy;
2986216641Sdavidxu	}
2987216641Sdavidxu	return (error);
2988216641Sdavidxu}
2989216641Sdavidxu
2990216641Sdavidxustatic int
2991178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2992178646Sdavidxu{
2993178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2994178646Sdavidxu}
2995178646Sdavidxu
2996178646Sdavidxustatic int
2997162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2998162536Sdavidxu{
2999232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3000162536Sdavidxu	int error;
3001162536Sdavidxu
3002162536Sdavidxu	/* Allow a null timespec (wait forever). */
3003162536Sdavidxu	if (uap->uaddr2 == NULL)
3004232144Sdavidxu		tm_p = NULL;
3005162536Sdavidxu	else {
3006232144Sdavidxu		error = umtx_copyin_umtx_time(
3007232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3008162536Sdavidxu		if (error != 0)
3009162536Sdavidxu			return (error);
3010232144Sdavidxu		tm_p = &timeout;
3011139013Sdavidxu	}
3012232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3013162536Sdavidxu}
3014162536Sdavidxu
3015162536Sdavidxustatic int
3016162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3017162536Sdavidxu{
3018179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3019162536Sdavidxu}
3020162536Sdavidxu
3021162536Sdavidxustatic int
3022179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3023179970Sdavidxu{
3024232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3025179970Sdavidxu	int error;
3026179970Sdavidxu
3027179970Sdavidxu	/* Allow a null timespec (wait forever). */
3028179970Sdavidxu	if (uap->uaddr2 == NULL)
3029232144Sdavidxu		tm_p = NULL;
3030179970Sdavidxu	else {
3031232144Sdavidxu		error = umtx_copyin_umtx_time(
3032232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3033179970Sdavidxu		if (error != 0)
3034179970Sdavidxu			return (error);
3035232144Sdavidxu		tm_p = &timeout;
3036179970Sdavidxu	}
3037232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3038179970Sdavidxu}
3039179970Sdavidxu
3040179970Sdavidxustatic int
3041179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3042179970Sdavidxu{
3043179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3044179970Sdavidxu}
3045179970Sdavidxu
3046179970Sdavidxustatic int
3047162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3048162536Sdavidxu{
3049162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3050162536Sdavidxu}
3051162536Sdavidxu
3052162536Sdavidxustatic int
3053162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3054162536Sdavidxu{
3055162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3056162536Sdavidxu}
3057162536Sdavidxu
3058164839Sdavidxustatic int
3059164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3060164839Sdavidxu{
3061164839Sdavidxu	struct timespec *ts, timeout;
3062164839Sdavidxu	int error;
3063164839Sdavidxu
3064164839Sdavidxu	/* Allow a null timespec (wait forever). */
3065164839Sdavidxu	if (uap->uaddr2 == NULL)
3066164839Sdavidxu		ts = NULL;
3067164839Sdavidxu	else {
3068228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3069164839Sdavidxu		if (error != 0)
3070164839Sdavidxu			return (error);
3071164839Sdavidxu		ts = &timeout;
3072164839Sdavidxu	}
3073164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3074164839Sdavidxu}
3075164839Sdavidxu
3076164839Sdavidxustatic int
3077164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3078164839Sdavidxu{
3079164839Sdavidxu	return do_cv_signal(td, uap->obj);
3080164839Sdavidxu}
3081164839Sdavidxu
3082164839Sdavidxustatic int
3083164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3084164839Sdavidxu{
3085164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3086164839Sdavidxu}
3087164839Sdavidxu
3088177848Sdavidxustatic int
3089177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3090177848Sdavidxu{
3091232209Sdavidxu	struct _umtx_time timeout;
3092177848Sdavidxu	int error;
3093177848Sdavidxu
3094177848Sdavidxu	/* Allow a null timespec (wait forever). */
3095177848Sdavidxu	if (uap->uaddr2 == NULL) {
3096177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3097177848Sdavidxu	} else {
3098232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3099232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3100177848Sdavidxu		if (error != 0)
3101177848Sdavidxu			return (error);
3102233690Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3103177848Sdavidxu	}
3104177848Sdavidxu	return (error);
3105177848Sdavidxu}
3106177848Sdavidxu
3107177848Sdavidxustatic int
3108177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3109177848Sdavidxu{
3110232209Sdavidxu	struct _umtx_time timeout;
3111177848Sdavidxu	int error;
3112177848Sdavidxu
3113177848Sdavidxu	/* Allow a null timespec (wait forever). */
3114177848Sdavidxu	if (uap->uaddr2 == NULL) {
3115177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3116177848Sdavidxu	} else {
3117232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3118232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3119177848Sdavidxu		if (error != 0)
3120177848Sdavidxu			return (error);
3121177848Sdavidxu
3122233690Sdavidxu		error = do_rw_wrlock(td, uap->obj, &timeout);
3123177848Sdavidxu	}
3124177848Sdavidxu	return (error);
3125177848Sdavidxu}
3126177848Sdavidxu
3127177848Sdavidxustatic int
3128177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3129177848Sdavidxu{
3130177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3131177848Sdavidxu}
3132177848Sdavidxu
3133201472Sdavidxustatic int
3134201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3135201472Sdavidxu{
3136232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3137201472Sdavidxu	int error;
3138201472Sdavidxu
3139201472Sdavidxu	/* Allow a null timespec (wait forever). */
3140201472Sdavidxu	if (uap->uaddr2 == NULL)
3141232144Sdavidxu		tm_p = NULL;
3142201472Sdavidxu	else {
3143232144Sdavidxu		error = umtx_copyin_umtx_time(
3144232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3145201472Sdavidxu		if (error != 0)
3146201472Sdavidxu			return (error);
3147232144Sdavidxu		tm_p = &timeout;
3148201472Sdavidxu	}
3149232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3150201472Sdavidxu}
3151201472Sdavidxu
3152201472Sdavidxustatic int
3153201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3154201472Sdavidxu{
3155201472Sdavidxu	return do_sem_wake(td, uap->obj);
3156201472Sdavidxu}
3157201472Sdavidxu
3158162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3159162536Sdavidxu
3160162536Sdavidxustatic _umtx_op_func op_table[] = {
3161162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3162162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3163162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3164162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3165162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3166162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3167162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3168164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3169164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3170164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3171173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3172177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3173177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3174177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3175178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3176178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3177179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3178179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3179201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3180201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3181216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3182216641Sdavidxu	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3183162536Sdavidxu};
3184162536Sdavidxu
3185162536Sdavidxuint
3186225617Skmacysys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3187162536Sdavidxu{
3188163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3189162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3190162536Sdavidxu	return (EINVAL);
3191162536Sdavidxu}
3192162536Sdavidxu
3193205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3194163046Sdavidxuint
3195163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3196163046Sdavidxu    /* struct umtx *umtx */
3197163046Sdavidxu{
3198163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3199163046Sdavidxu}
3200163046Sdavidxu
3201163046Sdavidxuint
3202163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3203163046Sdavidxu    /* struct umtx *umtx */
3204163046Sdavidxu{
3205163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3206163046Sdavidxu}
3207163046Sdavidxu
3208162536Sdavidxustruct timespec32 {
3209209390Sed	uint32_t tv_sec;
3210209390Sed	uint32_t tv_nsec;
3211162536Sdavidxu};
3212162536Sdavidxu
3213232144Sdavidxustruct umtx_time32 {
3214232144Sdavidxu	struct	timespec32	timeout;
3215232144Sdavidxu	uint32_t		flags;
3216232144Sdavidxu	uint32_t		clockid;
3217232144Sdavidxu};
3218232144Sdavidxu
3219162536Sdavidxustatic inline int
3220228218Sphoumtx_copyin_timeout32(void *addr, struct timespec *tsp)
3221162536Sdavidxu{
3222162536Sdavidxu	struct timespec32 ts32;
3223162536Sdavidxu	int error;
3224162536Sdavidxu
3225162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3226162536Sdavidxu	if (error == 0) {
3227228218Spho		if (ts32.tv_sec < 0 ||
3228228218Spho		    ts32.tv_nsec >= 1000000000 ||
3229228218Spho		    ts32.tv_nsec < 0)
3230228218Spho			error = EINVAL;
3231228218Spho		else {
3232228218Spho			tsp->tv_sec = ts32.tv_sec;
3233228218Spho			tsp->tv_nsec = ts32.tv_nsec;
3234228218Spho		}
3235162536Sdavidxu	}
3236140421Sdavidxu	return (error);
3237139013Sdavidxu}
3238161678Sdavidxu
3239232144Sdavidxustatic inline int
3240232144Sdavidxuumtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
3241232144Sdavidxu{
3242232144Sdavidxu	struct umtx_time32 t32;
3243232144Sdavidxu	int error;
3244232144Sdavidxu
3245232144Sdavidxu	t32.clockid = CLOCK_REALTIME;
3246232144Sdavidxu	t32.flags   = 0;
3247232144Sdavidxu	if (size <= sizeof(struct timespec32))
3248232144Sdavidxu		error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
3249232144Sdavidxu	else
3250232144Sdavidxu		error = copyin(addr, &t32, sizeof(struct umtx_time32));
3251232144Sdavidxu	if (error != 0)
3252232144Sdavidxu		return (error);
3253232144Sdavidxu	if (t32.timeout.tv_sec < 0 ||
3254232144Sdavidxu	    t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
3255232144Sdavidxu		return (EINVAL);
3256232144Sdavidxu	tp->_timeout.tv_sec = t32.timeout.tv_sec;
3257232144Sdavidxu	tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
3258232144Sdavidxu	tp->_flags = t32.flags;
3259232144Sdavidxu	tp->_clockid = t32.clockid;
3260232144Sdavidxu	return (0);
3261232144Sdavidxu}
3262232144Sdavidxu
3263162536Sdavidxustatic int
3264162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3265162536Sdavidxu{
3266162536Sdavidxu	struct timespec *ts, timeout;
3267162536Sdavidxu	int error;
3268162536Sdavidxu
3269162536Sdavidxu	/* Allow a null timespec (wait forever). */
3270162536Sdavidxu	if (uap->uaddr2 == NULL)
3271162536Sdavidxu		ts = NULL;
3272162536Sdavidxu	else {
3273228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3274162536Sdavidxu		if (error != 0)
3275162536Sdavidxu			return (error);
3276162536Sdavidxu		ts = &timeout;
3277162536Sdavidxu	}
3278162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3279162536Sdavidxu}
3280162536Sdavidxu
3281162536Sdavidxustatic int
3282162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3283162536Sdavidxu{
3284162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3285162536Sdavidxu}
3286162536Sdavidxu
3287162536Sdavidxustatic int
3288162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3289162536Sdavidxu{
3290232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3291162536Sdavidxu	int error;
3292162536Sdavidxu
3293162536Sdavidxu	if (uap->uaddr2 == NULL)
3294232144Sdavidxu		tm_p = NULL;
3295162536Sdavidxu	else {
3296232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3297232144Sdavidxu			(size_t)uap->uaddr1, &timeout);
3298162536Sdavidxu		if (error != 0)
3299162536Sdavidxu			return (error);
3300232144Sdavidxu		tm_p = &timeout;
3301162536Sdavidxu	}
3302232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3303162536Sdavidxu}
3304162536Sdavidxu
3305162536Sdavidxustatic int
3306162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3307162536Sdavidxu{
3308232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3309162536Sdavidxu	int error;
3310162536Sdavidxu
3311162536Sdavidxu	/* Allow a null timespec (wait forever). */
3312162536Sdavidxu	if (uap->uaddr2 == NULL)
3313232144Sdavidxu		tm_p = NULL;
3314162536Sdavidxu	else {
3315232144Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3316232144Sdavidxu			    (size_t)uap->uaddr1, &timeout);
3317162536Sdavidxu		if (error != 0)
3318162536Sdavidxu			return (error);
3319232144Sdavidxu		tm_p = &timeout;
3320162536Sdavidxu	}
3321232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3322162536Sdavidxu}
3323162536Sdavidxu
3324164839Sdavidxustatic int
3325179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3326179970Sdavidxu{
3327232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3328179970Sdavidxu	int error;
3329179970Sdavidxu
3330179970Sdavidxu	/* Allow a null timespec (wait forever). */
3331179970Sdavidxu	if (uap->uaddr2 == NULL)
3332232144Sdavidxu		tm_p = NULL;
3333179970Sdavidxu	else {
3334232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3335232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3336179970Sdavidxu		if (error != 0)
3337179970Sdavidxu			return (error);
3338232144Sdavidxu		tm_p = &timeout;
3339179970Sdavidxu	}
3340232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3341179970Sdavidxu}
3342179970Sdavidxu
3343179970Sdavidxustatic int
3344164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3345164839Sdavidxu{
3346164839Sdavidxu	struct timespec *ts, timeout;
3347164839Sdavidxu	int error;
3348164839Sdavidxu
3349164839Sdavidxu	/* Allow a null timespec (wait forever). */
3350164839Sdavidxu	if (uap->uaddr2 == NULL)
3351164839Sdavidxu		ts = NULL;
3352164839Sdavidxu	else {
3353228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3354164839Sdavidxu		if (error != 0)
3355164839Sdavidxu			return (error);
3356164839Sdavidxu		ts = &timeout;
3357164839Sdavidxu	}
3358164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3359164839Sdavidxu}
3360164839Sdavidxu
3361177848Sdavidxustatic int
3362177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3363177848Sdavidxu{
3364232209Sdavidxu	struct _umtx_time timeout;
3365177848Sdavidxu	int error;
3366177848Sdavidxu
3367177848Sdavidxu	/* Allow a null timespec (wait forever). */
3368177848Sdavidxu	if (uap->uaddr2 == NULL) {
3369177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3370177848Sdavidxu	} else {
3371232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3372232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3373177848Sdavidxu		if (error != 0)
3374177848Sdavidxu			return (error);
3375233693Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3376177848Sdavidxu	}
3377177848Sdavidxu	return (error);
3378177848Sdavidxu}
3379177848Sdavidxu
3380177848Sdavidxustatic int
3381177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3382177848Sdavidxu{
3383232209Sdavidxu	struct _umtx_time timeout;
3384177848Sdavidxu	int error;
3385177848Sdavidxu
3386177848Sdavidxu	/* Allow a null timespec (wait forever). */
3387177848Sdavidxu	if (uap->uaddr2 == NULL) {
3388177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3389177848Sdavidxu	} else {
3390232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3391232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3392177848Sdavidxu		if (error != 0)
3393177848Sdavidxu			return (error);
3394233693Sdavidxu		error = do_rw_wrlock(td, uap->obj, &timeout);
3395177848Sdavidxu	}
3396177848Sdavidxu	return (error);
3397177848Sdavidxu}
3398177848Sdavidxu
3399178646Sdavidxustatic int
3400178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3401178646Sdavidxu{
3402232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3403178646Sdavidxu	int error;
3404178646Sdavidxu
3405178646Sdavidxu	if (uap->uaddr2 == NULL)
3406232144Sdavidxu		tm_p = NULL;
3407178646Sdavidxu	else {
3408232144Sdavidxu		error = umtx_copyin_umtx_time32(
3409232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1,&timeout);
3410178646Sdavidxu		if (error != 0)
3411178646Sdavidxu			return (error);
3412232144Sdavidxu		tm_p = &timeout;
3413178646Sdavidxu	}
3414232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3415178646Sdavidxu}
3416178646Sdavidxu
3417201472Sdavidxustatic int
3418201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3419201472Sdavidxu{
3420232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3421201472Sdavidxu	int error;
3422201472Sdavidxu
3423201472Sdavidxu	/* Allow a null timespec (wait forever). */
3424201472Sdavidxu	if (uap->uaddr2 == NULL)
3425232144Sdavidxu		tm_p = NULL;
3426201472Sdavidxu	else {
3427232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3428232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3429201472Sdavidxu		if (error != 0)
3430201472Sdavidxu			return (error);
3431232144Sdavidxu		tm_p = &timeout;
3432201472Sdavidxu	}
3433232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3434201472Sdavidxu}
3435201472Sdavidxu
3436216641Sdavidxustatic int
3437216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3438216641Sdavidxu{
3439216641Sdavidxu	int count = uap->val;
3440216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3441216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3442216641Sdavidxu	int tocopy;
3443216641Sdavidxu	int error = 0;
3444216641Sdavidxu	int i, pos = 0;
3445216641Sdavidxu
3446216641Sdavidxu	while (count > 0) {
3447216641Sdavidxu		tocopy = count;
3448216641Sdavidxu		if (tocopy > BATCH_SIZE)
3449216641Sdavidxu			tocopy = BATCH_SIZE;
3450216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3451216641Sdavidxu		if (error != 0)
3452216641Sdavidxu			break;
3453216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3454216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3455216641Sdavidxu				INT_MAX, 1);
3456216641Sdavidxu		count -= tocopy;
3457216641Sdavidxu		pos += tocopy;
3458216641Sdavidxu	}
3459216641Sdavidxu	return (error);
3460216641Sdavidxu}
3461216641Sdavidxu
3462162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3463162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3464162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3465162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3466162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3467162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3468162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3469162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3470164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3471164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3472164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3473173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3474177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3475177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3476177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3477178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3478178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3479179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3480179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3481201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3482201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3483216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3484216641Sdavidxu	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3485162536Sdavidxu};
3486162536Sdavidxu
3487162536Sdavidxuint
3488162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3489162536Sdavidxu{
3490163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3491162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3492162536Sdavidxu			(struct _umtx_op_args *)uap);
3493162536Sdavidxu	return (EINVAL);
3494162536Sdavidxu}
3495162536Sdavidxu#endif
3496162536Sdavidxu
3497161678Sdavidxuvoid
3498161678Sdavidxuumtx_thread_init(struct thread *td)
3499161678Sdavidxu{
3500161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3501161678Sdavidxu	td->td_umtxq->uq_thread = td;
3502161678Sdavidxu}
3503161678Sdavidxu
3504161678Sdavidxuvoid
3505161678Sdavidxuumtx_thread_fini(struct thread *td)
3506161678Sdavidxu{
3507161678Sdavidxu	umtxq_free(td->td_umtxq);
3508161678Sdavidxu}
3509161678Sdavidxu
3510161678Sdavidxu/*
3511161678Sdavidxu * It will be called when new thread is created, e.g fork().
3512161678Sdavidxu */
3513161678Sdavidxuvoid
3514161678Sdavidxuumtx_thread_alloc(struct thread *td)
3515161678Sdavidxu{
3516161678Sdavidxu	struct umtx_q *uq;
3517161678Sdavidxu
3518161678Sdavidxu	uq = td->td_umtxq;
3519161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3520161678Sdavidxu
3521161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3522161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3523161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3524161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3525161678Sdavidxu}
3526161678Sdavidxu
3527161678Sdavidxu/*
3528161678Sdavidxu * exec() hook.
3529161678Sdavidxu */
3530161678Sdavidxustatic void
3531161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3532161678Sdavidxu	struct image_params *imgp __unused)
3533161678Sdavidxu{
3534161678Sdavidxu	umtx_thread_cleanup(curthread);
3535161678Sdavidxu}
3536161678Sdavidxu
3537161678Sdavidxu/*
3538161678Sdavidxu * thread_exit() hook.
3539161678Sdavidxu */
3540161678Sdavidxuvoid
3541161678Sdavidxuumtx_thread_exit(struct thread *td)
3542161678Sdavidxu{
3543161678Sdavidxu	umtx_thread_cleanup(td);
3544161678Sdavidxu}
3545161678Sdavidxu
3546161678Sdavidxu/*
3547161678Sdavidxu * clean up umtx data.
3548161678Sdavidxu */
3549161678Sdavidxustatic void
3550161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3551161678Sdavidxu{
3552161678Sdavidxu	struct umtx_q *uq;
3553161678Sdavidxu	struct umtx_pi *pi;
3554161678Sdavidxu
3555161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3556161678Sdavidxu		return;
3557161678Sdavidxu
3558170300Sjeff	mtx_lock_spin(&umtx_lock);
3559161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3560161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3561161678Sdavidxu		pi->pi_owner = NULL;
3562161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3563161678Sdavidxu	}
3564216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3565174701Sdavidxu	thread_lock(td);
3566216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
3567174701Sdavidxu	thread_unlock(td);
3568161678Sdavidxu}
3569