kern_umtx.c revision 233642
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 233642 2012-03-29 02:46:43Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32233045Sdavide#include "opt_umtx_profiling.h"
33233045Sdavide
34112904Sjeff#include <sys/param.h>
35112904Sjeff#include <sys/kernel.h>
36131431Smarcel#include <sys/limits.h>
37112904Sjeff#include <sys/lock.h>
38115765Sjeff#include <sys/malloc.h>
39112904Sjeff#include <sys/mutex.h>
40164033Srwatson#include <sys/priv.h>
41112904Sjeff#include <sys/proc.h>
42161678Sdavidxu#include <sys/sched.h>
43165369Sdavidxu#include <sys/smp.h>
44161678Sdavidxu#include <sys/sysctl.h>
45112904Sjeff#include <sys/sysent.h>
46112904Sjeff#include <sys/systm.h>
47112904Sjeff#include <sys/sysproto.h>
48216641Sdavidxu#include <sys/syscallsubr.h>
49139013Sdavidxu#include <sys/eventhandler.h>
50112904Sjeff#include <sys/umtx.h>
51112904Sjeff
52139013Sdavidxu#include <vm/vm.h>
53139013Sdavidxu#include <vm/vm_param.h>
54139013Sdavidxu#include <vm/pmap.h>
55139013Sdavidxu#include <vm/vm_map.h>
56139013Sdavidxu#include <vm/vm_object.h>
57139013Sdavidxu
58165369Sdavidxu#include <machine/cpu.h>
59165369Sdavidxu
60205014Snwhitehorn#ifdef COMPAT_FREEBSD32
61162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
62162536Sdavidxu#endif
63162536Sdavidxu
64179970Sdavidxu#define _UMUTEX_TRY		1
65179970Sdavidxu#define _UMUTEX_WAIT		2
66179970Sdavidxu
67161678Sdavidxu/* Priority inheritance mutex info. */
68161678Sdavidxustruct umtx_pi {
69161678Sdavidxu	/* Owner thread */
70161678Sdavidxu	struct thread		*pi_owner;
71161678Sdavidxu
72161678Sdavidxu	/* Reference count */
73161678Sdavidxu	int			pi_refcount;
74161678Sdavidxu
75161678Sdavidxu 	/* List entry to link umtx holding by thread */
76161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
77161678Sdavidxu
78161678Sdavidxu	/* List entry in hash */
79161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
80161678Sdavidxu
81161678Sdavidxu	/* List for waiters */
82161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
83161678Sdavidxu
84161678Sdavidxu	/* Identify a userland lock object */
85161678Sdavidxu	struct umtx_key		pi_key;
86161678Sdavidxu};
87161678Sdavidxu
88161678Sdavidxu/* A userland synchronous object user. */
89115765Sjeffstruct umtx_q {
90161678Sdavidxu	/* Linked list for the hash. */
91161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
92161678Sdavidxu
93161678Sdavidxu	/* Umtx key. */
94161678Sdavidxu	struct umtx_key		uq_key;
95161678Sdavidxu
96161678Sdavidxu	/* Umtx flags. */
97161678Sdavidxu	int			uq_flags;
98161678Sdavidxu#define UQF_UMTXQ	0x0001
99161678Sdavidxu
100161678Sdavidxu	/* The thread waits on. */
101161678Sdavidxu	struct thread		*uq_thread;
102161678Sdavidxu
103161678Sdavidxu	/*
104161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
105170300Sjeff	 * or umtx_lock, write must have both chain lock and
106170300Sjeff	 * umtx_lock being hold.
107161678Sdavidxu	 */
108161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
109161678Sdavidxu
110161678Sdavidxu	/* On blocked list */
111161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
112161678Sdavidxu
113161678Sdavidxu	/* Thread contending with us */
114161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
115161678Sdavidxu
116161742Sdavidxu	/* Inherited priority from PP mutex */
117161678Sdavidxu	u_char			uq_inherited_pri;
118201991Sdavidxu
119201991Sdavidxu	/* Spare queue ready to be reused */
120201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
121201991Sdavidxu
122201991Sdavidxu	/* The queue we on */
123201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
124115765Sjeff};
125115765Sjeff
126161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
127161678Sdavidxu
128201991Sdavidxu/* Per-key wait-queue */
129201991Sdavidxustruct umtxq_queue {
130201991Sdavidxu	struct umtxq_head	head;
131201991Sdavidxu	struct umtx_key		key;
132201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
133201991Sdavidxu	int			length;
134201991Sdavidxu};
135201991Sdavidxu
136201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
137201991Sdavidxu
138161678Sdavidxu/* Userland lock object's wait-queue chain */
139138224Sdavidxustruct umtxq_chain {
140161678Sdavidxu	/* Lock for this chain. */
141161678Sdavidxu	struct mtx		uc_lock;
142161678Sdavidxu
143161678Sdavidxu	/* List of sleep queues. */
144201991Sdavidxu	struct umtxq_list	uc_queue[2];
145177848Sdavidxu#define UMTX_SHARED_QUEUE	0
146177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
147161678Sdavidxu
148201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
149201991Sdavidxu
150161678Sdavidxu	/* Busy flag */
151161678Sdavidxu	char			uc_busy;
152161678Sdavidxu
153161678Sdavidxu	/* Chain lock waiters */
154158377Sdavidxu	int			uc_waiters;
155161678Sdavidxu
156161678Sdavidxu	/* All PI in the list */
157161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
158201991Sdavidxu
159233045Sdavide#ifdef UMTX_PROFILING
160233045Sdavide	int 			length;
161233045Sdavide	int			max_length;
162233045Sdavide#endif
163138224Sdavidxu};
164115765Sjeff
165161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
166189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
167161678Sdavidxu
168161678Sdavidxu/*
169161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
170161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
171161678Sdavidxu * and let another thread B block on the mutex, because B is
172161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
173161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
174161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
175161678Sdavidxu */
176161678Sdavidxu
177163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
178163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
179163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
180161678Sdavidxu
181138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
182216678Sdavidxu#define	UMTX_CHAINS		512
183216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
184115765Sjeff
185161678Sdavidxu#define	GET_SHARE(flags)	\
186161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187161678Sdavidxu
188177848Sdavidxu#define BUSY_SPINS		200
189177848Sdavidxu
190161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
191179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
192138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
193161678Sdavidxustatic int			umtx_pi_allocated;
194115310Sjeff
195227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
196161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
197161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
198161678Sdavidxu
199233045Sdavide#ifdef UMTX_PROFILING
200233045Sdavidestatic long max_length;
201233045SdavideSYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
202233045Sdavidestatic SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
203233045Sdavide#endif
204233045Sdavide
205161678Sdavidxustatic void umtxq_sysinit(void *);
206161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
207161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
208139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
209139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
210139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
211139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
212177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
213177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
214161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
215139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
216163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
217161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
218161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
219161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
220161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
221161678Sdavidxu	struct image_params *imgp __unused);
222161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
223115310Sjeff
224177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
225177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
226177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
227177848Sdavidxu
228170300Sjeffstatic struct mtx umtx_lock;
229170300Sjeff
230233045Sdavide#ifdef UMTX_PROFILING
231161678Sdavidxustatic void
232233045Sdavideumtx_init_profiling(void)
233233045Sdavide{
234233045Sdavide	struct sysctl_oid *chain_oid;
235233045Sdavide	char chain_name[10];
236233045Sdavide	int i;
237233045Sdavide
238233045Sdavide	for (i = 0; i < UMTX_CHAINS; ++i) {
239233045Sdavide		snprintf(chain_name, sizeof(chain_name), "%d", i);
240233045Sdavide		chain_oid = SYSCTL_ADD_NODE(NULL,
241233045Sdavide		    SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
242233045Sdavide		    chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
243233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
244233045Sdavide		    "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
245233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
246233045Sdavide		    "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
247233045Sdavide	}
248233045Sdavide}
249233045Sdavide#endif
250233045Sdavide
251233045Sdavidestatic void
252161678Sdavidxuumtxq_sysinit(void *arg __unused)
253161678Sdavidxu{
254179421Sdavidxu	int i, j;
255138224Sdavidxu
256161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
257161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
258179421Sdavidxu	for (i = 0; i < 2; ++i) {
259179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
260179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
261179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
262201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
263201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
264201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
265179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
266179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
267179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
268233045Sdavide			#ifdef UMTX_PROFILING
269233045Sdavide			umtxq_chains[i][j].length = 0;
270233045Sdavide			umtxq_chains[i][j].max_length = 0;
271233045Sdavide			#endif
272179421Sdavidxu		}
273161678Sdavidxu	}
274233045Sdavide	#ifdef UMTX_PROFILING
275233045Sdavide	umtx_init_profiling();
276233045Sdavide	#endif
277170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
280161678Sdavidxu}
281161678Sdavidxu
282143149Sdavidxustruct umtx_q *
283143149Sdavidxuumtxq_alloc(void)
284143149Sdavidxu{
285161678Sdavidxu	struct umtx_q *uq;
286161678Sdavidxu
287161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
290161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
291161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
292161678Sdavidxu	return (uq);
293143149Sdavidxu}
294143149Sdavidxu
295143149Sdavidxuvoid
296143149Sdavidxuumtxq_free(struct umtx_q *uq)
297143149Sdavidxu{
298201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
299201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
300143149Sdavidxu	free(uq, M_UMTX);
301143149Sdavidxu}
302143149Sdavidxu
303161678Sdavidxustatic inline void
304139013Sdavidxuumtxq_hash(struct umtx_key *key)
305138224Sdavidxu{
306161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308138224Sdavidxu}
309138224Sdavidxu
310161678Sdavidxustatic inline struct umtxq_chain *
311161678Sdavidxuumtxq_getchain(struct umtx_key *key)
312139013Sdavidxu{
313201886Sdavidxu	if (key->type <= TYPE_SEM)
314179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
315179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
316139013Sdavidxu}
317139013Sdavidxu
318161678Sdavidxu/*
319177848Sdavidxu * Lock a chain.
320161678Sdavidxu */
321138224Sdavidxustatic inline void
322177848Sdavidxuumtxq_lock(struct umtx_key *key)
323139257Sdavidxu{
324161678Sdavidxu	struct umtxq_chain *uc;
325139257Sdavidxu
326161678Sdavidxu	uc = umtxq_getchain(key);
327177848Sdavidxu	mtx_lock(&uc->uc_lock);
328139257Sdavidxu}
329139257Sdavidxu
330161678Sdavidxu/*
331177848Sdavidxu * Unlock a chain.
332161678Sdavidxu */
333139257Sdavidxustatic inline void
334177848Sdavidxuumtxq_unlock(struct umtx_key *key)
335139257Sdavidxu{
336161678Sdavidxu	struct umtxq_chain *uc;
337139257Sdavidxu
338161678Sdavidxu	uc = umtxq_getchain(key);
339177848Sdavidxu	mtx_unlock(&uc->uc_lock);
340139257Sdavidxu}
341139257Sdavidxu
342161678Sdavidxu/*
343177848Sdavidxu * Set chain to busy state when following operation
344177848Sdavidxu * may be blocked (kernel mutex can not be used).
345161678Sdavidxu */
346139257Sdavidxustatic inline void
347177848Sdavidxuumtxq_busy(struct umtx_key *key)
348138224Sdavidxu{
349161678Sdavidxu	struct umtxq_chain *uc;
350161678Sdavidxu
351161678Sdavidxu	uc = umtxq_getchain(key);
352177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
353177848Sdavidxu	if (uc->uc_busy) {
354177880Sdavidxu#ifdef SMP
355177880Sdavidxu		if (smp_cpus > 1) {
356177880Sdavidxu			int count = BUSY_SPINS;
357177880Sdavidxu			if (count > 0) {
358177880Sdavidxu				umtxq_unlock(key);
359177880Sdavidxu				while (uc->uc_busy && --count > 0)
360177880Sdavidxu					cpu_spinwait();
361177880Sdavidxu				umtxq_lock(key);
362177880Sdavidxu			}
363177848Sdavidxu		}
364177880Sdavidxu#endif
365177880Sdavidxu		while (uc->uc_busy) {
366177848Sdavidxu			uc->uc_waiters++;
367177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
368177848Sdavidxu			uc->uc_waiters--;
369177848Sdavidxu		}
370177848Sdavidxu	}
371177848Sdavidxu	uc->uc_busy = 1;
372138224Sdavidxu}
373138224Sdavidxu
374161678Sdavidxu/*
375177848Sdavidxu * Unbusy a chain.
376161678Sdavidxu */
377138225Sdavidxustatic inline void
378177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
379138224Sdavidxu{
380161678Sdavidxu	struct umtxq_chain *uc;
381161678Sdavidxu
382161678Sdavidxu	uc = umtxq_getchain(key);
383177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
384177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
385177848Sdavidxu	uc->uc_busy = 0;
386177848Sdavidxu	if (uc->uc_waiters)
387177848Sdavidxu		wakeup_one(uc);
388138224Sdavidxu}
389138224Sdavidxu
390201991Sdavidxustatic struct umtxq_queue *
391201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
392201991Sdavidxu{
393201991Sdavidxu	struct umtxq_queue *uh;
394201991Sdavidxu	struct umtxq_chain *uc;
395201991Sdavidxu
396201991Sdavidxu	uc = umtxq_getchain(key);
397201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
398201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
399201991Sdavidxu		if (umtx_key_match(&uh->key, key))
400201991Sdavidxu			return (uh);
401201991Sdavidxu	}
402201991Sdavidxu
403201991Sdavidxu	return (NULL);
404201991Sdavidxu}
405201991Sdavidxu
406139013Sdavidxustatic inline void
407177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
408115765Sjeff{
409201991Sdavidxu	struct umtxq_queue *uh;
410161678Sdavidxu	struct umtxq_chain *uc;
411139013Sdavidxu
412161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
413161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
414201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
415203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
416201991Sdavidxu	if (uh != NULL) {
417201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
418201991Sdavidxu	} else {
419201991Sdavidxu		uh = uq->uq_spare_queue;
420201991Sdavidxu		uh->key = uq->uq_key;
421201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
422201991Sdavidxu	}
423201991Sdavidxu	uq->uq_spare_queue = NULL;
424201991Sdavidxu
425201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
426201991Sdavidxu	uh->length++;
427233045Sdavide	#ifdef UMTX_PROFILING
428233045Sdavide	uc->length++;
429233045Sdavide	if (uc->length > uc->max_length) {
430233045Sdavide		uc->max_length = uc->length;
431233045Sdavide		if (uc->max_length > max_length)
432233045Sdavide			max_length = uc->max_length;
433233045Sdavide	}
434233045Sdavide	#endif
435158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
436201991Sdavidxu	uq->uq_cur_queue = uh;
437201991Sdavidxu	return;
438139013Sdavidxu}
439139013Sdavidxu
440139013Sdavidxustatic inline void
441177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
442139013Sdavidxu{
443161678Sdavidxu	struct umtxq_chain *uc;
444201991Sdavidxu	struct umtxq_queue *uh;
445161678Sdavidxu
446161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
447161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
448158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
449201991Sdavidxu		uh = uq->uq_cur_queue;
450201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
451201991Sdavidxu		uh->length--;
452233045Sdavide		#ifdef UMTX_PROFILING
453233045Sdavide		uc->length--;
454233045Sdavide		#endif
455158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
456201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
457201991Sdavidxu			KASSERT(uh->length == 0,
458201991Sdavidxu			    ("inconsistent umtxq_queue length"));
459201991Sdavidxu			LIST_REMOVE(uh, link);
460201991Sdavidxu		} else {
461201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
462201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
463201991Sdavidxu			LIST_REMOVE(uh, link);
464201991Sdavidxu		}
465201991Sdavidxu		uq->uq_spare_queue = uh;
466201991Sdavidxu		uq->uq_cur_queue = NULL;
467139013Sdavidxu	}
468139013Sdavidxu}
469139013Sdavidxu
470161678Sdavidxu/*
471161678Sdavidxu * Check if there are multiple waiters
472161678Sdavidxu */
473139013Sdavidxustatic int
474139013Sdavidxuumtxq_count(struct umtx_key *key)
475139013Sdavidxu{
476161678Sdavidxu	struct umtxq_chain *uc;
477201991Sdavidxu	struct umtxq_queue *uh;
478115765Sjeff
479161678Sdavidxu	uc = umtxq_getchain(key);
480161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
481201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
482201991Sdavidxu	if (uh != NULL)
483201991Sdavidxu		return (uh->length);
484201991Sdavidxu	return (0);
485115765Sjeff}
486115765Sjeff
487161678Sdavidxu/*
488161678Sdavidxu * Check if there are multiple PI waiters and returns first
489161678Sdavidxu * waiter.
490161678Sdavidxu */
491139257Sdavidxustatic int
492161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
493161678Sdavidxu{
494161678Sdavidxu	struct umtxq_chain *uc;
495201991Sdavidxu	struct umtxq_queue *uh;
496161678Sdavidxu
497161678Sdavidxu	*first = NULL;
498161678Sdavidxu	uc = umtxq_getchain(key);
499161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
500201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
501201991Sdavidxu	if (uh != NULL) {
502201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
503201991Sdavidxu		return (uh->length);
504161678Sdavidxu	}
505201991Sdavidxu	return (0);
506161678Sdavidxu}
507161678Sdavidxu
508161678Sdavidxu/*
509161678Sdavidxu * Wake up threads waiting on an userland object.
510161678Sdavidxu */
511177848Sdavidxu
512161678Sdavidxustatic int
513177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
514115765Sjeff{
515161678Sdavidxu	struct umtxq_chain *uc;
516201991Sdavidxu	struct umtxq_queue *uh;
517201991Sdavidxu	struct umtx_q *uq;
518161678Sdavidxu	int ret;
519115765Sjeff
520139257Sdavidxu	ret = 0;
521161678Sdavidxu	uc = umtxq_getchain(key);
522161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
523201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
524201991Sdavidxu	if (uh != NULL) {
525201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
526177848Sdavidxu			umtxq_remove_queue(uq, q);
527161678Sdavidxu			wakeup(uq);
528139257Sdavidxu			if (++ret >= n_wake)
529201991Sdavidxu				return (ret);
530139013Sdavidxu		}
531139013Sdavidxu	}
532139257Sdavidxu	return (ret);
533138224Sdavidxu}
534138224Sdavidxu
535177848Sdavidxu
536161678Sdavidxu/*
537161678Sdavidxu * Wake up specified thread.
538161678Sdavidxu */
539161678Sdavidxustatic inline void
540161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
541161678Sdavidxu{
542161678Sdavidxu	struct umtxq_chain *uc;
543161678Sdavidxu
544161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
545161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
546161678Sdavidxu	umtxq_remove(uq);
547161678Sdavidxu	wakeup(uq);
548161678Sdavidxu}
549161678Sdavidxu
550161678Sdavidxu/*
551161678Sdavidxu * Put thread into sleep state, before sleeping, check if
552161678Sdavidxu * thread was removed from umtx queue.
553161678Sdavidxu */
554138224Sdavidxustatic inline int
555161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
556138224Sdavidxu{
557161678Sdavidxu	struct umtxq_chain *uc;
558161678Sdavidxu	int error;
559161678Sdavidxu
560161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
561161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
562161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
563161678Sdavidxu		return (0);
564161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
565139751Sdavidxu	if (error == EWOULDBLOCK)
566139751Sdavidxu		error = ETIMEDOUT;
567139751Sdavidxu	return (error);
568138224Sdavidxu}
569138224Sdavidxu
570161678Sdavidxu/*
571161678Sdavidxu * Convert userspace address into unique logical address.
572161678Sdavidxu */
573218969Sjhbint
574161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
575139013Sdavidxu{
576161678Sdavidxu	struct thread *td = curthread;
577139013Sdavidxu	vm_map_t map;
578139013Sdavidxu	vm_map_entry_t entry;
579139013Sdavidxu	vm_pindex_t pindex;
580139013Sdavidxu	vm_prot_t prot;
581139013Sdavidxu	boolean_t wired;
582139013Sdavidxu
583161678Sdavidxu	key->type = type;
584161678Sdavidxu	if (share == THREAD_SHARE) {
585161678Sdavidxu		key->shared = 0;
586161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
587161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
588163677Sdavidxu	} else {
589163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
590161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
591161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
592161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
593161678Sdavidxu		    &wired) != KERN_SUCCESS) {
594161678Sdavidxu			return EFAULT;
595161678Sdavidxu		}
596161678Sdavidxu
597161678Sdavidxu		if ((share == PROCESS_SHARE) ||
598161678Sdavidxu		    (share == AUTO_SHARE &&
599161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
600161678Sdavidxu			key->shared = 1;
601161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
602161678Sdavidxu				(vm_offset_t)addr;
603161678Sdavidxu			vm_object_reference(key->info.shared.object);
604161678Sdavidxu		} else {
605161678Sdavidxu			key->shared = 0;
606161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
607161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
608161678Sdavidxu		}
609161678Sdavidxu		vm_map_lookup_done(map, entry);
610139013Sdavidxu	}
611139013Sdavidxu
612161678Sdavidxu	umtxq_hash(key);
613139013Sdavidxu	return (0);
614139013Sdavidxu}
615139013Sdavidxu
616161678Sdavidxu/*
617161678Sdavidxu * Release key.
618161678Sdavidxu */
619218969Sjhbvoid
620139013Sdavidxuumtx_key_release(struct umtx_key *key)
621139013Sdavidxu{
622161678Sdavidxu	if (key->shared)
623139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
624139013Sdavidxu}
625139013Sdavidxu
626161678Sdavidxu/*
627161678Sdavidxu * Lock a umtx object.
628161678Sdavidxu */
629139013Sdavidxustatic int
630163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
631112904Sjeff{
632143149Sdavidxu	struct umtx_q *uq;
633163449Sdavidxu	u_long owner;
634163449Sdavidxu	u_long old;
635138224Sdavidxu	int error = 0;
636112904Sjeff
637143149Sdavidxu	uq = td->td_umtxq;
638161678Sdavidxu
639112904Sjeff	/*
640161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
641112904Sjeff	 * can fault on any access.
642112904Sjeff	 */
643112904Sjeff	for (;;) {
644112904Sjeff		/*
645112904Sjeff		 * Try the uncontested case.  This should be done in userland.
646112904Sjeff		 */
647163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
648112904Sjeff
649138224Sdavidxu		/* The acquire succeeded. */
650138224Sdavidxu		if (owner == UMTX_UNOWNED)
651138224Sdavidxu			return (0);
652138224Sdavidxu
653115765Sjeff		/* The address was invalid. */
654115765Sjeff		if (owner == -1)
655115765Sjeff			return (EFAULT);
656115765Sjeff
657115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
658115765Sjeff		if (owner == UMTX_CONTESTED) {
659163449Sdavidxu			owner = casuword(&umtx->u_owner,
660139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
661115765Sjeff
662138224Sdavidxu			if (owner == UMTX_CONTESTED)
663138224Sdavidxu				return (0);
664138224Sdavidxu
665115765Sjeff			/* The address was invalid. */
666115765Sjeff			if (owner == -1)
667115765Sjeff				return (EFAULT);
668115765Sjeff
669115765Sjeff			/* If this failed the lock has changed, restart. */
670115765Sjeff			continue;
671112904Sjeff		}
672112904Sjeff
673138224Sdavidxu		/*
674138224Sdavidxu		 * If we caught a signal, we have retried and now
675138224Sdavidxu		 * exit immediately.
676138224Sdavidxu		 */
677161678Sdavidxu		if (error != 0)
678138224Sdavidxu			return (error);
679112904Sjeff
680161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
681161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
682161678Sdavidxu			return (error);
683161678Sdavidxu
684161678Sdavidxu		umtxq_lock(&uq->uq_key);
685161678Sdavidxu		umtxq_busy(&uq->uq_key);
686161678Sdavidxu		umtxq_insert(uq);
687161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
688161678Sdavidxu		umtxq_unlock(&uq->uq_key);
689161678Sdavidxu
690112904Sjeff		/*
691112904Sjeff		 * Set the contested bit so that a release in user space
692112904Sjeff		 * knows to use the system call for unlock.  If this fails
693112904Sjeff		 * either some one else has acquired the lock or it has been
694112904Sjeff		 * released.
695112904Sjeff		 */
696163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
697112904Sjeff
698112904Sjeff		/* The address was invalid. */
699112967Sjake		if (old == -1) {
700143149Sdavidxu			umtxq_lock(&uq->uq_key);
701143149Sdavidxu			umtxq_remove(uq);
702143149Sdavidxu			umtxq_unlock(&uq->uq_key);
703143149Sdavidxu			umtx_key_release(&uq->uq_key);
704115765Sjeff			return (EFAULT);
705112904Sjeff		}
706112904Sjeff
707112904Sjeff		/*
708115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
709117685Smtm		 * and we need to retry or we lost a race to the thread
710117685Smtm		 * unlocking the umtx.
711112904Sjeff		 */
712143149Sdavidxu		umtxq_lock(&uq->uq_key);
713161678Sdavidxu		if (old == owner)
714161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
715143149Sdavidxu		umtxq_remove(uq);
716143149Sdavidxu		umtxq_unlock(&uq->uq_key);
717143149Sdavidxu		umtx_key_release(&uq->uq_key);
718112904Sjeff	}
719117743Smtm
720117743Smtm	return (0);
721112904Sjeff}
722112904Sjeff
723161678Sdavidxu/*
724161678Sdavidxu * Lock a umtx object.
725161678Sdavidxu */
726139013Sdavidxustatic int
727163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
728140245Sdavidxu	struct timespec *timeout)
729112904Sjeff{
730140245Sdavidxu	struct timespec ts, ts2, ts3;
731139013Sdavidxu	struct timeval tv;
732140245Sdavidxu	int error;
733139013Sdavidxu
734140245Sdavidxu	if (timeout == NULL) {
735162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
736162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
737162030Sdavidxu		if (error == EINTR)
738162030Sdavidxu			error = ERESTART;
739139013Sdavidxu	} else {
740140245Sdavidxu		getnanouptime(&ts);
741140245Sdavidxu		timespecadd(&ts, timeout);
742140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
743139013Sdavidxu		for (;;) {
744162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
745140245Sdavidxu			if (error != ETIMEDOUT)
746140245Sdavidxu				break;
747140245Sdavidxu			getnanouptime(&ts2);
748140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
749139751Sdavidxu				error = ETIMEDOUT;
750139013Sdavidxu				break;
751139013Sdavidxu			}
752140245Sdavidxu			ts3 = ts;
753140245Sdavidxu			timespecsub(&ts3, &ts2);
754140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
755139013Sdavidxu		}
756162030Sdavidxu		/* Timed-locking is not restarted. */
757162030Sdavidxu		if (error == ERESTART)
758162030Sdavidxu			error = EINTR;
759139013Sdavidxu	}
760139013Sdavidxu	return (error);
761139013Sdavidxu}
762139013Sdavidxu
763161678Sdavidxu/*
764161678Sdavidxu * Unlock a umtx object.
765161678Sdavidxu */
766139013Sdavidxustatic int
767163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
768139013Sdavidxu{
769139013Sdavidxu	struct umtx_key key;
770163449Sdavidxu	u_long owner;
771163449Sdavidxu	u_long old;
772139257Sdavidxu	int error;
773139257Sdavidxu	int count;
774112904Sjeff
775112904Sjeff	/*
776112904Sjeff	 * Make sure we own this mtx.
777112904Sjeff	 */
778163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
779161678Sdavidxu	if (owner == -1)
780115765Sjeff		return (EFAULT);
781115765Sjeff
782139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
783115765Sjeff		return (EPERM);
784112904Sjeff
785161678Sdavidxu	/* This should be done in userland */
786161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
787163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
788161678Sdavidxu		if (old == -1)
789161678Sdavidxu			return (EFAULT);
790161678Sdavidxu		if (old == owner)
791161678Sdavidxu			return (0);
792161855Sdavidxu		owner = old;
793161678Sdavidxu	}
794161678Sdavidxu
795117685Smtm	/* We should only ever be in here for contested locks */
796161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
797161678Sdavidxu		&key)) != 0)
798139257Sdavidxu		return (error);
799139257Sdavidxu
800139257Sdavidxu	umtxq_lock(&key);
801139257Sdavidxu	umtxq_busy(&key);
802139257Sdavidxu	count = umtxq_count(&key);
803139257Sdavidxu	umtxq_unlock(&key);
804139257Sdavidxu
805117743Smtm	/*
806117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
807117743Smtm	 * there is zero or one thread only waiting for it.
808117743Smtm	 * Otherwise, it must be marked as contested.
809117743Smtm	 */
810163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
811163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
812139257Sdavidxu	umtxq_lock(&key);
813161678Sdavidxu	umtxq_signal(&key,1);
814139257Sdavidxu	umtxq_unbusy(&key);
815139257Sdavidxu	umtxq_unlock(&key);
816139257Sdavidxu	umtx_key_release(&key);
817115765Sjeff	if (old == -1)
818115765Sjeff		return (EFAULT);
819138224Sdavidxu	if (old != owner)
820138224Sdavidxu		return (EINVAL);
821115765Sjeff	return (0);
822112904Sjeff}
823139013Sdavidxu
824205014Snwhitehorn#ifdef COMPAT_FREEBSD32
825162536Sdavidxu
826161678Sdavidxu/*
827162536Sdavidxu * Lock a umtx object.
828162536Sdavidxu */
829162536Sdavidxustatic int
830162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
831162536Sdavidxu{
832162536Sdavidxu	struct umtx_q *uq;
833162536Sdavidxu	uint32_t owner;
834162536Sdavidxu	uint32_t old;
835162536Sdavidxu	int error = 0;
836162536Sdavidxu
837162536Sdavidxu	uq = td->td_umtxq;
838162536Sdavidxu
839162536Sdavidxu	/*
840162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
841162536Sdavidxu	 * can fault on any access.
842162536Sdavidxu	 */
843162536Sdavidxu	for (;;) {
844162536Sdavidxu		/*
845162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
846162536Sdavidxu		 */
847162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
848162536Sdavidxu
849162536Sdavidxu		/* The acquire succeeded. */
850162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
851162536Sdavidxu			return (0);
852162536Sdavidxu
853162536Sdavidxu		/* The address was invalid. */
854162536Sdavidxu		if (owner == -1)
855162536Sdavidxu			return (EFAULT);
856162536Sdavidxu
857162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
858162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
859162536Sdavidxu			owner = casuword32(m,
860162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
861162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
862162536Sdavidxu				return (0);
863162536Sdavidxu
864162536Sdavidxu			/* The address was invalid. */
865162536Sdavidxu			if (owner == -1)
866162536Sdavidxu				return (EFAULT);
867162536Sdavidxu
868162536Sdavidxu			/* If this failed the lock has changed, restart. */
869162536Sdavidxu			continue;
870162536Sdavidxu		}
871162536Sdavidxu
872162536Sdavidxu		/*
873162536Sdavidxu		 * If we caught a signal, we have retried and now
874162536Sdavidxu		 * exit immediately.
875162536Sdavidxu		 */
876162536Sdavidxu		if (error != 0)
877162536Sdavidxu			return (error);
878162536Sdavidxu
879162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
880162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
881162536Sdavidxu			return (error);
882162536Sdavidxu
883162536Sdavidxu		umtxq_lock(&uq->uq_key);
884162536Sdavidxu		umtxq_busy(&uq->uq_key);
885162536Sdavidxu		umtxq_insert(uq);
886162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
887162536Sdavidxu		umtxq_unlock(&uq->uq_key);
888162536Sdavidxu
889162536Sdavidxu		/*
890162536Sdavidxu		 * Set the contested bit so that a release in user space
891162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
892162536Sdavidxu		 * either some one else has acquired the lock or it has been
893162536Sdavidxu		 * released.
894162536Sdavidxu		 */
895162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
896162536Sdavidxu
897162536Sdavidxu		/* The address was invalid. */
898162536Sdavidxu		if (old == -1) {
899162536Sdavidxu			umtxq_lock(&uq->uq_key);
900162536Sdavidxu			umtxq_remove(uq);
901162536Sdavidxu			umtxq_unlock(&uq->uq_key);
902162536Sdavidxu			umtx_key_release(&uq->uq_key);
903162536Sdavidxu			return (EFAULT);
904162536Sdavidxu		}
905162536Sdavidxu
906162536Sdavidxu		/*
907162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
908162536Sdavidxu		 * and we need to retry or we lost a race to the thread
909162536Sdavidxu		 * unlocking the umtx.
910162536Sdavidxu		 */
911162536Sdavidxu		umtxq_lock(&uq->uq_key);
912162536Sdavidxu		if (old == owner)
913162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
914162536Sdavidxu		umtxq_remove(uq);
915162536Sdavidxu		umtxq_unlock(&uq->uq_key);
916162536Sdavidxu		umtx_key_release(&uq->uq_key);
917162536Sdavidxu	}
918162536Sdavidxu
919162536Sdavidxu	return (0);
920162536Sdavidxu}
921162536Sdavidxu
922162536Sdavidxu/*
923162536Sdavidxu * Lock a umtx object.
924162536Sdavidxu */
925162536Sdavidxustatic int
926162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
927162536Sdavidxu	struct timespec *timeout)
928162536Sdavidxu{
929162536Sdavidxu	struct timespec ts, ts2, ts3;
930162536Sdavidxu	struct timeval tv;
931162536Sdavidxu	int error;
932162536Sdavidxu
933162536Sdavidxu	if (timeout == NULL) {
934162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
935162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
936162536Sdavidxu		if (error == EINTR)
937162536Sdavidxu			error = ERESTART;
938162536Sdavidxu	} else {
939162536Sdavidxu		getnanouptime(&ts);
940162536Sdavidxu		timespecadd(&ts, timeout);
941162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
942162536Sdavidxu		for (;;) {
943162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
944162536Sdavidxu			if (error != ETIMEDOUT)
945162536Sdavidxu				break;
946162536Sdavidxu			getnanouptime(&ts2);
947162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
948162536Sdavidxu				error = ETIMEDOUT;
949162536Sdavidxu				break;
950162536Sdavidxu			}
951162536Sdavidxu			ts3 = ts;
952162536Sdavidxu			timespecsub(&ts3, &ts2);
953162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
954162536Sdavidxu		}
955162536Sdavidxu		/* Timed-locking is not restarted. */
956162536Sdavidxu		if (error == ERESTART)
957162536Sdavidxu			error = EINTR;
958162536Sdavidxu	}
959162536Sdavidxu	return (error);
960162536Sdavidxu}
961162536Sdavidxu
962162536Sdavidxu/*
963162536Sdavidxu * Unlock a umtx object.
964162536Sdavidxu */
965162536Sdavidxustatic int
966162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
967162536Sdavidxu{
968162536Sdavidxu	struct umtx_key key;
969162536Sdavidxu	uint32_t owner;
970162536Sdavidxu	uint32_t old;
971162536Sdavidxu	int error;
972162536Sdavidxu	int count;
973162536Sdavidxu
974162536Sdavidxu	/*
975162536Sdavidxu	 * Make sure we own this mtx.
976162536Sdavidxu	 */
977162536Sdavidxu	owner = fuword32(m);
978162536Sdavidxu	if (owner == -1)
979162536Sdavidxu		return (EFAULT);
980162536Sdavidxu
981162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
982162536Sdavidxu		return (EPERM);
983162536Sdavidxu
984162536Sdavidxu	/* This should be done in userland */
985162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
986162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
987162536Sdavidxu		if (old == -1)
988162536Sdavidxu			return (EFAULT);
989162536Sdavidxu		if (old == owner)
990162536Sdavidxu			return (0);
991162536Sdavidxu		owner = old;
992162536Sdavidxu	}
993162536Sdavidxu
994162536Sdavidxu	/* We should only ever be in here for contested locks */
995162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
996162536Sdavidxu		&key)) != 0)
997162536Sdavidxu		return (error);
998162536Sdavidxu
999162536Sdavidxu	umtxq_lock(&key);
1000162536Sdavidxu	umtxq_busy(&key);
1001162536Sdavidxu	count = umtxq_count(&key);
1002162536Sdavidxu	umtxq_unlock(&key);
1003162536Sdavidxu
1004162536Sdavidxu	/*
1005162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1006162536Sdavidxu	 * there is zero or one thread only waiting for it.
1007162536Sdavidxu	 * Otherwise, it must be marked as contested.
1008162536Sdavidxu	 */
1009162536Sdavidxu	old = casuword32(m, owner,
1010162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1011162536Sdavidxu	umtxq_lock(&key);
1012162536Sdavidxu	umtxq_signal(&key,1);
1013162536Sdavidxu	umtxq_unbusy(&key);
1014162536Sdavidxu	umtxq_unlock(&key);
1015162536Sdavidxu	umtx_key_release(&key);
1016162536Sdavidxu	if (old == -1)
1017162536Sdavidxu		return (EFAULT);
1018162536Sdavidxu	if (old != owner)
1019162536Sdavidxu		return (EINVAL);
1020162536Sdavidxu	return (0);
1021162536Sdavidxu}
1022162536Sdavidxu#endif
1023162536Sdavidxu
1024231989Sdavidxustatic inline int
1025231989Sdavidxutstohz(const struct timespec *tsp)
1026231989Sdavidxu{
1027231989Sdavidxu	struct timeval tv;
1028231989Sdavidxu
1029231989Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, tsp);
1030231989Sdavidxu	return tvtohz(&tv);
1031231989Sdavidxu}
1032231989Sdavidxu
1033233642Sdavidxustatic int
1034233642Sdavidxuumtxq_nanosleep(struct thread *td, int clockid, int absolute,
1035233642Sdavidxu	struct timespec *timeout, const char *mesg)
1036233642Sdavidxu{
1037233642Sdavidxu	struct umtx_q *uq;
1038233642Sdavidxu	struct timespec ets, cts, tts;
1039233642Sdavidxu	int error;
1040233642Sdavidxu
1041233642Sdavidxu	uq = td->td_umtxq;
1042233642Sdavidxu	umtxq_unlock(&uq->uq_key);
1043233642Sdavidxu	if (!absolute) {
1044233642Sdavidxu		kern_clock_gettime(td, clockid, &ets);
1045233642Sdavidxu		timespecadd(&ets, timeout);
1046233642Sdavidxu		tts = *timeout;
1047233642Sdavidxu	} else { /* absolute time */
1048233642Sdavidxu		ets = *timeout;
1049233642Sdavidxu		tts = *timeout;
1050233642Sdavidxu		kern_clock_gettime(td, clockid, &cts);
1051233642Sdavidxu		timespecsub(&tts, &cts);
1052233642Sdavidxu	}
1053233642Sdavidxu	umtxq_lock(&uq->uq_key);
1054233642Sdavidxu	for (;;) {
1055233642Sdavidxu		error = umtxq_sleep(uq, mesg, tstohz(&tts));
1056233642Sdavidxu		if (error != ETIMEDOUT)
1057233642Sdavidxu			break;
1058233642Sdavidxu		kern_clock_gettime(td, clockid, &cts);
1059233642Sdavidxu		if (timespeccmp(&cts, &ets, >=)) {
1060233642Sdavidxu			error = ETIMEDOUT;
1061233642Sdavidxu			break;
1062233642Sdavidxu		}
1063233642Sdavidxu		tts = ets;
1064233642Sdavidxu		timespecsub(&tts, &cts);
1065233642Sdavidxu	}
1066233642Sdavidxu	return (error);
1067233642Sdavidxu}
1068233642Sdavidxu
1069162536Sdavidxu/*
1070161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1071161678Sdavidxu */
1072139013Sdavidxustatic int
1073163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1074232144Sdavidxu	struct _umtx_time *timeout, int compat32, int is_private)
1075139013Sdavidxu{
1076143149Sdavidxu	struct umtx_q *uq;
1077163449Sdavidxu	u_long tmp;
1078140245Sdavidxu	int error = 0;
1079139013Sdavidxu
1080143149Sdavidxu	uq = td->td_umtxq;
1081178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1082178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1083139013Sdavidxu		return (error);
1084161678Sdavidxu
1085161678Sdavidxu	umtxq_lock(&uq->uq_key);
1086161678Sdavidxu	umtxq_insert(uq);
1087161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1088162536Sdavidxu	if (compat32 == 0)
1089162536Sdavidxu		tmp = fuword(addr);
1090162536Sdavidxu        else
1091190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1092233642Sdavidxu	umtxq_lock(&uq->uq_key);
1093233642Sdavidxu	if (tmp == id) {
1094233642Sdavidxu		if (timeout == NULL)
1095233642Sdavidxu			error = umtxq_sleep(uq, "uwait", 0);
1096233642Sdavidxu		else
1097233642Sdavidxu			error = umtxq_nanosleep(td, timeout->_clockid,
1098233642Sdavidxu		   		((timeout->_flags & UMTX_ABSTIME) != 0),
1099233642Sdavidxu				&timeout->_timeout, "uwait");
1100233642Sdavidxu	}
1101233642Sdavidxu
1102233642Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
1103233642Sdavidxu		error = 0;
1104233642Sdavidxu	else
1105143149Sdavidxu		umtxq_remove(uq);
1106233642Sdavidxu	umtxq_unlock(&uq->uq_key);
1107143149Sdavidxu	umtx_key_release(&uq->uq_key);
1108139257Sdavidxu	if (error == ERESTART)
1109139257Sdavidxu		error = EINTR;
1110139013Sdavidxu	return (error);
1111139013Sdavidxu}
1112139013Sdavidxu
1113161678Sdavidxu/*
1114161678Sdavidxu * Wake up threads sleeping on the specified address.
1115161678Sdavidxu */
1116151692Sdavidxuint
1117178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1118139013Sdavidxu{
1119139013Sdavidxu	struct umtx_key key;
1120139257Sdavidxu	int ret;
1121139013Sdavidxu
1122178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1123178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1124139257Sdavidxu		return (ret);
1125139258Sdavidxu	umtxq_lock(&key);
1126139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1127139258Sdavidxu	umtxq_unlock(&key);
1128139257Sdavidxu	umtx_key_release(&key);
1129139013Sdavidxu	return (0);
1130139013Sdavidxu}
1131139013Sdavidxu
1132161678Sdavidxu/*
1133161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1134161678Sdavidxu */
1135161678Sdavidxustatic int
1136161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1137179970Sdavidxu	int mode)
1138161678Sdavidxu{
1139161678Sdavidxu	struct umtx_q *uq;
1140161678Sdavidxu	uint32_t owner, old, id;
1141161678Sdavidxu	int error = 0;
1142161678Sdavidxu
1143161678Sdavidxu	id = td->td_tid;
1144161678Sdavidxu	uq = td->td_umtxq;
1145161678Sdavidxu
1146161678Sdavidxu	/*
1147161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1148161678Sdavidxu	 * can fault on any access.
1149161678Sdavidxu	 */
1150161678Sdavidxu	for (;;) {
1151179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1152179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1153179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1154179970Sdavidxu				return (0);
1155179970Sdavidxu		} else {
1156179970Sdavidxu			/*
1157179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1158179970Sdavidxu			 */
1159179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1160161678Sdavidxu
1161179970Sdavidxu			/* The acquire succeeded. */
1162179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1163161678Sdavidxu				return (0);
1164161678Sdavidxu
1165161678Sdavidxu			/* The address was invalid. */
1166161678Sdavidxu			if (owner == -1)
1167161678Sdavidxu				return (EFAULT);
1168161678Sdavidxu
1169179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1170179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1171179970Sdavidxu				owner = casuword32(&m->m_owner,
1172179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1173179970Sdavidxu
1174179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1175179970Sdavidxu					return (0);
1176179970Sdavidxu
1177179970Sdavidxu				/* The address was invalid. */
1178179970Sdavidxu				if (owner == -1)
1179179970Sdavidxu					return (EFAULT);
1180179970Sdavidxu
1181179970Sdavidxu				/* If this failed the lock has changed, restart. */
1182179970Sdavidxu				continue;
1183179970Sdavidxu			}
1184161678Sdavidxu		}
1185161678Sdavidxu
1186161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1187161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1188161678Sdavidxu			return (EDEADLK);
1189161678Sdavidxu
1190179970Sdavidxu		if (mode == _UMUTEX_TRY)
1191161678Sdavidxu			return (EBUSY);
1192161678Sdavidxu
1193161678Sdavidxu		/*
1194161678Sdavidxu		 * If we caught a signal, we have retried and now
1195161678Sdavidxu		 * exit immediately.
1196161678Sdavidxu		 */
1197161678Sdavidxu		if (error != 0)
1198161678Sdavidxu			return (error);
1199161678Sdavidxu
1200161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1201161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1202161678Sdavidxu			return (error);
1203161678Sdavidxu
1204161678Sdavidxu		umtxq_lock(&uq->uq_key);
1205161678Sdavidxu		umtxq_busy(&uq->uq_key);
1206161678Sdavidxu		umtxq_insert(uq);
1207161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1208161678Sdavidxu
1209161678Sdavidxu		/*
1210161678Sdavidxu		 * Set the contested bit so that a release in user space
1211161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1212161678Sdavidxu		 * either some one else has acquired the lock or it has been
1213161678Sdavidxu		 * released.
1214161678Sdavidxu		 */
1215161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1216161678Sdavidxu
1217161678Sdavidxu		/* The address was invalid. */
1218161678Sdavidxu		if (old == -1) {
1219161678Sdavidxu			umtxq_lock(&uq->uq_key);
1220161678Sdavidxu			umtxq_remove(uq);
1221179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1222161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1223161678Sdavidxu			umtx_key_release(&uq->uq_key);
1224161678Sdavidxu			return (EFAULT);
1225161678Sdavidxu		}
1226161678Sdavidxu
1227161678Sdavidxu		/*
1228161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1229161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1230161678Sdavidxu		 * unlocking the umtx.
1231161678Sdavidxu		 */
1232161678Sdavidxu		umtxq_lock(&uq->uq_key);
1233179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1234161678Sdavidxu		if (old == owner)
1235161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1236161678Sdavidxu		umtxq_remove(uq);
1237161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1238161678Sdavidxu		umtx_key_release(&uq->uq_key);
1239161678Sdavidxu	}
1240161678Sdavidxu
1241161678Sdavidxu	return (0);
1242161678Sdavidxu}
1243161678Sdavidxu
1244161678Sdavidxu/*
1245161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1246161678Sdavidxu */
1247161678Sdavidxu/*
1248161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1249161678Sdavidxu */
1250161678Sdavidxustatic int
1251161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1252161678Sdavidxu{
1253161678Sdavidxu	struct umtx_key key;
1254161678Sdavidxu	uint32_t owner, old, id;
1255161678Sdavidxu	int error;
1256161678Sdavidxu	int count;
1257161678Sdavidxu
1258161678Sdavidxu	id = td->td_tid;
1259161678Sdavidxu	/*
1260161678Sdavidxu	 * Make sure we own this mtx.
1261161678Sdavidxu	 */
1262163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1263161678Sdavidxu	if (owner == -1)
1264161678Sdavidxu		return (EFAULT);
1265161678Sdavidxu
1266161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1267161678Sdavidxu		return (EPERM);
1268161678Sdavidxu
1269161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1270161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1271161678Sdavidxu		if (old == -1)
1272161678Sdavidxu			return (EFAULT);
1273161678Sdavidxu		if (old == owner)
1274161678Sdavidxu			return (0);
1275161855Sdavidxu		owner = old;
1276161678Sdavidxu	}
1277161678Sdavidxu
1278161678Sdavidxu	/* We should only ever be in here for contested locks */
1279161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1280161678Sdavidxu	    &key)) != 0)
1281161678Sdavidxu		return (error);
1282161678Sdavidxu
1283161678Sdavidxu	umtxq_lock(&key);
1284161678Sdavidxu	umtxq_busy(&key);
1285161678Sdavidxu	count = umtxq_count(&key);
1286161678Sdavidxu	umtxq_unlock(&key);
1287161678Sdavidxu
1288161678Sdavidxu	/*
1289161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1290161678Sdavidxu	 * there is zero or one thread only waiting for it.
1291161678Sdavidxu	 * Otherwise, it must be marked as contested.
1292161678Sdavidxu	 */
1293161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1294161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1295161678Sdavidxu	umtxq_lock(&key);
1296161678Sdavidxu	umtxq_signal(&key,1);
1297161678Sdavidxu	umtxq_unbusy(&key);
1298161678Sdavidxu	umtxq_unlock(&key);
1299161678Sdavidxu	umtx_key_release(&key);
1300161678Sdavidxu	if (old == -1)
1301161678Sdavidxu		return (EFAULT);
1302161678Sdavidxu	if (old != owner)
1303161678Sdavidxu		return (EINVAL);
1304161678Sdavidxu	return (0);
1305161678Sdavidxu}
1306161678Sdavidxu
1307179970Sdavidxu/*
1308179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1309179970Sdavidxu * only for simple mutex.
1310179970Sdavidxu */
1311179970Sdavidxustatic int
1312179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1313179970Sdavidxu{
1314179970Sdavidxu	struct umtx_key key;
1315179970Sdavidxu	uint32_t owner;
1316179970Sdavidxu	uint32_t flags;
1317179970Sdavidxu	int error;
1318179970Sdavidxu	int count;
1319179970Sdavidxu
1320179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1321179970Sdavidxu	if (owner == -1)
1322179970Sdavidxu		return (EFAULT);
1323179970Sdavidxu
1324179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1325179970Sdavidxu		return (0);
1326179970Sdavidxu
1327179970Sdavidxu	flags = fuword32(&m->m_flags);
1328179970Sdavidxu
1329179970Sdavidxu	/* We should only ever be in here for contested locks */
1330179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1331179970Sdavidxu	    &key)) != 0)
1332179970Sdavidxu		return (error);
1333179970Sdavidxu
1334179970Sdavidxu	umtxq_lock(&key);
1335179970Sdavidxu	umtxq_busy(&key);
1336179970Sdavidxu	count = umtxq_count(&key);
1337179970Sdavidxu	umtxq_unlock(&key);
1338179970Sdavidxu
1339179970Sdavidxu	if (count <= 1)
1340179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1341179970Sdavidxu
1342179970Sdavidxu	umtxq_lock(&key);
1343179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1344179970Sdavidxu		umtxq_signal(&key, 1);
1345179970Sdavidxu	umtxq_unbusy(&key);
1346179970Sdavidxu	umtxq_unlock(&key);
1347179970Sdavidxu	umtx_key_release(&key);
1348179970Sdavidxu	return (0);
1349179970Sdavidxu}
1350179970Sdavidxu
1351161678Sdavidxustatic inline struct umtx_pi *
1352163697Sdavidxuumtx_pi_alloc(int flags)
1353161678Sdavidxu{
1354161678Sdavidxu	struct umtx_pi *pi;
1355161678Sdavidxu
1356163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1357161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1358161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1359161678Sdavidxu	return (pi);
1360161678Sdavidxu}
1361161678Sdavidxu
1362161678Sdavidxustatic inline void
1363161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1364161678Sdavidxu{
1365161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1366161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1367161678Sdavidxu}
1368161678Sdavidxu
1369161678Sdavidxu/*
1370161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1371161678Sdavidxu * changed.
1372161678Sdavidxu */
1373161678Sdavidxustatic int
1374161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1375161678Sdavidxu{
1376161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1377161678Sdavidxu	struct thread *td1;
1378161678Sdavidxu
1379170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1380161678Sdavidxu	if (pi == NULL)
1381161678Sdavidxu		return (0);
1382161678Sdavidxu
1383161678Sdavidxu	uq = td->td_umtxq;
1384161678Sdavidxu
1385161678Sdavidxu	/*
1386161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1387161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1388161678Sdavidxu	 * the previous thread or higher than the next thread.
1389161678Sdavidxu	 */
1390161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1391161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1392161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1393161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1394161678Sdavidxu		/*
1395161678Sdavidxu		 * Remove thread from blocked chain and determine where
1396161678Sdavidxu		 * it should be moved to.
1397161678Sdavidxu		 */
1398161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1399161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1400161678Sdavidxu			td1 = uq1->uq_thread;
1401161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1402161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1403161678Sdavidxu				break;
1404161678Sdavidxu		}
1405161678Sdavidxu
1406161678Sdavidxu		if (uq1 == NULL)
1407161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1408161678Sdavidxu		else
1409161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1410161678Sdavidxu	}
1411161678Sdavidxu	return (1);
1412161678Sdavidxu}
1413161678Sdavidxu
1414161678Sdavidxu/*
1415161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1416161678Sdavidxu * PI mutex.
1417161678Sdavidxu */
1418161678Sdavidxustatic void
1419161678Sdavidxuumtx_propagate_priority(struct thread *td)
1420161678Sdavidxu{
1421161678Sdavidxu	struct umtx_q *uq;
1422161678Sdavidxu	struct umtx_pi *pi;
1423161678Sdavidxu	int pri;
1424161678Sdavidxu
1425170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1426161678Sdavidxu	pri = UPRI(td);
1427161678Sdavidxu	uq = td->td_umtxq;
1428161678Sdavidxu	pi = uq->uq_pi_blocked;
1429161678Sdavidxu	if (pi == NULL)
1430161678Sdavidxu		return;
1431161678Sdavidxu
1432161678Sdavidxu	for (;;) {
1433161678Sdavidxu		td = pi->pi_owner;
1434216313Sdavidxu		if (td == NULL || td == curthread)
1435161678Sdavidxu			return;
1436161678Sdavidxu
1437161678Sdavidxu		MPASS(td->td_proc != NULL);
1438161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1439161678Sdavidxu
1440170300Sjeff		thread_lock(td);
1441216313Sdavidxu		if (td->td_lend_user_pri > pri)
1442216313Sdavidxu			sched_lend_user_prio(td, pri);
1443216313Sdavidxu		else {
1444216313Sdavidxu			thread_unlock(td);
1445216313Sdavidxu			break;
1446216313Sdavidxu		}
1447170300Sjeff		thread_unlock(td);
1448161678Sdavidxu
1449161678Sdavidxu		/*
1450161678Sdavidxu		 * Pick up the lock that td is blocked on.
1451161678Sdavidxu		 */
1452161678Sdavidxu		uq = td->td_umtxq;
1453161678Sdavidxu		pi = uq->uq_pi_blocked;
1454216791Sdavidxu		if (pi == NULL)
1455216791Sdavidxu			break;
1456161678Sdavidxu		/* Resort td on the list if needed. */
1457216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1458161678Sdavidxu	}
1459161678Sdavidxu}
1460161678Sdavidxu
1461161678Sdavidxu/*
1462161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1463161678Sdavidxu * it is interrupted by signal or resumed by others.
1464161678Sdavidxu */
1465161678Sdavidxustatic void
1466216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1467161678Sdavidxu{
1468161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1469161678Sdavidxu	struct umtx_pi *pi2;
1470216791Sdavidxu	int pri;
1471161678Sdavidxu
1472170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1473161678Sdavidxu
1474161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1475161678Sdavidxu		pri = PRI_MAX;
1476161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1477161678Sdavidxu
1478161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1479161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1480161678Sdavidxu			if (uq != NULL) {
1481161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1482161678Sdavidxu					pri = UPRI(uq->uq_thread);
1483161678Sdavidxu			}
1484161678Sdavidxu		}
1485161678Sdavidxu
1486161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1487161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1488170300Sjeff		thread_lock(pi->pi_owner);
1489216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1490170300Sjeff		thread_unlock(pi->pi_owner);
1491216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1492216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1493161678Sdavidxu	}
1494161678Sdavidxu}
1495161678Sdavidxu
1496161678Sdavidxu/*
1497161678Sdavidxu * Insert a PI mutex into owned list.
1498161678Sdavidxu */
1499161678Sdavidxustatic void
1500161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1501161678Sdavidxu{
1502161678Sdavidxu	struct umtx_q *uq_owner;
1503161678Sdavidxu
1504161678Sdavidxu	uq_owner = owner->td_umtxq;
1505170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1506161678Sdavidxu	if (pi->pi_owner != NULL)
1507161678Sdavidxu		panic("pi_ower != NULL");
1508161678Sdavidxu	pi->pi_owner = owner;
1509161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1510161678Sdavidxu}
1511161678Sdavidxu
1512161678Sdavidxu/*
1513161678Sdavidxu * Claim ownership of a PI mutex.
1514161678Sdavidxu */
1515161678Sdavidxustatic int
1516161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1517161678Sdavidxu{
1518161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1519161678Sdavidxu
1520161678Sdavidxu	uq_owner = owner->td_umtxq;
1521170300Sjeff	mtx_lock_spin(&umtx_lock);
1522161678Sdavidxu	if (pi->pi_owner == owner) {
1523170300Sjeff		mtx_unlock_spin(&umtx_lock);
1524161678Sdavidxu		return (0);
1525161678Sdavidxu	}
1526161678Sdavidxu
1527161678Sdavidxu	if (pi->pi_owner != NULL) {
1528161678Sdavidxu		/*
1529161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1530161678Sdavidxu		 */
1531170300Sjeff		mtx_unlock_spin(&umtx_lock);
1532161678Sdavidxu		return (EPERM);
1533161678Sdavidxu	}
1534161678Sdavidxu	umtx_pi_setowner(pi, owner);
1535161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1536161678Sdavidxu	if (uq != NULL) {
1537161678Sdavidxu		int pri;
1538161678Sdavidxu
1539161678Sdavidxu		pri = UPRI(uq->uq_thread);
1540170300Sjeff		thread_lock(owner);
1541161678Sdavidxu		if (pri < UPRI(owner))
1542161678Sdavidxu			sched_lend_user_prio(owner, pri);
1543170300Sjeff		thread_unlock(owner);
1544161678Sdavidxu	}
1545170300Sjeff	mtx_unlock_spin(&umtx_lock);
1546161678Sdavidxu	return (0);
1547161678Sdavidxu}
1548161678Sdavidxu
1549161678Sdavidxu/*
1550174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1551174701Sdavidxu * this may result new priority propagating process.
1552174701Sdavidxu */
1553174701Sdavidxuvoid
1554174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1555174701Sdavidxu{
1556174707Sdavidxu	struct umtx_q *uq;
1557174707Sdavidxu	struct umtx_pi *pi;
1558174707Sdavidxu
1559174707Sdavidxu	uq = td->td_umtxq;
1560174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1561174707Sdavidxu	/*
1562174707Sdavidxu	 * Pick up the lock that td is blocked on.
1563174707Sdavidxu	 */
1564174707Sdavidxu	pi = uq->uq_pi_blocked;
1565216791Sdavidxu	if (pi != NULL) {
1566216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1567216791Sdavidxu		umtx_repropagate_priority(pi);
1568216791Sdavidxu	}
1569174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1570174701Sdavidxu}
1571174701Sdavidxu
1572174701Sdavidxu/*
1573161678Sdavidxu * Sleep on a PI mutex.
1574161678Sdavidxu */
1575161678Sdavidxustatic int
1576161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1577161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1578161678Sdavidxu{
1579161678Sdavidxu	struct umtxq_chain *uc;
1580161678Sdavidxu	struct thread *td, *td1;
1581161678Sdavidxu	struct umtx_q *uq1;
1582161678Sdavidxu	int pri;
1583161678Sdavidxu	int error = 0;
1584161678Sdavidxu
1585161678Sdavidxu	td = uq->uq_thread;
1586161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1587161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1588161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1589189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1590161678Sdavidxu	umtxq_insert(uq);
1591189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1592161678Sdavidxu	if (pi->pi_owner == NULL) {
1593189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1594213642Sdavidxu		/* XXX Only look up thread in current process. */
1595213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1596170300Sjeff		mtx_lock_spin(&umtx_lock);
1597215336Sdavidxu		if (td1 != NULL) {
1598215336Sdavidxu			if (pi->pi_owner == NULL)
1599215336Sdavidxu				umtx_pi_setowner(pi, td1);
1600215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1601161678Sdavidxu		}
1602161678Sdavidxu	}
1603161678Sdavidxu
1604161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1605161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1606161678Sdavidxu		if (pri > UPRI(td))
1607161678Sdavidxu			break;
1608161678Sdavidxu	}
1609161678Sdavidxu
1610161678Sdavidxu	if (uq1 != NULL)
1611161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1612161678Sdavidxu	else
1613161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1614161678Sdavidxu
1615161678Sdavidxu	uq->uq_pi_blocked = pi;
1616174701Sdavidxu	thread_lock(td);
1617161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1618174701Sdavidxu	thread_unlock(td);
1619161678Sdavidxu	umtx_propagate_priority(td);
1620170300Sjeff	mtx_unlock_spin(&umtx_lock);
1621189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1622161678Sdavidxu
1623161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1624161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1625161678Sdavidxu		if (error == EWOULDBLOCK)
1626161678Sdavidxu			error = ETIMEDOUT;
1627161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1628161678Sdavidxu			umtxq_remove(uq);
1629161678Sdavidxu		}
1630161678Sdavidxu	}
1631170300Sjeff	mtx_lock_spin(&umtx_lock);
1632161678Sdavidxu	uq->uq_pi_blocked = NULL;
1633174701Sdavidxu	thread_lock(td);
1634161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1635174701Sdavidxu	thread_unlock(td);
1636161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1637216791Sdavidxu	umtx_repropagate_priority(pi);
1638170300Sjeff	mtx_unlock_spin(&umtx_lock);
1639189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1640161678Sdavidxu
1641161678Sdavidxu	return (error);
1642161678Sdavidxu}
1643161678Sdavidxu
1644161678Sdavidxu/*
1645161678Sdavidxu * Add reference count for a PI mutex.
1646161678Sdavidxu */
1647161678Sdavidxustatic void
1648161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1649161678Sdavidxu{
1650161678Sdavidxu	struct umtxq_chain *uc;
1651161678Sdavidxu
1652161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1653161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1654161678Sdavidxu	pi->pi_refcount++;
1655161678Sdavidxu}
1656161678Sdavidxu
1657161678Sdavidxu/*
1658161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1659161678Sdavidxu * is decreased to zero, its memory space is freed.
1660161678Sdavidxu */
1661161678Sdavidxustatic void
1662161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1663161678Sdavidxu{
1664161678Sdavidxu	struct umtxq_chain *uc;
1665161678Sdavidxu
1666161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1667161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1668161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1669161678Sdavidxu	if (--pi->pi_refcount == 0) {
1670170300Sjeff		mtx_lock_spin(&umtx_lock);
1671161678Sdavidxu		if (pi->pi_owner != NULL) {
1672161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1673161678Sdavidxu				pi, pi_link);
1674161678Sdavidxu			pi->pi_owner = NULL;
1675161678Sdavidxu		}
1676161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1677161678Sdavidxu			("blocked queue not empty"));
1678170300Sjeff		mtx_unlock_spin(&umtx_lock);
1679161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1680189756Sdavidxu		umtx_pi_free(pi);
1681161678Sdavidxu	}
1682161678Sdavidxu}
1683161678Sdavidxu
1684161678Sdavidxu/*
1685161678Sdavidxu * Find a PI mutex in hash table.
1686161678Sdavidxu */
1687161678Sdavidxustatic struct umtx_pi *
1688161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1689161678Sdavidxu{
1690161678Sdavidxu	struct umtxq_chain *uc;
1691161678Sdavidxu	struct umtx_pi *pi;
1692161678Sdavidxu
1693161678Sdavidxu	uc = umtxq_getchain(key);
1694161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1695161678Sdavidxu
1696161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1697161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1698161678Sdavidxu			return (pi);
1699161678Sdavidxu		}
1700161678Sdavidxu	}
1701161678Sdavidxu	return (NULL);
1702161678Sdavidxu}
1703161678Sdavidxu
1704161678Sdavidxu/*
1705161678Sdavidxu * Insert a PI mutex into hash table.
1706161678Sdavidxu */
1707161678Sdavidxustatic inline void
1708161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1709161678Sdavidxu{
1710161678Sdavidxu	struct umtxq_chain *uc;
1711161678Sdavidxu
1712161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1713161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1714161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1715161678Sdavidxu}
1716161678Sdavidxu
1717161678Sdavidxu/*
1718161678Sdavidxu * Lock a PI mutex.
1719161678Sdavidxu */
1720161678Sdavidxustatic int
1721161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1722161678Sdavidxu	int try)
1723161678Sdavidxu{
1724161678Sdavidxu	struct umtx_q *uq;
1725161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1726161678Sdavidxu	uint32_t id, owner, old;
1727161678Sdavidxu	int error;
1728161678Sdavidxu
1729161678Sdavidxu	id = td->td_tid;
1730161678Sdavidxu	uq = td->td_umtxq;
1731161678Sdavidxu
1732161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1733161678Sdavidxu	    &uq->uq_key)) != 0)
1734161678Sdavidxu		return (error);
1735163697Sdavidxu	umtxq_lock(&uq->uq_key);
1736163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1737163697Sdavidxu	if (pi == NULL) {
1738163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1739163697Sdavidxu		if (new_pi == NULL) {
1740161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1741163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1742161678Sdavidxu			umtxq_lock(&uq->uq_key);
1743161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1744163697Sdavidxu			if (pi != NULL) {
1745161678Sdavidxu				umtx_pi_free(new_pi);
1746163697Sdavidxu				new_pi = NULL;
1747161678Sdavidxu			}
1748161678Sdavidxu		}
1749163697Sdavidxu		if (new_pi != NULL) {
1750163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1751163697Sdavidxu			umtx_pi_insert(new_pi);
1752163697Sdavidxu			pi = new_pi;
1753163697Sdavidxu		}
1754163697Sdavidxu	}
1755163697Sdavidxu	umtx_pi_ref(pi);
1756163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1757161678Sdavidxu
1758163697Sdavidxu	/*
1759163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1760163697Sdavidxu	 * can fault on any access.
1761163697Sdavidxu	 */
1762163697Sdavidxu	for (;;) {
1763161678Sdavidxu		/*
1764161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1765161678Sdavidxu		 */
1766161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1767161678Sdavidxu
1768161678Sdavidxu		/* The acquire succeeded. */
1769161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1770161678Sdavidxu			error = 0;
1771161678Sdavidxu			break;
1772161678Sdavidxu		}
1773161678Sdavidxu
1774161678Sdavidxu		/* The address was invalid. */
1775161678Sdavidxu		if (owner == -1) {
1776161678Sdavidxu			error = EFAULT;
1777161678Sdavidxu			break;
1778161678Sdavidxu		}
1779161678Sdavidxu
1780161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1781161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1782161678Sdavidxu			owner = casuword32(&m->m_owner,
1783161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1784161678Sdavidxu
1785161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1786161678Sdavidxu				umtxq_lock(&uq->uq_key);
1787189756Sdavidxu				umtxq_busy(&uq->uq_key);
1788161678Sdavidxu				error = umtx_pi_claim(pi, td);
1789189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1790161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1791161678Sdavidxu				break;
1792161678Sdavidxu			}
1793161678Sdavidxu
1794161678Sdavidxu			/* The address was invalid. */
1795161678Sdavidxu			if (owner == -1) {
1796161678Sdavidxu				error = EFAULT;
1797161678Sdavidxu				break;
1798161678Sdavidxu			}
1799161678Sdavidxu
1800161678Sdavidxu			/* If this failed the lock has changed, restart. */
1801161678Sdavidxu			continue;
1802161678Sdavidxu		}
1803161678Sdavidxu
1804161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1805161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1806161678Sdavidxu			error = EDEADLK;
1807161678Sdavidxu			break;
1808161678Sdavidxu		}
1809161678Sdavidxu
1810161678Sdavidxu		if (try != 0) {
1811161678Sdavidxu			error = EBUSY;
1812161678Sdavidxu			break;
1813161678Sdavidxu		}
1814161678Sdavidxu
1815161678Sdavidxu		/*
1816161678Sdavidxu		 * If we caught a signal, we have retried and now
1817161678Sdavidxu		 * exit immediately.
1818161678Sdavidxu		 */
1819161678Sdavidxu		if (error != 0)
1820161678Sdavidxu			break;
1821161678Sdavidxu
1822161678Sdavidxu		umtxq_lock(&uq->uq_key);
1823161678Sdavidxu		umtxq_busy(&uq->uq_key);
1824161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1825161678Sdavidxu
1826161678Sdavidxu		/*
1827161678Sdavidxu		 * Set the contested bit so that a release in user space
1828161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1829161678Sdavidxu		 * either some one else has acquired the lock or it has been
1830161678Sdavidxu		 * released.
1831161678Sdavidxu		 */
1832161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1833161678Sdavidxu
1834161678Sdavidxu		/* The address was invalid. */
1835161678Sdavidxu		if (old == -1) {
1836161678Sdavidxu			umtxq_lock(&uq->uq_key);
1837161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1838161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1839161678Sdavidxu			error = EFAULT;
1840161678Sdavidxu			break;
1841161678Sdavidxu		}
1842161678Sdavidxu
1843161678Sdavidxu		umtxq_lock(&uq->uq_key);
1844161678Sdavidxu		/*
1845161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1846161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1847161678Sdavidxu		 * unlocking the umtx.
1848161678Sdavidxu		 */
1849161678Sdavidxu		if (old == owner)
1850161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1851161678Sdavidxu				 "umtxpi", timo);
1852189756Sdavidxu		else {
1853189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1854189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1855189756Sdavidxu		}
1856161678Sdavidxu	}
1857161678Sdavidxu
1858163697Sdavidxu	umtxq_lock(&uq->uq_key);
1859163697Sdavidxu	umtx_pi_unref(pi);
1860163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1861161678Sdavidxu
1862161678Sdavidxu	umtx_key_release(&uq->uq_key);
1863161678Sdavidxu	return (error);
1864161678Sdavidxu}
1865161678Sdavidxu
1866161678Sdavidxu/*
1867161678Sdavidxu * Unlock a PI mutex.
1868161678Sdavidxu */
1869161678Sdavidxustatic int
1870161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1871161678Sdavidxu{
1872161678Sdavidxu	struct umtx_key key;
1873161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1874161678Sdavidxu	struct umtx_pi *pi, *pi2;
1875161678Sdavidxu	uint32_t owner, old, id;
1876161678Sdavidxu	int error;
1877161678Sdavidxu	int count;
1878161678Sdavidxu	int pri;
1879161678Sdavidxu
1880161678Sdavidxu	id = td->td_tid;
1881161678Sdavidxu	/*
1882161678Sdavidxu	 * Make sure we own this mtx.
1883161678Sdavidxu	 */
1884163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1885161678Sdavidxu	if (owner == -1)
1886161678Sdavidxu		return (EFAULT);
1887161678Sdavidxu
1888161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1889161678Sdavidxu		return (EPERM);
1890161678Sdavidxu
1891161678Sdavidxu	/* This should be done in userland */
1892161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1893161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1894161678Sdavidxu		if (old == -1)
1895161678Sdavidxu			return (EFAULT);
1896161678Sdavidxu		if (old == owner)
1897161678Sdavidxu			return (0);
1898161855Sdavidxu		owner = old;
1899161678Sdavidxu	}
1900161678Sdavidxu
1901161678Sdavidxu	/* We should only ever be in here for contested locks */
1902161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1903161678Sdavidxu	    &key)) != 0)
1904161678Sdavidxu		return (error);
1905161678Sdavidxu
1906161678Sdavidxu	umtxq_lock(&key);
1907161678Sdavidxu	umtxq_busy(&key);
1908161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1909161678Sdavidxu	if (uq_first != NULL) {
1910189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1911161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1912189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1913161678Sdavidxu		if (pi->pi_owner != curthread) {
1914189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1915161678Sdavidxu			umtxq_unbusy(&key);
1916161678Sdavidxu			umtxq_unlock(&key);
1917189756Sdavidxu			umtx_key_release(&key);
1918161678Sdavidxu			/* userland messed the mutex */
1919161678Sdavidxu			return (EPERM);
1920161678Sdavidxu		}
1921161678Sdavidxu		uq_me = curthread->td_umtxq;
1922161678Sdavidxu		pi->pi_owner = NULL;
1923161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1924189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1925161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1926189756Sdavidxu		while (uq_first != NULL &&
1927189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1928189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1929189756Sdavidxu		}
1930161678Sdavidxu		pri = PRI_MAX;
1931161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1932161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1933161678Sdavidxu			if (uq_first2 != NULL) {
1934161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1935161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1936161678Sdavidxu			}
1937161678Sdavidxu		}
1938170300Sjeff		thread_lock(curthread);
1939216791Sdavidxu		sched_lend_user_prio(curthread, pri);
1940170300Sjeff		thread_unlock(curthread);
1941170300Sjeff		mtx_unlock_spin(&umtx_lock);
1942189756Sdavidxu		if (uq_first)
1943189756Sdavidxu			umtxq_signal_thread(uq_first);
1944161678Sdavidxu	}
1945161678Sdavidxu	umtxq_unlock(&key);
1946161678Sdavidxu
1947161678Sdavidxu	/*
1948161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1949161678Sdavidxu	 * there is zero or one thread only waiting for it.
1950161678Sdavidxu	 * Otherwise, it must be marked as contested.
1951161678Sdavidxu	 */
1952161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1953161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1954161678Sdavidxu
1955161678Sdavidxu	umtxq_lock(&key);
1956161678Sdavidxu	umtxq_unbusy(&key);
1957161678Sdavidxu	umtxq_unlock(&key);
1958161678Sdavidxu	umtx_key_release(&key);
1959161678Sdavidxu	if (old == -1)
1960161678Sdavidxu		return (EFAULT);
1961161678Sdavidxu	if (old != owner)
1962161678Sdavidxu		return (EINVAL);
1963161678Sdavidxu	return (0);
1964161678Sdavidxu}
1965161678Sdavidxu
1966161678Sdavidxu/*
1967161678Sdavidxu * Lock a PP mutex.
1968161678Sdavidxu */
1969161678Sdavidxustatic int
1970161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1971161678Sdavidxu	int try)
1972161678Sdavidxu{
1973161678Sdavidxu	struct umtx_q *uq, *uq2;
1974161678Sdavidxu	struct umtx_pi *pi;
1975161678Sdavidxu	uint32_t ceiling;
1976161678Sdavidxu	uint32_t owner, id;
1977161678Sdavidxu	int error, pri, old_inherited_pri, su;
1978161678Sdavidxu
1979161678Sdavidxu	id = td->td_tid;
1980161678Sdavidxu	uq = td->td_umtxq;
1981161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1982161678Sdavidxu	    &uq->uq_key)) != 0)
1983161678Sdavidxu		return (error);
1984164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1985161678Sdavidxu	for (;;) {
1986161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1987161678Sdavidxu		umtxq_lock(&uq->uq_key);
1988161678Sdavidxu		umtxq_busy(&uq->uq_key);
1989161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1990161678Sdavidxu
1991161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1992161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1993161678Sdavidxu			error = EINVAL;
1994161678Sdavidxu			goto out;
1995161678Sdavidxu		}
1996161678Sdavidxu
1997170300Sjeff		mtx_lock_spin(&umtx_lock);
1998161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1999170300Sjeff			mtx_unlock_spin(&umtx_lock);
2000161678Sdavidxu			error = EINVAL;
2001161678Sdavidxu			goto out;
2002161678Sdavidxu		}
2003161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2004161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2005170300Sjeff			thread_lock(td);
2006161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
2007161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
2008170300Sjeff			thread_unlock(td);
2009161678Sdavidxu		}
2010170300Sjeff		mtx_unlock_spin(&umtx_lock);
2011161678Sdavidxu
2012161678Sdavidxu		owner = casuword32(&m->m_owner,
2013161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2014161678Sdavidxu
2015161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2016161678Sdavidxu			error = 0;
2017161678Sdavidxu			break;
2018161678Sdavidxu		}
2019161678Sdavidxu
2020161678Sdavidxu		/* The address was invalid. */
2021161678Sdavidxu		if (owner == -1) {
2022161678Sdavidxu			error = EFAULT;
2023161678Sdavidxu			break;
2024161678Sdavidxu		}
2025161678Sdavidxu
2026161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2027161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2028161678Sdavidxu			error = EDEADLK;
2029161678Sdavidxu			break;
2030161678Sdavidxu		}
2031161678Sdavidxu
2032161678Sdavidxu		if (try != 0) {
2033161678Sdavidxu			error = EBUSY;
2034161678Sdavidxu			break;
2035161678Sdavidxu		}
2036161678Sdavidxu
2037161678Sdavidxu		/*
2038161678Sdavidxu		 * If we caught a signal, we have retried and now
2039161678Sdavidxu		 * exit immediately.
2040161678Sdavidxu		 */
2041161678Sdavidxu		if (error != 0)
2042161678Sdavidxu			break;
2043161678Sdavidxu
2044161678Sdavidxu		umtxq_lock(&uq->uq_key);
2045161678Sdavidxu		umtxq_insert(uq);
2046161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2047161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
2048161678Sdavidxu		umtxq_remove(uq);
2049161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2050161678Sdavidxu
2051170300Sjeff		mtx_lock_spin(&umtx_lock);
2052161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2053161678Sdavidxu		pri = PRI_MAX;
2054161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2055161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2056161678Sdavidxu			if (uq2 != NULL) {
2057161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2058161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2059161678Sdavidxu			}
2060161678Sdavidxu		}
2061161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2062161678Sdavidxu			pri = uq->uq_inherited_pri;
2063170300Sjeff		thread_lock(td);
2064216791Sdavidxu		sched_lend_user_prio(td, pri);
2065170300Sjeff		thread_unlock(td);
2066170300Sjeff		mtx_unlock_spin(&umtx_lock);
2067161678Sdavidxu	}
2068161678Sdavidxu
2069161678Sdavidxu	if (error != 0) {
2070170300Sjeff		mtx_lock_spin(&umtx_lock);
2071161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2072161678Sdavidxu		pri = PRI_MAX;
2073161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2074161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2075161678Sdavidxu			if (uq2 != NULL) {
2076161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2077161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2078161678Sdavidxu			}
2079161678Sdavidxu		}
2080161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2081161678Sdavidxu			pri = uq->uq_inherited_pri;
2082170300Sjeff		thread_lock(td);
2083216791Sdavidxu		sched_lend_user_prio(td, pri);
2084170300Sjeff		thread_unlock(td);
2085170300Sjeff		mtx_unlock_spin(&umtx_lock);
2086161678Sdavidxu	}
2087161678Sdavidxu
2088161678Sdavidxuout:
2089161678Sdavidxu	umtxq_lock(&uq->uq_key);
2090161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2091161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2092161678Sdavidxu	umtx_key_release(&uq->uq_key);
2093161678Sdavidxu	return (error);
2094161678Sdavidxu}
2095161678Sdavidxu
2096161678Sdavidxu/*
2097161678Sdavidxu * Unlock a PP mutex.
2098161678Sdavidxu */
2099161678Sdavidxustatic int
2100161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2101161678Sdavidxu{
2102161678Sdavidxu	struct umtx_key key;
2103161678Sdavidxu	struct umtx_q *uq, *uq2;
2104161678Sdavidxu	struct umtx_pi *pi;
2105161678Sdavidxu	uint32_t owner, id;
2106161678Sdavidxu	uint32_t rceiling;
2107161926Sdavidxu	int error, pri, new_inherited_pri, su;
2108161678Sdavidxu
2109161678Sdavidxu	id = td->td_tid;
2110161678Sdavidxu	uq = td->td_umtxq;
2111164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2112161678Sdavidxu
2113161678Sdavidxu	/*
2114161678Sdavidxu	 * Make sure we own this mtx.
2115161678Sdavidxu	 */
2116163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2117161678Sdavidxu	if (owner == -1)
2118161678Sdavidxu		return (EFAULT);
2119161678Sdavidxu
2120161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2121161678Sdavidxu		return (EPERM);
2122161678Sdavidxu
2123161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2124161678Sdavidxu	if (error != 0)
2125161678Sdavidxu		return (error);
2126161678Sdavidxu
2127161678Sdavidxu	if (rceiling == -1)
2128161678Sdavidxu		new_inherited_pri = PRI_MAX;
2129161678Sdavidxu	else {
2130161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2131161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2132161678Sdavidxu			return (EINVAL);
2133161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2134161678Sdavidxu	}
2135161678Sdavidxu
2136161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2137161678Sdavidxu	    &key)) != 0)
2138161678Sdavidxu		return (error);
2139161678Sdavidxu	umtxq_lock(&key);
2140161678Sdavidxu	umtxq_busy(&key);
2141161678Sdavidxu	umtxq_unlock(&key);
2142161678Sdavidxu	/*
2143161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2144161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2145161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2146161678Sdavidxu	 * has to be adjusted for such mutex.
2147161678Sdavidxu	 */
2148163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2149163449Sdavidxu		UMUTEX_CONTESTED);
2150161678Sdavidxu
2151161678Sdavidxu	umtxq_lock(&key);
2152161678Sdavidxu	if (error == 0)
2153161678Sdavidxu		umtxq_signal(&key, 1);
2154161678Sdavidxu	umtxq_unbusy(&key);
2155161678Sdavidxu	umtxq_unlock(&key);
2156161678Sdavidxu
2157161678Sdavidxu	if (error == -1)
2158161678Sdavidxu		error = EFAULT;
2159161678Sdavidxu	else {
2160170300Sjeff		mtx_lock_spin(&umtx_lock);
2161161926Sdavidxu		if (su != 0)
2162161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2163161678Sdavidxu		pri = PRI_MAX;
2164161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2165161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2166161678Sdavidxu			if (uq2 != NULL) {
2167161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2168161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2169161678Sdavidxu			}
2170161678Sdavidxu		}
2171161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2172161678Sdavidxu			pri = uq->uq_inherited_pri;
2173170300Sjeff		thread_lock(td);
2174216791Sdavidxu		sched_lend_user_prio(td, pri);
2175170300Sjeff		thread_unlock(td);
2176170300Sjeff		mtx_unlock_spin(&umtx_lock);
2177161678Sdavidxu	}
2178161678Sdavidxu	umtx_key_release(&key);
2179161678Sdavidxu	return (error);
2180161678Sdavidxu}
2181161678Sdavidxu
2182161678Sdavidxustatic int
2183161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2184161678Sdavidxu	uint32_t *old_ceiling)
2185161678Sdavidxu{
2186161678Sdavidxu	struct umtx_q *uq;
2187161678Sdavidxu	uint32_t save_ceiling;
2188161678Sdavidxu	uint32_t owner, id;
2189161678Sdavidxu	uint32_t flags;
2190161678Sdavidxu	int error;
2191161678Sdavidxu
2192161678Sdavidxu	flags = fuword32(&m->m_flags);
2193161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2194161678Sdavidxu		return (EINVAL);
2195161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2196161678Sdavidxu		return (EINVAL);
2197161678Sdavidxu	id = td->td_tid;
2198161678Sdavidxu	uq = td->td_umtxq;
2199161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2200161678Sdavidxu	   &uq->uq_key)) != 0)
2201161678Sdavidxu		return (error);
2202161678Sdavidxu	for (;;) {
2203161678Sdavidxu		umtxq_lock(&uq->uq_key);
2204161678Sdavidxu		umtxq_busy(&uq->uq_key);
2205161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2206161678Sdavidxu
2207161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2208161678Sdavidxu
2209161678Sdavidxu		owner = casuword32(&m->m_owner,
2210161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2211161678Sdavidxu
2212161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2213161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2214163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2215163449Sdavidxu				UMUTEX_CONTESTED);
2216161678Sdavidxu			error = 0;
2217161678Sdavidxu			break;
2218161678Sdavidxu		}
2219161678Sdavidxu
2220161678Sdavidxu		/* The address was invalid. */
2221161678Sdavidxu		if (owner == -1) {
2222161678Sdavidxu			error = EFAULT;
2223161678Sdavidxu			break;
2224161678Sdavidxu		}
2225161678Sdavidxu
2226161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2227161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2228161678Sdavidxu			error = 0;
2229161678Sdavidxu			break;
2230161678Sdavidxu		}
2231161678Sdavidxu
2232161678Sdavidxu		/*
2233161678Sdavidxu		 * If we caught a signal, we have retried and now
2234161678Sdavidxu		 * exit immediately.
2235161678Sdavidxu		 */
2236161678Sdavidxu		if (error != 0)
2237161678Sdavidxu			break;
2238161678Sdavidxu
2239161678Sdavidxu		/*
2240161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2241161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2242161678Sdavidxu		 * unlocking the umtx.
2243161678Sdavidxu		 */
2244161678Sdavidxu		umtxq_lock(&uq->uq_key);
2245161678Sdavidxu		umtxq_insert(uq);
2246161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2247161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2248161678Sdavidxu		umtxq_remove(uq);
2249161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2250161678Sdavidxu	}
2251161678Sdavidxu	umtxq_lock(&uq->uq_key);
2252161678Sdavidxu	if (error == 0)
2253161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2254161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2255161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2256161678Sdavidxu	umtx_key_release(&uq->uq_key);
2257161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2258161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2259161678Sdavidxu	return (error);
2260161678Sdavidxu}
2261161678Sdavidxu
2262162030Sdavidxustatic int
2263162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2264179970Sdavidxu	int mode)
2265162030Sdavidxu{
2266162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2267162030Sdavidxu	case 0:
2268179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2269162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2270179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2271162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2272179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2273162030Sdavidxu	}
2274162030Sdavidxu	return (EINVAL);
2275162030Sdavidxu}
2276162030Sdavidxu
2277161678Sdavidxu/*
2278161678Sdavidxu * Lock a userland POSIX mutex.
2279161678Sdavidxu */
2280161678Sdavidxustatic int
2281162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2282232144Sdavidxu	struct _umtx_time *timeout, int mode)
2283161678Sdavidxu{
2284232144Sdavidxu	struct timespec cts, ets, tts;
2285161678Sdavidxu	uint32_t flags;
2286162030Sdavidxu	int error;
2287161678Sdavidxu
2288161678Sdavidxu	flags = fuword32(&m->m_flags);
2289161678Sdavidxu	if (flags == -1)
2290161678Sdavidxu		return (EFAULT);
2291161678Sdavidxu
2292162030Sdavidxu	if (timeout == NULL) {
2293179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2294162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2295179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2296162030Sdavidxu			error = ERESTART;
2297162030Sdavidxu	} else {
2298232144Sdavidxu		kern_clock_gettime(td, timeout->_clockid, &cts);
2299232144Sdavidxu		if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2300232144Sdavidxu			ets = cts;
2301232144Sdavidxu			timespecadd(&ets, &timeout->_timeout);
2302232144Sdavidxu			tts = timeout->_timeout;
2303232144Sdavidxu		} else {
2304232144Sdavidxu			ets = timeout->_timeout;
2305232144Sdavidxu			tts = timeout->_timeout;
2306232144Sdavidxu			timespecsub(&tts, &cts);
2307232144Sdavidxu		}
2308162030Sdavidxu		for (;;) {
2309232144Sdavidxu			error = _do_lock_umutex(td, m, flags, tstohz(&tts), mode);
2310162030Sdavidxu			if (error != ETIMEDOUT)
2311162030Sdavidxu				break;
2312232144Sdavidxu			kern_clock_gettime(td, timeout->_clockid, &cts);
2313232144Sdavidxu			if (timespeccmp(&cts, &ets, >=))
2314162030Sdavidxu				break;
2315232144Sdavidxu			tts = ets;
2316232144Sdavidxu			timespecsub(&tts, &cts);
2317162030Sdavidxu		}
2318162030Sdavidxu		/* Timed-locking is not restarted. */
2319162030Sdavidxu		if (error == ERESTART)
2320162030Sdavidxu			error = EINTR;
2321161742Sdavidxu	}
2322162030Sdavidxu	return (error);
2323161678Sdavidxu}
2324161678Sdavidxu
2325161678Sdavidxu/*
2326161678Sdavidxu * Unlock a userland POSIX mutex.
2327161678Sdavidxu */
2328161678Sdavidxustatic int
2329161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2330161678Sdavidxu{
2331161678Sdavidxu	uint32_t flags;
2332161678Sdavidxu
2333161678Sdavidxu	flags = fuword32(&m->m_flags);
2334161678Sdavidxu	if (flags == -1)
2335161678Sdavidxu		return (EFAULT);
2336161678Sdavidxu
2337161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2338161855Sdavidxu	case 0:
2339161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2340161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2341161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2342161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2343161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2344161855Sdavidxu	}
2345161678Sdavidxu
2346161855Sdavidxu	return (EINVAL);
2347161678Sdavidxu}
2348161678Sdavidxu
2349164839Sdavidxustatic int
2350164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2351164876Sdavidxu	struct timespec *timeout, u_long wflags)
2352164839Sdavidxu{
2353164839Sdavidxu	struct umtx_q *uq;
2354164839Sdavidxu	uint32_t flags;
2355216641Sdavidxu	uint32_t clockid;
2356164839Sdavidxu	int error;
2357164839Sdavidxu
2358164839Sdavidxu	uq = td->td_umtxq;
2359164839Sdavidxu	flags = fuword32(&cv->c_flags);
2360164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2361164839Sdavidxu	if (error != 0)
2362164839Sdavidxu		return (error);
2363216641Sdavidxu
2364216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2365216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2366216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2367216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2368216641Sdavidxu			/* hmm, only HW clock id will work. */
2369216641Sdavidxu			return (EINVAL);
2370216641Sdavidxu		}
2371216641Sdavidxu	} else {
2372216641Sdavidxu		clockid = CLOCK_REALTIME;
2373216641Sdavidxu	}
2374216641Sdavidxu
2375164839Sdavidxu	umtxq_lock(&uq->uq_key);
2376164839Sdavidxu	umtxq_busy(&uq->uq_key);
2377164839Sdavidxu	umtxq_insert(uq);
2378164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2379164839Sdavidxu
2380164839Sdavidxu	/*
2381216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2382216641Sdavidxu	 * don't modify cache line when unnecessary.
2383164839Sdavidxu	 */
2384216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2385216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2386164839Sdavidxu
2387164839Sdavidxu	umtxq_lock(&uq->uq_key);
2388164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2389164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2390164839Sdavidxu
2391164839Sdavidxu	error = do_unlock_umutex(td, m);
2392164839Sdavidxu
2393164839Sdavidxu	umtxq_lock(&uq->uq_key);
2394164839Sdavidxu	if (error == 0) {
2395233642Sdavidxu		if (timeout == NULL)
2396164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2397233642Sdavidxu		else
2398233642Sdavidxu			error = umtxq_nanosleep(td, clockid,
2399233642Sdavidxu			    ((wflags & CVWAIT_ABSTIME) != 0),
2400233642Sdavidxu			    timeout, "ucond");
2401164839Sdavidxu	}
2402164839Sdavidxu
2403211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2404211794Sdavidxu		error = 0;
2405211794Sdavidxu	else {
2406216641Sdavidxu		/*
2407216641Sdavidxu		 * This must be timeout,interrupted by signal or
2408216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2409216641Sdavidxu		 * necessary.
2410216641Sdavidxu		 */
2411216641Sdavidxu		umtxq_busy(&uq->uq_key);
2412216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2413216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2414216641Sdavidxu			umtxq_remove(uq);
2415216641Sdavidxu			if (oldlen == 1) {
2416216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2417216641Sdavidxu				suword32(
2418216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2419216641Sdavidxu					 &cv->c_has_waiters), 0);
2420216641Sdavidxu				umtxq_lock(&uq->uq_key);
2421216641Sdavidxu			}
2422216641Sdavidxu		}
2423216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2424164839Sdavidxu		if (error == ERESTART)
2425164839Sdavidxu			error = EINTR;
2426164839Sdavidxu	}
2427211794Sdavidxu
2428164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2429164839Sdavidxu	umtx_key_release(&uq->uq_key);
2430164839Sdavidxu	return (error);
2431164839Sdavidxu}
2432164839Sdavidxu
2433164839Sdavidxu/*
2434164839Sdavidxu * Signal a userland condition variable.
2435164839Sdavidxu */
2436164839Sdavidxustatic int
2437164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2438164839Sdavidxu{
2439164839Sdavidxu	struct umtx_key key;
2440164839Sdavidxu	int error, cnt, nwake;
2441164839Sdavidxu	uint32_t flags;
2442164839Sdavidxu
2443164839Sdavidxu	flags = fuword32(&cv->c_flags);
2444164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2445164839Sdavidxu		return (error);
2446164839Sdavidxu	umtxq_lock(&key);
2447164839Sdavidxu	umtxq_busy(&key);
2448164839Sdavidxu	cnt = umtxq_count(&key);
2449164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2450164839Sdavidxu	if (cnt <= nwake) {
2451164839Sdavidxu		umtxq_unlock(&key);
2452164839Sdavidxu		error = suword32(
2453164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2454164839Sdavidxu		umtxq_lock(&key);
2455164839Sdavidxu	}
2456164839Sdavidxu	umtxq_unbusy(&key);
2457164839Sdavidxu	umtxq_unlock(&key);
2458164839Sdavidxu	umtx_key_release(&key);
2459164839Sdavidxu	return (error);
2460164839Sdavidxu}
2461164839Sdavidxu
2462164839Sdavidxustatic int
2463164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2464164839Sdavidxu{
2465164839Sdavidxu	struct umtx_key key;
2466164839Sdavidxu	int error;
2467164839Sdavidxu	uint32_t flags;
2468164839Sdavidxu
2469164839Sdavidxu	flags = fuword32(&cv->c_flags);
2470164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2471164839Sdavidxu		return (error);
2472164839Sdavidxu
2473164839Sdavidxu	umtxq_lock(&key);
2474164839Sdavidxu	umtxq_busy(&key);
2475164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2476164839Sdavidxu	umtxq_unlock(&key);
2477164839Sdavidxu
2478164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2479164839Sdavidxu
2480164839Sdavidxu	umtxq_lock(&key);
2481164839Sdavidxu	umtxq_unbusy(&key);
2482164839Sdavidxu	umtxq_unlock(&key);
2483164839Sdavidxu
2484164839Sdavidxu	umtx_key_release(&key);
2485164839Sdavidxu	return (error);
2486164839Sdavidxu}
2487164839Sdavidxu
2488177848Sdavidxustatic int
2489177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2490177848Sdavidxu{
2491177848Sdavidxu	struct umtx_q *uq;
2492177848Sdavidxu	uint32_t flags, wrflags;
2493177848Sdavidxu	int32_t state, oldstate;
2494177848Sdavidxu	int32_t blocked_readers;
2495177848Sdavidxu	int error;
2496177848Sdavidxu
2497177848Sdavidxu	uq = td->td_umtxq;
2498177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2499177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2500177848Sdavidxu	if (error != 0)
2501177848Sdavidxu		return (error);
2502177848Sdavidxu
2503177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2504177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2505177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2506177848Sdavidxu
2507177848Sdavidxu	for (;;) {
2508177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2509177848Sdavidxu		/* try to lock it */
2510177848Sdavidxu		while (!(state & wrflags)) {
2511177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2512177848Sdavidxu				umtx_key_release(&uq->uq_key);
2513177848Sdavidxu				return (EAGAIN);
2514177848Sdavidxu			}
2515177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2516177848Sdavidxu			if (oldstate == state) {
2517177848Sdavidxu				umtx_key_release(&uq->uq_key);
2518177848Sdavidxu				return (0);
2519177848Sdavidxu			}
2520177848Sdavidxu			state = oldstate;
2521177848Sdavidxu		}
2522177848Sdavidxu
2523177848Sdavidxu		if (error)
2524177848Sdavidxu			break;
2525177848Sdavidxu
2526177848Sdavidxu		/* grab monitor lock */
2527177848Sdavidxu		umtxq_lock(&uq->uq_key);
2528177848Sdavidxu		umtxq_busy(&uq->uq_key);
2529177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2530177848Sdavidxu
2531203414Sdavidxu		/*
2532203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2533203414Sdavidxu		 * and the check below
2534203414Sdavidxu		 */
2535203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2536203414Sdavidxu
2537177848Sdavidxu		/* set read contention bit */
2538177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2539177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2540177848Sdavidxu			if (oldstate == state)
2541177848Sdavidxu				goto sleep;
2542177848Sdavidxu			state = oldstate;
2543177848Sdavidxu		}
2544177848Sdavidxu
2545177848Sdavidxu		/* state is changed while setting flags, restart */
2546177848Sdavidxu		if (!(state & wrflags)) {
2547177848Sdavidxu			umtxq_lock(&uq->uq_key);
2548177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2549177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2550177848Sdavidxu			continue;
2551177848Sdavidxu		}
2552177848Sdavidxu
2553177848Sdavidxusleep:
2554177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2555177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2556177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2557177848Sdavidxu
2558177848Sdavidxu		while (state & wrflags) {
2559177848Sdavidxu			umtxq_lock(&uq->uq_key);
2560177848Sdavidxu			umtxq_insert(uq);
2561177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2562177848Sdavidxu
2563177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2564177848Sdavidxu
2565177848Sdavidxu			umtxq_busy(&uq->uq_key);
2566177848Sdavidxu			umtxq_remove(uq);
2567177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2568177848Sdavidxu			if (error)
2569177848Sdavidxu				break;
2570177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2571177848Sdavidxu		}
2572177848Sdavidxu
2573177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2574177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2575177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2576177848Sdavidxu		if (blocked_readers == 1) {
2577177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2578177848Sdavidxu			for (;;) {
2579177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2580177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2581177848Sdavidxu				if (oldstate == state)
2582177848Sdavidxu					break;
2583177848Sdavidxu				state = oldstate;
2584177848Sdavidxu			}
2585177848Sdavidxu		}
2586177848Sdavidxu
2587177848Sdavidxu		umtxq_lock(&uq->uq_key);
2588177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2589177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2590177848Sdavidxu	}
2591177848Sdavidxu	umtx_key_release(&uq->uq_key);
2592177848Sdavidxu	return (error);
2593177848Sdavidxu}
2594177848Sdavidxu
2595177848Sdavidxustatic int
2596232209Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct _umtx_time *timeout)
2597177848Sdavidxu{
2598232209Sdavidxu	struct timespec cts, ets, tts;
2599177848Sdavidxu	int error;
2600177848Sdavidxu
2601232209Sdavidxu	kern_clock_gettime(td, timeout->_clockid, &cts);
2602232209Sdavidxu	if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2603232209Sdavidxu		ets = cts;
2604232209Sdavidxu		timespecadd(&ets, &timeout->_timeout);
2605232209Sdavidxu		tts = timeout->_timeout;
2606232209Sdavidxu	} else {
2607232209Sdavidxu		ets = timeout->_timeout;
2608232209Sdavidxu		tts = timeout->_timeout;
2609232209Sdavidxu		timespecsub(&tts, &cts);
2610232209Sdavidxu	}
2611177848Sdavidxu	for (;;) {
2612232209Sdavidxu		error = do_rw_rdlock(td, obj, val, tstohz(&tts));
2613177848Sdavidxu		if (error != ETIMEDOUT)
2614177848Sdavidxu			break;
2615232209Sdavidxu		kern_clock_gettime(td, timeout->_clockid, &cts);
2616232209Sdavidxu		if (timespeccmp(&cts, &ets, >=))
2617177848Sdavidxu			break;
2618232209Sdavidxu		tts = ets;
2619232209Sdavidxu		timespecsub(&tts, &cts);
2620177848Sdavidxu	}
2621177849Sdavidxu	if (error == ERESTART)
2622177849Sdavidxu		error = EINTR;
2623177848Sdavidxu	return (error);
2624177848Sdavidxu}
2625177848Sdavidxu
2626177848Sdavidxustatic int
2627177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2628177848Sdavidxu{
2629177848Sdavidxu	struct umtx_q *uq;
2630177848Sdavidxu	uint32_t flags;
2631177848Sdavidxu	int32_t state, oldstate;
2632177848Sdavidxu	int32_t blocked_writers;
2633197476Sdavidxu	int32_t blocked_readers;
2634177848Sdavidxu	int error;
2635177848Sdavidxu
2636177848Sdavidxu	uq = td->td_umtxq;
2637177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2638177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2639177848Sdavidxu	if (error != 0)
2640177848Sdavidxu		return (error);
2641177848Sdavidxu
2642197476Sdavidxu	blocked_readers = 0;
2643177848Sdavidxu	for (;;) {
2644177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2645177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2646177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2647177848Sdavidxu			if (oldstate == state) {
2648177848Sdavidxu				umtx_key_release(&uq->uq_key);
2649177848Sdavidxu				return (0);
2650177848Sdavidxu			}
2651177848Sdavidxu			state = oldstate;
2652177848Sdavidxu		}
2653177848Sdavidxu
2654197476Sdavidxu		if (error) {
2655197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2656197476Sdavidxu			    blocked_readers != 0) {
2657197476Sdavidxu				umtxq_lock(&uq->uq_key);
2658197476Sdavidxu				umtxq_busy(&uq->uq_key);
2659197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2660197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2661197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2662197476Sdavidxu			}
2663197476Sdavidxu
2664177848Sdavidxu			break;
2665197476Sdavidxu		}
2666177848Sdavidxu
2667177848Sdavidxu		/* grab monitor lock */
2668177848Sdavidxu		umtxq_lock(&uq->uq_key);
2669177848Sdavidxu		umtxq_busy(&uq->uq_key);
2670177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2671177848Sdavidxu
2672203414Sdavidxu		/*
2673203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2674203414Sdavidxu		 * and the check below
2675203414Sdavidxu		 */
2676203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2677203414Sdavidxu
2678177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2679177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2680177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2681177848Sdavidxu			if (oldstate == state)
2682177848Sdavidxu				goto sleep;
2683177848Sdavidxu			state = oldstate;
2684177848Sdavidxu		}
2685177848Sdavidxu
2686177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2687177848Sdavidxu			umtxq_lock(&uq->uq_key);
2688177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2689177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2690177848Sdavidxu			continue;
2691177848Sdavidxu		}
2692177848Sdavidxusleep:
2693177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2694177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2695177848Sdavidxu
2696177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2697177848Sdavidxu			umtxq_lock(&uq->uq_key);
2698177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2699177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2700177848Sdavidxu
2701177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2702177848Sdavidxu
2703177848Sdavidxu			umtxq_busy(&uq->uq_key);
2704177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2705177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2706177848Sdavidxu			if (error)
2707177848Sdavidxu				break;
2708177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2709177848Sdavidxu		}
2710177848Sdavidxu
2711177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2712177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2713177848Sdavidxu		if (blocked_writers == 1) {
2714177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2715177848Sdavidxu			for (;;) {
2716177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2717177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2718177848Sdavidxu				if (oldstate == state)
2719177848Sdavidxu					break;
2720177848Sdavidxu				state = oldstate;
2721177848Sdavidxu			}
2722197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2723197476Sdavidxu		} else
2724197476Sdavidxu			blocked_readers = 0;
2725177848Sdavidxu
2726177848Sdavidxu		umtxq_lock(&uq->uq_key);
2727177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2728177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2729177848Sdavidxu	}
2730177848Sdavidxu
2731177848Sdavidxu	umtx_key_release(&uq->uq_key);
2732177848Sdavidxu	return (error);
2733177848Sdavidxu}
2734177848Sdavidxu
2735177848Sdavidxustatic int
2736232209Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct _umtx_time *timeout)
2737177848Sdavidxu{
2738232209Sdavidxu	struct timespec cts, ets, tts;
2739177848Sdavidxu	int error;
2740177848Sdavidxu
2741232209Sdavidxu	kern_clock_gettime(td, timeout->_clockid, &cts);
2742232209Sdavidxu	if ((timeout->_flags & UMTX_ABSTIME) == 0) {
2743232209Sdavidxu		ets = cts;
2744232209Sdavidxu		timespecadd(&ets, &timeout->_timeout);
2745232209Sdavidxu		tts = timeout->_timeout;
2746232209Sdavidxu	} else {
2747232209Sdavidxu		ets = timeout->_timeout;
2748232209Sdavidxu		tts = timeout->_timeout;
2749232209Sdavidxu		timespecsub(&tts, &cts);
2750232209Sdavidxu	}
2751177848Sdavidxu	for (;;) {
2752232209Sdavidxu		error = do_rw_wrlock(td, obj, tstohz(&tts));
2753177848Sdavidxu		if (error != ETIMEDOUT)
2754177848Sdavidxu			break;
2755232209Sdavidxu		kern_clock_gettime(td, timeout->_clockid, &cts);
2756232209Sdavidxu		if (timespeccmp(&cts, &ets, >=))
2757177848Sdavidxu			break;
2758232209Sdavidxu		tts = ets;
2759232209Sdavidxu		timespecsub(&tts, &cts);
2760177848Sdavidxu	}
2761177849Sdavidxu	if (error == ERESTART)
2762177849Sdavidxu		error = EINTR;
2763177848Sdavidxu	return (error);
2764177848Sdavidxu}
2765177848Sdavidxu
2766177848Sdavidxustatic int
2767177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2768177848Sdavidxu{
2769177848Sdavidxu	struct umtx_q *uq;
2770177848Sdavidxu	uint32_t flags;
2771177848Sdavidxu	int32_t state, oldstate;
2772177848Sdavidxu	int error, q, count;
2773177848Sdavidxu
2774177848Sdavidxu	uq = td->td_umtxq;
2775177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2776177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2777177848Sdavidxu	if (error != 0)
2778177848Sdavidxu		return (error);
2779177848Sdavidxu
2780177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2781177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2782177848Sdavidxu		for (;;) {
2783177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2784177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2785177848Sdavidxu			if (oldstate != state) {
2786177848Sdavidxu				state = oldstate;
2787177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2788177848Sdavidxu					error = EPERM;
2789177848Sdavidxu					goto out;
2790177848Sdavidxu				}
2791177848Sdavidxu			} else
2792177848Sdavidxu				break;
2793177848Sdavidxu		}
2794177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2795177848Sdavidxu		for (;;) {
2796177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2797177848Sdavidxu				state - 1);
2798177848Sdavidxu			if (oldstate != state) {
2799177848Sdavidxu				state = oldstate;
2800177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2801177848Sdavidxu					error = EPERM;
2802177848Sdavidxu					goto out;
2803177848Sdavidxu				}
2804177848Sdavidxu			}
2805177848Sdavidxu			else
2806177848Sdavidxu				break;
2807177848Sdavidxu		}
2808177848Sdavidxu	} else {
2809177848Sdavidxu		error = EPERM;
2810177848Sdavidxu		goto out;
2811177848Sdavidxu	}
2812177848Sdavidxu
2813177848Sdavidxu	count = 0;
2814177848Sdavidxu
2815177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2816177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2817177848Sdavidxu			count = 1;
2818177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2819177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2820177848Sdavidxu			count = INT_MAX;
2821177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2822177848Sdavidxu		}
2823177848Sdavidxu	} else {
2824177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2825177848Sdavidxu			count = INT_MAX;
2826177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2827177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2828177848Sdavidxu			count = 1;
2829177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2830177848Sdavidxu		}
2831177848Sdavidxu	}
2832177848Sdavidxu
2833177848Sdavidxu	if (count) {
2834177848Sdavidxu		umtxq_lock(&uq->uq_key);
2835177848Sdavidxu		umtxq_busy(&uq->uq_key);
2836177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2837177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2838177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2839177848Sdavidxu	}
2840177848Sdavidxuout:
2841177848Sdavidxu	umtx_key_release(&uq->uq_key);
2842177848Sdavidxu	return (error);
2843177848Sdavidxu}
2844177848Sdavidxu
2845201472Sdavidxustatic int
2846232144Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
2847201472Sdavidxu{
2848201472Sdavidxu	struct umtx_q *uq;
2849201472Sdavidxu	uint32_t flags, count;
2850201472Sdavidxu	int error;
2851201472Sdavidxu
2852201472Sdavidxu	uq = td->td_umtxq;
2853201472Sdavidxu	flags = fuword32(&sem->_flags);
2854201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2855201472Sdavidxu	if (error != 0)
2856201472Sdavidxu		return (error);
2857201472Sdavidxu	umtxq_lock(&uq->uq_key);
2858201472Sdavidxu	umtxq_busy(&uq->uq_key);
2859201472Sdavidxu	umtxq_insert(uq);
2860201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2861201472Sdavidxu
2862230194Sdavidxu	casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2863230194Sdavidxu	rmb();
2864201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2865201472Sdavidxu	if (count != 0) {
2866201472Sdavidxu		umtxq_lock(&uq->uq_key);
2867201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2868201472Sdavidxu		umtxq_remove(uq);
2869201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2870201472Sdavidxu		umtx_key_release(&uq->uq_key);
2871201472Sdavidxu		return (0);
2872201472Sdavidxu	}
2873201472Sdavidxu	umtxq_lock(&uq->uq_key);
2874201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2875201472Sdavidxu
2876233642Sdavidxu	if (timeout == NULL)
2877201472Sdavidxu		error = umtxq_sleep(uq, "usem", 0);
2878233642Sdavidxu	else
2879233642Sdavidxu		error = umtxq_nanosleep(td, timeout->_clockid,
2880233642Sdavidxu	   	    ((timeout->_flags & UMTX_ABSTIME) != 0),
2881233642Sdavidxu		    &timeout->_timeout, "usem");
2882201472Sdavidxu
2883211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2884211794Sdavidxu		error = 0;
2885211794Sdavidxu	else {
2886211794Sdavidxu		umtxq_remove(uq);
2887201472Sdavidxu		if (error == ERESTART)
2888201472Sdavidxu			error = EINTR;
2889201472Sdavidxu	}
2890201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2891201472Sdavidxu	umtx_key_release(&uq->uq_key);
2892201472Sdavidxu	return (error);
2893201472Sdavidxu}
2894201472Sdavidxu
2895201472Sdavidxu/*
2896201472Sdavidxu * Signal a userland condition variable.
2897201472Sdavidxu */
2898201472Sdavidxustatic int
2899201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2900201472Sdavidxu{
2901201472Sdavidxu	struct umtx_key key;
2902201472Sdavidxu	int error, cnt, nwake;
2903201472Sdavidxu	uint32_t flags;
2904201472Sdavidxu
2905201472Sdavidxu	flags = fuword32(&sem->_flags);
2906201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2907201472Sdavidxu		return (error);
2908201472Sdavidxu	umtxq_lock(&key);
2909201472Sdavidxu	umtxq_busy(&key);
2910201472Sdavidxu	cnt = umtxq_count(&key);
2911201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2912201472Sdavidxu	if (cnt <= nwake) {
2913201472Sdavidxu		umtxq_unlock(&key);
2914201472Sdavidxu		error = suword32(
2915201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2916201472Sdavidxu		umtxq_lock(&key);
2917201472Sdavidxu	}
2918201472Sdavidxu	umtxq_unbusy(&key);
2919201472Sdavidxu	umtxq_unlock(&key);
2920201472Sdavidxu	umtx_key_release(&key);
2921201472Sdavidxu	return (error);
2922201472Sdavidxu}
2923201472Sdavidxu
2924139013Sdavidxuint
2925225617Skmacysys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2926139013Sdavidxu    /* struct umtx *umtx */
2927139013Sdavidxu{
2928162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2929139013Sdavidxu}
2930139013Sdavidxu
2931139013Sdavidxuint
2932225617Skmacysys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2933139013Sdavidxu    /* struct umtx *umtx */
2934139013Sdavidxu{
2935162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2936139013Sdavidxu}
2937139013Sdavidxu
2938228219Sphoinline int
2939228219Sphoumtx_copyin_timeout(const void *addr, struct timespec *tsp)
2940228219Spho{
2941228219Spho	int error;
2942228219Spho
2943228219Spho	error = copyin(addr, tsp, sizeof(struct timespec));
2944228219Spho	if (error == 0) {
2945228219Spho		if (tsp->tv_sec < 0 ||
2946228219Spho		    tsp->tv_nsec >= 1000000000 ||
2947228219Spho		    tsp->tv_nsec < 0)
2948228219Spho			error = EINVAL;
2949228219Spho	}
2950228219Spho	return (error);
2951228219Spho}
2952228219Spho
2953232144Sdavidxustatic inline int
2954232144Sdavidxuumtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
2955232144Sdavidxu{
2956232144Sdavidxu	int error;
2957232144Sdavidxu
2958232286Sdavidxu	if (size <= sizeof(struct timespec)) {
2959232286Sdavidxu		tp->_clockid = CLOCK_REALTIME;
2960232286Sdavidxu		tp->_flags = 0;
2961232144Sdavidxu		error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
2962232286Sdavidxu	} else
2963232144Sdavidxu		error = copyin(addr, tp, sizeof(struct _umtx_time));
2964232144Sdavidxu	if (error != 0)
2965232144Sdavidxu		return (error);
2966232144Sdavidxu	if (tp->_timeout.tv_sec < 0 ||
2967232144Sdavidxu	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
2968232144Sdavidxu		return (EINVAL);
2969232144Sdavidxu	return (0);
2970232144Sdavidxu}
2971232144Sdavidxu
2972162536Sdavidxustatic int
2973162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2974139013Sdavidxu{
2975162536Sdavidxu	struct timespec *ts, timeout;
2976139013Sdavidxu	int error;
2977139013Sdavidxu
2978162536Sdavidxu	/* Allow a null timespec (wait forever). */
2979162536Sdavidxu	if (uap->uaddr2 == NULL)
2980162536Sdavidxu		ts = NULL;
2981162536Sdavidxu	else {
2982228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
2983162536Sdavidxu		if (error != 0)
2984162536Sdavidxu			return (error);
2985162536Sdavidxu		ts = &timeout;
2986162536Sdavidxu	}
2987162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2988162536Sdavidxu}
2989162536Sdavidxu
2990162536Sdavidxustatic int
2991162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2992162536Sdavidxu{
2993162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2994162536Sdavidxu}
2995162536Sdavidxu
2996162536Sdavidxustatic int
2997162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2998162536Sdavidxu{
2999232144Sdavidxu	struct _umtx_time timeout, *tm_p;
3000162536Sdavidxu	int error;
3001162536Sdavidxu
3002162536Sdavidxu	if (uap->uaddr2 == NULL)
3003232144Sdavidxu		tm_p = NULL;
3004162536Sdavidxu	else {
3005232144Sdavidxu		error = umtx_copyin_umtx_time(
3006232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3007162536Sdavidxu		if (error != 0)
3008162536Sdavidxu			return (error);
3009232144Sdavidxu		tm_p = &timeout;
3010162536Sdavidxu	}
3011232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
3012162536Sdavidxu}
3013162536Sdavidxu
3014162536Sdavidxustatic int
3015173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3016173800Sdavidxu{
3017232144Sdavidxu	struct _umtx_time timeout, *tm_p;
3018173800Sdavidxu	int error;
3019173800Sdavidxu
3020173800Sdavidxu	if (uap->uaddr2 == NULL)
3021232144Sdavidxu		tm_p = NULL;
3022173800Sdavidxu	else {
3023232144Sdavidxu		error = umtx_copyin_umtx_time(
3024232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3025173800Sdavidxu		if (error != 0)
3026173800Sdavidxu			return (error);
3027232144Sdavidxu		tm_p = &timeout;
3028173800Sdavidxu	}
3029232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3030173800Sdavidxu}
3031173800Sdavidxu
3032173800Sdavidxustatic int
3033178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3034178646Sdavidxu{
3035232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3036178646Sdavidxu	int error;
3037178646Sdavidxu
3038178646Sdavidxu	if (uap->uaddr2 == NULL)
3039232144Sdavidxu		tm_p = NULL;
3040178646Sdavidxu	else {
3041232144Sdavidxu		error = umtx_copyin_umtx_time(
3042232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3043178646Sdavidxu		if (error != 0)
3044178646Sdavidxu			return (error);
3045232144Sdavidxu		tm_p = &timeout;
3046178646Sdavidxu	}
3047232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3048178646Sdavidxu}
3049178646Sdavidxu
3050178646Sdavidxustatic int
3051162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3052162536Sdavidxu{
3053178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3054162536Sdavidxu}
3055162536Sdavidxu
3056216641Sdavidxu#define BATCH_SIZE	128
3057162536Sdavidxustatic int
3058216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3059216641Sdavidxu{
3060216641Sdavidxu	int count = uap->val;
3061216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3062216641Sdavidxu	char **upp = (char **)uap->obj;
3063216641Sdavidxu	int tocopy;
3064216641Sdavidxu	int error = 0;
3065216641Sdavidxu	int i, pos = 0;
3066216641Sdavidxu
3067216641Sdavidxu	while (count > 0) {
3068216641Sdavidxu		tocopy = count;
3069216641Sdavidxu		if (tocopy > BATCH_SIZE)
3070216641Sdavidxu			tocopy = BATCH_SIZE;
3071216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3072216641Sdavidxu		if (error != 0)
3073216641Sdavidxu			break;
3074216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3075216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3076216641Sdavidxu		count -= tocopy;
3077216641Sdavidxu		pos += tocopy;
3078216641Sdavidxu	}
3079216641Sdavidxu	return (error);
3080216641Sdavidxu}
3081216641Sdavidxu
3082216641Sdavidxustatic int
3083178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3084178646Sdavidxu{
3085178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3086178646Sdavidxu}
3087178646Sdavidxu
3088178646Sdavidxustatic int
3089162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3090162536Sdavidxu{
3091232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3092162536Sdavidxu	int error;
3093162536Sdavidxu
3094162536Sdavidxu	/* Allow a null timespec (wait forever). */
3095162536Sdavidxu	if (uap->uaddr2 == NULL)
3096232144Sdavidxu		tm_p = NULL;
3097162536Sdavidxu	else {
3098232144Sdavidxu		error = umtx_copyin_umtx_time(
3099232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3100162536Sdavidxu		if (error != 0)
3101162536Sdavidxu			return (error);
3102232144Sdavidxu		tm_p = &timeout;
3103139013Sdavidxu	}
3104232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3105162536Sdavidxu}
3106162536Sdavidxu
3107162536Sdavidxustatic int
3108162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3109162536Sdavidxu{
3110179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3111162536Sdavidxu}
3112162536Sdavidxu
3113162536Sdavidxustatic int
3114179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3115179970Sdavidxu{
3116232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3117179970Sdavidxu	int error;
3118179970Sdavidxu
3119179970Sdavidxu	/* Allow a null timespec (wait forever). */
3120179970Sdavidxu	if (uap->uaddr2 == NULL)
3121232144Sdavidxu		tm_p = NULL;
3122179970Sdavidxu	else {
3123232144Sdavidxu		error = umtx_copyin_umtx_time(
3124232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3125179970Sdavidxu		if (error != 0)
3126179970Sdavidxu			return (error);
3127232144Sdavidxu		tm_p = &timeout;
3128179970Sdavidxu	}
3129232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3130179970Sdavidxu}
3131179970Sdavidxu
3132179970Sdavidxustatic int
3133179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3134179970Sdavidxu{
3135179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3136179970Sdavidxu}
3137179970Sdavidxu
3138179970Sdavidxustatic int
3139162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3140162536Sdavidxu{
3141162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3142162536Sdavidxu}
3143162536Sdavidxu
3144162536Sdavidxustatic int
3145162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3146162536Sdavidxu{
3147162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3148162536Sdavidxu}
3149162536Sdavidxu
3150164839Sdavidxustatic int
3151164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3152164839Sdavidxu{
3153164839Sdavidxu	struct timespec *ts, timeout;
3154164839Sdavidxu	int error;
3155164839Sdavidxu
3156164839Sdavidxu	/* Allow a null timespec (wait forever). */
3157164839Sdavidxu	if (uap->uaddr2 == NULL)
3158164839Sdavidxu		ts = NULL;
3159164839Sdavidxu	else {
3160228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3161164839Sdavidxu		if (error != 0)
3162164839Sdavidxu			return (error);
3163164839Sdavidxu		ts = &timeout;
3164164839Sdavidxu	}
3165164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3166164839Sdavidxu}
3167164839Sdavidxu
3168164839Sdavidxustatic int
3169164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3170164839Sdavidxu{
3171164839Sdavidxu	return do_cv_signal(td, uap->obj);
3172164839Sdavidxu}
3173164839Sdavidxu
3174164839Sdavidxustatic int
3175164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3176164839Sdavidxu{
3177164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3178164839Sdavidxu}
3179164839Sdavidxu
3180177848Sdavidxustatic int
3181177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3182177848Sdavidxu{
3183232209Sdavidxu	struct _umtx_time timeout;
3184177848Sdavidxu	int error;
3185177848Sdavidxu
3186177848Sdavidxu	/* Allow a null timespec (wait forever). */
3187177848Sdavidxu	if (uap->uaddr2 == NULL) {
3188177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3189177848Sdavidxu	} else {
3190232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3191232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3192177848Sdavidxu		if (error != 0)
3193177848Sdavidxu			return (error);
3194177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3195177848Sdavidxu	}
3196177848Sdavidxu	return (error);
3197177848Sdavidxu}
3198177848Sdavidxu
3199177848Sdavidxustatic int
3200177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3201177848Sdavidxu{
3202232209Sdavidxu	struct _umtx_time timeout;
3203177848Sdavidxu	int error;
3204177848Sdavidxu
3205177848Sdavidxu	/* Allow a null timespec (wait forever). */
3206177848Sdavidxu	if (uap->uaddr2 == NULL) {
3207177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3208177848Sdavidxu	} else {
3209232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3210232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3211177848Sdavidxu		if (error != 0)
3212177848Sdavidxu			return (error);
3213177848Sdavidxu
3214177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3215177848Sdavidxu	}
3216177848Sdavidxu	return (error);
3217177848Sdavidxu}
3218177848Sdavidxu
3219177848Sdavidxustatic int
3220177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3221177848Sdavidxu{
3222177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3223177848Sdavidxu}
3224177848Sdavidxu
3225201472Sdavidxustatic int
3226201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3227201472Sdavidxu{
3228232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3229201472Sdavidxu	int error;
3230201472Sdavidxu
3231201472Sdavidxu	/* Allow a null timespec (wait forever). */
3232201472Sdavidxu	if (uap->uaddr2 == NULL)
3233232144Sdavidxu		tm_p = NULL;
3234201472Sdavidxu	else {
3235232144Sdavidxu		error = umtx_copyin_umtx_time(
3236232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3237201472Sdavidxu		if (error != 0)
3238201472Sdavidxu			return (error);
3239232144Sdavidxu		tm_p = &timeout;
3240201472Sdavidxu	}
3241232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3242201472Sdavidxu}
3243201472Sdavidxu
3244201472Sdavidxustatic int
3245201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3246201472Sdavidxu{
3247201472Sdavidxu	return do_sem_wake(td, uap->obj);
3248201472Sdavidxu}
3249201472Sdavidxu
3250162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3251162536Sdavidxu
3252162536Sdavidxustatic _umtx_op_func op_table[] = {
3253162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3254162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3255162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3256162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3257162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3258162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3259162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3260164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3261164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3262164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3263173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3264177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3265177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3266177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3267178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3268178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3269179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3270179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3271201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3272201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3273216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3274216641Sdavidxu	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3275162536Sdavidxu};
3276162536Sdavidxu
3277162536Sdavidxuint
3278225617Skmacysys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3279162536Sdavidxu{
3280163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3281162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3282162536Sdavidxu	return (EINVAL);
3283162536Sdavidxu}
3284162536Sdavidxu
3285205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3286163046Sdavidxuint
3287163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3288163046Sdavidxu    /* struct umtx *umtx */
3289163046Sdavidxu{
3290163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3291163046Sdavidxu}
3292163046Sdavidxu
3293163046Sdavidxuint
3294163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3295163046Sdavidxu    /* struct umtx *umtx */
3296163046Sdavidxu{
3297163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3298163046Sdavidxu}
3299163046Sdavidxu
3300162536Sdavidxustruct timespec32 {
3301209390Sed	uint32_t tv_sec;
3302209390Sed	uint32_t tv_nsec;
3303162536Sdavidxu};
3304162536Sdavidxu
3305232144Sdavidxustruct umtx_time32 {
3306232144Sdavidxu	struct	timespec32	timeout;
3307232144Sdavidxu	uint32_t		flags;
3308232144Sdavidxu	uint32_t		clockid;
3309232144Sdavidxu};
3310232144Sdavidxu
3311162536Sdavidxustatic inline int
3312228218Sphoumtx_copyin_timeout32(void *addr, struct timespec *tsp)
3313162536Sdavidxu{
3314162536Sdavidxu	struct timespec32 ts32;
3315162536Sdavidxu	int error;
3316162536Sdavidxu
3317162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3318162536Sdavidxu	if (error == 0) {
3319228218Spho		if (ts32.tv_sec < 0 ||
3320228218Spho		    ts32.tv_nsec >= 1000000000 ||
3321228218Spho		    ts32.tv_nsec < 0)
3322228218Spho			error = EINVAL;
3323228218Spho		else {
3324228218Spho			tsp->tv_sec = ts32.tv_sec;
3325228218Spho			tsp->tv_nsec = ts32.tv_nsec;
3326228218Spho		}
3327162536Sdavidxu	}
3328140421Sdavidxu	return (error);
3329139013Sdavidxu}
3330161678Sdavidxu
3331232144Sdavidxustatic inline int
3332232144Sdavidxuumtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
3333232144Sdavidxu{
3334232144Sdavidxu	struct umtx_time32 t32;
3335232144Sdavidxu	int error;
3336232144Sdavidxu
3337232144Sdavidxu	t32.clockid = CLOCK_REALTIME;
3338232144Sdavidxu	t32.flags   = 0;
3339232144Sdavidxu	if (size <= sizeof(struct timespec32))
3340232144Sdavidxu		error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
3341232144Sdavidxu	else
3342232144Sdavidxu		error = copyin(addr, &t32, sizeof(struct umtx_time32));
3343232144Sdavidxu	if (error != 0)
3344232144Sdavidxu		return (error);
3345232144Sdavidxu	if (t32.timeout.tv_sec < 0 ||
3346232144Sdavidxu	    t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
3347232144Sdavidxu		return (EINVAL);
3348232144Sdavidxu	tp->_timeout.tv_sec = t32.timeout.tv_sec;
3349232144Sdavidxu	tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
3350232144Sdavidxu	tp->_flags = t32.flags;
3351232144Sdavidxu	tp->_clockid = t32.clockid;
3352232144Sdavidxu	return (0);
3353232144Sdavidxu}
3354232144Sdavidxu
3355162536Sdavidxustatic int
3356162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3357162536Sdavidxu{
3358162536Sdavidxu	struct timespec *ts, timeout;
3359162536Sdavidxu	int error;
3360162536Sdavidxu
3361162536Sdavidxu	/* Allow a null timespec (wait forever). */
3362162536Sdavidxu	if (uap->uaddr2 == NULL)
3363162536Sdavidxu		ts = NULL;
3364162536Sdavidxu	else {
3365228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3366162536Sdavidxu		if (error != 0)
3367162536Sdavidxu			return (error);
3368162536Sdavidxu		ts = &timeout;
3369162536Sdavidxu	}
3370162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3371162536Sdavidxu}
3372162536Sdavidxu
3373162536Sdavidxustatic int
3374162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3375162536Sdavidxu{
3376162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3377162536Sdavidxu}
3378162536Sdavidxu
3379162536Sdavidxustatic int
3380162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3381162536Sdavidxu{
3382232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3383162536Sdavidxu	int error;
3384162536Sdavidxu
3385162536Sdavidxu	if (uap->uaddr2 == NULL)
3386232144Sdavidxu		tm_p = NULL;
3387162536Sdavidxu	else {
3388232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3389232144Sdavidxu			(size_t)uap->uaddr1, &timeout);
3390162536Sdavidxu		if (error != 0)
3391162536Sdavidxu			return (error);
3392232144Sdavidxu		tm_p = &timeout;
3393162536Sdavidxu	}
3394232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3395162536Sdavidxu}
3396162536Sdavidxu
3397162536Sdavidxustatic int
3398162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3399162536Sdavidxu{
3400232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3401162536Sdavidxu	int error;
3402162536Sdavidxu
3403162536Sdavidxu	/* Allow a null timespec (wait forever). */
3404162536Sdavidxu	if (uap->uaddr2 == NULL)
3405232144Sdavidxu		tm_p = NULL;
3406162536Sdavidxu	else {
3407232144Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3408232144Sdavidxu			    (size_t)uap->uaddr1, &timeout);
3409162536Sdavidxu		if (error != 0)
3410162536Sdavidxu			return (error);
3411232144Sdavidxu		tm_p = &timeout;
3412162536Sdavidxu	}
3413232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3414162536Sdavidxu}
3415162536Sdavidxu
3416164839Sdavidxustatic int
3417179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3418179970Sdavidxu{
3419232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3420179970Sdavidxu	int error;
3421179970Sdavidxu
3422179970Sdavidxu	/* Allow a null timespec (wait forever). */
3423179970Sdavidxu	if (uap->uaddr2 == NULL)
3424232144Sdavidxu		tm_p = NULL;
3425179970Sdavidxu	else {
3426232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3427232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3428179970Sdavidxu		if (error != 0)
3429179970Sdavidxu			return (error);
3430232144Sdavidxu		tm_p = &timeout;
3431179970Sdavidxu	}
3432232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3433179970Sdavidxu}
3434179970Sdavidxu
3435179970Sdavidxustatic int
3436164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3437164839Sdavidxu{
3438164839Sdavidxu	struct timespec *ts, timeout;
3439164839Sdavidxu	int error;
3440164839Sdavidxu
3441164839Sdavidxu	/* Allow a null timespec (wait forever). */
3442164839Sdavidxu	if (uap->uaddr2 == NULL)
3443164839Sdavidxu		ts = NULL;
3444164839Sdavidxu	else {
3445228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3446164839Sdavidxu		if (error != 0)
3447164839Sdavidxu			return (error);
3448164839Sdavidxu		ts = &timeout;
3449164839Sdavidxu	}
3450164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3451164839Sdavidxu}
3452164839Sdavidxu
3453177848Sdavidxustatic int
3454177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3455177848Sdavidxu{
3456232209Sdavidxu	struct _umtx_time timeout;
3457177848Sdavidxu	int error;
3458177848Sdavidxu
3459177848Sdavidxu	/* Allow a null timespec (wait forever). */
3460177848Sdavidxu	if (uap->uaddr2 == NULL) {
3461177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3462177848Sdavidxu	} else {
3463232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3464232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3465177848Sdavidxu		if (error != 0)
3466177848Sdavidxu			return (error);
3467177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3468177848Sdavidxu	}
3469177848Sdavidxu	return (error);
3470177848Sdavidxu}
3471177848Sdavidxu
3472177848Sdavidxustatic int
3473177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3474177848Sdavidxu{
3475232209Sdavidxu	struct _umtx_time timeout;
3476177848Sdavidxu	int error;
3477177848Sdavidxu
3478177848Sdavidxu	/* Allow a null timespec (wait forever). */
3479177848Sdavidxu	if (uap->uaddr2 == NULL) {
3480177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3481177848Sdavidxu	} else {
3482232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3483232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3484177848Sdavidxu		if (error != 0)
3485177848Sdavidxu			return (error);
3486177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3487177848Sdavidxu	}
3488177848Sdavidxu	return (error);
3489177848Sdavidxu}
3490177848Sdavidxu
3491178646Sdavidxustatic int
3492178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3493178646Sdavidxu{
3494232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3495178646Sdavidxu	int error;
3496178646Sdavidxu
3497178646Sdavidxu	if (uap->uaddr2 == NULL)
3498232144Sdavidxu		tm_p = NULL;
3499178646Sdavidxu	else {
3500232144Sdavidxu		error = umtx_copyin_umtx_time32(
3501232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1,&timeout);
3502178646Sdavidxu		if (error != 0)
3503178646Sdavidxu			return (error);
3504232144Sdavidxu		tm_p = &timeout;
3505178646Sdavidxu	}
3506232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3507178646Sdavidxu}
3508178646Sdavidxu
3509201472Sdavidxustatic int
3510201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3511201472Sdavidxu{
3512232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3513201472Sdavidxu	int error;
3514201472Sdavidxu
3515201472Sdavidxu	/* Allow a null timespec (wait forever). */
3516201472Sdavidxu	if (uap->uaddr2 == NULL)
3517232144Sdavidxu		tm_p = NULL;
3518201472Sdavidxu	else {
3519232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3520232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3521201472Sdavidxu		if (error != 0)
3522201472Sdavidxu			return (error);
3523232144Sdavidxu		tm_p = &timeout;
3524201472Sdavidxu	}
3525232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3526201472Sdavidxu}
3527201472Sdavidxu
3528216641Sdavidxustatic int
3529216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3530216641Sdavidxu{
3531216641Sdavidxu	int count = uap->val;
3532216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3533216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3534216641Sdavidxu	int tocopy;
3535216641Sdavidxu	int error = 0;
3536216641Sdavidxu	int i, pos = 0;
3537216641Sdavidxu
3538216641Sdavidxu	while (count > 0) {
3539216641Sdavidxu		tocopy = count;
3540216641Sdavidxu		if (tocopy > BATCH_SIZE)
3541216641Sdavidxu			tocopy = BATCH_SIZE;
3542216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3543216641Sdavidxu		if (error != 0)
3544216641Sdavidxu			break;
3545216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3546216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3547216641Sdavidxu				INT_MAX, 1);
3548216641Sdavidxu		count -= tocopy;
3549216641Sdavidxu		pos += tocopy;
3550216641Sdavidxu	}
3551216641Sdavidxu	return (error);
3552216641Sdavidxu}
3553216641Sdavidxu
3554162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3555162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3556162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3557162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3558162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3559162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3560162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3561162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3562164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3563164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3564164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3565173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3566177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3567177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3568177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3569178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3570178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3571179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3572179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3573201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3574201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3575216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3576216641Sdavidxu	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3577162536Sdavidxu};
3578162536Sdavidxu
3579162536Sdavidxuint
3580162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3581162536Sdavidxu{
3582163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3583162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3584162536Sdavidxu			(struct _umtx_op_args *)uap);
3585162536Sdavidxu	return (EINVAL);
3586162536Sdavidxu}
3587162536Sdavidxu#endif
3588162536Sdavidxu
3589161678Sdavidxuvoid
3590161678Sdavidxuumtx_thread_init(struct thread *td)
3591161678Sdavidxu{
3592161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3593161678Sdavidxu	td->td_umtxq->uq_thread = td;
3594161678Sdavidxu}
3595161678Sdavidxu
3596161678Sdavidxuvoid
3597161678Sdavidxuumtx_thread_fini(struct thread *td)
3598161678Sdavidxu{
3599161678Sdavidxu	umtxq_free(td->td_umtxq);
3600161678Sdavidxu}
3601161678Sdavidxu
3602161678Sdavidxu/*
3603161678Sdavidxu * It will be called when new thread is created, e.g fork().
3604161678Sdavidxu */
3605161678Sdavidxuvoid
3606161678Sdavidxuumtx_thread_alloc(struct thread *td)
3607161678Sdavidxu{
3608161678Sdavidxu	struct umtx_q *uq;
3609161678Sdavidxu
3610161678Sdavidxu	uq = td->td_umtxq;
3611161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3612161678Sdavidxu
3613161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3614161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3615161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3616161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3617161678Sdavidxu}
3618161678Sdavidxu
3619161678Sdavidxu/*
3620161678Sdavidxu * exec() hook.
3621161678Sdavidxu */
3622161678Sdavidxustatic void
3623161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3624161678Sdavidxu	struct image_params *imgp __unused)
3625161678Sdavidxu{
3626161678Sdavidxu	umtx_thread_cleanup(curthread);
3627161678Sdavidxu}
3628161678Sdavidxu
3629161678Sdavidxu/*
3630161678Sdavidxu * thread_exit() hook.
3631161678Sdavidxu */
3632161678Sdavidxuvoid
3633161678Sdavidxuumtx_thread_exit(struct thread *td)
3634161678Sdavidxu{
3635161678Sdavidxu	umtx_thread_cleanup(td);
3636161678Sdavidxu}
3637161678Sdavidxu
3638161678Sdavidxu/*
3639161678Sdavidxu * clean up umtx data.
3640161678Sdavidxu */
3641161678Sdavidxustatic void
3642161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3643161678Sdavidxu{
3644161678Sdavidxu	struct umtx_q *uq;
3645161678Sdavidxu	struct umtx_pi *pi;
3646161678Sdavidxu
3647161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3648161678Sdavidxu		return;
3649161678Sdavidxu
3650170300Sjeff	mtx_lock_spin(&umtx_lock);
3651161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3652161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3653161678Sdavidxu		pi->pi_owner = NULL;
3654161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3655161678Sdavidxu	}
3656216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3657174701Sdavidxu	thread_lock(td);
3658216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
3659174701Sdavidxu	thread_unlock(td);
3660161678Sdavidxu}
3661