kern_umtx.c revision 228218
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 228218 2011-12-03 12:28:33Z pho $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46216641Sdavidxu#include <sys/syscallsubr.h>
47139013Sdavidxu#include <sys/eventhandler.h>
48112904Sjeff#include <sys/umtx.h>
49112904Sjeff
50139013Sdavidxu#include <vm/vm.h>
51139013Sdavidxu#include <vm/vm_param.h>
52139013Sdavidxu#include <vm/pmap.h>
53139013Sdavidxu#include <vm/vm_map.h>
54139013Sdavidxu#include <vm/vm_object.h>
55139013Sdavidxu
56165369Sdavidxu#include <machine/cpu.h>
57165369Sdavidxu
58205014Snwhitehorn#ifdef COMPAT_FREEBSD32
59162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
60162536Sdavidxu#endif
61162536Sdavidxu
62179970Sdavidxu#define _UMUTEX_TRY		1
63179970Sdavidxu#define _UMUTEX_WAIT		2
64179970Sdavidxu
65161678Sdavidxu/* Priority inheritance mutex info. */
66161678Sdavidxustruct umtx_pi {
67161678Sdavidxu	/* Owner thread */
68161678Sdavidxu	struct thread		*pi_owner;
69161678Sdavidxu
70161678Sdavidxu	/* Reference count */
71161678Sdavidxu	int			pi_refcount;
72161678Sdavidxu
73161678Sdavidxu 	/* List entry to link umtx holding by thread */
74161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
75161678Sdavidxu
76161678Sdavidxu	/* List entry in hash */
77161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
78161678Sdavidxu
79161678Sdavidxu	/* List for waiters */
80161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
81161678Sdavidxu
82161678Sdavidxu	/* Identify a userland lock object */
83161678Sdavidxu	struct umtx_key		pi_key;
84161678Sdavidxu};
85161678Sdavidxu
86161678Sdavidxu/* A userland synchronous object user. */
87115765Sjeffstruct umtx_q {
88161678Sdavidxu	/* Linked list for the hash. */
89161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
90161678Sdavidxu
91161678Sdavidxu	/* Umtx key. */
92161678Sdavidxu	struct umtx_key		uq_key;
93161678Sdavidxu
94161678Sdavidxu	/* Umtx flags. */
95161678Sdavidxu	int			uq_flags;
96161678Sdavidxu#define UQF_UMTXQ	0x0001
97161678Sdavidxu
98161678Sdavidxu	/* The thread waits on. */
99161678Sdavidxu	struct thread		*uq_thread;
100161678Sdavidxu
101161678Sdavidxu	/*
102161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
103170300Sjeff	 * or umtx_lock, write must have both chain lock and
104170300Sjeff	 * umtx_lock being hold.
105161678Sdavidxu	 */
106161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
107161678Sdavidxu
108161678Sdavidxu	/* On blocked list */
109161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
110161678Sdavidxu
111161678Sdavidxu	/* Thread contending with us */
112161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
113161678Sdavidxu
114161742Sdavidxu	/* Inherited priority from PP mutex */
115161678Sdavidxu	u_char			uq_inherited_pri;
116201991Sdavidxu
117201991Sdavidxu	/* Spare queue ready to be reused */
118201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
119201991Sdavidxu
120201991Sdavidxu	/* The queue we on */
121201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
122115765Sjeff};
123115765Sjeff
124161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
125161678Sdavidxu
126201991Sdavidxu/* Per-key wait-queue */
127201991Sdavidxustruct umtxq_queue {
128201991Sdavidxu	struct umtxq_head	head;
129201991Sdavidxu	struct umtx_key		key;
130201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
131201991Sdavidxu	int			length;
132201991Sdavidxu};
133201991Sdavidxu
134201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
135201991Sdavidxu
136161678Sdavidxu/* Userland lock object's wait-queue chain */
137138224Sdavidxustruct umtxq_chain {
138161678Sdavidxu	/* Lock for this chain. */
139161678Sdavidxu	struct mtx		uc_lock;
140161678Sdavidxu
141161678Sdavidxu	/* List of sleep queues. */
142201991Sdavidxu	struct umtxq_list	uc_queue[2];
143177848Sdavidxu#define UMTX_SHARED_QUEUE	0
144177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
145161678Sdavidxu
146201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
147201991Sdavidxu
148161678Sdavidxu	/* Busy flag */
149161678Sdavidxu	char			uc_busy;
150161678Sdavidxu
151161678Sdavidxu	/* Chain lock waiters */
152158377Sdavidxu	int			uc_waiters;
153161678Sdavidxu
154161678Sdavidxu	/* All PI in the list */
155161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
156201991Sdavidxu
157138224Sdavidxu};
158115765Sjeff
159161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
160189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
161161678Sdavidxu
162161678Sdavidxu/*
163161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
164161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
165161678Sdavidxu * and let another thread B block on the mutex, because B is
166161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
167161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
168161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
169161678Sdavidxu */
170161678Sdavidxu
171163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
172163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
173163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
174161678Sdavidxu
175138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
176216678Sdavidxu#define	UMTX_CHAINS		512
177216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
178115765Sjeff
179161678Sdavidxu#define	GET_SHARE(flags)	\
180161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
181161678Sdavidxu
182177848Sdavidxu#define BUSY_SPINS		200
183177848Sdavidxu
184161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
185179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
186138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
187161678Sdavidxustatic int			umtx_pi_allocated;
188115310Sjeff
189227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
190161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
191161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
192161678Sdavidxu
193161678Sdavidxustatic void umtxq_sysinit(void *);
194161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
195161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
196139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
197139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
198139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
199139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
200177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
201177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
202161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
203139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
204163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
205161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
206161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
208161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209161678Sdavidxu	struct image_params *imgp __unused);
210161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211115310Sjeff
212177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
213177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
214177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
215177848Sdavidxu
216170300Sjeffstatic struct mtx umtx_lock;
217170300Sjeff
218161678Sdavidxustatic void
219161678Sdavidxuumtxq_sysinit(void *arg __unused)
220161678Sdavidxu{
221179421Sdavidxu	int i, j;
222138224Sdavidxu
223161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
224161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
225179421Sdavidxu	for (i = 0; i < 2; ++i) {
226179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
227179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
228179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
229201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
230201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
231201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
232179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
233179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
234179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
235179421Sdavidxu		}
236161678Sdavidxu	}
237170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
238161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
239161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
240161678Sdavidxu}
241161678Sdavidxu
242143149Sdavidxustruct umtx_q *
243143149Sdavidxuumtxq_alloc(void)
244143149Sdavidxu{
245161678Sdavidxu	struct umtx_q *uq;
246161678Sdavidxu
247161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
248201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
249201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
250161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
251161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
252161678Sdavidxu	return (uq);
253143149Sdavidxu}
254143149Sdavidxu
255143149Sdavidxuvoid
256143149Sdavidxuumtxq_free(struct umtx_q *uq)
257143149Sdavidxu{
258201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
259201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
260143149Sdavidxu	free(uq, M_UMTX);
261143149Sdavidxu}
262143149Sdavidxu
263161678Sdavidxustatic inline void
264139013Sdavidxuumtxq_hash(struct umtx_key *key)
265138224Sdavidxu{
266161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
267161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
268138224Sdavidxu}
269138224Sdavidxu
270161678Sdavidxustatic inline struct umtxq_chain *
271161678Sdavidxuumtxq_getchain(struct umtx_key *key)
272139013Sdavidxu{
273201886Sdavidxu	if (key->type <= TYPE_SEM)
274179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
275179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
276139013Sdavidxu}
277139013Sdavidxu
278161678Sdavidxu/*
279177848Sdavidxu * Lock a chain.
280161678Sdavidxu */
281138224Sdavidxustatic inline void
282177848Sdavidxuumtxq_lock(struct umtx_key *key)
283139257Sdavidxu{
284161678Sdavidxu	struct umtxq_chain *uc;
285139257Sdavidxu
286161678Sdavidxu	uc = umtxq_getchain(key);
287177848Sdavidxu	mtx_lock(&uc->uc_lock);
288139257Sdavidxu}
289139257Sdavidxu
290161678Sdavidxu/*
291177848Sdavidxu * Unlock a chain.
292161678Sdavidxu */
293139257Sdavidxustatic inline void
294177848Sdavidxuumtxq_unlock(struct umtx_key *key)
295139257Sdavidxu{
296161678Sdavidxu	struct umtxq_chain *uc;
297139257Sdavidxu
298161678Sdavidxu	uc = umtxq_getchain(key);
299177848Sdavidxu	mtx_unlock(&uc->uc_lock);
300139257Sdavidxu}
301139257Sdavidxu
302161678Sdavidxu/*
303177848Sdavidxu * Set chain to busy state when following operation
304177848Sdavidxu * may be blocked (kernel mutex can not be used).
305161678Sdavidxu */
306139257Sdavidxustatic inline void
307177848Sdavidxuumtxq_busy(struct umtx_key *key)
308138224Sdavidxu{
309161678Sdavidxu	struct umtxq_chain *uc;
310161678Sdavidxu
311161678Sdavidxu	uc = umtxq_getchain(key);
312177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
313177848Sdavidxu	if (uc->uc_busy) {
314177880Sdavidxu#ifdef SMP
315177880Sdavidxu		if (smp_cpus > 1) {
316177880Sdavidxu			int count = BUSY_SPINS;
317177880Sdavidxu			if (count > 0) {
318177880Sdavidxu				umtxq_unlock(key);
319177880Sdavidxu				while (uc->uc_busy && --count > 0)
320177880Sdavidxu					cpu_spinwait();
321177880Sdavidxu				umtxq_lock(key);
322177880Sdavidxu			}
323177848Sdavidxu		}
324177880Sdavidxu#endif
325177880Sdavidxu		while (uc->uc_busy) {
326177848Sdavidxu			uc->uc_waiters++;
327177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
328177848Sdavidxu			uc->uc_waiters--;
329177848Sdavidxu		}
330177848Sdavidxu	}
331177848Sdavidxu	uc->uc_busy = 1;
332138224Sdavidxu}
333138224Sdavidxu
334161678Sdavidxu/*
335177848Sdavidxu * Unbusy a chain.
336161678Sdavidxu */
337138225Sdavidxustatic inline void
338177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
339138224Sdavidxu{
340161678Sdavidxu	struct umtxq_chain *uc;
341161678Sdavidxu
342161678Sdavidxu	uc = umtxq_getchain(key);
343177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
344177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
345177848Sdavidxu	uc->uc_busy = 0;
346177848Sdavidxu	if (uc->uc_waiters)
347177848Sdavidxu		wakeup_one(uc);
348138224Sdavidxu}
349138224Sdavidxu
350201991Sdavidxustatic struct umtxq_queue *
351201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
352201991Sdavidxu{
353201991Sdavidxu	struct umtxq_queue *uh;
354201991Sdavidxu	struct umtxq_chain *uc;
355201991Sdavidxu
356201991Sdavidxu	uc = umtxq_getchain(key);
357201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
358201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
359201991Sdavidxu		if (umtx_key_match(&uh->key, key))
360201991Sdavidxu			return (uh);
361201991Sdavidxu	}
362201991Sdavidxu
363201991Sdavidxu	return (NULL);
364201991Sdavidxu}
365201991Sdavidxu
366139013Sdavidxustatic inline void
367177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
368115765Sjeff{
369201991Sdavidxu	struct umtxq_queue *uh;
370161678Sdavidxu	struct umtxq_chain *uc;
371139013Sdavidxu
372161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
373161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
374201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
375203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
376201991Sdavidxu	if (uh != NULL) {
377201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
378201991Sdavidxu	} else {
379201991Sdavidxu		uh = uq->uq_spare_queue;
380201991Sdavidxu		uh->key = uq->uq_key;
381201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
382201991Sdavidxu	}
383201991Sdavidxu	uq->uq_spare_queue = NULL;
384201991Sdavidxu
385201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
386201991Sdavidxu	uh->length++;
387158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
388201991Sdavidxu	uq->uq_cur_queue = uh;
389201991Sdavidxu	return;
390139013Sdavidxu}
391139013Sdavidxu
392139013Sdavidxustatic inline void
393177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
394139013Sdavidxu{
395161678Sdavidxu	struct umtxq_chain *uc;
396201991Sdavidxu	struct umtxq_queue *uh;
397161678Sdavidxu
398161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
399161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
400158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
401201991Sdavidxu		uh = uq->uq_cur_queue;
402201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
403201991Sdavidxu		uh->length--;
404158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
405201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
406201991Sdavidxu			KASSERT(uh->length == 0,
407201991Sdavidxu			    ("inconsistent umtxq_queue length"));
408201991Sdavidxu			LIST_REMOVE(uh, link);
409201991Sdavidxu		} else {
410201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
411201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
412201991Sdavidxu			LIST_REMOVE(uh, link);
413201991Sdavidxu		}
414201991Sdavidxu		uq->uq_spare_queue = uh;
415201991Sdavidxu		uq->uq_cur_queue = NULL;
416139013Sdavidxu	}
417139013Sdavidxu}
418139013Sdavidxu
419161678Sdavidxu/*
420161678Sdavidxu * Check if there are multiple waiters
421161678Sdavidxu */
422139013Sdavidxustatic int
423139013Sdavidxuumtxq_count(struct umtx_key *key)
424139013Sdavidxu{
425161678Sdavidxu	struct umtxq_chain *uc;
426201991Sdavidxu	struct umtxq_queue *uh;
427115765Sjeff
428161678Sdavidxu	uc = umtxq_getchain(key);
429161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
430201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
431201991Sdavidxu	if (uh != NULL)
432201991Sdavidxu		return (uh->length);
433201991Sdavidxu	return (0);
434115765Sjeff}
435115765Sjeff
436161678Sdavidxu/*
437161678Sdavidxu * Check if there are multiple PI waiters and returns first
438161678Sdavidxu * waiter.
439161678Sdavidxu */
440139257Sdavidxustatic int
441161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
442161678Sdavidxu{
443161678Sdavidxu	struct umtxq_chain *uc;
444201991Sdavidxu	struct umtxq_queue *uh;
445161678Sdavidxu
446161678Sdavidxu	*first = NULL;
447161678Sdavidxu	uc = umtxq_getchain(key);
448161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
449201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
450201991Sdavidxu	if (uh != NULL) {
451201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
452201991Sdavidxu		return (uh->length);
453161678Sdavidxu	}
454201991Sdavidxu	return (0);
455161678Sdavidxu}
456161678Sdavidxu
457161678Sdavidxu/*
458161678Sdavidxu * Wake up threads waiting on an userland object.
459161678Sdavidxu */
460177848Sdavidxu
461161678Sdavidxustatic int
462177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
463115765Sjeff{
464161678Sdavidxu	struct umtxq_chain *uc;
465201991Sdavidxu	struct umtxq_queue *uh;
466201991Sdavidxu	struct umtx_q *uq;
467161678Sdavidxu	int ret;
468115765Sjeff
469139257Sdavidxu	ret = 0;
470161678Sdavidxu	uc = umtxq_getchain(key);
471161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
472201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
473201991Sdavidxu	if (uh != NULL) {
474201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
475177848Sdavidxu			umtxq_remove_queue(uq, q);
476161678Sdavidxu			wakeup(uq);
477139257Sdavidxu			if (++ret >= n_wake)
478201991Sdavidxu				return (ret);
479139013Sdavidxu		}
480139013Sdavidxu	}
481139257Sdavidxu	return (ret);
482138224Sdavidxu}
483138224Sdavidxu
484177848Sdavidxu
485161678Sdavidxu/*
486161678Sdavidxu * Wake up specified thread.
487161678Sdavidxu */
488161678Sdavidxustatic inline void
489161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
490161678Sdavidxu{
491161678Sdavidxu	struct umtxq_chain *uc;
492161678Sdavidxu
493161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
494161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
495161678Sdavidxu	umtxq_remove(uq);
496161678Sdavidxu	wakeup(uq);
497161678Sdavidxu}
498161678Sdavidxu
499161678Sdavidxu/*
500161678Sdavidxu * Put thread into sleep state, before sleeping, check if
501161678Sdavidxu * thread was removed from umtx queue.
502161678Sdavidxu */
503138224Sdavidxustatic inline int
504161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
505138224Sdavidxu{
506161678Sdavidxu	struct umtxq_chain *uc;
507161678Sdavidxu	int error;
508161678Sdavidxu
509161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
510161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
511161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
512161678Sdavidxu		return (0);
513161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
514139751Sdavidxu	if (error == EWOULDBLOCK)
515139751Sdavidxu		error = ETIMEDOUT;
516139751Sdavidxu	return (error);
517138224Sdavidxu}
518138224Sdavidxu
519161678Sdavidxu/*
520161678Sdavidxu * Convert userspace address into unique logical address.
521161678Sdavidxu */
522218969Sjhbint
523161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
524139013Sdavidxu{
525161678Sdavidxu	struct thread *td = curthread;
526139013Sdavidxu	vm_map_t map;
527139013Sdavidxu	vm_map_entry_t entry;
528139013Sdavidxu	vm_pindex_t pindex;
529139013Sdavidxu	vm_prot_t prot;
530139013Sdavidxu	boolean_t wired;
531139013Sdavidxu
532161678Sdavidxu	key->type = type;
533161678Sdavidxu	if (share == THREAD_SHARE) {
534161678Sdavidxu		key->shared = 0;
535161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
536161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
537163677Sdavidxu	} else {
538163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
539161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
540161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
541161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
542161678Sdavidxu		    &wired) != KERN_SUCCESS) {
543161678Sdavidxu			return EFAULT;
544161678Sdavidxu		}
545161678Sdavidxu
546161678Sdavidxu		if ((share == PROCESS_SHARE) ||
547161678Sdavidxu		    (share == AUTO_SHARE &&
548161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
549161678Sdavidxu			key->shared = 1;
550161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
551161678Sdavidxu				(vm_offset_t)addr;
552161678Sdavidxu			vm_object_reference(key->info.shared.object);
553161678Sdavidxu		} else {
554161678Sdavidxu			key->shared = 0;
555161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
556161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
557161678Sdavidxu		}
558161678Sdavidxu		vm_map_lookup_done(map, entry);
559139013Sdavidxu	}
560139013Sdavidxu
561161678Sdavidxu	umtxq_hash(key);
562139013Sdavidxu	return (0);
563139013Sdavidxu}
564139013Sdavidxu
565161678Sdavidxu/*
566161678Sdavidxu * Release key.
567161678Sdavidxu */
568218969Sjhbvoid
569139013Sdavidxuumtx_key_release(struct umtx_key *key)
570139013Sdavidxu{
571161678Sdavidxu	if (key->shared)
572139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
573139013Sdavidxu}
574139013Sdavidxu
575161678Sdavidxu/*
576161678Sdavidxu * Lock a umtx object.
577161678Sdavidxu */
578139013Sdavidxustatic int
579163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
580112904Sjeff{
581143149Sdavidxu	struct umtx_q *uq;
582163449Sdavidxu	u_long owner;
583163449Sdavidxu	u_long old;
584138224Sdavidxu	int error = 0;
585112904Sjeff
586143149Sdavidxu	uq = td->td_umtxq;
587161678Sdavidxu
588112904Sjeff	/*
589161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
590112904Sjeff	 * can fault on any access.
591112904Sjeff	 */
592112904Sjeff	for (;;) {
593112904Sjeff		/*
594112904Sjeff		 * Try the uncontested case.  This should be done in userland.
595112904Sjeff		 */
596163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
597112904Sjeff
598138224Sdavidxu		/* The acquire succeeded. */
599138224Sdavidxu		if (owner == UMTX_UNOWNED)
600138224Sdavidxu			return (0);
601138224Sdavidxu
602115765Sjeff		/* The address was invalid. */
603115765Sjeff		if (owner == -1)
604115765Sjeff			return (EFAULT);
605115765Sjeff
606115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
607115765Sjeff		if (owner == UMTX_CONTESTED) {
608163449Sdavidxu			owner = casuword(&umtx->u_owner,
609139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
610115765Sjeff
611138224Sdavidxu			if (owner == UMTX_CONTESTED)
612138224Sdavidxu				return (0);
613138224Sdavidxu
614115765Sjeff			/* The address was invalid. */
615115765Sjeff			if (owner == -1)
616115765Sjeff				return (EFAULT);
617115765Sjeff
618115765Sjeff			/* If this failed the lock has changed, restart. */
619115765Sjeff			continue;
620112904Sjeff		}
621112904Sjeff
622138224Sdavidxu		/*
623138224Sdavidxu		 * If we caught a signal, we have retried and now
624138224Sdavidxu		 * exit immediately.
625138224Sdavidxu		 */
626161678Sdavidxu		if (error != 0)
627138224Sdavidxu			return (error);
628112904Sjeff
629161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
630161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
631161678Sdavidxu			return (error);
632161678Sdavidxu
633161678Sdavidxu		umtxq_lock(&uq->uq_key);
634161678Sdavidxu		umtxq_busy(&uq->uq_key);
635161678Sdavidxu		umtxq_insert(uq);
636161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
637161678Sdavidxu		umtxq_unlock(&uq->uq_key);
638161678Sdavidxu
639112904Sjeff		/*
640112904Sjeff		 * Set the contested bit so that a release in user space
641112904Sjeff		 * knows to use the system call for unlock.  If this fails
642112904Sjeff		 * either some one else has acquired the lock or it has been
643112904Sjeff		 * released.
644112904Sjeff		 */
645163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
646112904Sjeff
647112904Sjeff		/* The address was invalid. */
648112967Sjake		if (old == -1) {
649143149Sdavidxu			umtxq_lock(&uq->uq_key);
650143149Sdavidxu			umtxq_remove(uq);
651143149Sdavidxu			umtxq_unlock(&uq->uq_key);
652143149Sdavidxu			umtx_key_release(&uq->uq_key);
653115765Sjeff			return (EFAULT);
654112904Sjeff		}
655112904Sjeff
656112904Sjeff		/*
657115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
658117685Smtm		 * and we need to retry or we lost a race to the thread
659117685Smtm		 * unlocking the umtx.
660112904Sjeff		 */
661143149Sdavidxu		umtxq_lock(&uq->uq_key);
662161678Sdavidxu		if (old == owner)
663161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
664143149Sdavidxu		umtxq_remove(uq);
665143149Sdavidxu		umtxq_unlock(&uq->uq_key);
666143149Sdavidxu		umtx_key_release(&uq->uq_key);
667112904Sjeff	}
668117743Smtm
669117743Smtm	return (0);
670112904Sjeff}
671112904Sjeff
672161678Sdavidxu/*
673161678Sdavidxu * Lock a umtx object.
674161678Sdavidxu */
675139013Sdavidxustatic int
676163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
677140245Sdavidxu	struct timespec *timeout)
678112904Sjeff{
679140245Sdavidxu	struct timespec ts, ts2, ts3;
680139013Sdavidxu	struct timeval tv;
681140245Sdavidxu	int error;
682139013Sdavidxu
683140245Sdavidxu	if (timeout == NULL) {
684162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
685162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
686162030Sdavidxu		if (error == EINTR)
687162030Sdavidxu			error = ERESTART;
688139013Sdavidxu	} else {
689140245Sdavidxu		getnanouptime(&ts);
690140245Sdavidxu		timespecadd(&ts, timeout);
691140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
692139013Sdavidxu		for (;;) {
693162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
694140245Sdavidxu			if (error != ETIMEDOUT)
695140245Sdavidxu				break;
696140245Sdavidxu			getnanouptime(&ts2);
697140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
698139751Sdavidxu				error = ETIMEDOUT;
699139013Sdavidxu				break;
700139013Sdavidxu			}
701140245Sdavidxu			ts3 = ts;
702140245Sdavidxu			timespecsub(&ts3, &ts2);
703140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
704139013Sdavidxu		}
705162030Sdavidxu		/* Timed-locking is not restarted. */
706162030Sdavidxu		if (error == ERESTART)
707162030Sdavidxu			error = EINTR;
708139013Sdavidxu	}
709139013Sdavidxu	return (error);
710139013Sdavidxu}
711139013Sdavidxu
712161678Sdavidxu/*
713161678Sdavidxu * Unlock a umtx object.
714161678Sdavidxu */
715139013Sdavidxustatic int
716163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
717139013Sdavidxu{
718139013Sdavidxu	struct umtx_key key;
719163449Sdavidxu	u_long owner;
720163449Sdavidxu	u_long old;
721139257Sdavidxu	int error;
722139257Sdavidxu	int count;
723112904Sjeff
724112904Sjeff	/*
725112904Sjeff	 * Make sure we own this mtx.
726112904Sjeff	 */
727163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
728161678Sdavidxu	if (owner == -1)
729115765Sjeff		return (EFAULT);
730115765Sjeff
731139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
732115765Sjeff		return (EPERM);
733112904Sjeff
734161678Sdavidxu	/* This should be done in userland */
735161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
736163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
737161678Sdavidxu		if (old == -1)
738161678Sdavidxu			return (EFAULT);
739161678Sdavidxu		if (old == owner)
740161678Sdavidxu			return (0);
741161855Sdavidxu		owner = old;
742161678Sdavidxu	}
743161678Sdavidxu
744117685Smtm	/* We should only ever be in here for contested locks */
745161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
746161678Sdavidxu		&key)) != 0)
747139257Sdavidxu		return (error);
748139257Sdavidxu
749139257Sdavidxu	umtxq_lock(&key);
750139257Sdavidxu	umtxq_busy(&key);
751139257Sdavidxu	count = umtxq_count(&key);
752139257Sdavidxu	umtxq_unlock(&key);
753139257Sdavidxu
754117743Smtm	/*
755117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
756117743Smtm	 * there is zero or one thread only waiting for it.
757117743Smtm	 * Otherwise, it must be marked as contested.
758117743Smtm	 */
759163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
760163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
761139257Sdavidxu	umtxq_lock(&key);
762161678Sdavidxu	umtxq_signal(&key,1);
763139257Sdavidxu	umtxq_unbusy(&key);
764139257Sdavidxu	umtxq_unlock(&key);
765139257Sdavidxu	umtx_key_release(&key);
766115765Sjeff	if (old == -1)
767115765Sjeff		return (EFAULT);
768138224Sdavidxu	if (old != owner)
769138224Sdavidxu		return (EINVAL);
770115765Sjeff	return (0);
771112904Sjeff}
772139013Sdavidxu
773205014Snwhitehorn#ifdef COMPAT_FREEBSD32
774162536Sdavidxu
775161678Sdavidxu/*
776162536Sdavidxu * Lock a umtx object.
777162536Sdavidxu */
778162536Sdavidxustatic int
779162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
780162536Sdavidxu{
781162536Sdavidxu	struct umtx_q *uq;
782162536Sdavidxu	uint32_t owner;
783162536Sdavidxu	uint32_t old;
784162536Sdavidxu	int error = 0;
785162536Sdavidxu
786162536Sdavidxu	uq = td->td_umtxq;
787162536Sdavidxu
788162536Sdavidxu	/*
789162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
790162536Sdavidxu	 * can fault on any access.
791162536Sdavidxu	 */
792162536Sdavidxu	for (;;) {
793162536Sdavidxu		/*
794162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
795162536Sdavidxu		 */
796162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
797162536Sdavidxu
798162536Sdavidxu		/* The acquire succeeded. */
799162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
800162536Sdavidxu			return (0);
801162536Sdavidxu
802162536Sdavidxu		/* The address was invalid. */
803162536Sdavidxu		if (owner == -1)
804162536Sdavidxu			return (EFAULT);
805162536Sdavidxu
806162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
807162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
808162536Sdavidxu			owner = casuword32(m,
809162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
810162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
811162536Sdavidxu				return (0);
812162536Sdavidxu
813162536Sdavidxu			/* The address was invalid. */
814162536Sdavidxu			if (owner == -1)
815162536Sdavidxu				return (EFAULT);
816162536Sdavidxu
817162536Sdavidxu			/* If this failed the lock has changed, restart. */
818162536Sdavidxu			continue;
819162536Sdavidxu		}
820162536Sdavidxu
821162536Sdavidxu		/*
822162536Sdavidxu		 * If we caught a signal, we have retried and now
823162536Sdavidxu		 * exit immediately.
824162536Sdavidxu		 */
825162536Sdavidxu		if (error != 0)
826162536Sdavidxu			return (error);
827162536Sdavidxu
828162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
829162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
830162536Sdavidxu			return (error);
831162536Sdavidxu
832162536Sdavidxu		umtxq_lock(&uq->uq_key);
833162536Sdavidxu		umtxq_busy(&uq->uq_key);
834162536Sdavidxu		umtxq_insert(uq);
835162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
836162536Sdavidxu		umtxq_unlock(&uq->uq_key);
837162536Sdavidxu
838162536Sdavidxu		/*
839162536Sdavidxu		 * Set the contested bit so that a release in user space
840162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
841162536Sdavidxu		 * either some one else has acquired the lock or it has been
842162536Sdavidxu		 * released.
843162536Sdavidxu		 */
844162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
845162536Sdavidxu
846162536Sdavidxu		/* The address was invalid. */
847162536Sdavidxu		if (old == -1) {
848162536Sdavidxu			umtxq_lock(&uq->uq_key);
849162536Sdavidxu			umtxq_remove(uq);
850162536Sdavidxu			umtxq_unlock(&uq->uq_key);
851162536Sdavidxu			umtx_key_release(&uq->uq_key);
852162536Sdavidxu			return (EFAULT);
853162536Sdavidxu		}
854162536Sdavidxu
855162536Sdavidxu		/*
856162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
857162536Sdavidxu		 * and we need to retry or we lost a race to the thread
858162536Sdavidxu		 * unlocking the umtx.
859162536Sdavidxu		 */
860162536Sdavidxu		umtxq_lock(&uq->uq_key);
861162536Sdavidxu		if (old == owner)
862162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
863162536Sdavidxu		umtxq_remove(uq);
864162536Sdavidxu		umtxq_unlock(&uq->uq_key);
865162536Sdavidxu		umtx_key_release(&uq->uq_key);
866162536Sdavidxu	}
867162536Sdavidxu
868162536Sdavidxu	return (0);
869162536Sdavidxu}
870162536Sdavidxu
871162536Sdavidxu/*
872162536Sdavidxu * Lock a umtx object.
873162536Sdavidxu */
874162536Sdavidxustatic int
875162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
876162536Sdavidxu	struct timespec *timeout)
877162536Sdavidxu{
878162536Sdavidxu	struct timespec ts, ts2, ts3;
879162536Sdavidxu	struct timeval tv;
880162536Sdavidxu	int error;
881162536Sdavidxu
882162536Sdavidxu	if (timeout == NULL) {
883162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
884162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
885162536Sdavidxu		if (error == EINTR)
886162536Sdavidxu			error = ERESTART;
887162536Sdavidxu	} else {
888162536Sdavidxu		getnanouptime(&ts);
889162536Sdavidxu		timespecadd(&ts, timeout);
890162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
891162536Sdavidxu		for (;;) {
892162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
893162536Sdavidxu			if (error != ETIMEDOUT)
894162536Sdavidxu				break;
895162536Sdavidxu			getnanouptime(&ts2);
896162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
897162536Sdavidxu				error = ETIMEDOUT;
898162536Sdavidxu				break;
899162536Sdavidxu			}
900162536Sdavidxu			ts3 = ts;
901162536Sdavidxu			timespecsub(&ts3, &ts2);
902162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
903162536Sdavidxu		}
904162536Sdavidxu		/* Timed-locking is not restarted. */
905162536Sdavidxu		if (error == ERESTART)
906162536Sdavidxu			error = EINTR;
907162536Sdavidxu	}
908162536Sdavidxu	return (error);
909162536Sdavidxu}
910162536Sdavidxu
911162536Sdavidxu/*
912162536Sdavidxu * Unlock a umtx object.
913162536Sdavidxu */
914162536Sdavidxustatic int
915162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
916162536Sdavidxu{
917162536Sdavidxu	struct umtx_key key;
918162536Sdavidxu	uint32_t owner;
919162536Sdavidxu	uint32_t old;
920162536Sdavidxu	int error;
921162536Sdavidxu	int count;
922162536Sdavidxu
923162536Sdavidxu	/*
924162536Sdavidxu	 * Make sure we own this mtx.
925162536Sdavidxu	 */
926162536Sdavidxu	owner = fuword32(m);
927162536Sdavidxu	if (owner == -1)
928162536Sdavidxu		return (EFAULT);
929162536Sdavidxu
930162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
931162536Sdavidxu		return (EPERM);
932162536Sdavidxu
933162536Sdavidxu	/* This should be done in userland */
934162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
935162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
936162536Sdavidxu		if (old == -1)
937162536Sdavidxu			return (EFAULT);
938162536Sdavidxu		if (old == owner)
939162536Sdavidxu			return (0);
940162536Sdavidxu		owner = old;
941162536Sdavidxu	}
942162536Sdavidxu
943162536Sdavidxu	/* We should only ever be in here for contested locks */
944162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
945162536Sdavidxu		&key)) != 0)
946162536Sdavidxu		return (error);
947162536Sdavidxu
948162536Sdavidxu	umtxq_lock(&key);
949162536Sdavidxu	umtxq_busy(&key);
950162536Sdavidxu	count = umtxq_count(&key);
951162536Sdavidxu	umtxq_unlock(&key);
952162536Sdavidxu
953162536Sdavidxu	/*
954162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
955162536Sdavidxu	 * there is zero or one thread only waiting for it.
956162536Sdavidxu	 * Otherwise, it must be marked as contested.
957162536Sdavidxu	 */
958162536Sdavidxu	old = casuword32(m, owner,
959162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
960162536Sdavidxu	umtxq_lock(&key);
961162536Sdavidxu	umtxq_signal(&key,1);
962162536Sdavidxu	umtxq_unbusy(&key);
963162536Sdavidxu	umtxq_unlock(&key);
964162536Sdavidxu	umtx_key_release(&key);
965162536Sdavidxu	if (old == -1)
966162536Sdavidxu		return (EFAULT);
967162536Sdavidxu	if (old != owner)
968162536Sdavidxu		return (EINVAL);
969162536Sdavidxu	return (0);
970162536Sdavidxu}
971162536Sdavidxu#endif
972162536Sdavidxu
973162536Sdavidxu/*
974161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
975161678Sdavidxu */
976139013Sdavidxustatic int
977163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
978178646Sdavidxu	struct timespec *timeout, int compat32, int is_private)
979139013Sdavidxu{
980143149Sdavidxu	struct umtx_q *uq;
981140245Sdavidxu	struct timespec ts, ts2, ts3;
982139013Sdavidxu	struct timeval tv;
983163449Sdavidxu	u_long tmp;
984140245Sdavidxu	int error = 0;
985139013Sdavidxu
986143149Sdavidxu	uq = td->td_umtxq;
987178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
988178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
989139013Sdavidxu		return (error);
990161678Sdavidxu
991161678Sdavidxu	umtxq_lock(&uq->uq_key);
992161678Sdavidxu	umtxq_insert(uq);
993161678Sdavidxu	umtxq_unlock(&uq->uq_key);
994162536Sdavidxu	if (compat32 == 0)
995162536Sdavidxu		tmp = fuword(addr);
996162536Sdavidxu        else
997190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
998139427Sdavidxu	if (tmp != id) {
999143149Sdavidxu		umtxq_lock(&uq->uq_key);
1000143149Sdavidxu		umtxq_remove(uq);
1001143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1002140245Sdavidxu	} else if (timeout == NULL) {
1003143149Sdavidxu		umtxq_lock(&uq->uq_key);
1004164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
1005161678Sdavidxu		umtxq_remove(uq);
1006143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1007139013Sdavidxu	} else {
1008140245Sdavidxu		getnanouptime(&ts);
1009140245Sdavidxu		timespecadd(&ts, timeout);
1010140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1011161678Sdavidxu		umtxq_lock(&uq->uq_key);
1012139013Sdavidxu		for (;;) {
1013164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1014211794Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ)) {
1015211794Sdavidxu				error = 0;
1016161678Sdavidxu				break;
1017211794Sdavidxu			}
1018140245Sdavidxu			if (error != ETIMEDOUT)
1019140245Sdavidxu				break;
1020161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1021140245Sdavidxu			getnanouptime(&ts2);
1022140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
1023139751Sdavidxu				error = ETIMEDOUT;
1024161678Sdavidxu				umtxq_lock(&uq->uq_key);
1025139013Sdavidxu				break;
1026139013Sdavidxu			}
1027140245Sdavidxu			ts3 = ts;
1028140245Sdavidxu			timespecsub(&ts3, &ts2);
1029140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1030161678Sdavidxu			umtxq_lock(&uq->uq_key);
1031139013Sdavidxu		}
1032143149Sdavidxu		umtxq_remove(uq);
1033143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1034139013Sdavidxu	}
1035143149Sdavidxu	umtx_key_release(&uq->uq_key);
1036139257Sdavidxu	if (error == ERESTART)
1037139257Sdavidxu		error = EINTR;
1038139013Sdavidxu	return (error);
1039139013Sdavidxu}
1040139013Sdavidxu
1041161678Sdavidxu/*
1042161678Sdavidxu * Wake up threads sleeping on the specified address.
1043161678Sdavidxu */
1044151692Sdavidxuint
1045178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1046139013Sdavidxu{
1047139013Sdavidxu	struct umtx_key key;
1048139257Sdavidxu	int ret;
1049139013Sdavidxu
1050178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1051178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1052139257Sdavidxu		return (ret);
1053139258Sdavidxu	umtxq_lock(&key);
1054139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1055139258Sdavidxu	umtxq_unlock(&key);
1056139257Sdavidxu	umtx_key_release(&key);
1057139013Sdavidxu	return (0);
1058139013Sdavidxu}
1059139013Sdavidxu
1060161678Sdavidxu/*
1061161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1062161678Sdavidxu */
1063161678Sdavidxustatic int
1064161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1065179970Sdavidxu	int mode)
1066161678Sdavidxu{
1067161678Sdavidxu	struct umtx_q *uq;
1068161678Sdavidxu	uint32_t owner, old, id;
1069161678Sdavidxu	int error = 0;
1070161678Sdavidxu
1071161678Sdavidxu	id = td->td_tid;
1072161678Sdavidxu	uq = td->td_umtxq;
1073161678Sdavidxu
1074161678Sdavidxu	/*
1075161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1076161678Sdavidxu	 * can fault on any access.
1077161678Sdavidxu	 */
1078161678Sdavidxu	for (;;) {
1079179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1080179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1081179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1082179970Sdavidxu				return (0);
1083179970Sdavidxu		} else {
1084179970Sdavidxu			/*
1085179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1086179970Sdavidxu			 */
1087179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1088161678Sdavidxu
1089179970Sdavidxu			/* The acquire succeeded. */
1090179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1091161678Sdavidxu				return (0);
1092161678Sdavidxu
1093161678Sdavidxu			/* The address was invalid. */
1094161678Sdavidxu			if (owner == -1)
1095161678Sdavidxu				return (EFAULT);
1096161678Sdavidxu
1097179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1098179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1099179970Sdavidxu				owner = casuword32(&m->m_owner,
1100179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1101179970Sdavidxu
1102179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1103179970Sdavidxu					return (0);
1104179970Sdavidxu
1105179970Sdavidxu				/* The address was invalid. */
1106179970Sdavidxu				if (owner == -1)
1107179970Sdavidxu					return (EFAULT);
1108179970Sdavidxu
1109179970Sdavidxu				/* If this failed the lock has changed, restart. */
1110179970Sdavidxu				continue;
1111179970Sdavidxu			}
1112161678Sdavidxu		}
1113161678Sdavidxu
1114161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1115161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1116161678Sdavidxu			return (EDEADLK);
1117161678Sdavidxu
1118179970Sdavidxu		if (mode == _UMUTEX_TRY)
1119161678Sdavidxu			return (EBUSY);
1120161678Sdavidxu
1121161678Sdavidxu		/*
1122161678Sdavidxu		 * If we caught a signal, we have retried and now
1123161678Sdavidxu		 * exit immediately.
1124161678Sdavidxu		 */
1125161678Sdavidxu		if (error != 0)
1126161678Sdavidxu			return (error);
1127161678Sdavidxu
1128161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1129161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1130161678Sdavidxu			return (error);
1131161678Sdavidxu
1132161678Sdavidxu		umtxq_lock(&uq->uq_key);
1133161678Sdavidxu		umtxq_busy(&uq->uq_key);
1134161678Sdavidxu		umtxq_insert(uq);
1135161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1136161678Sdavidxu
1137161678Sdavidxu		/*
1138161678Sdavidxu		 * Set the contested bit so that a release in user space
1139161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1140161678Sdavidxu		 * either some one else has acquired the lock or it has been
1141161678Sdavidxu		 * released.
1142161678Sdavidxu		 */
1143161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1144161678Sdavidxu
1145161678Sdavidxu		/* The address was invalid. */
1146161678Sdavidxu		if (old == -1) {
1147161678Sdavidxu			umtxq_lock(&uq->uq_key);
1148161678Sdavidxu			umtxq_remove(uq);
1149179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1150161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1151161678Sdavidxu			umtx_key_release(&uq->uq_key);
1152161678Sdavidxu			return (EFAULT);
1153161678Sdavidxu		}
1154161678Sdavidxu
1155161678Sdavidxu		/*
1156161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1157161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1158161678Sdavidxu		 * unlocking the umtx.
1159161678Sdavidxu		 */
1160161678Sdavidxu		umtxq_lock(&uq->uq_key);
1161179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1162161678Sdavidxu		if (old == owner)
1163161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1164161678Sdavidxu		umtxq_remove(uq);
1165161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1166161678Sdavidxu		umtx_key_release(&uq->uq_key);
1167161678Sdavidxu	}
1168161678Sdavidxu
1169161678Sdavidxu	return (0);
1170161678Sdavidxu}
1171161678Sdavidxu
1172161678Sdavidxu/*
1173161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1174161678Sdavidxu */
1175161678Sdavidxu/*
1176161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1177161678Sdavidxu */
1178161678Sdavidxustatic int
1179161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1180161678Sdavidxu{
1181161678Sdavidxu	struct umtx_key key;
1182161678Sdavidxu	uint32_t owner, old, id;
1183161678Sdavidxu	int error;
1184161678Sdavidxu	int count;
1185161678Sdavidxu
1186161678Sdavidxu	id = td->td_tid;
1187161678Sdavidxu	/*
1188161678Sdavidxu	 * Make sure we own this mtx.
1189161678Sdavidxu	 */
1190163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1191161678Sdavidxu	if (owner == -1)
1192161678Sdavidxu		return (EFAULT);
1193161678Sdavidxu
1194161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1195161678Sdavidxu		return (EPERM);
1196161678Sdavidxu
1197161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1198161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1199161678Sdavidxu		if (old == -1)
1200161678Sdavidxu			return (EFAULT);
1201161678Sdavidxu		if (old == owner)
1202161678Sdavidxu			return (0);
1203161855Sdavidxu		owner = old;
1204161678Sdavidxu	}
1205161678Sdavidxu
1206161678Sdavidxu	/* We should only ever be in here for contested locks */
1207161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1208161678Sdavidxu	    &key)) != 0)
1209161678Sdavidxu		return (error);
1210161678Sdavidxu
1211161678Sdavidxu	umtxq_lock(&key);
1212161678Sdavidxu	umtxq_busy(&key);
1213161678Sdavidxu	count = umtxq_count(&key);
1214161678Sdavidxu	umtxq_unlock(&key);
1215161678Sdavidxu
1216161678Sdavidxu	/*
1217161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1218161678Sdavidxu	 * there is zero or one thread only waiting for it.
1219161678Sdavidxu	 * Otherwise, it must be marked as contested.
1220161678Sdavidxu	 */
1221161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1222161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1223161678Sdavidxu	umtxq_lock(&key);
1224161678Sdavidxu	umtxq_signal(&key,1);
1225161678Sdavidxu	umtxq_unbusy(&key);
1226161678Sdavidxu	umtxq_unlock(&key);
1227161678Sdavidxu	umtx_key_release(&key);
1228161678Sdavidxu	if (old == -1)
1229161678Sdavidxu		return (EFAULT);
1230161678Sdavidxu	if (old != owner)
1231161678Sdavidxu		return (EINVAL);
1232161678Sdavidxu	return (0);
1233161678Sdavidxu}
1234161678Sdavidxu
1235179970Sdavidxu/*
1236179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1237179970Sdavidxu * only for simple mutex.
1238179970Sdavidxu */
1239179970Sdavidxustatic int
1240179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1241179970Sdavidxu{
1242179970Sdavidxu	struct umtx_key key;
1243179970Sdavidxu	uint32_t owner;
1244179970Sdavidxu	uint32_t flags;
1245179970Sdavidxu	int error;
1246179970Sdavidxu	int count;
1247179970Sdavidxu
1248179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1249179970Sdavidxu	if (owner == -1)
1250179970Sdavidxu		return (EFAULT);
1251179970Sdavidxu
1252179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1253179970Sdavidxu		return (0);
1254179970Sdavidxu
1255179970Sdavidxu	flags = fuword32(&m->m_flags);
1256179970Sdavidxu
1257179970Sdavidxu	/* We should only ever be in here for contested locks */
1258179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1259179970Sdavidxu	    &key)) != 0)
1260179970Sdavidxu		return (error);
1261179970Sdavidxu
1262179970Sdavidxu	umtxq_lock(&key);
1263179970Sdavidxu	umtxq_busy(&key);
1264179970Sdavidxu	count = umtxq_count(&key);
1265179970Sdavidxu	umtxq_unlock(&key);
1266179970Sdavidxu
1267179970Sdavidxu	if (count <= 1)
1268179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1269179970Sdavidxu
1270179970Sdavidxu	umtxq_lock(&key);
1271179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1272179970Sdavidxu		umtxq_signal(&key, 1);
1273179970Sdavidxu	umtxq_unbusy(&key);
1274179970Sdavidxu	umtxq_unlock(&key);
1275179970Sdavidxu	umtx_key_release(&key);
1276179970Sdavidxu	return (0);
1277179970Sdavidxu}
1278179970Sdavidxu
1279161678Sdavidxustatic inline struct umtx_pi *
1280163697Sdavidxuumtx_pi_alloc(int flags)
1281161678Sdavidxu{
1282161678Sdavidxu	struct umtx_pi *pi;
1283161678Sdavidxu
1284163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1285161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1286161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1287161678Sdavidxu	return (pi);
1288161678Sdavidxu}
1289161678Sdavidxu
1290161678Sdavidxustatic inline void
1291161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1292161678Sdavidxu{
1293161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1294161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1295161678Sdavidxu}
1296161678Sdavidxu
1297161678Sdavidxu/*
1298161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1299161678Sdavidxu * changed.
1300161678Sdavidxu */
1301161678Sdavidxustatic int
1302161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1303161678Sdavidxu{
1304161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1305161678Sdavidxu	struct thread *td1;
1306161678Sdavidxu
1307170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1308161678Sdavidxu	if (pi == NULL)
1309161678Sdavidxu		return (0);
1310161678Sdavidxu
1311161678Sdavidxu	uq = td->td_umtxq;
1312161678Sdavidxu
1313161678Sdavidxu	/*
1314161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1315161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1316161678Sdavidxu	 * the previous thread or higher than the next thread.
1317161678Sdavidxu	 */
1318161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1319161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1320161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1321161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1322161678Sdavidxu		/*
1323161678Sdavidxu		 * Remove thread from blocked chain and determine where
1324161678Sdavidxu		 * it should be moved to.
1325161678Sdavidxu		 */
1326161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1327161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1328161678Sdavidxu			td1 = uq1->uq_thread;
1329161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1330161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1331161678Sdavidxu				break;
1332161678Sdavidxu		}
1333161678Sdavidxu
1334161678Sdavidxu		if (uq1 == NULL)
1335161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1336161678Sdavidxu		else
1337161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1338161678Sdavidxu	}
1339161678Sdavidxu	return (1);
1340161678Sdavidxu}
1341161678Sdavidxu
1342161678Sdavidxu/*
1343161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1344161678Sdavidxu * PI mutex.
1345161678Sdavidxu */
1346161678Sdavidxustatic void
1347161678Sdavidxuumtx_propagate_priority(struct thread *td)
1348161678Sdavidxu{
1349161678Sdavidxu	struct umtx_q *uq;
1350161678Sdavidxu	struct umtx_pi *pi;
1351161678Sdavidxu	int pri;
1352161678Sdavidxu
1353170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1354161678Sdavidxu	pri = UPRI(td);
1355161678Sdavidxu	uq = td->td_umtxq;
1356161678Sdavidxu	pi = uq->uq_pi_blocked;
1357161678Sdavidxu	if (pi == NULL)
1358161678Sdavidxu		return;
1359161678Sdavidxu
1360161678Sdavidxu	for (;;) {
1361161678Sdavidxu		td = pi->pi_owner;
1362216313Sdavidxu		if (td == NULL || td == curthread)
1363161678Sdavidxu			return;
1364161678Sdavidxu
1365161678Sdavidxu		MPASS(td->td_proc != NULL);
1366161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1367161678Sdavidxu
1368170300Sjeff		thread_lock(td);
1369216313Sdavidxu		if (td->td_lend_user_pri > pri)
1370216313Sdavidxu			sched_lend_user_prio(td, pri);
1371216313Sdavidxu		else {
1372216313Sdavidxu			thread_unlock(td);
1373216313Sdavidxu			break;
1374216313Sdavidxu		}
1375170300Sjeff		thread_unlock(td);
1376161678Sdavidxu
1377161678Sdavidxu		/*
1378161678Sdavidxu		 * Pick up the lock that td is blocked on.
1379161678Sdavidxu		 */
1380161678Sdavidxu		uq = td->td_umtxq;
1381161678Sdavidxu		pi = uq->uq_pi_blocked;
1382216791Sdavidxu		if (pi == NULL)
1383216791Sdavidxu			break;
1384161678Sdavidxu		/* Resort td on the list if needed. */
1385216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1386161678Sdavidxu	}
1387161678Sdavidxu}
1388161678Sdavidxu
1389161678Sdavidxu/*
1390161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1391161678Sdavidxu * it is interrupted by signal or resumed by others.
1392161678Sdavidxu */
1393161678Sdavidxustatic void
1394216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1395161678Sdavidxu{
1396161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1397161678Sdavidxu	struct umtx_pi *pi2;
1398216791Sdavidxu	int pri;
1399161678Sdavidxu
1400170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1401161678Sdavidxu
1402161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1403161678Sdavidxu		pri = PRI_MAX;
1404161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1405161678Sdavidxu
1406161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1407161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1408161678Sdavidxu			if (uq != NULL) {
1409161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1410161678Sdavidxu					pri = UPRI(uq->uq_thread);
1411161678Sdavidxu			}
1412161678Sdavidxu		}
1413161678Sdavidxu
1414161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1415161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1416170300Sjeff		thread_lock(pi->pi_owner);
1417216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1418170300Sjeff		thread_unlock(pi->pi_owner);
1419216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1420216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1421161678Sdavidxu	}
1422161678Sdavidxu}
1423161678Sdavidxu
1424161678Sdavidxu/*
1425161678Sdavidxu * Insert a PI mutex into owned list.
1426161678Sdavidxu */
1427161678Sdavidxustatic void
1428161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1429161678Sdavidxu{
1430161678Sdavidxu	struct umtx_q *uq_owner;
1431161678Sdavidxu
1432161678Sdavidxu	uq_owner = owner->td_umtxq;
1433170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1434161678Sdavidxu	if (pi->pi_owner != NULL)
1435161678Sdavidxu		panic("pi_ower != NULL");
1436161678Sdavidxu	pi->pi_owner = owner;
1437161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1438161678Sdavidxu}
1439161678Sdavidxu
1440161678Sdavidxu/*
1441161678Sdavidxu * Claim ownership of a PI mutex.
1442161678Sdavidxu */
1443161678Sdavidxustatic int
1444161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1445161678Sdavidxu{
1446161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1447161678Sdavidxu
1448161678Sdavidxu	uq_owner = owner->td_umtxq;
1449170300Sjeff	mtx_lock_spin(&umtx_lock);
1450161678Sdavidxu	if (pi->pi_owner == owner) {
1451170300Sjeff		mtx_unlock_spin(&umtx_lock);
1452161678Sdavidxu		return (0);
1453161678Sdavidxu	}
1454161678Sdavidxu
1455161678Sdavidxu	if (pi->pi_owner != NULL) {
1456161678Sdavidxu		/*
1457161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1458161678Sdavidxu		 */
1459170300Sjeff		mtx_unlock_spin(&umtx_lock);
1460161678Sdavidxu		return (EPERM);
1461161678Sdavidxu	}
1462161678Sdavidxu	umtx_pi_setowner(pi, owner);
1463161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1464161678Sdavidxu	if (uq != NULL) {
1465161678Sdavidxu		int pri;
1466161678Sdavidxu
1467161678Sdavidxu		pri = UPRI(uq->uq_thread);
1468170300Sjeff		thread_lock(owner);
1469161678Sdavidxu		if (pri < UPRI(owner))
1470161678Sdavidxu			sched_lend_user_prio(owner, pri);
1471170300Sjeff		thread_unlock(owner);
1472161678Sdavidxu	}
1473170300Sjeff	mtx_unlock_spin(&umtx_lock);
1474161678Sdavidxu	return (0);
1475161678Sdavidxu}
1476161678Sdavidxu
1477161678Sdavidxu/*
1478174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1479174701Sdavidxu * this may result new priority propagating process.
1480174701Sdavidxu */
1481174701Sdavidxuvoid
1482174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1483174701Sdavidxu{
1484174707Sdavidxu	struct umtx_q *uq;
1485174707Sdavidxu	struct umtx_pi *pi;
1486174707Sdavidxu
1487174707Sdavidxu	uq = td->td_umtxq;
1488174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1489174707Sdavidxu	/*
1490174707Sdavidxu	 * Pick up the lock that td is blocked on.
1491174707Sdavidxu	 */
1492174707Sdavidxu	pi = uq->uq_pi_blocked;
1493216791Sdavidxu	if (pi != NULL) {
1494216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1495216791Sdavidxu		umtx_repropagate_priority(pi);
1496216791Sdavidxu	}
1497174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1498174701Sdavidxu}
1499174701Sdavidxu
1500174701Sdavidxu/*
1501161678Sdavidxu * Sleep on a PI mutex.
1502161678Sdavidxu */
1503161678Sdavidxustatic int
1504161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1505161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1506161678Sdavidxu{
1507161678Sdavidxu	struct umtxq_chain *uc;
1508161678Sdavidxu	struct thread *td, *td1;
1509161678Sdavidxu	struct umtx_q *uq1;
1510161678Sdavidxu	int pri;
1511161678Sdavidxu	int error = 0;
1512161678Sdavidxu
1513161678Sdavidxu	td = uq->uq_thread;
1514161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1515161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1516161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1517189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1518161678Sdavidxu	umtxq_insert(uq);
1519189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1520161678Sdavidxu	if (pi->pi_owner == NULL) {
1521189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1522213642Sdavidxu		/* XXX Only look up thread in current process. */
1523213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1524170300Sjeff		mtx_lock_spin(&umtx_lock);
1525215336Sdavidxu		if (td1 != NULL) {
1526215336Sdavidxu			if (pi->pi_owner == NULL)
1527215336Sdavidxu				umtx_pi_setowner(pi, td1);
1528215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1529161678Sdavidxu		}
1530161678Sdavidxu	}
1531161678Sdavidxu
1532161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1533161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1534161678Sdavidxu		if (pri > UPRI(td))
1535161678Sdavidxu			break;
1536161678Sdavidxu	}
1537161678Sdavidxu
1538161678Sdavidxu	if (uq1 != NULL)
1539161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1540161678Sdavidxu	else
1541161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1542161678Sdavidxu
1543161678Sdavidxu	uq->uq_pi_blocked = pi;
1544174701Sdavidxu	thread_lock(td);
1545161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1546174701Sdavidxu	thread_unlock(td);
1547161678Sdavidxu	umtx_propagate_priority(td);
1548170300Sjeff	mtx_unlock_spin(&umtx_lock);
1549189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1550161678Sdavidxu
1551161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1552161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1553161678Sdavidxu		if (error == EWOULDBLOCK)
1554161678Sdavidxu			error = ETIMEDOUT;
1555161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1556161678Sdavidxu			umtxq_remove(uq);
1557161678Sdavidxu		}
1558161678Sdavidxu	}
1559170300Sjeff	mtx_lock_spin(&umtx_lock);
1560161678Sdavidxu	uq->uq_pi_blocked = NULL;
1561174701Sdavidxu	thread_lock(td);
1562161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1563174701Sdavidxu	thread_unlock(td);
1564161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1565216791Sdavidxu	umtx_repropagate_priority(pi);
1566170300Sjeff	mtx_unlock_spin(&umtx_lock);
1567189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1568161678Sdavidxu
1569161678Sdavidxu	return (error);
1570161678Sdavidxu}
1571161678Sdavidxu
1572161678Sdavidxu/*
1573161678Sdavidxu * Add reference count for a PI mutex.
1574161678Sdavidxu */
1575161678Sdavidxustatic void
1576161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1577161678Sdavidxu{
1578161678Sdavidxu	struct umtxq_chain *uc;
1579161678Sdavidxu
1580161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1581161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1582161678Sdavidxu	pi->pi_refcount++;
1583161678Sdavidxu}
1584161678Sdavidxu
1585161678Sdavidxu/*
1586161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1587161678Sdavidxu * is decreased to zero, its memory space is freed.
1588161678Sdavidxu */
1589161678Sdavidxustatic void
1590161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1591161678Sdavidxu{
1592161678Sdavidxu	struct umtxq_chain *uc;
1593161678Sdavidxu
1594161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1595161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1596161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1597161678Sdavidxu	if (--pi->pi_refcount == 0) {
1598170300Sjeff		mtx_lock_spin(&umtx_lock);
1599161678Sdavidxu		if (pi->pi_owner != NULL) {
1600161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1601161678Sdavidxu				pi, pi_link);
1602161678Sdavidxu			pi->pi_owner = NULL;
1603161678Sdavidxu		}
1604161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1605161678Sdavidxu			("blocked queue not empty"));
1606170300Sjeff		mtx_unlock_spin(&umtx_lock);
1607161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1608189756Sdavidxu		umtx_pi_free(pi);
1609161678Sdavidxu	}
1610161678Sdavidxu}
1611161678Sdavidxu
1612161678Sdavidxu/*
1613161678Sdavidxu * Find a PI mutex in hash table.
1614161678Sdavidxu */
1615161678Sdavidxustatic struct umtx_pi *
1616161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1617161678Sdavidxu{
1618161678Sdavidxu	struct umtxq_chain *uc;
1619161678Sdavidxu	struct umtx_pi *pi;
1620161678Sdavidxu
1621161678Sdavidxu	uc = umtxq_getchain(key);
1622161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1623161678Sdavidxu
1624161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1625161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1626161678Sdavidxu			return (pi);
1627161678Sdavidxu		}
1628161678Sdavidxu	}
1629161678Sdavidxu	return (NULL);
1630161678Sdavidxu}
1631161678Sdavidxu
1632161678Sdavidxu/*
1633161678Sdavidxu * Insert a PI mutex into hash table.
1634161678Sdavidxu */
1635161678Sdavidxustatic inline void
1636161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1637161678Sdavidxu{
1638161678Sdavidxu	struct umtxq_chain *uc;
1639161678Sdavidxu
1640161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1641161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1642161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1643161678Sdavidxu}
1644161678Sdavidxu
1645161678Sdavidxu/*
1646161678Sdavidxu * Lock a PI mutex.
1647161678Sdavidxu */
1648161678Sdavidxustatic int
1649161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1650161678Sdavidxu	int try)
1651161678Sdavidxu{
1652161678Sdavidxu	struct umtx_q *uq;
1653161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1654161678Sdavidxu	uint32_t id, owner, old;
1655161678Sdavidxu	int error;
1656161678Sdavidxu
1657161678Sdavidxu	id = td->td_tid;
1658161678Sdavidxu	uq = td->td_umtxq;
1659161678Sdavidxu
1660161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1661161678Sdavidxu	    &uq->uq_key)) != 0)
1662161678Sdavidxu		return (error);
1663163697Sdavidxu	umtxq_lock(&uq->uq_key);
1664163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1665163697Sdavidxu	if (pi == NULL) {
1666163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1667163697Sdavidxu		if (new_pi == NULL) {
1668161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1669163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1670161678Sdavidxu			umtxq_lock(&uq->uq_key);
1671161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1672163697Sdavidxu			if (pi != NULL) {
1673161678Sdavidxu				umtx_pi_free(new_pi);
1674163697Sdavidxu				new_pi = NULL;
1675161678Sdavidxu			}
1676161678Sdavidxu		}
1677163697Sdavidxu		if (new_pi != NULL) {
1678163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1679163697Sdavidxu			umtx_pi_insert(new_pi);
1680163697Sdavidxu			pi = new_pi;
1681163697Sdavidxu		}
1682163697Sdavidxu	}
1683163697Sdavidxu	umtx_pi_ref(pi);
1684163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1685161678Sdavidxu
1686163697Sdavidxu	/*
1687163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1688163697Sdavidxu	 * can fault on any access.
1689163697Sdavidxu	 */
1690163697Sdavidxu	for (;;) {
1691161678Sdavidxu		/*
1692161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1693161678Sdavidxu		 */
1694161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1695161678Sdavidxu
1696161678Sdavidxu		/* The acquire succeeded. */
1697161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1698161678Sdavidxu			error = 0;
1699161678Sdavidxu			break;
1700161678Sdavidxu		}
1701161678Sdavidxu
1702161678Sdavidxu		/* The address was invalid. */
1703161678Sdavidxu		if (owner == -1) {
1704161678Sdavidxu			error = EFAULT;
1705161678Sdavidxu			break;
1706161678Sdavidxu		}
1707161678Sdavidxu
1708161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1709161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1710161678Sdavidxu			owner = casuword32(&m->m_owner,
1711161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1712161678Sdavidxu
1713161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1714161678Sdavidxu				umtxq_lock(&uq->uq_key);
1715189756Sdavidxu				umtxq_busy(&uq->uq_key);
1716161678Sdavidxu				error = umtx_pi_claim(pi, td);
1717189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1718161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1719161678Sdavidxu				break;
1720161678Sdavidxu			}
1721161678Sdavidxu
1722161678Sdavidxu			/* The address was invalid. */
1723161678Sdavidxu			if (owner == -1) {
1724161678Sdavidxu				error = EFAULT;
1725161678Sdavidxu				break;
1726161678Sdavidxu			}
1727161678Sdavidxu
1728161678Sdavidxu			/* If this failed the lock has changed, restart. */
1729161678Sdavidxu			continue;
1730161678Sdavidxu		}
1731161678Sdavidxu
1732161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1733161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1734161678Sdavidxu			error = EDEADLK;
1735161678Sdavidxu			break;
1736161678Sdavidxu		}
1737161678Sdavidxu
1738161678Sdavidxu		if (try != 0) {
1739161678Sdavidxu			error = EBUSY;
1740161678Sdavidxu			break;
1741161678Sdavidxu		}
1742161678Sdavidxu
1743161678Sdavidxu		/*
1744161678Sdavidxu		 * If we caught a signal, we have retried and now
1745161678Sdavidxu		 * exit immediately.
1746161678Sdavidxu		 */
1747161678Sdavidxu		if (error != 0)
1748161678Sdavidxu			break;
1749161678Sdavidxu
1750161678Sdavidxu		umtxq_lock(&uq->uq_key);
1751161678Sdavidxu		umtxq_busy(&uq->uq_key);
1752161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1753161678Sdavidxu
1754161678Sdavidxu		/*
1755161678Sdavidxu		 * Set the contested bit so that a release in user space
1756161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1757161678Sdavidxu		 * either some one else has acquired the lock or it has been
1758161678Sdavidxu		 * released.
1759161678Sdavidxu		 */
1760161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1761161678Sdavidxu
1762161678Sdavidxu		/* The address was invalid. */
1763161678Sdavidxu		if (old == -1) {
1764161678Sdavidxu			umtxq_lock(&uq->uq_key);
1765161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1766161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1767161678Sdavidxu			error = EFAULT;
1768161678Sdavidxu			break;
1769161678Sdavidxu		}
1770161678Sdavidxu
1771161678Sdavidxu		umtxq_lock(&uq->uq_key);
1772161678Sdavidxu		/*
1773161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1774161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1775161678Sdavidxu		 * unlocking the umtx.
1776161678Sdavidxu		 */
1777161678Sdavidxu		if (old == owner)
1778161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1779161678Sdavidxu				 "umtxpi", timo);
1780189756Sdavidxu		else {
1781189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1782189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1783189756Sdavidxu		}
1784161678Sdavidxu	}
1785161678Sdavidxu
1786163697Sdavidxu	umtxq_lock(&uq->uq_key);
1787163697Sdavidxu	umtx_pi_unref(pi);
1788163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1789161678Sdavidxu
1790161678Sdavidxu	umtx_key_release(&uq->uq_key);
1791161678Sdavidxu	return (error);
1792161678Sdavidxu}
1793161678Sdavidxu
1794161678Sdavidxu/*
1795161678Sdavidxu * Unlock a PI mutex.
1796161678Sdavidxu */
1797161678Sdavidxustatic int
1798161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1799161678Sdavidxu{
1800161678Sdavidxu	struct umtx_key key;
1801161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1802161678Sdavidxu	struct umtx_pi *pi, *pi2;
1803161678Sdavidxu	uint32_t owner, old, id;
1804161678Sdavidxu	int error;
1805161678Sdavidxu	int count;
1806161678Sdavidxu	int pri;
1807161678Sdavidxu
1808161678Sdavidxu	id = td->td_tid;
1809161678Sdavidxu	/*
1810161678Sdavidxu	 * Make sure we own this mtx.
1811161678Sdavidxu	 */
1812163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1813161678Sdavidxu	if (owner == -1)
1814161678Sdavidxu		return (EFAULT);
1815161678Sdavidxu
1816161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1817161678Sdavidxu		return (EPERM);
1818161678Sdavidxu
1819161678Sdavidxu	/* This should be done in userland */
1820161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1821161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1822161678Sdavidxu		if (old == -1)
1823161678Sdavidxu			return (EFAULT);
1824161678Sdavidxu		if (old == owner)
1825161678Sdavidxu			return (0);
1826161855Sdavidxu		owner = old;
1827161678Sdavidxu	}
1828161678Sdavidxu
1829161678Sdavidxu	/* We should only ever be in here for contested locks */
1830161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1831161678Sdavidxu	    &key)) != 0)
1832161678Sdavidxu		return (error);
1833161678Sdavidxu
1834161678Sdavidxu	umtxq_lock(&key);
1835161678Sdavidxu	umtxq_busy(&key);
1836161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1837161678Sdavidxu	if (uq_first != NULL) {
1838189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1839161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1840189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1841161678Sdavidxu		if (pi->pi_owner != curthread) {
1842189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1843161678Sdavidxu			umtxq_unbusy(&key);
1844161678Sdavidxu			umtxq_unlock(&key);
1845189756Sdavidxu			umtx_key_release(&key);
1846161678Sdavidxu			/* userland messed the mutex */
1847161678Sdavidxu			return (EPERM);
1848161678Sdavidxu		}
1849161678Sdavidxu		uq_me = curthread->td_umtxq;
1850161678Sdavidxu		pi->pi_owner = NULL;
1851161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1852189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1853161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1854189756Sdavidxu		while (uq_first != NULL &&
1855189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1856189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1857189756Sdavidxu		}
1858161678Sdavidxu		pri = PRI_MAX;
1859161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1860161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1861161678Sdavidxu			if (uq_first2 != NULL) {
1862161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1863161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1864161678Sdavidxu			}
1865161678Sdavidxu		}
1866170300Sjeff		thread_lock(curthread);
1867216791Sdavidxu		sched_lend_user_prio(curthread, pri);
1868170300Sjeff		thread_unlock(curthread);
1869170300Sjeff		mtx_unlock_spin(&umtx_lock);
1870189756Sdavidxu		if (uq_first)
1871189756Sdavidxu			umtxq_signal_thread(uq_first);
1872161678Sdavidxu	}
1873161678Sdavidxu	umtxq_unlock(&key);
1874161678Sdavidxu
1875161678Sdavidxu	/*
1876161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1877161678Sdavidxu	 * there is zero or one thread only waiting for it.
1878161678Sdavidxu	 * Otherwise, it must be marked as contested.
1879161678Sdavidxu	 */
1880161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1881161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1882161678Sdavidxu
1883161678Sdavidxu	umtxq_lock(&key);
1884161678Sdavidxu	umtxq_unbusy(&key);
1885161678Sdavidxu	umtxq_unlock(&key);
1886161678Sdavidxu	umtx_key_release(&key);
1887161678Sdavidxu	if (old == -1)
1888161678Sdavidxu		return (EFAULT);
1889161678Sdavidxu	if (old != owner)
1890161678Sdavidxu		return (EINVAL);
1891161678Sdavidxu	return (0);
1892161678Sdavidxu}
1893161678Sdavidxu
1894161678Sdavidxu/*
1895161678Sdavidxu * Lock a PP mutex.
1896161678Sdavidxu */
1897161678Sdavidxustatic int
1898161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1899161678Sdavidxu	int try)
1900161678Sdavidxu{
1901161678Sdavidxu	struct umtx_q *uq, *uq2;
1902161678Sdavidxu	struct umtx_pi *pi;
1903161678Sdavidxu	uint32_t ceiling;
1904161678Sdavidxu	uint32_t owner, id;
1905161678Sdavidxu	int error, pri, old_inherited_pri, su;
1906161678Sdavidxu
1907161678Sdavidxu	id = td->td_tid;
1908161678Sdavidxu	uq = td->td_umtxq;
1909161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1910161678Sdavidxu	    &uq->uq_key)) != 0)
1911161678Sdavidxu		return (error);
1912164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1913161678Sdavidxu	for (;;) {
1914161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1915161678Sdavidxu		umtxq_lock(&uq->uq_key);
1916161678Sdavidxu		umtxq_busy(&uq->uq_key);
1917161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1918161678Sdavidxu
1919161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1920161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1921161678Sdavidxu			error = EINVAL;
1922161678Sdavidxu			goto out;
1923161678Sdavidxu		}
1924161678Sdavidxu
1925170300Sjeff		mtx_lock_spin(&umtx_lock);
1926161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1927170300Sjeff			mtx_unlock_spin(&umtx_lock);
1928161678Sdavidxu			error = EINVAL;
1929161678Sdavidxu			goto out;
1930161678Sdavidxu		}
1931161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1932161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1933170300Sjeff			thread_lock(td);
1934161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1935161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1936170300Sjeff			thread_unlock(td);
1937161678Sdavidxu		}
1938170300Sjeff		mtx_unlock_spin(&umtx_lock);
1939161678Sdavidxu
1940161678Sdavidxu		owner = casuword32(&m->m_owner,
1941161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1942161678Sdavidxu
1943161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1944161678Sdavidxu			error = 0;
1945161678Sdavidxu			break;
1946161678Sdavidxu		}
1947161678Sdavidxu
1948161678Sdavidxu		/* The address was invalid. */
1949161678Sdavidxu		if (owner == -1) {
1950161678Sdavidxu			error = EFAULT;
1951161678Sdavidxu			break;
1952161678Sdavidxu		}
1953161678Sdavidxu
1954161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1955161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1956161678Sdavidxu			error = EDEADLK;
1957161678Sdavidxu			break;
1958161678Sdavidxu		}
1959161678Sdavidxu
1960161678Sdavidxu		if (try != 0) {
1961161678Sdavidxu			error = EBUSY;
1962161678Sdavidxu			break;
1963161678Sdavidxu		}
1964161678Sdavidxu
1965161678Sdavidxu		/*
1966161678Sdavidxu		 * If we caught a signal, we have retried and now
1967161678Sdavidxu		 * exit immediately.
1968161678Sdavidxu		 */
1969161678Sdavidxu		if (error != 0)
1970161678Sdavidxu			break;
1971161678Sdavidxu
1972161678Sdavidxu		umtxq_lock(&uq->uq_key);
1973161678Sdavidxu		umtxq_insert(uq);
1974161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1975161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
1976161678Sdavidxu		umtxq_remove(uq);
1977161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1978161678Sdavidxu
1979170300Sjeff		mtx_lock_spin(&umtx_lock);
1980161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1981161678Sdavidxu		pri = PRI_MAX;
1982161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1983161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1984161678Sdavidxu			if (uq2 != NULL) {
1985161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1986161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1987161678Sdavidxu			}
1988161678Sdavidxu		}
1989161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1990161678Sdavidxu			pri = uq->uq_inherited_pri;
1991170300Sjeff		thread_lock(td);
1992216791Sdavidxu		sched_lend_user_prio(td, pri);
1993170300Sjeff		thread_unlock(td);
1994170300Sjeff		mtx_unlock_spin(&umtx_lock);
1995161678Sdavidxu	}
1996161678Sdavidxu
1997161678Sdavidxu	if (error != 0) {
1998170300Sjeff		mtx_lock_spin(&umtx_lock);
1999161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2000161678Sdavidxu		pri = PRI_MAX;
2001161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2002161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2003161678Sdavidxu			if (uq2 != NULL) {
2004161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2005161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2006161678Sdavidxu			}
2007161678Sdavidxu		}
2008161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2009161678Sdavidxu			pri = uq->uq_inherited_pri;
2010170300Sjeff		thread_lock(td);
2011216791Sdavidxu		sched_lend_user_prio(td, pri);
2012170300Sjeff		thread_unlock(td);
2013170300Sjeff		mtx_unlock_spin(&umtx_lock);
2014161678Sdavidxu	}
2015161678Sdavidxu
2016161678Sdavidxuout:
2017161678Sdavidxu	umtxq_lock(&uq->uq_key);
2018161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2019161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2020161678Sdavidxu	umtx_key_release(&uq->uq_key);
2021161678Sdavidxu	return (error);
2022161678Sdavidxu}
2023161678Sdavidxu
2024161678Sdavidxu/*
2025161678Sdavidxu * Unlock a PP mutex.
2026161678Sdavidxu */
2027161678Sdavidxustatic int
2028161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2029161678Sdavidxu{
2030161678Sdavidxu	struct umtx_key key;
2031161678Sdavidxu	struct umtx_q *uq, *uq2;
2032161678Sdavidxu	struct umtx_pi *pi;
2033161678Sdavidxu	uint32_t owner, id;
2034161678Sdavidxu	uint32_t rceiling;
2035161926Sdavidxu	int error, pri, new_inherited_pri, su;
2036161678Sdavidxu
2037161678Sdavidxu	id = td->td_tid;
2038161678Sdavidxu	uq = td->td_umtxq;
2039164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2040161678Sdavidxu
2041161678Sdavidxu	/*
2042161678Sdavidxu	 * Make sure we own this mtx.
2043161678Sdavidxu	 */
2044163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2045161678Sdavidxu	if (owner == -1)
2046161678Sdavidxu		return (EFAULT);
2047161678Sdavidxu
2048161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2049161678Sdavidxu		return (EPERM);
2050161678Sdavidxu
2051161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2052161678Sdavidxu	if (error != 0)
2053161678Sdavidxu		return (error);
2054161678Sdavidxu
2055161678Sdavidxu	if (rceiling == -1)
2056161678Sdavidxu		new_inherited_pri = PRI_MAX;
2057161678Sdavidxu	else {
2058161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2059161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2060161678Sdavidxu			return (EINVAL);
2061161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2062161678Sdavidxu	}
2063161678Sdavidxu
2064161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2065161678Sdavidxu	    &key)) != 0)
2066161678Sdavidxu		return (error);
2067161678Sdavidxu	umtxq_lock(&key);
2068161678Sdavidxu	umtxq_busy(&key);
2069161678Sdavidxu	umtxq_unlock(&key);
2070161678Sdavidxu	/*
2071161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2072161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2073161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2074161678Sdavidxu	 * has to be adjusted for such mutex.
2075161678Sdavidxu	 */
2076163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2077163449Sdavidxu		UMUTEX_CONTESTED);
2078161678Sdavidxu
2079161678Sdavidxu	umtxq_lock(&key);
2080161678Sdavidxu	if (error == 0)
2081161678Sdavidxu		umtxq_signal(&key, 1);
2082161678Sdavidxu	umtxq_unbusy(&key);
2083161678Sdavidxu	umtxq_unlock(&key);
2084161678Sdavidxu
2085161678Sdavidxu	if (error == -1)
2086161678Sdavidxu		error = EFAULT;
2087161678Sdavidxu	else {
2088170300Sjeff		mtx_lock_spin(&umtx_lock);
2089161926Sdavidxu		if (su != 0)
2090161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2091161678Sdavidxu		pri = PRI_MAX;
2092161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2093161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2094161678Sdavidxu			if (uq2 != NULL) {
2095161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2096161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2097161678Sdavidxu			}
2098161678Sdavidxu		}
2099161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2100161678Sdavidxu			pri = uq->uq_inherited_pri;
2101170300Sjeff		thread_lock(td);
2102216791Sdavidxu		sched_lend_user_prio(td, pri);
2103170300Sjeff		thread_unlock(td);
2104170300Sjeff		mtx_unlock_spin(&umtx_lock);
2105161678Sdavidxu	}
2106161678Sdavidxu	umtx_key_release(&key);
2107161678Sdavidxu	return (error);
2108161678Sdavidxu}
2109161678Sdavidxu
2110161678Sdavidxustatic int
2111161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2112161678Sdavidxu	uint32_t *old_ceiling)
2113161678Sdavidxu{
2114161678Sdavidxu	struct umtx_q *uq;
2115161678Sdavidxu	uint32_t save_ceiling;
2116161678Sdavidxu	uint32_t owner, id;
2117161678Sdavidxu	uint32_t flags;
2118161678Sdavidxu	int error;
2119161678Sdavidxu
2120161678Sdavidxu	flags = fuword32(&m->m_flags);
2121161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2122161678Sdavidxu		return (EINVAL);
2123161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2124161678Sdavidxu		return (EINVAL);
2125161678Sdavidxu	id = td->td_tid;
2126161678Sdavidxu	uq = td->td_umtxq;
2127161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2128161678Sdavidxu	   &uq->uq_key)) != 0)
2129161678Sdavidxu		return (error);
2130161678Sdavidxu	for (;;) {
2131161678Sdavidxu		umtxq_lock(&uq->uq_key);
2132161678Sdavidxu		umtxq_busy(&uq->uq_key);
2133161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2134161678Sdavidxu
2135161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2136161678Sdavidxu
2137161678Sdavidxu		owner = casuword32(&m->m_owner,
2138161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2139161678Sdavidxu
2140161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2141161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2142163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2143163449Sdavidxu				UMUTEX_CONTESTED);
2144161678Sdavidxu			error = 0;
2145161678Sdavidxu			break;
2146161678Sdavidxu		}
2147161678Sdavidxu
2148161678Sdavidxu		/* The address was invalid. */
2149161678Sdavidxu		if (owner == -1) {
2150161678Sdavidxu			error = EFAULT;
2151161678Sdavidxu			break;
2152161678Sdavidxu		}
2153161678Sdavidxu
2154161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2155161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2156161678Sdavidxu			error = 0;
2157161678Sdavidxu			break;
2158161678Sdavidxu		}
2159161678Sdavidxu
2160161678Sdavidxu		/*
2161161678Sdavidxu		 * If we caught a signal, we have retried and now
2162161678Sdavidxu		 * exit immediately.
2163161678Sdavidxu		 */
2164161678Sdavidxu		if (error != 0)
2165161678Sdavidxu			break;
2166161678Sdavidxu
2167161678Sdavidxu		/*
2168161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2169161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2170161678Sdavidxu		 * unlocking the umtx.
2171161678Sdavidxu		 */
2172161678Sdavidxu		umtxq_lock(&uq->uq_key);
2173161678Sdavidxu		umtxq_insert(uq);
2174161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2175161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2176161678Sdavidxu		umtxq_remove(uq);
2177161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2178161678Sdavidxu	}
2179161678Sdavidxu	umtxq_lock(&uq->uq_key);
2180161678Sdavidxu	if (error == 0)
2181161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2182161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2183161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2184161678Sdavidxu	umtx_key_release(&uq->uq_key);
2185161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2186161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2187161678Sdavidxu	return (error);
2188161678Sdavidxu}
2189161678Sdavidxu
2190162030Sdavidxustatic int
2191162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2192179970Sdavidxu	int mode)
2193162030Sdavidxu{
2194162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2195162030Sdavidxu	case 0:
2196179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2197162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2198179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2199162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2200179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2201162030Sdavidxu	}
2202162030Sdavidxu	return (EINVAL);
2203162030Sdavidxu}
2204162030Sdavidxu
2205161678Sdavidxu/*
2206161678Sdavidxu * Lock a userland POSIX mutex.
2207161678Sdavidxu */
2208161678Sdavidxustatic int
2209162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2210179970Sdavidxu	struct timespec *timeout, int mode)
2211161678Sdavidxu{
2212162030Sdavidxu	struct timespec ts, ts2, ts3;
2213162030Sdavidxu	struct timeval tv;
2214161678Sdavidxu	uint32_t flags;
2215162030Sdavidxu	int error;
2216161678Sdavidxu
2217161678Sdavidxu	flags = fuword32(&m->m_flags);
2218161678Sdavidxu	if (flags == -1)
2219161678Sdavidxu		return (EFAULT);
2220161678Sdavidxu
2221162030Sdavidxu	if (timeout == NULL) {
2222179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2223162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2224179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2225162030Sdavidxu			error = ERESTART;
2226162030Sdavidxu	} else {
2227162030Sdavidxu		getnanouptime(&ts);
2228162030Sdavidxu		timespecadd(&ts, timeout);
2229162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2230162030Sdavidxu		for (;;) {
2231179970Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2232162030Sdavidxu			if (error != ETIMEDOUT)
2233162030Sdavidxu				break;
2234162030Sdavidxu			getnanouptime(&ts2);
2235162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2236162030Sdavidxu				error = ETIMEDOUT;
2237162030Sdavidxu				break;
2238162030Sdavidxu			}
2239162030Sdavidxu			ts3 = ts;
2240162030Sdavidxu			timespecsub(&ts3, &ts2);
2241162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2242162030Sdavidxu		}
2243162030Sdavidxu		/* Timed-locking is not restarted. */
2244162030Sdavidxu		if (error == ERESTART)
2245162030Sdavidxu			error = EINTR;
2246161742Sdavidxu	}
2247162030Sdavidxu	return (error);
2248161678Sdavidxu}
2249161678Sdavidxu
2250161678Sdavidxu/*
2251161678Sdavidxu * Unlock a userland POSIX mutex.
2252161678Sdavidxu */
2253161678Sdavidxustatic int
2254161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2255161678Sdavidxu{
2256161678Sdavidxu	uint32_t flags;
2257161678Sdavidxu
2258161678Sdavidxu	flags = fuword32(&m->m_flags);
2259161678Sdavidxu	if (flags == -1)
2260161678Sdavidxu		return (EFAULT);
2261161678Sdavidxu
2262161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2263161855Sdavidxu	case 0:
2264161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2265161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2266161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2267161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2268161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2269161855Sdavidxu	}
2270161678Sdavidxu
2271161855Sdavidxu	return (EINVAL);
2272161678Sdavidxu}
2273161678Sdavidxu
2274164839Sdavidxustatic int
2275164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2276164876Sdavidxu	struct timespec *timeout, u_long wflags)
2277164839Sdavidxu{
2278164839Sdavidxu	struct umtx_q *uq;
2279164839Sdavidxu	struct timeval tv;
2280164839Sdavidxu	struct timespec cts, ets, tts;
2281164839Sdavidxu	uint32_t flags;
2282216641Sdavidxu	uint32_t clockid;
2283164839Sdavidxu	int error;
2284164839Sdavidxu
2285164839Sdavidxu	uq = td->td_umtxq;
2286164839Sdavidxu	flags = fuword32(&cv->c_flags);
2287164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2288164839Sdavidxu	if (error != 0)
2289164839Sdavidxu		return (error);
2290216641Sdavidxu
2291216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2292216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2293216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2294216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2295216641Sdavidxu			/* hmm, only HW clock id will work. */
2296216641Sdavidxu			return (EINVAL);
2297216641Sdavidxu		}
2298216641Sdavidxu	} else {
2299216641Sdavidxu		clockid = CLOCK_REALTIME;
2300216641Sdavidxu	}
2301216641Sdavidxu
2302164839Sdavidxu	umtxq_lock(&uq->uq_key);
2303164839Sdavidxu	umtxq_busy(&uq->uq_key);
2304164839Sdavidxu	umtxq_insert(uq);
2305164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2306164839Sdavidxu
2307164839Sdavidxu	/*
2308216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2309216641Sdavidxu	 * don't modify cache line when unnecessary.
2310164839Sdavidxu	 */
2311216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2312216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2313164839Sdavidxu
2314164839Sdavidxu	umtxq_lock(&uq->uq_key);
2315164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2316164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2317164839Sdavidxu
2318164839Sdavidxu	error = do_unlock_umutex(td, m);
2319164839Sdavidxu
2320164839Sdavidxu	umtxq_lock(&uq->uq_key);
2321164839Sdavidxu	if (error == 0) {
2322216641Sdavidxu		if (timeout == NULL) {
2323164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2324164839Sdavidxu		} else {
2325216641Sdavidxu			if ((wflags & CVWAIT_ABSTIME) == 0) {
2326216641Sdavidxu				kern_clock_gettime(td, clockid, &ets);
2327216641Sdavidxu				timespecadd(&ets, timeout);
2328216641Sdavidxu				tts = *timeout;
2329216641Sdavidxu			} else { /* absolute time */
2330216641Sdavidxu				ets = *timeout;
2331216641Sdavidxu				tts = *timeout;
2332216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2333216641Sdavidxu				timespecsub(&tts, &cts);
2334216641Sdavidxu			}
2335216641Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2336164839Sdavidxu			for (;;) {
2337164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2338164839Sdavidxu				if (error != ETIMEDOUT)
2339164839Sdavidxu					break;
2340216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2341164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2342164839Sdavidxu					error = ETIMEDOUT;
2343164839Sdavidxu					break;
2344164839Sdavidxu				}
2345164839Sdavidxu				tts = ets;
2346164839Sdavidxu				timespecsub(&tts, &cts);
2347164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2348164839Sdavidxu			}
2349164839Sdavidxu		}
2350164839Sdavidxu	}
2351164839Sdavidxu
2352211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2353211794Sdavidxu		error = 0;
2354211794Sdavidxu	else {
2355216641Sdavidxu		/*
2356216641Sdavidxu		 * This must be timeout,interrupted by signal or
2357216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2358216641Sdavidxu		 * necessary.
2359216641Sdavidxu		 */
2360216641Sdavidxu		umtxq_busy(&uq->uq_key);
2361216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2362216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2363216641Sdavidxu			umtxq_remove(uq);
2364216641Sdavidxu			if (oldlen == 1) {
2365216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2366216641Sdavidxu				suword32(
2367216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2368216641Sdavidxu					 &cv->c_has_waiters), 0);
2369216641Sdavidxu				umtxq_lock(&uq->uq_key);
2370216641Sdavidxu			}
2371216641Sdavidxu		}
2372216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2373164839Sdavidxu		if (error == ERESTART)
2374164839Sdavidxu			error = EINTR;
2375164839Sdavidxu	}
2376211794Sdavidxu
2377164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2378164839Sdavidxu	umtx_key_release(&uq->uq_key);
2379164839Sdavidxu	return (error);
2380164839Sdavidxu}
2381164839Sdavidxu
2382164839Sdavidxu/*
2383164839Sdavidxu * Signal a userland condition variable.
2384164839Sdavidxu */
2385164839Sdavidxustatic int
2386164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2387164839Sdavidxu{
2388164839Sdavidxu	struct umtx_key key;
2389164839Sdavidxu	int error, cnt, nwake;
2390164839Sdavidxu	uint32_t flags;
2391164839Sdavidxu
2392164839Sdavidxu	flags = fuword32(&cv->c_flags);
2393164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2394164839Sdavidxu		return (error);
2395164839Sdavidxu	umtxq_lock(&key);
2396164839Sdavidxu	umtxq_busy(&key);
2397164839Sdavidxu	cnt = umtxq_count(&key);
2398164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2399164839Sdavidxu	if (cnt <= nwake) {
2400164839Sdavidxu		umtxq_unlock(&key);
2401164839Sdavidxu		error = suword32(
2402164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2403164839Sdavidxu		umtxq_lock(&key);
2404164839Sdavidxu	}
2405164839Sdavidxu	umtxq_unbusy(&key);
2406164839Sdavidxu	umtxq_unlock(&key);
2407164839Sdavidxu	umtx_key_release(&key);
2408164839Sdavidxu	return (error);
2409164839Sdavidxu}
2410164839Sdavidxu
2411164839Sdavidxustatic int
2412164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2413164839Sdavidxu{
2414164839Sdavidxu	struct umtx_key key;
2415164839Sdavidxu	int error;
2416164839Sdavidxu	uint32_t flags;
2417164839Sdavidxu
2418164839Sdavidxu	flags = fuword32(&cv->c_flags);
2419164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2420164839Sdavidxu		return (error);
2421164839Sdavidxu
2422164839Sdavidxu	umtxq_lock(&key);
2423164839Sdavidxu	umtxq_busy(&key);
2424164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2425164839Sdavidxu	umtxq_unlock(&key);
2426164839Sdavidxu
2427164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2428164839Sdavidxu
2429164839Sdavidxu	umtxq_lock(&key);
2430164839Sdavidxu	umtxq_unbusy(&key);
2431164839Sdavidxu	umtxq_unlock(&key);
2432164839Sdavidxu
2433164839Sdavidxu	umtx_key_release(&key);
2434164839Sdavidxu	return (error);
2435164839Sdavidxu}
2436164839Sdavidxu
2437177848Sdavidxustatic int
2438177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2439177848Sdavidxu{
2440177848Sdavidxu	struct umtx_q *uq;
2441177848Sdavidxu	uint32_t flags, wrflags;
2442177848Sdavidxu	int32_t state, oldstate;
2443177848Sdavidxu	int32_t blocked_readers;
2444177848Sdavidxu	int error;
2445177848Sdavidxu
2446177848Sdavidxu	uq = td->td_umtxq;
2447177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2448177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2449177848Sdavidxu	if (error != 0)
2450177848Sdavidxu		return (error);
2451177848Sdavidxu
2452177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2453177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2454177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2455177848Sdavidxu
2456177848Sdavidxu	for (;;) {
2457177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2458177848Sdavidxu		/* try to lock it */
2459177848Sdavidxu		while (!(state & wrflags)) {
2460177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2461177848Sdavidxu				umtx_key_release(&uq->uq_key);
2462177848Sdavidxu				return (EAGAIN);
2463177848Sdavidxu			}
2464177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2465177848Sdavidxu			if (oldstate == state) {
2466177848Sdavidxu				umtx_key_release(&uq->uq_key);
2467177848Sdavidxu				return (0);
2468177848Sdavidxu			}
2469177848Sdavidxu			state = oldstate;
2470177848Sdavidxu		}
2471177848Sdavidxu
2472177848Sdavidxu		if (error)
2473177848Sdavidxu			break;
2474177848Sdavidxu
2475177848Sdavidxu		/* grab monitor lock */
2476177848Sdavidxu		umtxq_lock(&uq->uq_key);
2477177848Sdavidxu		umtxq_busy(&uq->uq_key);
2478177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2479177848Sdavidxu
2480203414Sdavidxu		/*
2481203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2482203414Sdavidxu		 * and the check below
2483203414Sdavidxu		 */
2484203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2485203414Sdavidxu
2486177848Sdavidxu		/* set read contention bit */
2487177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2488177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2489177848Sdavidxu			if (oldstate == state)
2490177848Sdavidxu				goto sleep;
2491177848Sdavidxu			state = oldstate;
2492177848Sdavidxu		}
2493177848Sdavidxu
2494177848Sdavidxu		/* state is changed while setting flags, restart */
2495177848Sdavidxu		if (!(state & wrflags)) {
2496177848Sdavidxu			umtxq_lock(&uq->uq_key);
2497177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2498177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2499177848Sdavidxu			continue;
2500177848Sdavidxu		}
2501177848Sdavidxu
2502177848Sdavidxusleep:
2503177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2504177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2505177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2506177848Sdavidxu
2507177848Sdavidxu		while (state & wrflags) {
2508177848Sdavidxu			umtxq_lock(&uq->uq_key);
2509177848Sdavidxu			umtxq_insert(uq);
2510177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2511177848Sdavidxu
2512177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2513177848Sdavidxu
2514177848Sdavidxu			umtxq_busy(&uq->uq_key);
2515177848Sdavidxu			umtxq_remove(uq);
2516177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2517177848Sdavidxu			if (error)
2518177848Sdavidxu				break;
2519177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2520177848Sdavidxu		}
2521177848Sdavidxu
2522177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2523177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2524177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2525177848Sdavidxu		if (blocked_readers == 1) {
2526177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2527177848Sdavidxu			for (;;) {
2528177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2529177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2530177848Sdavidxu				if (oldstate == state)
2531177848Sdavidxu					break;
2532177848Sdavidxu				state = oldstate;
2533177848Sdavidxu			}
2534177848Sdavidxu		}
2535177848Sdavidxu
2536177848Sdavidxu		umtxq_lock(&uq->uq_key);
2537177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2538177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2539177848Sdavidxu	}
2540177848Sdavidxu	umtx_key_release(&uq->uq_key);
2541177848Sdavidxu	return (error);
2542177848Sdavidxu}
2543177848Sdavidxu
2544177848Sdavidxustatic int
2545177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2546177848Sdavidxu{
2547177848Sdavidxu	struct timespec ts, ts2, ts3;
2548177848Sdavidxu	struct timeval tv;
2549177848Sdavidxu	int error;
2550177848Sdavidxu
2551177848Sdavidxu	getnanouptime(&ts);
2552177848Sdavidxu	timespecadd(&ts, timeout);
2553177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2554177848Sdavidxu	for (;;) {
2555177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2556177848Sdavidxu		if (error != ETIMEDOUT)
2557177848Sdavidxu			break;
2558177848Sdavidxu		getnanouptime(&ts2);
2559177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2560177848Sdavidxu			error = ETIMEDOUT;
2561177848Sdavidxu			break;
2562177848Sdavidxu		}
2563177848Sdavidxu		ts3 = ts;
2564177848Sdavidxu		timespecsub(&ts3, &ts2);
2565177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2566177848Sdavidxu	}
2567177849Sdavidxu	if (error == ERESTART)
2568177849Sdavidxu		error = EINTR;
2569177848Sdavidxu	return (error);
2570177848Sdavidxu}
2571177848Sdavidxu
2572177848Sdavidxustatic int
2573177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2574177848Sdavidxu{
2575177848Sdavidxu	struct umtx_q *uq;
2576177848Sdavidxu	uint32_t flags;
2577177848Sdavidxu	int32_t state, oldstate;
2578177848Sdavidxu	int32_t blocked_writers;
2579197476Sdavidxu	int32_t blocked_readers;
2580177848Sdavidxu	int error;
2581177848Sdavidxu
2582177848Sdavidxu	uq = td->td_umtxq;
2583177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2584177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2585177848Sdavidxu	if (error != 0)
2586177848Sdavidxu		return (error);
2587177848Sdavidxu
2588197476Sdavidxu	blocked_readers = 0;
2589177848Sdavidxu	for (;;) {
2590177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2591177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2592177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2593177848Sdavidxu			if (oldstate == state) {
2594177848Sdavidxu				umtx_key_release(&uq->uq_key);
2595177848Sdavidxu				return (0);
2596177848Sdavidxu			}
2597177848Sdavidxu			state = oldstate;
2598177848Sdavidxu		}
2599177848Sdavidxu
2600197476Sdavidxu		if (error) {
2601197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2602197476Sdavidxu			    blocked_readers != 0) {
2603197476Sdavidxu				umtxq_lock(&uq->uq_key);
2604197476Sdavidxu				umtxq_busy(&uq->uq_key);
2605197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2606197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2607197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2608197476Sdavidxu			}
2609197476Sdavidxu
2610177848Sdavidxu			break;
2611197476Sdavidxu		}
2612177848Sdavidxu
2613177848Sdavidxu		/* grab monitor lock */
2614177848Sdavidxu		umtxq_lock(&uq->uq_key);
2615177848Sdavidxu		umtxq_busy(&uq->uq_key);
2616177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2617177848Sdavidxu
2618203414Sdavidxu		/*
2619203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2620203414Sdavidxu		 * and the check below
2621203414Sdavidxu		 */
2622203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2623203414Sdavidxu
2624177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2625177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2626177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2627177848Sdavidxu			if (oldstate == state)
2628177848Sdavidxu				goto sleep;
2629177848Sdavidxu			state = oldstate;
2630177848Sdavidxu		}
2631177848Sdavidxu
2632177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2633177848Sdavidxu			umtxq_lock(&uq->uq_key);
2634177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2635177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2636177848Sdavidxu			continue;
2637177848Sdavidxu		}
2638177848Sdavidxusleep:
2639177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2640177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2641177848Sdavidxu
2642177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2643177848Sdavidxu			umtxq_lock(&uq->uq_key);
2644177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2645177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2646177848Sdavidxu
2647177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2648177848Sdavidxu
2649177848Sdavidxu			umtxq_busy(&uq->uq_key);
2650177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2651177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2652177848Sdavidxu			if (error)
2653177848Sdavidxu				break;
2654177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2655177848Sdavidxu		}
2656177848Sdavidxu
2657177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2658177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2659177848Sdavidxu		if (blocked_writers == 1) {
2660177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2661177848Sdavidxu			for (;;) {
2662177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2663177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2664177848Sdavidxu				if (oldstate == state)
2665177848Sdavidxu					break;
2666177848Sdavidxu				state = oldstate;
2667177848Sdavidxu			}
2668197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2669197476Sdavidxu		} else
2670197476Sdavidxu			blocked_readers = 0;
2671177848Sdavidxu
2672177848Sdavidxu		umtxq_lock(&uq->uq_key);
2673177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2674177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2675177848Sdavidxu	}
2676177848Sdavidxu
2677177848Sdavidxu	umtx_key_release(&uq->uq_key);
2678177848Sdavidxu	return (error);
2679177848Sdavidxu}
2680177848Sdavidxu
2681177848Sdavidxustatic int
2682177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2683177848Sdavidxu{
2684177848Sdavidxu	struct timespec ts, ts2, ts3;
2685177848Sdavidxu	struct timeval tv;
2686177848Sdavidxu	int error;
2687177848Sdavidxu
2688177848Sdavidxu	getnanouptime(&ts);
2689177848Sdavidxu	timespecadd(&ts, timeout);
2690177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2691177848Sdavidxu	for (;;) {
2692177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2693177848Sdavidxu		if (error != ETIMEDOUT)
2694177848Sdavidxu			break;
2695177848Sdavidxu		getnanouptime(&ts2);
2696177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2697177848Sdavidxu			error = ETIMEDOUT;
2698177848Sdavidxu			break;
2699177848Sdavidxu		}
2700177848Sdavidxu		ts3 = ts;
2701177848Sdavidxu		timespecsub(&ts3, &ts2);
2702177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2703177848Sdavidxu	}
2704177849Sdavidxu	if (error == ERESTART)
2705177849Sdavidxu		error = EINTR;
2706177848Sdavidxu	return (error);
2707177848Sdavidxu}
2708177848Sdavidxu
2709177848Sdavidxustatic int
2710177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2711177848Sdavidxu{
2712177848Sdavidxu	struct umtx_q *uq;
2713177848Sdavidxu	uint32_t flags;
2714177848Sdavidxu	int32_t state, oldstate;
2715177848Sdavidxu	int error, q, count;
2716177848Sdavidxu
2717177848Sdavidxu	uq = td->td_umtxq;
2718177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2719177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2720177848Sdavidxu	if (error != 0)
2721177848Sdavidxu		return (error);
2722177848Sdavidxu
2723177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2724177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2725177848Sdavidxu		for (;;) {
2726177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2727177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2728177848Sdavidxu			if (oldstate != state) {
2729177848Sdavidxu				state = oldstate;
2730177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2731177848Sdavidxu					error = EPERM;
2732177848Sdavidxu					goto out;
2733177848Sdavidxu				}
2734177848Sdavidxu			} else
2735177848Sdavidxu				break;
2736177848Sdavidxu		}
2737177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2738177848Sdavidxu		for (;;) {
2739177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2740177848Sdavidxu				state - 1);
2741177848Sdavidxu			if (oldstate != state) {
2742177848Sdavidxu				state = oldstate;
2743177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2744177848Sdavidxu					error = EPERM;
2745177848Sdavidxu					goto out;
2746177848Sdavidxu				}
2747177848Sdavidxu			}
2748177848Sdavidxu			else
2749177848Sdavidxu				break;
2750177848Sdavidxu		}
2751177848Sdavidxu	} else {
2752177848Sdavidxu		error = EPERM;
2753177848Sdavidxu		goto out;
2754177848Sdavidxu	}
2755177848Sdavidxu
2756177848Sdavidxu	count = 0;
2757177848Sdavidxu
2758177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2759177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2760177848Sdavidxu			count = 1;
2761177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2762177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2763177848Sdavidxu			count = INT_MAX;
2764177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2765177848Sdavidxu		}
2766177848Sdavidxu	} else {
2767177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2768177848Sdavidxu			count = INT_MAX;
2769177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2770177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2771177848Sdavidxu			count = 1;
2772177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2773177848Sdavidxu		}
2774177848Sdavidxu	}
2775177848Sdavidxu
2776177848Sdavidxu	if (count) {
2777177848Sdavidxu		umtxq_lock(&uq->uq_key);
2778177848Sdavidxu		umtxq_busy(&uq->uq_key);
2779177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2780177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2781177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2782177848Sdavidxu	}
2783177848Sdavidxuout:
2784177848Sdavidxu	umtx_key_release(&uq->uq_key);
2785177848Sdavidxu	return (error);
2786177848Sdavidxu}
2787177848Sdavidxu
2788201472Sdavidxustatic int
2789201472Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2790201472Sdavidxu{
2791201472Sdavidxu	struct umtx_q *uq;
2792201472Sdavidxu	struct timeval tv;
2793201472Sdavidxu	struct timespec cts, ets, tts;
2794201472Sdavidxu	uint32_t flags, count;
2795201472Sdavidxu	int error;
2796201472Sdavidxu
2797201472Sdavidxu	uq = td->td_umtxq;
2798201472Sdavidxu	flags = fuword32(&sem->_flags);
2799201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2800201472Sdavidxu	if (error != 0)
2801201472Sdavidxu		return (error);
2802201472Sdavidxu	umtxq_lock(&uq->uq_key);
2803201472Sdavidxu	umtxq_busy(&uq->uq_key);
2804201472Sdavidxu	umtxq_insert(uq);
2805201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2806201472Sdavidxu
2807215652Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2808215652Sdavidxu		casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2809203657Sdavidxu
2810201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2811201472Sdavidxu	if (count != 0) {
2812201472Sdavidxu		umtxq_lock(&uq->uq_key);
2813201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2814201472Sdavidxu		umtxq_remove(uq);
2815201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2816201472Sdavidxu		umtx_key_release(&uq->uq_key);
2817201472Sdavidxu		return (0);
2818201472Sdavidxu	}
2819201472Sdavidxu
2820201472Sdavidxu	umtxq_lock(&uq->uq_key);
2821201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2822201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2823201472Sdavidxu
2824201472Sdavidxu	umtxq_lock(&uq->uq_key);
2825201472Sdavidxu	if (timeout == NULL) {
2826201472Sdavidxu		error = umtxq_sleep(uq, "usem", 0);
2827201472Sdavidxu	} else {
2828201472Sdavidxu		getnanouptime(&ets);
2829201472Sdavidxu		timespecadd(&ets, timeout);
2830201472Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2831201472Sdavidxu		for (;;) {
2832201472Sdavidxu			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2833201472Sdavidxu			if (error != ETIMEDOUT)
2834201472Sdavidxu				break;
2835201472Sdavidxu			getnanouptime(&cts);
2836201472Sdavidxu			if (timespeccmp(&cts, &ets, >=)) {
2837201472Sdavidxu				error = ETIMEDOUT;
2838201472Sdavidxu				break;
2839201472Sdavidxu			}
2840201472Sdavidxu			tts = ets;
2841201472Sdavidxu			timespecsub(&tts, &cts);
2842201472Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2843201472Sdavidxu		}
2844201472Sdavidxu	}
2845201472Sdavidxu
2846211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2847211794Sdavidxu		error = 0;
2848211794Sdavidxu	else {
2849211794Sdavidxu		umtxq_remove(uq);
2850201472Sdavidxu		if (error == ERESTART)
2851201472Sdavidxu			error = EINTR;
2852201472Sdavidxu	}
2853201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2854201472Sdavidxu	umtx_key_release(&uq->uq_key);
2855201472Sdavidxu	return (error);
2856201472Sdavidxu}
2857201472Sdavidxu
2858201472Sdavidxu/*
2859201472Sdavidxu * Signal a userland condition variable.
2860201472Sdavidxu */
2861201472Sdavidxustatic int
2862201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2863201472Sdavidxu{
2864201472Sdavidxu	struct umtx_key key;
2865201472Sdavidxu	int error, cnt, nwake;
2866201472Sdavidxu	uint32_t flags;
2867201472Sdavidxu
2868201472Sdavidxu	flags = fuword32(&sem->_flags);
2869201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2870201472Sdavidxu		return (error);
2871201472Sdavidxu	umtxq_lock(&key);
2872201472Sdavidxu	umtxq_busy(&key);
2873201472Sdavidxu	cnt = umtxq_count(&key);
2874201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2875201472Sdavidxu	if (cnt <= nwake) {
2876201472Sdavidxu		umtxq_unlock(&key);
2877201472Sdavidxu		error = suword32(
2878201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2879201472Sdavidxu		umtxq_lock(&key);
2880201472Sdavidxu	}
2881201472Sdavidxu	umtxq_unbusy(&key);
2882201472Sdavidxu	umtxq_unlock(&key);
2883201472Sdavidxu	umtx_key_release(&key);
2884201472Sdavidxu	return (error);
2885201472Sdavidxu}
2886201472Sdavidxu
2887139013Sdavidxuint
2888225617Skmacysys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2889139013Sdavidxu    /* struct umtx *umtx */
2890139013Sdavidxu{
2891162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2892139013Sdavidxu}
2893139013Sdavidxu
2894139013Sdavidxuint
2895225617Skmacysys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2896139013Sdavidxu    /* struct umtx *umtx */
2897139013Sdavidxu{
2898162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2899139013Sdavidxu}
2900139013Sdavidxu
2901162536Sdavidxustatic int
2902162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2903139013Sdavidxu{
2904162536Sdavidxu	struct timespec *ts, timeout;
2905139013Sdavidxu	int error;
2906139013Sdavidxu
2907162536Sdavidxu	/* Allow a null timespec (wait forever). */
2908162536Sdavidxu	if (uap->uaddr2 == NULL)
2909162536Sdavidxu		ts = NULL;
2910162536Sdavidxu	else {
2911162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2912162536Sdavidxu		if (error != 0)
2913162536Sdavidxu			return (error);
2914162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2915162536Sdavidxu		    timeout.tv_nsec < 0) {
2916162536Sdavidxu			return (EINVAL);
2917161678Sdavidxu		}
2918162536Sdavidxu		ts = &timeout;
2919162536Sdavidxu	}
2920162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2921162536Sdavidxu}
2922162536Sdavidxu
2923162536Sdavidxustatic int
2924162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2925162536Sdavidxu{
2926162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2927162536Sdavidxu}
2928162536Sdavidxu
2929162536Sdavidxustatic int
2930162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2931162536Sdavidxu{
2932162536Sdavidxu	struct timespec *ts, timeout;
2933162536Sdavidxu	int error;
2934162536Sdavidxu
2935162536Sdavidxu	if (uap->uaddr2 == NULL)
2936162536Sdavidxu		ts = NULL;
2937162536Sdavidxu	else {
2938162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2939162536Sdavidxu		if (error != 0)
2940162536Sdavidxu			return (error);
2941162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2942162536Sdavidxu		    timeout.tv_nsec < 0)
2943162536Sdavidxu			return (EINVAL);
2944162536Sdavidxu		ts = &timeout;
2945162536Sdavidxu	}
2946178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2947162536Sdavidxu}
2948162536Sdavidxu
2949162536Sdavidxustatic int
2950173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2951173800Sdavidxu{
2952173800Sdavidxu	struct timespec *ts, timeout;
2953173800Sdavidxu	int error;
2954173800Sdavidxu
2955173800Sdavidxu	if (uap->uaddr2 == NULL)
2956173800Sdavidxu		ts = NULL;
2957173800Sdavidxu	else {
2958173800Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2959173800Sdavidxu		if (error != 0)
2960173800Sdavidxu			return (error);
2961173800Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2962173800Sdavidxu		    timeout.tv_nsec < 0)
2963173800Sdavidxu			return (EINVAL);
2964173800Sdavidxu		ts = &timeout;
2965173800Sdavidxu	}
2966178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2967173800Sdavidxu}
2968173800Sdavidxu
2969173800Sdavidxustatic int
2970178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2971178646Sdavidxu{
2972178646Sdavidxu	struct timespec *ts, timeout;
2973178646Sdavidxu	int error;
2974178646Sdavidxu
2975178646Sdavidxu	if (uap->uaddr2 == NULL)
2976178646Sdavidxu		ts = NULL;
2977178646Sdavidxu	else {
2978178646Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2979178646Sdavidxu		if (error != 0)
2980178646Sdavidxu			return (error);
2981178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2982178646Sdavidxu		    timeout.tv_nsec < 0)
2983178646Sdavidxu			return (EINVAL);
2984178646Sdavidxu		ts = &timeout;
2985178646Sdavidxu	}
2986178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2987178646Sdavidxu}
2988178646Sdavidxu
2989178646Sdavidxustatic int
2990162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2991162536Sdavidxu{
2992178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2993162536Sdavidxu}
2994162536Sdavidxu
2995216641Sdavidxu#define BATCH_SIZE	128
2996162536Sdavidxustatic int
2997216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
2998216641Sdavidxu{
2999216641Sdavidxu	int count = uap->val;
3000216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3001216641Sdavidxu	char **upp = (char **)uap->obj;
3002216641Sdavidxu	int tocopy;
3003216641Sdavidxu	int error = 0;
3004216641Sdavidxu	int i, pos = 0;
3005216641Sdavidxu
3006216641Sdavidxu	while (count > 0) {
3007216641Sdavidxu		tocopy = count;
3008216641Sdavidxu		if (tocopy > BATCH_SIZE)
3009216641Sdavidxu			tocopy = BATCH_SIZE;
3010216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3011216641Sdavidxu		if (error != 0)
3012216641Sdavidxu			break;
3013216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3014216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3015216641Sdavidxu		count -= tocopy;
3016216641Sdavidxu		pos += tocopy;
3017216641Sdavidxu	}
3018216641Sdavidxu	return (error);
3019216641Sdavidxu}
3020216641Sdavidxu
3021216641Sdavidxustatic int
3022178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3023178646Sdavidxu{
3024178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3025178646Sdavidxu}
3026178646Sdavidxu
3027178646Sdavidxustatic int
3028162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3029162536Sdavidxu{
3030162536Sdavidxu	struct timespec *ts, timeout;
3031162536Sdavidxu	int error;
3032162536Sdavidxu
3033162536Sdavidxu	/* Allow a null timespec (wait forever). */
3034162536Sdavidxu	if (uap->uaddr2 == NULL)
3035162536Sdavidxu		ts = NULL;
3036162536Sdavidxu	else {
3037162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3038162536Sdavidxu		    sizeof(timeout));
3039162536Sdavidxu		if (error != 0)
3040162536Sdavidxu			return (error);
3041162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3042162536Sdavidxu		    timeout.tv_nsec < 0) {
3043162536Sdavidxu			return (EINVAL);
3044139013Sdavidxu		}
3045162536Sdavidxu		ts = &timeout;
3046139013Sdavidxu	}
3047162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3048162536Sdavidxu}
3049162536Sdavidxu
3050162536Sdavidxustatic int
3051162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3052162536Sdavidxu{
3053179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3054162536Sdavidxu}
3055162536Sdavidxu
3056162536Sdavidxustatic int
3057179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3058179970Sdavidxu{
3059179970Sdavidxu	struct timespec *ts, timeout;
3060179970Sdavidxu	int error;
3061179970Sdavidxu
3062179970Sdavidxu	/* Allow a null timespec (wait forever). */
3063179970Sdavidxu	if (uap->uaddr2 == NULL)
3064179970Sdavidxu		ts = NULL;
3065179970Sdavidxu	else {
3066179970Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3067179970Sdavidxu		    sizeof(timeout));
3068179970Sdavidxu		if (error != 0)
3069179970Sdavidxu			return (error);
3070179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3071179970Sdavidxu		    timeout.tv_nsec < 0) {
3072179970Sdavidxu			return (EINVAL);
3073179970Sdavidxu		}
3074179970Sdavidxu		ts = &timeout;
3075179970Sdavidxu	}
3076179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3077179970Sdavidxu}
3078179970Sdavidxu
3079179970Sdavidxustatic int
3080179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3081179970Sdavidxu{
3082179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3083179970Sdavidxu}
3084179970Sdavidxu
3085179970Sdavidxustatic int
3086162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3087162536Sdavidxu{
3088162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3089162536Sdavidxu}
3090162536Sdavidxu
3091162536Sdavidxustatic int
3092162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3093162536Sdavidxu{
3094162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3095162536Sdavidxu}
3096162536Sdavidxu
3097164839Sdavidxustatic int
3098164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3099164839Sdavidxu{
3100164839Sdavidxu	struct timespec *ts, timeout;
3101164839Sdavidxu	int error;
3102164839Sdavidxu
3103164839Sdavidxu	/* Allow a null timespec (wait forever). */
3104164839Sdavidxu	if (uap->uaddr2 == NULL)
3105164839Sdavidxu		ts = NULL;
3106164839Sdavidxu	else {
3107164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3108164839Sdavidxu		    sizeof(timeout));
3109164839Sdavidxu		if (error != 0)
3110164839Sdavidxu			return (error);
3111164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3112164839Sdavidxu		    timeout.tv_nsec < 0) {
3113164839Sdavidxu			return (EINVAL);
3114164839Sdavidxu		}
3115164839Sdavidxu		ts = &timeout;
3116164839Sdavidxu	}
3117164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3118164839Sdavidxu}
3119164839Sdavidxu
3120164839Sdavidxustatic int
3121164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3122164839Sdavidxu{
3123164839Sdavidxu	return do_cv_signal(td, uap->obj);
3124164839Sdavidxu}
3125164839Sdavidxu
3126164839Sdavidxustatic int
3127164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3128164839Sdavidxu{
3129164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3130164839Sdavidxu}
3131164839Sdavidxu
3132177848Sdavidxustatic int
3133177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3134177848Sdavidxu{
3135177848Sdavidxu	struct timespec timeout;
3136177848Sdavidxu	int error;
3137177848Sdavidxu
3138177848Sdavidxu	/* Allow a null timespec (wait forever). */
3139177848Sdavidxu	if (uap->uaddr2 == NULL) {
3140177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3141177848Sdavidxu	} else {
3142177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3143177848Sdavidxu		    sizeof(timeout));
3144177848Sdavidxu		if (error != 0)
3145177848Sdavidxu			return (error);
3146177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3147177848Sdavidxu		    timeout.tv_nsec < 0) {
3148177848Sdavidxu			return (EINVAL);
3149177848Sdavidxu		}
3150177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3151177848Sdavidxu	}
3152177848Sdavidxu	return (error);
3153177848Sdavidxu}
3154177848Sdavidxu
3155177848Sdavidxustatic int
3156177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3157177848Sdavidxu{
3158177848Sdavidxu	struct timespec timeout;
3159177848Sdavidxu	int error;
3160177848Sdavidxu
3161177848Sdavidxu	/* Allow a null timespec (wait forever). */
3162177848Sdavidxu	if (uap->uaddr2 == NULL) {
3163177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3164177848Sdavidxu	} else {
3165177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3166177848Sdavidxu		    sizeof(timeout));
3167177848Sdavidxu		if (error != 0)
3168177848Sdavidxu			return (error);
3169177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3170177848Sdavidxu		    timeout.tv_nsec < 0) {
3171177848Sdavidxu			return (EINVAL);
3172177848Sdavidxu		}
3173177848Sdavidxu
3174177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3175177848Sdavidxu	}
3176177848Sdavidxu	return (error);
3177177848Sdavidxu}
3178177848Sdavidxu
3179177848Sdavidxustatic int
3180177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3181177848Sdavidxu{
3182177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3183177848Sdavidxu}
3184177848Sdavidxu
3185201472Sdavidxustatic int
3186201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3187201472Sdavidxu{
3188201472Sdavidxu	struct timespec *ts, timeout;
3189201472Sdavidxu	int error;
3190201472Sdavidxu
3191201472Sdavidxu	/* Allow a null timespec (wait forever). */
3192201472Sdavidxu	if (uap->uaddr2 == NULL)
3193201472Sdavidxu		ts = NULL;
3194201472Sdavidxu	else {
3195201472Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3196201472Sdavidxu		    sizeof(timeout));
3197201472Sdavidxu		if (error != 0)
3198201472Sdavidxu			return (error);
3199201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3200201472Sdavidxu		    timeout.tv_nsec < 0) {
3201201472Sdavidxu			return (EINVAL);
3202201472Sdavidxu		}
3203201472Sdavidxu		ts = &timeout;
3204201472Sdavidxu	}
3205201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3206201472Sdavidxu}
3207201472Sdavidxu
3208201472Sdavidxustatic int
3209201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3210201472Sdavidxu{
3211201472Sdavidxu	return do_sem_wake(td, uap->obj);
3212201472Sdavidxu}
3213201472Sdavidxu
3214162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3215162536Sdavidxu
3216162536Sdavidxustatic _umtx_op_func op_table[] = {
3217162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3218162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3219162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3220162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3221162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3222162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3223162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3224164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3225164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3226164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3227173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3228177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3229177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3230177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3231178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3232178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3233179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3234179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3235201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3236201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3237216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3238216641Sdavidxu	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3239162536Sdavidxu};
3240162536Sdavidxu
3241162536Sdavidxuint
3242225617Skmacysys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3243162536Sdavidxu{
3244163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3245162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3246162536Sdavidxu	return (EINVAL);
3247162536Sdavidxu}
3248162536Sdavidxu
3249205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3250163046Sdavidxuint
3251163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3252163046Sdavidxu    /* struct umtx *umtx */
3253163046Sdavidxu{
3254163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3255163046Sdavidxu}
3256163046Sdavidxu
3257163046Sdavidxuint
3258163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3259163046Sdavidxu    /* struct umtx *umtx */
3260163046Sdavidxu{
3261163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3262163046Sdavidxu}
3263163046Sdavidxu
3264162536Sdavidxustruct timespec32 {
3265209390Sed	uint32_t tv_sec;
3266209390Sed	uint32_t tv_nsec;
3267162536Sdavidxu};
3268162536Sdavidxu
3269162536Sdavidxustatic inline int
3270228218Sphoumtx_copyin_timeout32(void *addr, struct timespec *tsp)
3271162536Sdavidxu{
3272162536Sdavidxu	struct timespec32 ts32;
3273162536Sdavidxu	int error;
3274162536Sdavidxu
3275162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3276162536Sdavidxu	if (error == 0) {
3277228218Spho		if (ts32.tv_sec < 0 ||
3278228218Spho		    ts32.tv_nsec >= 1000000000 ||
3279228218Spho		    ts32.tv_nsec < 0)
3280228218Spho			error = EINVAL;
3281228218Spho		else {
3282228218Spho			tsp->tv_sec = ts32.tv_sec;
3283228218Spho			tsp->tv_nsec = ts32.tv_nsec;
3284228218Spho		}
3285162536Sdavidxu	}
3286140421Sdavidxu	return (error);
3287139013Sdavidxu}
3288161678Sdavidxu
3289162536Sdavidxustatic int
3290162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3291162536Sdavidxu{
3292162536Sdavidxu	struct timespec *ts, timeout;
3293162536Sdavidxu	int error;
3294162536Sdavidxu
3295162536Sdavidxu	/* Allow a null timespec (wait forever). */
3296162536Sdavidxu	if (uap->uaddr2 == NULL)
3297162536Sdavidxu		ts = NULL;
3298162536Sdavidxu	else {
3299228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3300162536Sdavidxu		if (error != 0)
3301162536Sdavidxu			return (error);
3302162536Sdavidxu		ts = &timeout;
3303162536Sdavidxu	}
3304162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3305162536Sdavidxu}
3306162536Sdavidxu
3307162536Sdavidxustatic int
3308162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3309162536Sdavidxu{
3310162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3311162536Sdavidxu}
3312162536Sdavidxu
3313162536Sdavidxustatic int
3314162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3315162536Sdavidxu{
3316162536Sdavidxu	struct timespec *ts, timeout;
3317162536Sdavidxu	int error;
3318162536Sdavidxu
3319162536Sdavidxu	if (uap->uaddr2 == NULL)
3320162536Sdavidxu		ts = NULL;
3321162536Sdavidxu	else {
3322228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3323162536Sdavidxu		if (error != 0)
3324162536Sdavidxu			return (error);
3325162536Sdavidxu		ts = &timeout;
3326162536Sdavidxu	}
3327178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3328162536Sdavidxu}
3329162536Sdavidxu
3330162536Sdavidxustatic int
3331162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3332162536Sdavidxu{
3333162536Sdavidxu	struct timespec *ts, timeout;
3334162536Sdavidxu	int error;
3335162536Sdavidxu
3336162536Sdavidxu	/* Allow a null timespec (wait forever). */
3337162536Sdavidxu	if (uap->uaddr2 == NULL)
3338162536Sdavidxu		ts = NULL;
3339162536Sdavidxu	else {
3340228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3341162536Sdavidxu		if (error != 0)
3342162536Sdavidxu			return (error);
3343162536Sdavidxu		ts = &timeout;
3344162536Sdavidxu	}
3345162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3346162536Sdavidxu}
3347162536Sdavidxu
3348164839Sdavidxustatic int
3349179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3350179970Sdavidxu{
3351179970Sdavidxu	struct timespec *ts, timeout;
3352179970Sdavidxu	int error;
3353179970Sdavidxu
3354179970Sdavidxu	/* Allow a null timespec (wait forever). */
3355179970Sdavidxu	if (uap->uaddr2 == NULL)
3356179970Sdavidxu		ts = NULL;
3357179970Sdavidxu	else {
3358228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3359179970Sdavidxu		if (error != 0)
3360179970Sdavidxu			return (error);
3361179970Sdavidxu		ts = &timeout;
3362179970Sdavidxu	}
3363179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3364179970Sdavidxu}
3365179970Sdavidxu
3366179970Sdavidxustatic int
3367164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3368164839Sdavidxu{
3369164839Sdavidxu	struct timespec *ts, timeout;
3370164839Sdavidxu	int error;
3371164839Sdavidxu
3372164839Sdavidxu	/* Allow a null timespec (wait forever). */
3373164839Sdavidxu	if (uap->uaddr2 == NULL)
3374164839Sdavidxu		ts = NULL;
3375164839Sdavidxu	else {
3376228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3377164839Sdavidxu		if (error != 0)
3378164839Sdavidxu			return (error);
3379164839Sdavidxu		ts = &timeout;
3380164839Sdavidxu	}
3381164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3382164839Sdavidxu}
3383164839Sdavidxu
3384177848Sdavidxustatic int
3385177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3386177848Sdavidxu{
3387177848Sdavidxu	struct timespec timeout;
3388177848Sdavidxu	int error;
3389177848Sdavidxu
3390177848Sdavidxu	/* Allow a null timespec (wait forever). */
3391177848Sdavidxu	if (uap->uaddr2 == NULL) {
3392177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3393177848Sdavidxu	} else {
3394228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3395177848Sdavidxu		if (error != 0)
3396177848Sdavidxu			return (error);
3397177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3398177848Sdavidxu	}
3399177848Sdavidxu	return (error);
3400177848Sdavidxu}
3401177848Sdavidxu
3402177848Sdavidxustatic int
3403177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3404177848Sdavidxu{
3405177848Sdavidxu	struct timespec timeout;
3406177848Sdavidxu	int error;
3407177848Sdavidxu
3408177848Sdavidxu	/* Allow a null timespec (wait forever). */
3409177848Sdavidxu	if (uap->uaddr2 == NULL) {
3410177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3411177848Sdavidxu	} else {
3412228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3413177848Sdavidxu		if (error != 0)
3414177848Sdavidxu			return (error);
3415177848Sdavidxu
3416177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3417177848Sdavidxu	}
3418177848Sdavidxu	return (error);
3419177848Sdavidxu}
3420177848Sdavidxu
3421178646Sdavidxustatic int
3422178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3423178646Sdavidxu{
3424178646Sdavidxu	struct timespec *ts, timeout;
3425178646Sdavidxu	int error;
3426178646Sdavidxu
3427178646Sdavidxu	if (uap->uaddr2 == NULL)
3428178646Sdavidxu		ts = NULL;
3429178646Sdavidxu	else {
3430228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3431178646Sdavidxu		if (error != 0)
3432178646Sdavidxu			return (error);
3433178646Sdavidxu		ts = &timeout;
3434178646Sdavidxu	}
3435178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3436178646Sdavidxu}
3437178646Sdavidxu
3438201472Sdavidxustatic int
3439201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3440201472Sdavidxu{
3441201472Sdavidxu	struct timespec *ts, timeout;
3442201472Sdavidxu	int error;
3443201472Sdavidxu
3444201472Sdavidxu	/* Allow a null timespec (wait forever). */
3445201472Sdavidxu	if (uap->uaddr2 == NULL)
3446201472Sdavidxu		ts = NULL;
3447201472Sdavidxu	else {
3448228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3449201472Sdavidxu		if (error != 0)
3450201472Sdavidxu			return (error);
3451201472Sdavidxu		ts = &timeout;
3452201472Sdavidxu	}
3453201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3454201472Sdavidxu}
3455201472Sdavidxu
3456216641Sdavidxustatic int
3457216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3458216641Sdavidxu{
3459216641Sdavidxu	int count = uap->val;
3460216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3461216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3462216641Sdavidxu	int tocopy;
3463216641Sdavidxu	int error = 0;
3464216641Sdavidxu	int i, pos = 0;
3465216641Sdavidxu
3466216641Sdavidxu	while (count > 0) {
3467216641Sdavidxu		tocopy = count;
3468216641Sdavidxu		if (tocopy > BATCH_SIZE)
3469216641Sdavidxu			tocopy = BATCH_SIZE;
3470216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3471216641Sdavidxu		if (error != 0)
3472216641Sdavidxu			break;
3473216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3474216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3475216641Sdavidxu				INT_MAX, 1);
3476216641Sdavidxu		count -= tocopy;
3477216641Sdavidxu		pos += tocopy;
3478216641Sdavidxu	}
3479216641Sdavidxu	return (error);
3480216641Sdavidxu}
3481216641Sdavidxu
3482162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3483162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3484162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3485162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3486162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3487162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3488162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3489162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3490164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3491164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3492164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3493173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3494177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3495177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3496177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3497178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3498178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3499179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3500179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3501201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3502201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3503216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3504216641Sdavidxu	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3505162536Sdavidxu};
3506162536Sdavidxu
3507162536Sdavidxuint
3508162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3509162536Sdavidxu{
3510163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3511162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3512162536Sdavidxu			(struct _umtx_op_args *)uap);
3513162536Sdavidxu	return (EINVAL);
3514162536Sdavidxu}
3515162536Sdavidxu#endif
3516162536Sdavidxu
3517161678Sdavidxuvoid
3518161678Sdavidxuumtx_thread_init(struct thread *td)
3519161678Sdavidxu{
3520161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3521161678Sdavidxu	td->td_umtxq->uq_thread = td;
3522161678Sdavidxu}
3523161678Sdavidxu
3524161678Sdavidxuvoid
3525161678Sdavidxuumtx_thread_fini(struct thread *td)
3526161678Sdavidxu{
3527161678Sdavidxu	umtxq_free(td->td_umtxq);
3528161678Sdavidxu}
3529161678Sdavidxu
3530161678Sdavidxu/*
3531161678Sdavidxu * It will be called when new thread is created, e.g fork().
3532161678Sdavidxu */
3533161678Sdavidxuvoid
3534161678Sdavidxuumtx_thread_alloc(struct thread *td)
3535161678Sdavidxu{
3536161678Sdavidxu	struct umtx_q *uq;
3537161678Sdavidxu
3538161678Sdavidxu	uq = td->td_umtxq;
3539161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3540161678Sdavidxu
3541161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3542161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3543161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3544161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3545161678Sdavidxu}
3546161678Sdavidxu
3547161678Sdavidxu/*
3548161678Sdavidxu * exec() hook.
3549161678Sdavidxu */
3550161678Sdavidxustatic void
3551161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3552161678Sdavidxu	struct image_params *imgp __unused)
3553161678Sdavidxu{
3554161678Sdavidxu	umtx_thread_cleanup(curthread);
3555161678Sdavidxu}
3556161678Sdavidxu
3557161678Sdavidxu/*
3558161678Sdavidxu * thread_exit() hook.
3559161678Sdavidxu */
3560161678Sdavidxuvoid
3561161678Sdavidxuumtx_thread_exit(struct thread *td)
3562161678Sdavidxu{
3563161678Sdavidxu	umtx_thread_cleanup(td);
3564161678Sdavidxu}
3565161678Sdavidxu
3566161678Sdavidxu/*
3567161678Sdavidxu * clean up umtx data.
3568161678Sdavidxu */
3569161678Sdavidxustatic void
3570161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3571161678Sdavidxu{
3572161678Sdavidxu	struct umtx_q *uq;
3573161678Sdavidxu	struct umtx_pi *pi;
3574161678Sdavidxu
3575161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3576161678Sdavidxu		return;
3577161678Sdavidxu
3578170300Sjeff	mtx_lock_spin(&umtx_lock);
3579161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3580161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3581161678Sdavidxu		pi->pi_owner = NULL;
3582161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3583161678Sdavidxu	}
3584216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3585174701Sdavidxu	thread_lock(td);
3586216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
3587174701Sdavidxu	thread_unlock(td);
3588161678Sdavidxu}
3589