kern_umtx.c revision 216791
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 216791 2010-12-29 09:26:46Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46216641Sdavidxu#include <sys/syscallsubr.h>
47139013Sdavidxu#include <sys/eventhandler.h>
48112904Sjeff#include <sys/umtx.h>
49112904Sjeff
50139013Sdavidxu#include <vm/vm.h>
51139013Sdavidxu#include <vm/vm_param.h>
52139013Sdavidxu#include <vm/pmap.h>
53139013Sdavidxu#include <vm/vm_map.h>
54139013Sdavidxu#include <vm/vm_object.h>
55139013Sdavidxu
56165369Sdavidxu#include <machine/cpu.h>
57165369Sdavidxu
58205014Snwhitehorn#ifdef COMPAT_FREEBSD32
59162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
60162536Sdavidxu#endif
61162536Sdavidxu
62201887Sdavidxuenum {
63201887Sdavidxu	TYPE_SIMPLE_WAIT,
64201887Sdavidxu	TYPE_CV,
65201887Sdavidxu	TYPE_SEM,
66201887Sdavidxu	TYPE_SIMPLE_LOCK,
67201887Sdavidxu	TYPE_NORMAL_UMUTEX,
68201887Sdavidxu	TYPE_PI_UMUTEX,
69201887Sdavidxu	TYPE_PP_UMUTEX,
70201887Sdavidxu	TYPE_RWLOCK
71201887Sdavidxu};
72139013Sdavidxu
73179970Sdavidxu#define _UMUTEX_TRY		1
74179970Sdavidxu#define _UMUTEX_WAIT		2
75179970Sdavidxu
76161678Sdavidxu/* Key to represent a unique userland synchronous object */
77139013Sdavidxustruct umtx_key {
78161678Sdavidxu	int	hash;
79139013Sdavidxu	int	type;
80161678Sdavidxu	int	shared;
81139013Sdavidxu	union {
82139013Sdavidxu		struct {
83139013Sdavidxu			vm_object_t	object;
84161678Sdavidxu			uintptr_t	offset;
85139013Sdavidxu		} shared;
86139013Sdavidxu		struct {
87161678Sdavidxu			struct vmspace	*vs;
88161678Sdavidxu			uintptr_t	addr;
89139013Sdavidxu		} private;
90139013Sdavidxu		struct {
91161678Sdavidxu			void		*a;
92161678Sdavidxu			uintptr_t	b;
93139013Sdavidxu		} both;
94139013Sdavidxu	} info;
95139013Sdavidxu};
96139013Sdavidxu
97161678Sdavidxu/* Priority inheritance mutex info. */
98161678Sdavidxustruct umtx_pi {
99161678Sdavidxu	/* Owner thread */
100161678Sdavidxu	struct thread		*pi_owner;
101161678Sdavidxu
102161678Sdavidxu	/* Reference count */
103161678Sdavidxu	int			pi_refcount;
104161678Sdavidxu
105161678Sdavidxu 	/* List entry to link umtx holding by thread */
106161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
107161678Sdavidxu
108161678Sdavidxu	/* List entry in hash */
109161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
110161678Sdavidxu
111161678Sdavidxu	/* List for waiters */
112161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
113161678Sdavidxu
114161678Sdavidxu	/* Identify a userland lock object */
115161678Sdavidxu	struct umtx_key		pi_key;
116161678Sdavidxu};
117161678Sdavidxu
118161678Sdavidxu/* A userland synchronous object user. */
119115765Sjeffstruct umtx_q {
120161678Sdavidxu	/* Linked list for the hash. */
121161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
122161678Sdavidxu
123161678Sdavidxu	/* Umtx key. */
124161678Sdavidxu	struct umtx_key		uq_key;
125161678Sdavidxu
126161678Sdavidxu	/* Umtx flags. */
127161678Sdavidxu	int			uq_flags;
128161678Sdavidxu#define UQF_UMTXQ	0x0001
129161678Sdavidxu
130161678Sdavidxu	/* The thread waits on. */
131161678Sdavidxu	struct thread		*uq_thread;
132161678Sdavidxu
133161678Sdavidxu	/*
134161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
135170300Sjeff	 * or umtx_lock, write must have both chain lock and
136170300Sjeff	 * umtx_lock being hold.
137161678Sdavidxu	 */
138161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
139161678Sdavidxu
140161678Sdavidxu	/* On blocked list */
141161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
142161678Sdavidxu
143161678Sdavidxu	/* Thread contending with us */
144161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
145161678Sdavidxu
146161742Sdavidxu	/* Inherited priority from PP mutex */
147161678Sdavidxu	u_char			uq_inherited_pri;
148201991Sdavidxu
149201991Sdavidxu	/* Spare queue ready to be reused */
150201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
151201991Sdavidxu
152201991Sdavidxu	/* The queue we on */
153201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
154115765Sjeff};
155115765Sjeff
156161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
157161678Sdavidxu
158201991Sdavidxu/* Per-key wait-queue */
159201991Sdavidxustruct umtxq_queue {
160201991Sdavidxu	struct umtxq_head	head;
161201991Sdavidxu	struct umtx_key		key;
162201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
163201991Sdavidxu	int			length;
164201991Sdavidxu};
165201991Sdavidxu
166201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
167201991Sdavidxu
168161678Sdavidxu/* Userland lock object's wait-queue chain */
169138224Sdavidxustruct umtxq_chain {
170161678Sdavidxu	/* Lock for this chain. */
171161678Sdavidxu	struct mtx		uc_lock;
172161678Sdavidxu
173161678Sdavidxu	/* List of sleep queues. */
174201991Sdavidxu	struct umtxq_list	uc_queue[2];
175177848Sdavidxu#define UMTX_SHARED_QUEUE	0
176177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
177161678Sdavidxu
178201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
179201991Sdavidxu
180161678Sdavidxu	/* Busy flag */
181161678Sdavidxu	char			uc_busy;
182161678Sdavidxu
183161678Sdavidxu	/* Chain lock waiters */
184158377Sdavidxu	int			uc_waiters;
185161678Sdavidxu
186161678Sdavidxu	/* All PI in the list */
187161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
188201991Sdavidxu
189138224Sdavidxu};
190115765Sjeff
191161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
192189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
193161678Sdavidxu
194161678Sdavidxu/*
195161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
196161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
197161678Sdavidxu * and let another thread B block on the mutex, because B is
198161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
199161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
200161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
201161678Sdavidxu */
202161678Sdavidxu
203163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
204163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
205163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
206161678Sdavidxu
207138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
208216678Sdavidxu#define	UMTX_CHAINS		512
209216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
210115765Sjeff
211161678Sdavidxu#define THREAD_SHARE		0
212161678Sdavidxu#define PROCESS_SHARE		1
213161678Sdavidxu#define AUTO_SHARE		2
214161678Sdavidxu
215161678Sdavidxu#define	GET_SHARE(flags)	\
216161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
217161678Sdavidxu
218177848Sdavidxu#define BUSY_SPINS		200
219177848Sdavidxu
220161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
221179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
222138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
223161678Sdavidxustatic int			umtx_pi_allocated;
224115310Sjeff
225161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
226161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
227161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
228161678Sdavidxu
229161678Sdavidxustatic void umtxq_sysinit(void *);
230161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
231161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
232139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
233139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
234139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
235139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
236177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
237177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
238161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
239139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
240139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
241161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
242139013Sdavidxu	struct umtx_key *key);
243139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
244163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
245161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
246161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
247161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
248161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
249161678Sdavidxu	struct image_params *imgp __unused);
250161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
251115310Sjeff
252177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
253177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
254177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
255177848Sdavidxu
256170300Sjeffstatic struct mtx umtx_lock;
257170300Sjeff
258161678Sdavidxustatic void
259161678Sdavidxuumtxq_sysinit(void *arg __unused)
260161678Sdavidxu{
261179421Sdavidxu	int i, j;
262138224Sdavidxu
263161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
264161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
265179421Sdavidxu	for (i = 0; i < 2; ++i) {
266179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
267179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
268179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
269201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
270201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
271201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
272179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
273179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
274179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
275179421Sdavidxu		}
276161678Sdavidxu	}
277170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
278161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
279161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
280161678Sdavidxu}
281161678Sdavidxu
282143149Sdavidxustruct umtx_q *
283143149Sdavidxuumtxq_alloc(void)
284143149Sdavidxu{
285161678Sdavidxu	struct umtx_q *uq;
286161678Sdavidxu
287161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
288201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
289201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
290161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
291161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
292161678Sdavidxu	return (uq);
293143149Sdavidxu}
294143149Sdavidxu
295143149Sdavidxuvoid
296143149Sdavidxuumtxq_free(struct umtx_q *uq)
297143149Sdavidxu{
298201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
299201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
300143149Sdavidxu	free(uq, M_UMTX);
301143149Sdavidxu}
302143149Sdavidxu
303161678Sdavidxustatic inline void
304139013Sdavidxuumtxq_hash(struct umtx_key *key)
305138224Sdavidxu{
306161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
307161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
308138224Sdavidxu}
309138224Sdavidxu
310139013Sdavidxustatic inline int
311139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
312139013Sdavidxu{
313139013Sdavidxu	return (k1->type == k2->type &&
314161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
315161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
316139013Sdavidxu}
317139013Sdavidxu
318161678Sdavidxustatic inline struct umtxq_chain *
319161678Sdavidxuumtxq_getchain(struct umtx_key *key)
320139013Sdavidxu{
321201886Sdavidxu	if (key->type <= TYPE_SEM)
322179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
323179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
324139013Sdavidxu}
325139013Sdavidxu
326161678Sdavidxu/*
327177848Sdavidxu * Lock a chain.
328161678Sdavidxu */
329138224Sdavidxustatic inline void
330177848Sdavidxuumtxq_lock(struct umtx_key *key)
331139257Sdavidxu{
332161678Sdavidxu	struct umtxq_chain *uc;
333139257Sdavidxu
334161678Sdavidxu	uc = umtxq_getchain(key);
335177848Sdavidxu	mtx_lock(&uc->uc_lock);
336139257Sdavidxu}
337139257Sdavidxu
338161678Sdavidxu/*
339177848Sdavidxu * Unlock a chain.
340161678Sdavidxu */
341139257Sdavidxustatic inline void
342177848Sdavidxuumtxq_unlock(struct umtx_key *key)
343139257Sdavidxu{
344161678Sdavidxu	struct umtxq_chain *uc;
345139257Sdavidxu
346161678Sdavidxu	uc = umtxq_getchain(key);
347177848Sdavidxu	mtx_unlock(&uc->uc_lock);
348139257Sdavidxu}
349139257Sdavidxu
350161678Sdavidxu/*
351177848Sdavidxu * Set chain to busy state when following operation
352177848Sdavidxu * may be blocked (kernel mutex can not be used).
353161678Sdavidxu */
354139257Sdavidxustatic inline void
355177848Sdavidxuumtxq_busy(struct umtx_key *key)
356138224Sdavidxu{
357161678Sdavidxu	struct umtxq_chain *uc;
358161678Sdavidxu
359161678Sdavidxu	uc = umtxq_getchain(key);
360177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
361177848Sdavidxu	if (uc->uc_busy) {
362177880Sdavidxu#ifdef SMP
363177880Sdavidxu		if (smp_cpus > 1) {
364177880Sdavidxu			int count = BUSY_SPINS;
365177880Sdavidxu			if (count > 0) {
366177880Sdavidxu				umtxq_unlock(key);
367177880Sdavidxu				while (uc->uc_busy && --count > 0)
368177880Sdavidxu					cpu_spinwait();
369177880Sdavidxu				umtxq_lock(key);
370177880Sdavidxu			}
371177848Sdavidxu		}
372177880Sdavidxu#endif
373177880Sdavidxu		while (uc->uc_busy) {
374177848Sdavidxu			uc->uc_waiters++;
375177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
376177848Sdavidxu			uc->uc_waiters--;
377177848Sdavidxu		}
378177848Sdavidxu	}
379177848Sdavidxu	uc->uc_busy = 1;
380138224Sdavidxu}
381138224Sdavidxu
382161678Sdavidxu/*
383177848Sdavidxu * Unbusy a chain.
384161678Sdavidxu */
385138225Sdavidxustatic inline void
386177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
387138224Sdavidxu{
388161678Sdavidxu	struct umtxq_chain *uc;
389161678Sdavidxu
390161678Sdavidxu	uc = umtxq_getchain(key);
391177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
392177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
393177848Sdavidxu	uc->uc_busy = 0;
394177848Sdavidxu	if (uc->uc_waiters)
395177848Sdavidxu		wakeup_one(uc);
396138224Sdavidxu}
397138224Sdavidxu
398201991Sdavidxustatic struct umtxq_queue *
399201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
400201991Sdavidxu{
401201991Sdavidxu	struct umtxq_queue *uh;
402201991Sdavidxu	struct umtxq_chain *uc;
403201991Sdavidxu
404201991Sdavidxu	uc = umtxq_getchain(key);
405201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
406201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
407201991Sdavidxu		if (umtx_key_match(&uh->key, key))
408201991Sdavidxu			return (uh);
409201991Sdavidxu	}
410201991Sdavidxu
411201991Sdavidxu	return (NULL);
412201991Sdavidxu}
413201991Sdavidxu
414139013Sdavidxustatic inline void
415177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
416115765Sjeff{
417201991Sdavidxu	struct umtxq_queue *uh;
418161678Sdavidxu	struct umtxq_chain *uc;
419139013Sdavidxu
420161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
421161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
422201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
423203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
424201991Sdavidxu	if (uh != NULL) {
425201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
426201991Sdavidxu	} else {
427201991Sdavidxu		uh = uq->uq_spare_queue;
428201991Sdavidxu		uh->key = uq->uq_key;
429201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
430201991Sdavidxu	}
431201991Sdavidxu	uq->uq_spare_queue = NULL;
432201991Sdavidxu
433201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
434201991Sdavidxu	uh->length++;
435158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
436201991Sdavidxu	uq->uq_cur_queue = uh;
437201991Sdavidxu	return;
438139013Sdavidxu}
439139013Sdavidxu
440139013Sdavidxustatic inline void
441177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
442139013Sdavidxu{
443161678Sdavidxu	struct umtxq_chain *uc;
444201991Sdavidxu	struct umtxq_queue *uh;
445161678Sdavidxu
446161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
447161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
448158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
449201991Sdavidxu		uh = uq->uq_cur_queue;
450201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
451201991Sdavidxu		uh->length--;
452158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
453201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
454201991Sdavidxu			KASSERT(uh->length == 0,
455201991Sdavidxu			    ("inconsistent umtxq_queue length"));
456201991Sdavidxu			LIST_REMOVE(uh, link);
457201991Sdavidxu		} else {
458201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
459201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
460201991Sdavidxu			LIST_REMOVE(uh, link);
461201991Sdavidxu		}
462201991Sdavidxu		uq->uq_spare_queue = uh;
463201991Sdavidxu		uq->uq_cur_queue = NULL;
464139013Sdavidxu	}
465139013Sdavidxu}
466139013Sdavidxu
467161678Sdavidxu/*
468161678Sdavidxu * Check if there are multiple waiters
469161678Sdavidxu */
470139013Sdavidxustatic int
471139013Sdavidxuumtxq_count(struct umtx_key *key)
472139013Sdavidxu{
473161678Sdavidxu	struct umtxq_chain *uc;
474201991Sdavidxu	struct umtxq_queue *uh;
475115765Sjeff
476161678Sdavidxu	uc = umtxq_getchain(key);
477161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
478201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
479201991Sdavidxu	if (uh != NULL)
480201991Sdavidxu		return (uh->length);
481201991Sdavidxu	return (0);
482115765Sjeff}
483115765Sjeff
484161678Sdavidxu/*
485161678Sdavidxu * Check if there are multiple PI waiters and returns first
486161678Sdavidxu * waiter.
487161678Sdavidxu */
488139257Sdavidxustatic int
489161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
490161678Sdavidxu{
491161678Sdavidxu	struct umtxq_chain *uc;
492201991Sdavidxu	struct umtxq_queue *uh;
493161678Sdavidxu
494161678Sdavidxu	*first = NULL;
495161678Sdavidxu	uc = umtxq_getchain(key);
496161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
497201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
498201991Sdavidxu	if (uh != NULL) {
499201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
500201991Sdavidxu		return (uh->length);
501161678Sdavidxu	}
502201991Sdavidxu	return (0);
503161678Sdavidxu}
504161678Sdavidxu
505161678Sdavidxu/*
506161678Sdavidxu * Wake up threads waiting on an userland object.
507161678Sdavidxu */
508177848Sdavidxu
509161678Sdavidxustatic int
510177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
511115765Sjeff{
512161678Sdavidxu	struct umtxq_chain *uc;
513201991Sdavidxu	struct umtxq_queue *uh;
514201991Sdavidxu	struct umtx_q *uq;
515161678Sdavidxu	int ret;
516115765Sjeff
517139257Sdavidxu	ret = 0;
518161678Sdavidxu	uc = umtxq_getchain(key);
519161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
520201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
521201991Sdavidxu	if (uh != NULL) {
522201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
523177848Sdavidxu			umtxq_remove_queue(uq, q);
524161678Sdavidxu			wakeup(uq);
525139257Sdavidxu			if (++ret >= n_wake)
526201991Sdavidxu				return (ret);
527139013Sdavidxu		}
528139013Sdavidxu	}
529139257Sdavidxu	return (ret);
530138224Sdavidxu}
531138224Sdavidxu
532177848Sdavidxu
533161678Sdavidxu/*
534161678Sdavidxu * Wake up specified thread.
535161678Sdavidxu */
536161678Sdavidxustatic inline void
537161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
538161678Sdavidxu{
539161678Sdavidxu	struct umtxq_chain *uc;
540161678Sdavidxu
541161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
542161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
543161678Sdavidxu	umtxq_remove(uq);
544161678Sdavidxu	wakeup(uq);
545161678Sdavidxu}
546161678Sdavidxu
547161678Sdavidxu/*
548161678Sdavidxu * Put thread into sleep state, before sleeping, check if
549161678Sdavidxu * thread was removed from umtx queue.
550161678Sdavidxu */
551138224Sdavidxustatic inline int
552161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
553138224Sdavidxu{
554161678Sdavidxu	struct umtxq_chain *uc;
555161678Sdavidxu	int error;
556161678Sdavidxu
557161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
558161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
559161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
560161678Sdavidxu		return (0);
561161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
562139751Sdavidxu	if (error == EWOULDBLOCK)
563139751Sdavidxu		error = ETIMEDOUT;
564139751Sdavidxu	return (error);
565138224Sdavidxu}
566138224Sdavidxu
567161678Sdavidxu/*
568161678Sdavidxu * Convert userspace address into unique logical address.
569161678Sdavidxu */
570139013Sdavidxustatic int
571161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
572139013Sdavidxu{
573161678Sdavidxu	struct thread *td = curthread;
574139013Sdavidxu	vm_map_t map;
575139013Sdavidxu	vm_map_entry_t entry;
576139013Sdavidxu	vm_pindex_t pindex;
577139013Sdavidxu	vm_prot_t prot;
578139013Sdavidxu	boolean_t wired;
579139013Sdavidxu
580161678Sdavidxu	key->type = type;
581161678Sdavidxu	if (share == THREAD_SHARE) {
582161678Sdavidxu		key->shared = 0;
583161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
584161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
585163677Sdavidxu	} else {
586163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
587161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
588161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
589161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
590161678Sdavidxu		    &wired) != KERN_SUCCESS) {
591161678Sdavidxu			return EFAULT;
592161678Sdavidxu		}
593161678Sdavidxu
594161678Sdavidxu		if ((share == PROCESS_SHARE) ||
595161678Sdavidxu		    (share == AUTO_SHARE &&
596161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
597161678Sdavidxu			key->shared = 1;
598161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
599161678Sdavidxu				(vm_offset_t)addr;
600161678Sdavidxu			vm_object_reference(key->info.shared.object);
601161678Sdavidxu		} else {
602161678Sdavidxu			key->shared = 0;
603161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
604161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
605161678Sdavidxu		}
606161678Sdavidxu		vm_map_lookup_done(map, entry);
607139013Sdavidxu	}
608139013Sdavidxu
609161678Sdavidxu	umtxq_hash(key);
610139013Sdavidxu	return (0);
611139013Sdavidxu}
612139013Sdavidxu
613161678Sdavidxu/*
614161678Sdavidxu * Release key.
615161678Sdavidxu */
616139013Sdavidxustatic inline void
617139013Sdavidxuumtx_key_release(struct umtx_key *key)
618139013Sdavidxu{
619161678Sdavidxu	if (key->shared)
620139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
621139013Sdavidxu}
622139013Sdavidxu
623161678Sdavidxu/*
624161678Sdavidxu * Lock a umtx object.
625161678Sdavidxu */
626139013Sdavidxustatic int
627163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
628112904Sjeff{
629143149Sdavidxu	struct umtx_q *uq;
630163449Sdavidxu	u_long owner;
631163449Sdavidxu	u_long old;
632138224Sdavidxu	int error = 0;
633112904Sjeff
634143149Sdavidxu	uq = td->td_umtxq;
635161678Sdavidxu
636112904Sjeff	/*
637161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
638112904Sjeff	 * can fault on any access.
639112904Sjeff	 */
640112904Sjeff	for (;;) {
641112904Sjeff		/*
642112904Sjeff		 * Try the uncontested case.  This should be done in userland.
643112904Sjeff		 */
644163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
645112904Sjeff
646138224Sdavidxu		/* The acquire succeeded. */
647138224Sdavidxu		if (owner == UMTX_UNOWNED)
648138224Sdavidxu			return (0);
649138224Sdavidxu
650115765Sjeff		/* The address was invalid. */
651115765Sjeff		if (owner == -1)
652115765Sjeff			return (EFAULT);
653115765Sjeff
654115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
655115765Sjeff		if (owner == UMTX_CONTESTED) {
656163449Sdavidxu			owner = casuword(&umtx->u_owner,
657139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
658115765Sjeff
659138224Sdavidxu			if (owner == UMTX_CONTESTED)
660138224Sdavidxu				return (0);
661138224Sdavidxu
662115765Sjeff			/* The address was invalid. */
663115765Sjeff			if (owner == -1)
664115765Sjeff				return (EFAULT);
665115765Sjeff
666115765Sjeff			/* If this failed the lock has changed, restart. */
667115765Sjeff			continue;
668112904Sjeff		}
669112904Sjeff
670138224Sdavidxu		/*
671138224Sdavidxu		 * If we caught a signal, we have retried and now
672138224Sdavidxu		 * exit immediately.
673138224Sdavidxu		 */
674161678Sdavidxu		if (error != 0)
675138224Sdavidxu			return (error);
676112904Sjeff
677161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
678161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
679161678Sdavidxu			return (error);
680161678Sdavidxu
681161678Sdavidxu		umtxq_lock(&uq->uq_key);
682161678Sdavidxu		umtxq_busy(&uq->uq_key);
683161678Sdavidxu		umtxq_insert(uq);
684161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
685161678Sdavidxu		umtxq_unlock(&uq->uq_key);
686161678Sdavidxu
687112904Sjeff		/*
688112904Sjeff		 * Set the contested bit so that a release in user space
689112904Sjeff		 * knows to use the system call for unlock.  If this fails
690112904Sjeff		 * either some one else has acquired the lock or it has been
691112904Sjeff		 * released.
692112904Sjeff		 */
693163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
694112904Sjeff
695112904Sjeff		/* The address was invalid. */
696112967Sjake		if (old == -1) {
697143149Sdavidxu			umtxq_lock(&uq->uq_key);
698143149Sdavidxu			umtxq_remove(uq);
699143149Sdavidxu			umtxq_unlock(&uq->uq_key);
700143149Sdavidxu			umtx_key_release(&uq->uq_key);
701115765Sjeff			return (EFAULT);
702112904Sjeff		}
703112904Sjeff
704112904Sjeff		/*
705115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
706117685Smtm		 * and we need to retry or we lost a race to the thread
707117685Smtm		 * unlocking the umtx.
708112904Sjeff		 */
709143149Sdavidxu		umtxq_lock(&uq->uq_key);
710161678Sdavidxu		if (old == owner)
711161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
712143149Sdavidxu		umtxq_remove(uq);
713143149Sdavidxu		umtxq_unlock(&uq->uq_key);
714143149Sdavidxu		umtx_key_release(&uq->uq_key);
715112904Sjeff	}
716117743Smtm
717117743Smtm	return (0);
718112904Sjeff}
719112904Sjeff
720161678Sdavidxu/*
721161678Sdavidxu * Lock a umtx object.
722161678Sdavidxu */
723139013Sdavidxustatic int
724163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
725140245Sdavidxu	struct timespec *timeout)
726112904Sjeff{
727140245Sdavidxu	struct timespec ts, ts2, ts3;
728139013Sdavidxu	struct timeval tv;
729140245Sdavidxu	int error;
730139013Sdavidxu
731140245Sdavidxu	if (timeout == NULL) {
732162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
733162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
734162030Sdavidxu		if (error == EINTR)
735162030Sdavidxu			error = ERESTART;
736139013Sdavidxu	} else {
737140245Sdavidxu		getnanouptime(&ts);
738140245Sdavidxu		timespecadd(&ts, timeout);
739140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
740139013Sdavidxu		for (;;) {
741162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
742140245Sdavidxu			if (error != ETIMEDOUT)
743140245Sdavidxu				break;
744140245Sdavidxu			getnanouptime(&ts2);
745140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
746139751Sdavidxu				error = ETIMEDOUT;
747139013Sdavidxu				break;
748139013Sdavidxu			}
749140245Sdavidxu			ts3 = ts;
750140245Sdavidxu			timespecsub(&ts3, &ts2);
751140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
752139013Sdavidxu		}
753162030Sdavidxu		/* Timed-locking is not restarted. */
754162030Sdavidxu		if (error == ERESTART)
755162030Sdavidxu			error = EINTR;
756139013Sdavidxu	}
757139013Sdavidxu	return (error);
758139013Sdavidxu}
759139013Sdavidxu
760161678Sdavidxu/*
761161678Sdavidxu * Unlock a umtx object.
762161678Sdavidxu */
763139013Sdavidxustatic int
764163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
765139013Sdavidxu{
766139013Sdavidxu	struct umtx_key key;
767163449Sdavidxu	u_long owner;
768163449Sdavidxu	u_long old;
769139257Sdavidxu	int error;
770139257Sdavidxu	int count;
771112904Sjeff
772112904Sjeff	/*
773112904Sjeff	 * Make sure we own this mtx.
774112904Sjeff	 */
775163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
776161678Sdavidxu	if (owner == -1)
777115765Sjeff		return (EFAULT);
778115765Sjeff
779139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
780115765Sjeff		return (EPERM);
781112904Sjeff
782161678Sdavidxu	/* This should be done in userland */
783161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
784163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
785161678Sdavidxu		if (old == -1)
786161678Sdavidxu			return (EFAULT);
787161678Sdavidxu		if (old == owner)
788161678Sdavidxu			return (0);
789161855Sdavidxu		owner = old;
790161678Sdavidxu	}
791161678Sdavidxu
792117685Smtm	/* We should only ever be in here for contested locks */
793161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
794161678Sdavidxu		&key)) != 0)
795139257Sdavidxu		return (error);
796139257Sdavidxu
797139257Sdavidxu	umtxq_lock(&key);
798139257Sdavidxu	umtxq_busy(&key);
799139257Sdavidxu	count = umtxq_count(&key);
800139257Sdavidxu	umtxq_unlock(&key);
801139257Sdavidxu
802117743Smtm	/*
803117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
804117743Smtm	 * there is zero or one thread only waiting for it.
805117743Smtm	 * Otherwise, it must be marked as contested.
806117743Smtm	 */
807163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
808163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
809139257Sdavidxu	umtxq_lock(&key);
810161678Sdavidxu	umtxq_signal(&key,1);
811139257Sdavidxu	umtxq_unbusy(&key);
812139257Sdavidxu	umtxq_unlock(&key);
813139257Sdavidxu	umtx_key_release(&key);
814115765Sjeff	if (old == -1)
815115765Sjeff		return (EFAULT);
816138224Sdavidxu	if (old != owner)
817138224Sdavidxu		return (EINVAL);
818115765Sjeff	return (0);
819112904Sjeff}
820139013Sdavidxu
821205014Snwhitehorn#ifdef COMPAT_FREEBSD32
822162536Sdavidxu
823161678Sdavidxu/*
824162536Sdavidxu * Lock a umtx object.
825162536Sdavidxu */
826162536Sdavidxustatic int
827162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
828162536Sdavidxu{
829162536Sdavidxu	struct umtx_q *uq;
830162536Sdavidxu	uint32_t owner;
831162536Sdavidxu	uint32_t old;
832162536Sdavidxu	int error = 0;
833162536Sdavidxu
834162536Sdavidxu	uq = td->td_umtxq;
835162536Sdavidxu
836162536Sdavidxu	/*
837162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
838162536Sdavidxu	 * can fault on any access.
839162536Sdavidxu	 */
840162536Sdavidxu	for (;;) {
841162536Sdavidxu		/*
842162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
843162536Sdavidxu		 */
844162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
845162536Sdavidxu
846162536Sdavidxu		/* The acquire succeeded. */
847162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
848162536Sdavidxu			return (0);
849162536Sdavidxu
850162536Sdavidxu		/* The address was invalid. */
851162536Sdavidxu		if (owner == -1)
852162536Sdavidxu			return (EFAULT);
853162536Sdavidxu
854162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
855162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
856162536Sdavidxu			owner = casuword32(m,
857162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
858162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
859162536Sdavidxu				return (0);
860162536Sdavidxu
861162536Sdavidxu			/* The address was invalid. */
862162536Sdavidxu			if (owner == -1)
863162536Sdavidxu				return (EFAULT);
864162536Sdavidxu
865162536Sdavidxu			/* If this failed the lock has changed, restart. */
866162536Sdavidxu			continue;
867162536Sdavidxu		}
868162536Sdavidxu
869162536Sdavidxu		/*
870162536Sdavidxu		 * If we caught a signal, we have retried and now
871162536Sdavidxu		 * exit immediately.
872162536Sdavidxu		 */
873162536Sdavidxu		if (error != 0)
874162536Sdavidxu			return (error);
875162536Sdavidxu
876162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
877162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
878162536Sdavidxu			return (error);
879162536Sdavidxu
880162536Sdavidxu		umtxq_lock(&uq->uq_key);
881162536Sdavidxu		umtxq_busy(&uq->uq_key);
882162536Sdavidxu		umtxq_insert(uq);
883162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
884162536Sdavidxu		umtxq_unlock(&uq->uq_key);
885162536Sdavidxu
886162536Sdavidxu		/*
887162536Sdavidxu		 * Set the contested bit so that a release in user space
888162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
889162536Sdavidxu		 * either some one else has acquired the lock or it has been
890162536Sdavidxu		 * released.
891162536Sdavidxu		 */
892162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
893162536Sdavidxu
894162536Sdavidxu		/* The address was invalid. */
895162536Sdavidxu		if (old == -1) {
896162536Sdavidxu			umtxq_lock(&uq->uq_key);
897162536Sdavidxu			umtxq_remove(uq);
898162536Sdavidxu			umtxq_unlock(&uq->uq_key);
899162536Sdavidxu			umtx_key_release(&uq->uq_key);
900162536Sdavidxu			return (EFAULT);
901162536Sdavidxu		}
902162536Sdavidxu
903162536Sdavidxu		/*
904162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
905162536Sdavidxu		 * and we need to retry or we lost a race to the thread
906162536Sdavidxu		 * unlocking the umtx.
907162536Sdavidxu		 */
908162536Sdavidxu		umtxq_lock(&uq->uq_key);
909162536Sdavidxu		if (old == owner)
910162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
911162536Sdavidxu		umtxq_remove(uq);
912162536Sdavidxu		umtxq_unlock(&uq->uq_key);
913162536Sdavidxu		umtx_key_release(&uq->uq_key);
914162536Sdavidxu	}
915162536Sdavidxu
916162536Sdavidxu	return (0);
917162536Sdavidxu}
918162536Sdavidxu
919162536Sdavidxu/*
920162536Sdavidxu * Lock a umtx object.
921162536Sdavidxu */
922162536Sdavidxustatic int
923162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
924162536Sdavidxu	struct timespec *timeout)
925162536Sdavidxu{
926162536Sdavidxu	struct timespec ts, ts2, ts3;
927162536Sdavidxu	struct timeval tv;
928162536Sdavidxu	int error;
929162536Sdavidxu
930162536Sdavidxu	if (timeout == NULL) {
931162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
932162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
933162536Sdavidxu		if (error == EINTR)
934162536Sdavidxu			error = ERESTART;
935162536Sdavidxu	} else {
936162536Sdavidxu		getnanouptime(&ts);
937162536Sdavidxu		timespecadd(&ts, timeout);
938162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
939162536Sdavidxu		for (;;) {
940162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
941162536Sdavidxu			if (error != ETIMEDOUT)
942162536Sdavidxu				break;
943162536Sdavidxu			getnanouptime(&ts2);
944162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
945162536Sdavidxu				error = ETIMEDOUT;
946162536Sdavidxu				break;
947162536Sdavidxu			}
948162536Sdavidxu			ts3 = ts;
949162536Sdavidxu			timespecsub(&ts3, &ts2);
950162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
951162536Sdavidxu		}
952162536Sdavidxu		/* Timed-locking is not restarted. */
953162536Sdavidxu		if (error == ERESTART)
954162536Sdavidxu			error = EINTR;
955162536Sdavidxu	}
956162536Sdavidxu	return (error);
957162536Sdavidxu}
958162536Sdavidxu
959162536Sdavidxu/*
960162536Sdavidxu * Unlock a umtx object.
961162536Sdavidxu */
962162536Sdavidxustatic int
963162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
964162536Sdavidxu{
965162536Sdavidxu	struct umtx_key key;
966162536Sdavidxu	uint32_t owner;
967162536Sdavidxu	uint32_t old;
968162536Sdavidxu	int error;
969162536Sdavidxu	int count;
970162536Sdavidxu
971162536Sdavidxu	/*
972162536Sdavidxu	 * Make sure we own this mtx.
973162536Sdavidxu	 */
974162536Sdavidxu	owner = fuword32(m);
975162536Sdavidxu	if (owner == -1)
976162536Sdavidxu		return (EFAULT);
977162536Sdavidxu
978162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
979162536Sdavidxu		return (EPERM);
980162536Sdavidxu
981162536Sdavidxu	/* This should be done in userland */
982162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
983162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
984162536Sdavidxu		if (old == -1)
985162536Sdavidxu			return (EFAULT);
986162536Sdavidxu		if (old == owner)
987162536Sdavidxu			return (0);
988162536Sdavidxu		owner = old;
989162536Sdavidxu	}
990162536Sdavidxu
991162536Sdavidxu	/* We should only ever be in here for contested locks */
992162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
993162536Sdavidxu		&key)) != 0)
994162536Sdavidxu		return (error);
995162536Sdavidxu
996162536Sdavidxu	umtxq_lock(&key);
997162536Sdavidxu	umtxq_busy(&key);
998162536Sdavidxu	count = umtxq_count(&key);
999162536Sdavidxu	umtxq_unlock(&key);
1000162536Sdavidxu
1001162536Sdavidxu	/*
1002162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1003162536Sdavidxu	 * there is zero or one thread only waiting for it.
1004162536Sdavidxu	 * Otherwise, it must be marked as contested.
1005162536Sdavidxu	 */
1006162536Sdavidxu	old = casuword32(m, owner,
1007162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1008162536Sdavidxu	umtxq_lock(&key);
1009162536Sdavidxu	umtxq_signal(&key,1);
1010162536Sdavidxu	umtxq_unbusy(&key);
1011162536Sdavidxu	umtxq_unlock(&key);
1012162536Sdavidxu	umtx_key_release(&key);
1013162536Sdavidxu	if (old == -1)
1014162536Sdavidxu		return (EFAULT);
1015162536Sdavidxu	if (old != owner)
1016162536Sdavidxu		return (EINVAL);
1017162536Sdavidxu	return (0);
1018162536Sdavidxu}
1019162536Sdavidxu#endif
1020162536Sdavidxu
1021162536Sdavidxu/*
1022161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1023161678Sdavidxu */
1024139013Sdavidxustatic int
1025163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1026178646Sdavidxu	struct timespec *timeout, int compat32, int is_private)
1027139013Sdavidxu{
1028143149Sdavidxu	struct umtx_q *uq;
1029140245Sdavidxu	struct timespec ts, ts2, ts3;
1030139013Sdavidxu	struct timeval tv;
1031163449Sdavidxu	u_long tmp;
1032140245Sdavidxu	int error = 0;
1033139013Sdavidxu
1034143149Sdavidxu	uq = td->td_umtxq;
1035178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1036178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1037139013Sdavidxu		return (error);
1038161678Sdavidxu
1039161678Sdavidxu	umtxq_lock(&uq->uq_key);
1040161678Sdavidxu	umtxq_insert(uq);
1041161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1042162536Sdavidxu	if (compat32 == 0)
1043162536Sdavidxu		tmp = fuword(addr);
1044162536Sdavidxu        else
1045190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1046139427Sdavidxu	if (tmp != id) {
1047143149Sdavidxu		umtxq_lock(&uq->uq_key);
1048143149Sdavidxu		umtxq_remove(uq);
1049143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1050140245Sdavidxu	} else if (timeout == NULL) {
1051143149Sdavidxu		umtxq_lock(&uq->uq_key);
1052164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
1053161678Sdavidxu		umtxq_remove(uq);
1054143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1055139013Sdavidxu	} else {
1056140245Sdavidxu		getnanouptime(&ts);
1057140245Sdavidxu		timespecadd(&ts, timeout);
1058140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1059161678Sdavidxu		umtxq_lock(&uq->uq_key);
1060139013Sdavidxu		for (;;) {
1061164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
1062211794Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ)) {
1063211794Sdavidxu				error = 0;
1064161678Sdavidxu				break;
1065211794Sdavidxu			}
1066140245Sdavidxu			if (error != ETIMEDOUT)
1067140245Sdavidxu				break;
1068161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1069140245Sdavidxu			getnanouptime(&ts2);
1070140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
1071139751Sdavidxu				error = ETIMEDOUT;
1072161678Sdavidxu				umtxq_lock(&uq->uq_key);
1073139013Sdavidxu				break;
1074139013Sdavidxu			}
1075140245Sdavidxu			ts3 = ts;
1076140245Sdavidxu			timespecsub(&ts3, &ts2);
1077140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1078161678Sdavidxu			umtxq_lock(&uq->uq_key);
1079139013Sdavidxu		}
1080143149Sdavidxu		umtxq_remove(uq);
1081143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1082139013Sdavidxu	}
1083143149Sdavidxu	umtx_key_release(&uq->uq_key);
1084139257Sdavidxu	if (error == ERESTART)
1085139257Sdavidxu		error = EINTR;
1086139013Sdavidxu	return (error);
1087139013Sdavidxu}
1088139013Sdavidxu
1089161678Sdavidxu/*
1090161678Sdavidxu * Wake up threads sleeping on the specified address.
1091161678Sdavidxu */
1092151692Sdavidxuint
1093178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1094139013Sdavidxu{
1095139013Sdavidxu	struct umtx_key key;
1096139257Sdavidxu	int ret;
1097139013Sdavidxu
1098178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1099178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1100139257Sdavidxu		return (ret);
1101139258Sdavidxu	umtxq_lock(&key);
1102139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1103139258Sdavidxu	umtxq_unlock(&key);
1104139257Sdavidxu	umtx_key_release(&key);
1105139013Sdavidxu	return (0);
1106139013Sdavidxu}
1107139013Sdavidxu
1108161678Sdavidxu/*
1109161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1110161678Sdavidxu */
1111161678Sdavidxustatic int
1112161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1113179970Sdavidxu	int mode)
1114161678Sdavidxu{
1115161678Sdavidxu	struct umtx_q *uq;
1116161678Sdavidxu	uint32_t owner, old, id;
1117161678Sdavidxu	int error = 0;
1118161678Sdavidxu
1119161678Sdavidxu	id = td->td_tid;
1120161678Sdavidxu	uq = td->td_umtxq;
1121161678Sdavidxu
1122161678Sdavidxu	/*
1123161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1124161678Sdavidxu	 * can fault on any access.
1125161678Sdavidxu	 */
1126161678Sdavidxu	for (;;) {
1127179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1128179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1129179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1130179970Sdavidxu				return (0);
1131179970Sdavidxu		} else {
1132179970Sdavidxu			/*
1133179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1134179970Sdavidxu			 */
1135179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1136161678Sdavidxu
1137179970Sdavidxu			/* The acquire succeeded. */
1138179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1139161678Sdavidxu				return (0);
1140161678Sdavidxu
1141161678Sdavidxu			/* The address was invalid. */
1142161678Sdavidxu			if (owner == -1)
1143161678Sdavidxu				return (EFAULT);
1144161678Sdavidxu
1145179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1146179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1147179970Sdavidxu				owner = casuword32(&m->m_owner,
1148179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1149179970Sdavidxu
1150179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1151179970Sdavidxu					return (0);
1152179970Sdavidxu
1153179970Sdavidxu				/* The address was invalid. */
1154179970Sdavidxu				if (owner == -1)
1155179970Sdavidxu					return (EFAULT);
1156179970Sdavidxu
1157179970Sdavidxu				/* If this failed the lock has changed, restart. */
1158179970Sdavidxu				continue;
1159179970Sdavidxu			}
1160161678Sdavidxu		}
1161161678Sdavidxu
1162161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1163161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1164161678Sdavidxu			return (EDEADLK);
1165161678Sdavidxu
1166179970Sdavidxu		if (mode == _UMUTEX_TRY)
1167161678Sdavidxu			return (EBUSY);
1168161678Sdavidxu
1169161678Sdavidxu		/*
1170161678Sdavidxu		 * If we caught a signal, we have retried and now
1171161678Sdavidxu		 * exit immediately.
1172161678Sdavidxu		 */
1173161678Sdavidxu		if (error != 0)
1174161678Sdavidxu			return (error);
1175161678Sdavidxu
1176161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1177161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1178161678Sdavidxu			return (error);
1179161678Sdavidxu
1180161678Sdavidxu		umtxq_lock(&uq->uq_key);
1181161678Sdavidxu		umtxq_busy(&uq->uq_key);
1182161678Sdavidxu		umtxq_insert(uq);
1183161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1184161678Sdavidxu
1185161678Sdavidxu		/*
1186161678Sdavidxu		 * Set the contested bit so that a release in user space
1187161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1188161678Sdavidxu		 * either some one else has acquired the lock or it has been
1189161678Sdavidxu		 * released.
1190161678Sdavidxu		 */
1191161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1192161678Sdavidxu
1193161678Sdavidxu		/* The address was invalid. */
1194161678Sdavidxu		if (old == -1) {
1195161678Sdavidxu			umtxq_lock(&uq->uq_key);
1196161678Sdavidxu			umtxq_remove(uq);
1197179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1198161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1199161678Sdavidxu			umtx_key_release(&uq->uq_key);
1200161678Sdavidxu			return (EFAULT);
1201161678Sdavidxu		}
1202161678Sdavidxu
1203161678Sdavidxu		/*
1204161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1205161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1206161678Sdavidxu		 * unlocking the umtx.
1207161678Sdavidxu		 */
1208161678Sdavidxu		umtxq_lock(&uq->uq_key);
1209179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1210161678Sdavidxu		if (old == owner)
1211161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1212161678Sdavidxu		umtxq_remove(uq);
1213161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1214161678Sdavidxu		umtx_key_release(&uq->uq_key);
1215161678Sdavidxu	}
1216161678Sdavidxu
1217161678Sdavidxu	return (0);
1218161678Sdavidxu}
1219161678Sdavidxu
1220161678Sdavidxu/*
1221161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1222161678Sdavidxu */
1223161678Sdavidxu/*
1224161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1225161678Sdavidxu */
1226161678Sdavidxustatic int
1227161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1228161678Sdavidxu{
1229161678Sdavidxu	struct umtx_key key;
1230161678Sdavidxu	uint32_t owner, old, id;
1231161678Sdavidxu	int error;
1232161678Sdavidxu	int count;
1233161678Sdavidxu
1234161678Sdavidxu	id = td->td_tid;
1235161678Sdavidxu	/*
1236161678Sdavidxu	 * Make sure we own this mtx.
1237161678Sdavidxu	 */
1238163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1239161678Sdavidxu	if (owner == -1)
1240161678Sdavidxu		return (EFAULT);
1241161678Sdavidxu
1242161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1243161678Sdavidxu		return (EPERM);
1244161678Sdavidxu
1245161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1246161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1247161678Sdavidxu		if (old == -1)
1248161678Sdavidxu			return (EFAULT);
1249161678Sdavidxu		if (old == owner)
1250161678Sdavidxu			return (0);
1251161855Sdavidxu		owner = old;
1252161678Sdavidxu	}
1253161678Sdavidxu
1254161678Sdavidxu	/* We should only ever be in here for contested locks */
1255161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1256161678Sdavidxu	    &key)) != 0)
1257161678Sdavidxu		return (error);
1258161678Sdavidxu
1259161678Sdavidxu	umtxq_lock(&key);
1260161678Sdavidxu	umtxq_busy(&key);
1261161678Sdavidxu	count = umtxq_count(&key);
1262161678Sdavidxu	umtxq_unlock(&key);
1263161678Sdavidxu
1264161678Sdavidxu	/*
1265161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1266161678Sdavidxu	 * there is zero or one thread only waiting for it.
1267161678Sdavidxu	 * Otherwise, it must be marked as contested.
1268161678Sdavidxu	 */
1269161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1270161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1271161678Sdavidxu	umtxq_lock(&key);
1272161678Sdavidxu	umtxq_signal(&key,1);
1273161678Sdavidxu	umtxq_unbusy(&key);
1274161678Sdavidxu	umtxq_unlock(&key);
1275161678Sdavidxu	umtx_key_release(&key);
1276161678Sdavidxu	if (old == -1)
1277161678Sdavidxu		return (EFAULT);
1278161678Sdavidxu	if (old != owner)
1279161678Sdavidxu		return (EINVAL);
1280161678Sdavidxu	return (0);
1281161678Sdavidxu}
1282161678Sdavidxu
1283179970Sdavidxu/*
1284179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1285179970Sdavidxu * only for simple mutex.
1286179970Sdavidxu */
1287179970Sdavidxustatic int
1288179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1289179970Sdavidxu{
1290179970Sdavidxu	struct umtx_key key;
1291179970Sdavidxu	uint32_t owner;
1292179970Sdavidxu	uint32_t flags;
1293179970Sdavidxu	int error;
1294179970Sdavidxu	int count;
1295179970Sdavidxu
1296179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1297179970Sdavidxu	if (owner == -1)
1298179970Sdavidxu		return (EFAULT);
1299179970Sdavidxu
1300179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1301179970Sdavidxu		return (0);
1302179970Sdavidxu
1303179970Sdavidxu	flags = fuword32(&m->m_flags);
1304179970Sdavidxu
1305179970Sdavidxu	/* We should only ever be in here for contested locks */
1306179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1307179970Sdavidxu	    &key)) != 0)
1308179970Sdavidxu		return (error);
1309179970Sdavidxu
1310179970Sdavidxu	umtxq_lock(&key);
1311179970Sdavidxu	umtxq_busy(&key);
1312179970Sdavidxu	count = umtxq_count(&key);
1313179970Sdavidxu	umtxq_unlock(&key);
1314179970Sdavidxu
1315179970Sdavidxu	if (count <= 1)
1316179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1317179970Sdavidxu
1318179970Sdavidxu	umtxq_lock(&key);
1319179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1320179970Sdavidxu		umtxq_signal(&key, 1);
1321179970Sdavidxu	umtxq_unbusy(&key);
1322179970Sdavidxu	umtxq_unlock(&key);
1323179970Sdavidxu	umtx_key_release(&key);
1324179970Sdavidxu	return (0);
1325179970Sdavidxu}
1326179970Sdavidxu
1327161678Sdavidxustatic inline struct umtx_pi *
1328163697Sdavidxuumtx_pi_alloc(int flags)
1329161678Sdavidxu{
1330161678Sdavidxu	struct umtx_pi *pi;
1331161678Sdavidxu
1332163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1333161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1334161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1335161678Sdavidxu	return (pi);
1336161678Sdavidxu}
1337161678Sdavidxu
1338161678Sdavidxustatic inline void
1339161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1340161678Sdavidxu{
1341161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1342161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1343161678Sdavidxu}
1344161678Sdavidxu
1345161678Sdavidxu/*
1346161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1347161678Sdavidxu * changed.
1348161678Sdavidxu */
1349161678Sdavidxustatic int
1350161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1351161678Sdavidxu{
1352161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1353161678Sdavidxu	struct thread *td1;
1354161678Sdavidxu
1355170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1356161678Sdavidxu	if (pi == NULL)
1357161678Sdavidxu		return (0);
1358161678Sdavidxu
1359161678Sdavidxu	uq = td->td_umtxq;
1360161678Sdavidxu
1361161678Sdavidxu	/*
1362161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1363161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1364161678Sdavidxu	 * the previous thread or higher than the next thread.
1365161678Sdavidxu	 */
1366161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1367161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1368161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1369161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1370161678Sdavidxu		/*
1371161678Sdavidxu		 * Remove thread from blocked chain and determine where
1372161678Sdavidxu		 * it should be moved to.
1373161678Sdavidxu		 */
1374161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1375161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1376161678Sdavidxu			td1 = uq1->uq_thread;
1377161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1378161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1379161678Sdavidxu				break;
1380161678Sdavidxu		}
1381161678Sdavidxu
1382161678Sdavidxu		if (uq1 == NULL)
1383161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1384161678Sdavidxu		else
1385161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1386161678Sdavidxu	}
1387161678Sdavidxu	return (1);
1388161678Sdavidxu}
1389161678Sdavidxu
1390161678Sdavidxu/*
1391161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1392161678Sdavidxu * PI mutex.
1393161678Sdavidxu */
1394161678Sdavidxustatic void
1395161678Sdavidxuumtx_propagate_priority(struct thread *td)
1396161678Sdavidxu{
1397161678Sdavidxu	struct umtx_q *uq;
1398161678Sdavidxu	struct umtx_pi *pi;
1399161678Sdavidxu	int pri;
1400161678Sdavidxu
1401170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1402161678Sdavidxu	pri = UPRI(td);
1403161678Sdavidxu	uq = td->td_umtxq;
1404161678Sdavidxu	pi = uq->uq_pi_blocked;
1405161678Sdavidxu	if (pi == NULL)
1406161678Sdavidxu		return;
1407161678Sdavidxu
1408161678Sdavidxu	for (;;) {
1409161678Sdavidxu		td = pi->pi_owner;
1410216313Sdavidxu		if (td == NULL || td == curthread)
1411161678Sdavidxu			return;
1412161678Sdavidxu
1413161678Sdavidxu		MPASS(td->td_proc != NULL);
1414161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1415161678Sdavidxu
1416170300Sjeff		thread_lock(td);
1417216313Sdavidxu		if (td->td_lend_user_pri > pri)
1418216313Sdavidxu			sched_lend_user_prio(td, pri);
1419216313Sdavidxu		else {
1420216313Sdavidxu			thread_unlock(td);
1421216313Sdavidxu			break;
1422216313Sdavidxu		}
1423170300Sjeff		thread_unlock(td);
1424161678Sdavidxu
1425161678Sdavidxu		/*
1426161678Sdavidxu		 * Pick up the lock that td is blocked on.
1427161678Sdavidxu		 */
1428161678Sdavidxu		uq = td->td_umtxq;
1429161678Sdavidxu		pi = uq->uq_pi_blocked;
1430216791Sdavidxu		if (pi == NULL)
1431216791Sdavidxu			break;
1432161678Sdavidxu		/* Resort td on the list if needed. */
1433216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1434161678Sdavidxu	}
1435161678Sdavidxu}
1436161678Sdavidxu
1437161678Sdavidxu/*
1438161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1439161678Sdavidxu * it is interrupted by signal or resumed by others.
1440161678Sdavidxu */
1441161678Sdavidxustatic void
1442216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1443161678Sdavidxu{
1444161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1445161678Sdavidxu	struct umtx_pi *pi2;
1446216791Sdavidxu	int pri;
1447161678Sdavidxu
1448170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1449161678Sdavidxu
1450161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1451161678Sdavidxu		pri = PRI_MAX;
1452161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1453161678Sdavidxu
1454161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1455161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1456161678Sdavidxu			if (uq != NULL) {
1457161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1458161678Sdavidxu					pri = UPRI(uq->uq_thread);
1459161678Sdavidxu			}
1460161678Sdavidxu		}
1461161678Sdavidxu
1462161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1463161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1464170300Sjeff		thread_lock(pi->pi_owner);
1465216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1466170300Sjeff		thread_unlock(pi->pi_owner);
1467216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1468216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1469161678Sdavidxu	}
1470161678Sdavidxu}
1471161678Sdavidxu
1472161678Sdavidxu/*
1473161678Sdavidxu * Insert a PI mutex into owned list.
1474161678Sdavidxu */
1475161678Sdavidxustatic void
1476161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1477161678Sdavidxu{
1478161678Sdavidxu	struct umtx_q *uq_owner;
1479161678Sdavidxu
1480161678Sdavidxu	uq_owner = owner->td_umtxq;
1481170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1482161678Sdavidxu	if (pi->pi_owner != NULL)
1483161678Sdavidxu		panic("pi_ower != NULL");
1484161678Sdavidxu	pi->pi_owner = owner;
1485161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1486161678Sdavidxu}
1487161678Sdavidxu
1488161678Sdavidxu/*
1489161678Sdavidxu * Claim ownership of a PI mutex.
1490161678Sdavidxu */
1491161678Sdavidxustatic int
1492161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1493161678Sdavidxu{
1494161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1495161678Sdavidxu
1496161678Sdavidxu	uq_owner = owner->td_umtxq;
1497170300Sjeff	mtx_lock_spin(&umtx_lock);
1498161678Sdavidxu	if (pi->pi_owner == owner) {
1499170300Sjeff		mtx_unlock_spin(&umtx_lock);
1500161678Sdavidxu		return (0);
1501161678Sdavidxu	}
1502161678Sdavidxu
1503161678Sdavidxu	if (pi->pi_owner != NULL) {
1504161678Sdavidxu		/*
1505161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1506161678Sdavidxu		 */
1507170300Sjeff		mtx_unlock_spin(&umtx_lock);
1508161678Sdavidxu		return (EPERM);
1509161678Sdavidxu	}
1510161678Sdavidxu	umtx_pi_setowner(pi, owner);
1511161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1512161678Sdavidxu	if (uq != NULL) {
1513161678Sdavidxu		int pri;
1514161678Sdavidxu
1515161678Sdavidxu		pri = UPRI(uq->uq_thread);
1516170300Sjeff		thread_lock(owner);
1517161678Sdavidxu		if (pri < UPRI(owner))
1518161678Sdavidxu			sched_lend_user_prio(owner, pri);
1519170300Sjeff		thread_unlock(owner);
1520161678Sdavidxu	}
1521170300Sjeff	mtx_unlock_spin(&umtx_lock);
1522161678Sdavidxu	return (0);
1523161678Sdavidxu}
1524161678Sdavidxu
1525161678Sdavidxu/*
1526174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1527174701Sdavidxu * this may result new priority propagating process.
1528174701Sdavidxu */
1529174701Sdavidxuvoid
1530174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1531174701Sdavidxu{
1532174707Sdavidxu	struct umtx_q *uq;
1533174707Sdavidxu	struct umtx_pi *pi;
1534174707Sdavidxu
1535174707Sdavidxu	uq = td->td_umtxq;
1536174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1537174707Sdavidxu	/*
1538174707Sdavidxu	 * Pick up the lock that td is blocked on.
1539174707Sdavidxu	 */
1540174707Sdavidxu	pi = uq->uq_pi_blocked;
1541216791Sdavidxu	if (pi != NULL) {
1542216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1543216791Sdavidxu		umtx_repropagate_priority(pi);
1544216791Sdavidxu	}
1545174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1546174701Sdavidxu}
1547174701Sdavidxu
1548174701Sdavidxu/*
1549161678Sdavidxu * Sleep on a PI mutex.
1550161678Sdavidxu */
1551161678Sdavidxustatic int
1552161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1553161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1554161678Sdavidxu{
1555161678Sdavidxu	struct umtxq_chain *uc;
1556161678Sdavidxu	struct thread *td, *td1;
1557161678Sdavidxu	struct umtx_q *uq1;
1558161678Sdavidxu	int pri;
1559161678Sdavidxu	int error = 0;
1560161678Sdavidxu
1561161678Sdavidxu	td = uq->uq_thread;
1562161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1563161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1564161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1565189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1566161678Sdavidxu	umtxq_insert(uq);
1567189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1568161678Sdavidxu	if (pi->pi_owner == NULL) {
1569189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1570213642Sdavidxu		/* XXX Only look up thread in current process. */
1571213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1572170300Sjeff		mtx_lock_spin(&umtx_lock);
1573215336Sdavidxu		if (td1 != NULL) {
1574215336Sdavidxu			if (pi->pi_owner == NULL)
1575215336Sdavidxu				umtx_pi_setowner(pi, td1);
1576215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1577161678Sdavidxu		}
1578161678Sdavidxu	}
1579161678Sdavidxu
1580161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1581161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1582161678Sdavidxu		if (pri > UPRI(td))
1583161678Sdavidxu			break;
1584161678Sdavidxu	}
1585161678Sdavidxu
1586161678Sdavidxu	if (uq1 != NULL)
1587161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1588161678Sdavidxu	else
1589161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1590161678Sdavidxu
1591161678Sdavidxu	uq->uq_pi_blocked = pi;
1592174701Sdavidxu	thread_lock(td);
1593161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1594174701Sdavidxu	thread_unlock(td);
1595161678Sdavidxu	umtx_propagate_priority(td);
1596170300Sjeff	mtx_unlock_spin(&umtx_lock);
1597189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1598161678Sdavidxu
1599161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1600161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1601161678Sdavidxu		if (error == EWOULDBLOCK)
1602161678Sdavidxu			error = ETIMEDOUT;
1603161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1604161678Sdavidxu			umtxq_remove(uq);
1605161678Sdavidxu		}
1606161678Sdavidxu	}
1607170300Sjeff	mtx_lock_spin(&umtx_lock);
1608161678Sdavidxu	uq->uq_pi_blocked = NULL;
1609174701Sdavidxu	thread_lock(td);
1610161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1611174701Sdavidxu	thread_unlock(td);
1612161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1613216791Sdavidxu	umtx_repropagate_priority(pi);
1614170300Sjeff	mtx_unlock_spin(&umtx_lock);
1615189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1616161678Sdavidxu
1617161678Sdavidxu	return (error);
1618161678Sdavidxu}
1619161678Sdavidxu
1620161678Sdavidxu/*
1621161678Sdavidxu * Add reference count for a PI mutex.
1622161678Sdavidxu */
1623161678Sdavidxustatic void
1624161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1625161678Sdavidxu{
1626161678Sdavidxu	struct umtxq_chain *uc;
1627161678Sdavidxu
1628161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1629161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1630161678Sdavidxu	pi->pi_refcount++;
1631161678Sdavidxu}
1632161678Sdavidxu
1633161678Sdavidxu/*
1634161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1635161678Sdavidxu * is decreased to zero, its memory space is freed.
1636161678Sdavidxu */
1637161678Sdavidxustatic void
1638161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1639161678Sdavidxu{
1640161678Sdavidxu	struct umtxq_chain *uc;
1641161678Sdavidxu
1642161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1643161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1644161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1645161678Sdavidxu	if (--pi->pi_refcount == 0) {
1646170300Sjeff		mtx_lock_spin(&umtx_lock);
1647161678Sdavidxu		if (pi->pi_owner != NULL) {
1648161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1649161678Sdavidxu				pi, pi_link);
1650161678Sdavidxu			pi->pi_owner = NULL;
1651161678Sdavidxu		}
1652161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1653161678Sdavidxu			("blocked queue not empty"));
1654170300Sjeff		mtx_unlock_spin(&umtx_lock);
1655161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1656189756Sdavidxu		umtx_pi_free(pi);
1657161678Sdavidxu	}
1658161678Sdavidxu}
1659161678Sdavidxu
1660161678Sdavidxu/*
1661161678Sdavidxu * Find a PI mutex in hash table.
1662161678Sdavidxu */
1663161678Sdavidxustatic struct umtx_pi *
1664161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1665161678Sdavidxu{
1666161678Sdavidxu	struct umtxq_chain *uc;
1667161678Sdavidxu	struct umtx_pi *pi;
1668161678Sdavidxu
1669161678Sdavidxu	uc = umtxq_getchain(key);
1670161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1671161678Sdavidxu
1672161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1673161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1674161678Sdavidxu			return (pi);
1675161678Sdavidxu		}
1676161678Sdavidxu	}
1677161678Sdavidxu	return (NULL);
1678161678Sdavidxu}
1679161678Sdavidxu
1680161678Sdavidxu/*
1681161678Sdavidxu * Insert a PI mutex into hash table.
1682161678Sdavidxu */
1683161678Sdavidxustatic inline void
1684161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1685161678Sdavidxu{
1686161678Sdavidxu	struct umtxq_chain *uc;
1687161678Sdavidxu
1688161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1689161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1690161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1691161678Sdavidxu}
1692161678Sdavidxu
1693161678Sdavidxu/*
1694161678Sdavidxu * Lock a PI mutex.
1695161678Sdavidxu */
1696161678Sdavidxustatic int
1697161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1698161678Sdavidxu	int try)
1699161678Sdavidxu{
1700161678Sdavidxu	struct umtx_q *uq;
1701161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1702161678Sdavidxu	uint32_t id, owner, old;
1703161678Sdavidxu	int error;
1704161678Sdavidxu
1705161678Sdavidxu	id = td->td_tid;
1706161678Sdavidxu	uq = td->td_umtxq;
1707161678Sdavidxu
1708161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1709161678Sdavidxu	    &uq->uq_key)) != 0)
1710161678Sdavidxu		return (error);
1711163697Sdavidxu	umtxq_lock(&uq->uq_key);
1712163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1713163697Sdavidxu	if (pi == NULL) {
1714163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1715163697Sdavidxu		if (new_pi == NULL) {
1716161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1717163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1718161678Sdavidxu			umtxq_lock(&uq->uq_key);
1719161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1720163697Sdavidxu			if (pi != NULL) {
1721161678Sdavidxu				umtx_pi_free(new_pi);
1722163697Sdavidxu				new_pi = NULL;
1723161678Sdavidxu			}
1724161678Sdavidxu		}
1725163697Sdavidxu		if (new_pi != NULL) {
1726163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1727163697Sdavidxu			umtx_pi_insert(new_pi);
1728163697Sdavidxu			pi = new_pi;
1729163697Sdavidxu		}
1730163697Sdavidxu	}
1731163697Sdavidxu	umtx_pi_ref(pi);
1732163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1733161678Sdavidxu
1734163697Sdavidxu	/*
1735163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1736163697Sdavidxu	 * can fault on any access.
1737163697Sdavidxu	 */
1738163697Sdavidxu	for (;;) {
1739161678Sdavidxu		/*
1740161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1741161678Sdavidxu		 */
1742161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1743161678Sdavidxu
1744161678Sdavidxu		/* The acquire succeeded. */
1745161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1746161678Sdavidxu			error = 0;
1747161678Sdavidxu			break;
1748161678Sdavidxu		}
1749161678Sdavidxu
1750161678Sdavidxu		/* The address was invalid. */
1751161678Sdavidxu		if (owner == -1) {
1752161678Sdavidxu			error = EFAULT;
1753161678Sdavidxu			break;
1754161678Sdavidxu		}
1755161678Sdavidxu
1756161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1757161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1758161678Sdavidxu			owner = casuword32(&m->m_owner,
1759161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1760161678Sdavidxu
1761161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1762161678Sdavidxu				umtxq_lock(&uq->uq_key);
1763189756Sdavidxu				umtxq_busy(&uq->uq_key);
1764161678Sdavidxu				error = umtx_pi_claim(pi, td);
1765189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1766161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1767161678Sdavidxu				break;
1768161678Sdavidxu			}
1769161678Sdavidxu
1770161678Sdavidxu			/* The address was invalid. */
1771161678Sdavidxu			if (owner == -1) {
1772161678Sdavidxu				error = EFAULT;
1773161678Sdavidxu				break;
1774161678Sdavidxu			}
1775161678Sdavidxu
1776161678Sdavidxu			/* If this failed the lock has changed, restart. */
1777161678Sdavidxu			continue;
1778161678Sdavidxu		}
1779161678Sdavidxu
1780161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1781161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1782161678Sdavidxu			error = EDEADLK;
1783161678Sdavidxu			break;
1784161678Sdavidxu		}
1785161678Sdavidxu
1786161678Sdavidxu		if (try != 0) {
1787161678Sdavidxu			error = EBUSY;
1788161678Sdavidxu			break;
1789161678Sdavidxu		}
1790161678Sdavidxu
1791161678Sdavidxu		/*
1792161678Sdavidxu		 * If we caught a signal, we have retried and now
1793161678Sdavidxu		 * exit immediately.
1794161678Sdavidxu		 */
1795161678Sdavidxu		if (error != 0)
1796161678Sdavidxu			break;
1797161678Sdavidxu
1798161678Sdavidxu		umtxq_lock(&uq->uq_key);
1799161678Sdavidxu		umtxq_busy(&uq->uq_key);
1800161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1801161678Sdavidxu
1802161678Sdavidxu		/*
1803161678Sdavidxu		 * Set the contested bit so that a release in user space
1804161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1805161678Sdavidxu		 * either some one else has acquired the lock or it has been
1806161678Sdavidxu		 * released.
1807161678Sdavidxu		 */
1808161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1809161678Sdavidxu
1810161678Sdavidxu		/* The address was invalid. */
1811161678Sdavidxu		if (old == -1) {
1812161678Sdavidxu			umtxq_lock(&uq->uq_key);
1813161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1814161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1815161678Sdavidxu			error = EFAULT;
1816161678Sdavidxu			break;
1817161678Sdavidxu		}
1818161678Sdavidxu
1819161678Sdavidxu		umtxq_lock(&uq->uq_key);
1820161678Sdavidxu		/*
1821161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1822161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1823161678Sdavidxu		 * unlocking the umtx.
1824161678Sdavidxu		 */
1825161678Sdavidxu		if (old == owner)
1826161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1827161678Sdavidxu				 "umtxpi", timo);
1828189756Sdavidxu		else {
1829189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1830189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1831189756Sdavidxu		}
1832161678Sdavidxu	}
1833161678Sdavidxu
1834163697Sdavidxu	umtxq_lock(&uq->uq_key);
1835163697Sdavidxu	umtx_pi_unref(pi);
1836163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1837161678Sdavidxu
1838161678Sdavidxu	umtx_key_release(&uq->uq_key);
1839161678Sdavidxu	return (error);
1840161678Sdavidxu}
1841161678Sdavidxu
1842161678Sdavidxu/*
1843161678Sdavidxu * Unlock a PI mutex.
1844161678Sdavidxu */
1845161678Sdavidxustatic int
1846161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1847161678Sdavidxu{
1848161678Sdavidxu	struct umtx_key key;
1849161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1850161678Sdavidxu	struct umtx_pi *pi, *pi2;
1851161678Sdavidxu	uint32_t owner, old, id;
1852161678Sdavidxu	int error;
1853161678Sdavidxu	int count;
1854161678Sdavidxu	int pri;
1855161678Sdavidxu
1856161678Sdavidxu	id = td->td_tid;
1857161678Sdavidxu	/*
1858161678Sdavidxu	 * Make sure we own this mtx.
1859161678Sdavidxu	 */
1860163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1861161678Sdavidxu	if (owner == -1)
1862161678Sdavidxu		return (EFAULT);
1863161678Sdavidxu
1864161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1865161678Sdavidxu		return (EPERM);
1866161678Sdavidxu
1867161678Sdavidxu	/* This should be done in userland */
1868161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1869161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1870161678Sdavidxu		if (old == -1)
1871161678Sdavidxu			return (EFAULT);
1872161678Sdavidxu		if (old == owner)
1873161678Sdavidxu			return (0);
1874161855Sdavidxu		owner = old;
1875161678Sdavidxu	}
1876161678Sdavidxu
1877161678Sdavidxu	/* We should only ever be in here for contested locks */
1878161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1879161678Sdavidxu	    &key)) != 0)
1880161678Sdavidxu		return (error);
1881161678Sdavidxu
1882161678Sdavidxu	umtxq_lock(&key);
1883161678Sdavidxu	umtxq_busy(&key);
1884161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1885161678Sdavidxu	if (uq_first != NULL) {
1886189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1887161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1888189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1889161678Sdavidxu		if (pi->pi_owner != curthread) {
1890189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1891161678Sdavidxu			umtxq_unbusy(&key);
1892161678Sdavidxu			umtxq_unlock(&key);
1893189756Sdavidxu			umtx_key_release(&key);
1894161678Sdavidxu			/* userland messed the mutex */
1895161678Sdavidxu			return (EPERM);
1896161678Sdavidxu		}
1897161678Sdavidxu		uq_me = curthread->td_umtxq;
1898161678Sdavidxu		pi->pi_owner = NULL;
1899161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1900189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1901161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1902189756Sdavidxu		while (uq_first != NULL &&
1903189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1904189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1905189756Sdavidxu		}
1906161678Sdavidxu		pri = PRI_MAX;
1907161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1908161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1909161678Sdavidxu			if (uq_first2 != NULL) {
1910161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1911161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1912161678Sdavidxu			}
1913161678Sdavidxu		}
1914170300Sjeff		thread_lock(curthread);
1915216791Sdavidxu		sched_lend_user_prio(curthread, pri);
1916170300Sjeff		thread_unlock(curthread);
1917170300Sjeff		mtx_unlock_spin(&umtx_lock);
1918189756Sdavidxu		if (uq_first)
1919189756Sdavidxu			umtxq_signal_thread(uq_first);
1920161678Sdavidxu	}
1921161678Sdavidxu	umtxq_unlock(&key);
1922161678Sdavidxu
1923161678Sdavidxu	/*
1924161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1925161678Sdavidxu	 * there is zero or one thread only waiting for it.
1926161678Sdavidxu	 * Otherwise, it must be marked as contested.
1927161678Sdavidxu	 */
1928161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1929161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1930161678Sdavidxu
1931161678Sdavidxu	umtxq_lock(&key);
1932161678Sdavidxu	umtxq_unbusy(&key);
1933161678Sdavidxu	umtxq_unlock(&key);
1934161678Sdavidxu	umtx_key_release(&key);
1935161678Sdavidxu	if (old == -1)
1936161678Sdavidxu		return (EFAULT);
1937161678Sdavidxu	if (old != owner)
1938161678Sdavidxu		return (EINVAL);
1939161678Sdavidxu	return (0);
1940161678Sdavidxu}
1941161678Sdavidxu
1942161678Sdavidxu/*
1943161678Sdavidxu * Lock a PP mutex.
1944161678Sdavidxu */
1945161678Sdavidxustatic int
1946161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1947161678Sdavidxu	int try)
1948161678Sdavidxu{
1949161678Sdavidxu	struct umtx_q *uq, *uq2;
1950161678Sdavidxu	struct umtx_pi *pi;
1951161678Sdavidxu	uint32_t ceiling;
1952161678Sdavidxu	uint32_t owner, id;
1953161678Sdavidxu	int error, pri, old_inherited_pri, su;
1954161678Sdavidxu
1955161678Sdavidxu	id = td->td_tid;
1956161678Sdavidxu	uq = td->td_umtxq;
1957161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1958161678Sdavidxu	    &uq->uq_key)) != 0)
1959161678Sdavidxu		return (error);
1960164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1961161678Sdavidxu	for (;;) {
1962161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1963161678Sdavidxu		umtxq_lock(&uq->uq_key);
1964161678Sdavidxu		umtxq_busy(&uq->uq_key);
1965161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1966161678Sdavidxu
1967161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1968161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1969161678Sdavidxu			error = EINVAL;
1970161678Sdavidxu			goto out;
1971161678Sdavidxu		}
1972161678Sdavidxu
1973170300Sjeff		mtx_lock_spin(&umtx_lock);
1974161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1975170300Sjeff			mtx_unlock_spin(&umtx_lock);
1976161678Sdavidxu			error = EINVAL;
1977161678Sdavidxu			goto out;
1978161678Sdavidxu		}
1979161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1980161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1981170300Sjeff			thread_lock(td);
1982161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1983161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1984170300Sjeff			thread_unlock(td);
1985161678Sdavidxu		}
1986170300Sjeff		mtx_unlock_spin(&umtx_lock);
1987161678Sdavidxu
1988161678Sdavidxu		owner = casuword32(&m->m_owner,
1989161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1990161678Sdavidxu
1991161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1992161678Sdavidxu			error = 0;
1993161678Sdavidxu			break;
1994161678Sdavidxu		}
1995161678Sdavidxu
1996161678Sdavidxu		/* The address was invalid. */
1997161678Sdavidxu		if (owner == -1) {
1998161678Sdavidxu			error = EFAULT;
1999161678Sdavidxu			break;
2000161678Sdavidxu		}
2001161678Sdavidxu
2002161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2003161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2004161678Sdavidxu			error = EDEADLK;
2005161678Sdavidxu			break;
2006161678Sdavidxu		}
2007161678Sdavidxu
2008161678Sdavidxu		if (try != 0) {
2009161678Sdavidxu			error = EBUSY;
2010161678Sdavidxu			break;
2011161678Sdavidxu		}
2012161678Sdavidxu
2013161678Sdavidxu		/*
2014161678Sdavidxu		 * If we caught a signal, we have retried and now
2015161678Sdavidxu		 * exit immediately.
2016161678Sdavidxu		 */
2017161678Sdavidxu		if (error != 0)
2018161678Sdavidxu			break;
2019161678Sdavidxu
2020161678Sdavidxu		umtxq_lock(&uq->uq_key);
2021161678Sdavidxu		umtxq_insert(uq);
2022161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2023161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
2024161678Sdavidxu		umtxq_remove(uq);
2025161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2026161678Sdavidxu
2027170300Sjeff		mtx_lock_spin(&umtx_lock);
2028161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2029161678Sdavidxu		pri = PRI_MAX;
2030161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2031161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2032161678Sdavidxu			if (uq2 != NULL) {
2033161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2034161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2035161678Sdavidxu			}
2036161678Sdavidxu		}
2037161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2038161678Sdavidxu			pri = uq->uq_inherited_pri;
2039170300Sjeff		thread_lock(td);
2040216791Sdavidxu		sched_lend_user_prio(td, pri);
2041170300Sjeff		thread_unlock(td);
2042170300Sjeff		mtx_unlock_spin(&umtx_lock);
2043161678Sdavidxu	}
2044161678Sdavidxu
2045161678Sdavidxu	if (error != 0) {
2046170300Sjeff		mtx_lock_spin(&umtx_lock);
2047161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2048161678Sdavidxu		pri = PRI_MAX;
2049161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2050161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2051161678Sdavidxu			if (uq2 != NULL) {
2052161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2053161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2054161678Sdavidxu			}
2055161678Sdavidxu		}
2056161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2057161678Sdavidxu			pri = uq->uq_inherited_pri;
2058170300Sjeff		thread_lock(td);
2059216791Sdavidxu		sched_lend_user_prio(td, pri);
2060170300Sjeff		thread_unlock(td);
2061170300Sjeff		mtx_unlock_spin(&umtx_lock);
2062161678Sdavidxu	}
2063161678Sdavidxu
2064161678Sdavidxuout:
2065161678Sdavidxu	umtxq_lock(&uq->uq_key);
2066161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2067161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2068161678Sdavidxu	umtx_key_release(&uq->uq_key);
2069161678Sdavidxu	return (error);
2070161678Sdavidxu}
2071161678Sdavidxu
2072161678Sdavidxu/*
2073161678Sdavidxu * Unlock a PP mutex.
2074161678Sdavidxu */
2075161678Sdavidxustatic int
2076161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2077161678Sdavidxu{
2078161678Sdavidxu	struct umtx_key key;
2079161678Sdavidxu	struct umtx_q *uq, *uq2;
2080161678Sdavidxu	struct umtx_pi *pi;
2081161678Sdavidxu	uint32_t owner, id;
2082161678Sdavidxu	uint32_t rceiling;
2083161926Sdavidxu	int error, pri, new_inherited_pri, su;
2084161678Sdavidxu
2085161678Sdavidxu	id = td->td_tid;
2086161678Sdavidxu	uq = td->td_umtxq;
2087164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2088161678Sdavidxu
2089161678Sdavidxu	/*
2090161678Sdavidxu	 * Make sure we own this mtx.
2091161678Sdavidxu	 */
2092163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2093161678Sdavidxu	if (owner == -1)
2094161678Sdavidxu		return (EFAULT);
2095161678Sdavidxu
2096161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2097161678Sdavidxu		return (EPERM);
2098161678Sdavidxu
2099161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2100161678Sdavidxu	if (error != 0)
2101161678Sdavidxu		return (error);
2102161678Sdavidxu
2103161678Sdavidxu	if (rceiling == -1)
2104161678Sdavidxu		new_inherited_pri = PRI_MAX;
2105161678Sdavidxu	else {
2106161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2107161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2108161678Sdavidxu			return (EINVAL);
2109161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2110161678Sdavidxu	}
2111161678Sdavidxu
2112161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2113161678Sdavidxu	    &key)) != 0)
2114161678Sdavidxu		return (error);
2115161678Sdavidxu	umtxq_lock(&key);
2116161678Sdavidxu	umtxq_busy(&key);
2117161678Sdavidxu	umtxq_unlock(&key);
2118161678Sdavidxu	/*
2119161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2120161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2121161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2122161678Sdavidxu	 * has to be adjusted for such mutex.
2123161678Sdavidxu	 */
2124163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2125163449Sdavidxu		UMUTEX_CONTESTED);
2126161678Sdavidxu
2127161678Sdavidxu	umtxq_lock(&key);
2128161678Sdavidxu	if (error == 0)
2129161678Sdavidxu		umtxq_signal(&key, 1);
2130161678Sdavidxu	umtxq_unbusy(&key);
2131161678Sdavidxu	umtxq_unlock(&key);
2132161678Sdavidxu
2133161678Sdavidxu	if (error == -1)
2134161678Sdavidxu		error = EFAULT;
2135161678Sdavidxu	else {
2136170300Sjeff		mtx_lock_spin(&umtx_lock);
2137161926Sdavidxu		if (su != 0)
2138161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2139161678Sdavidxu		pri = PRI_MAX;
2140161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2141161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2142161678Sdavidxu			if (uq2 != NULL) {
2143161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2144161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2145161678Sdavidxu			}
2146161678Sdavidxu		}
2147161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2148161678Sdavidxu			pri = uq->uq_inherited_pri;
2149170300Sjeff		thread_lock(td);
2150216791Sdavidxu		sched_lend_user_prio(td, pri);
2151170300Sjeff		thread_unlock(td);
2152170300Sjeff		mtx_unlock_spin(&umtx_lock);
2153161678Sdavidxu	}
2154161678Sdavidxu	umtx_key_release(&key);
2155161678Sdavidxu	return (error);
2156161678Sdavidxu}
2157161678Sdavidxu
2158161678Sdavidxustatic int
2159161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2160161678Sdavidxu	uint32_t *old_ceiling)
2161161678Sdavidxu{
2162161678Sdavidxu	struct umtx_q *uq;
2163161678Sdavidxu	uint32_t save_ceiling;
2164161678Sdavidxu	uint32_t owner, id;
2165161678Sdavidxu	uint32_t flags;
2166161678Sdavidxu	int error;
2167161678Sdavidxu
2168161678Sdavidxu	flags = fuword32(&m->m_flags);
2169161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2170161678Sdavidxu		return (EINVAL);
2171161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2172161678Sdavidxu		return (EINVAL);
2173161678Sdavidxu	id = td->td_tid;
2174161678Sdavidxu	uq = td->td_umtxq;
2175161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2176161678Sdavidxu	   &uq->uq_key)) != 0)
2177161678Sdavidxu		return (error);
2178161678Sdavidxu	for (;;) {
2179161678Sdavidxu		umtxq_lock(&uq->uq_key);
2180161678Sdavidxu		umtxq_busy(&uq->uq_key);
2181161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2182161678Sdavidxu
2183161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2184161678Sdavidxu
2185161678Sdavidxu		owner = casuword32(&m->m_owner,
2186161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2187161678Sdavidxu
2188161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2189161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2190163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2191163449Sdavidxu				UMUTEX_CONTESTED);
2192161678Sdavidxu			error = 0;
2193161678Sdavidxu			break;
2194161678Sdavidxu		}
2195161678Sdavidxu
2196161678Sdavidxu		/* The address was invalid. */
2197161678Sdavidxu		if (owner == -1) {
2198161678Sdavidxu			error = EFAULT;
2199161678Sdavidxu			break;
2200161678Sdavidxu		}
2201161678Sdavidxu
2202161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2203161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2204161678Sdavidxu			error = 0;
2205161678Sdavidxu			break;
2206161678Sdavidxu		}
2207161678Sdavidxu
2208161678Sdavidxu		/*
2209161678Sdavidxu		 * If we caught a signal, we have retried and now
2210161678Sdavidxu		 * exit immediately.
2211161678Sdavidxu		 */
2212161678Sdavidxu		if (error != 0)
2213161678Sdavidxu			break;
2214161678Sdavidxu
2215161678Sdavidxu		/*
2216161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2217161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2218161678Sdavidxu		 * unlocking the umtx.
2219161678Sdavidxu		 */
2220161678Sdavidxu		umtxq_lock(&uq->uq_key);
2221161678Sdavidxu		umtxq_insert(uq);
2222161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2223161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2224161678Sdavidxu		umtxq_remove(uq);
2225161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2226161678Sdavidxu	}
2227161678Sdavidxu	umtxq_lock(&uq->uq_key);
2228161678Sdavidxu	if (error == 0)
2229161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2230161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2231161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2232161678Sdavidxu	umtx_key_release(&uq->uq_key);
2233161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2234161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2235161678Sdavidxu	return (error);
2236161678Sdavidxu}
2237161678Sdavidxu
2238162030Sdavidxustatic int
2239162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2240179970Sdavidxu	int mode)
2241162030Sdavidxu{
2242162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2243162030Sdavidxu	case 0:
2244179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2245162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2246179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2247162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2248179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2249162030Sdavidxu	}
2250162030Sdavidxu	return (EINVAL);
2251162030Sdavidxu}
2252162030Sdavidxu
2253161678Sdavidxu/*
2254161678Sdavidxu * Lock a userland POSIX mutex.
2255161678Sdavidxu */
2256161678Sdavidxustatic int
2257162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2258179970Sdavidxu	struct timespec *timeout, int mode)
2259161678Sdavidxu{
2260162030Sdavidxu	struct timespec ts, ts2, ts3;
2261162030Sdavidxu	struct timeval tv;
2262161678Sdavidxu	uint32_t flags;
2263162030Sdavidxu	int error;
2264161678Sdavidxu
2265161678Sdavidxu	flags = fuword32(&m->m_flags);
2266161678Sdavidxu	if (flags == -1)
2267161678Sdavidxu		return (EFAULT);
2268161678Sdavidxu
2269162030Sdavidxu	if (timeout == NULL) {
2270179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2271162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2272179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2273162030Sdavidxu			error = ERESTART;
2274162030Sdavidxu	} else {
2275162030Sdavidxu		getnanouptime(&ts);
2276162030Sdavidxu		timespecadd(&ts, timeout);
2277162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2278162030Sdavidxu		for (;;) {
2279179970Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2280162030Sdavidxu			if (error != ETIMEDOUT)
2281162030Sdavidxu				break;
2282162030Sdavidxu			getnanouptime(&ts2);
2283162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2284162030Sdavidxu				error = ETIMEDOUT;
2285162030Sdavidxu				break;
2286162030Sdavidxu			}
2287162030Sdavidxu			ts3 = ts;
2288162030Sdavidxu			timespecsub(&ts3, &ts2);
2289162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2290162030Sdavidxu		}
2291162030Sdavidxu		/* Timed-locking is not restarted. */
2292162030Sdavidxu		if (error == ERESTART)
2293162030Sdavidxu			error = EINTR;
2294161742Sdavidxu	}
2295162030Sdavidxu	return (error);
2296161678Sdavidxu}
2297161678Sdavidxu
2298161678Sdavidxu/*
2299161678Sdavidxu * Unlock a userland POSIX mutex.
2300161678Sdavidxu */
2301161678Sdavidxustatic int
2302161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2303161678Sdavidxu{
2304161678Sdavidxu	uint32_t flags;
2305161678Sdavidxu
2306161678Sdavidxu	flags = fuword32(&m->m_flags);
2307161678Sdavidxu	if (flags == -1)
2308161678Sdavidxu		return (EFAULT);
2309161678Sdavidxu
2310161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2311161855Sdavidxu	case 0:
2312161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2313161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2314161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2315161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2316161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2317161855Sdavidxu	}
2318161678Sdavidxu
2319161855Sdavidxu	return (EINVAL);
2320161678Sdavidxu}
2321161678Sdavidxu
2322164839Sdavidxustatic int
2323164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2324164876Sdavidxu	struct timespec *timeout, u_long wflags)
2325164839Sdavidxu{
2326164839Sdavidxu	struct umtx_q *uq;
2327164839Sdavidxu	struct timeval tv;
2328164839Sdavidxu	struct timespec cts, ets, tts;
2329164839Sdavidxu	uint32_t flags;
2330216641Sdavidxu	uint32_t clockid;
2331164839Sdavidxu	int error;
2332164839Sdavidxu
2333164839Sdavidxu	uq = td->td_umtxq;
2334164839Sdavidxu	flags = fuword32(&cv->c_flags);
2335164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2336164839Sdavidxu	if (error != 0)
2337164839Sdavidxu		return (error);
2338216641Sdavidxu
2339216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2340216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2341216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2342216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2343216641Sdavidxu			/* hmm, only HW clock id will work. */
2344216641Sdavidxu			return (EINVAL);
2345216641Sdavidxu		}
2346216641Sdavidxu	} else {
2347216641Sdavidxu		clockid = CLOCK_REALTIME;
2348216641Sdavidxu	}
2349216641Sdavidxu
2350164839Sdavidxu	umtxq_lock(&uq->uq_key);
2351164839Sdavidxu	umtxq_busy(&uq->uq_key);
2352164839Sdavidxu	umtxq_insert(uq);
2353164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2354164839Sdavidxu
2355164839Sdavidxu	/*
2356216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2357216641Sdavidxu	 * don't modify cache line when unnecessary.
2358164839Sdavidxu	 */
2359216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2360216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2361164839Sdavidxu
2362164839Sdavidxu	umtxq_lock(&uq->uq_key);
2363164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2364164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2365164839Sdavidxu
2366164839Sdavidxu	error = do_unlock_umutex(td, m);
2367164839Sdavidxu
2368164839Sdavidxu	umtxq_lock(&uq->uq_key);
2369164839Sdavidxu	if (error == 0) {
2370216641Sdavidxu		if (timeout == NULL) {
2371164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2372164839Sdavidxu		} else {
2373216641Sdavidxu			if ((wflags & CVWAIT_ABSTIME) == 0) {
2374216641Sdavidxu				kern_clock_gettime(td, clockid, &ets);
2375216641Sdavidxu				timespecadd(&ets, timeout);
2376216641Sdavidxu				tts = *timeout;
2377216641Sdavidxu			} else { /* absolute time */
2378216641Sdavidxu				ets = *timeout;
2379216641Sdavidxu				tts = *timeout;
2380216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2381216641Sdavidxu				timespecsub(&tts, &cts);
2382216641Sdavidxu			}
2383216641Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2384164839Sdavidxu			for (;;) {
2385164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2386164839Sdavidxu				if (error != ETIMEDOUT)
2387164839Sdavidxu					break;
2388216641Sdavidxu				kern_clock_gettime(td, clockid, &cts);
2389164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2390164839Sdavidxu					error = ETIMEDOUT;
2391164839Sdavidxu					break;
2392164839Sdavidxu				}
2393164839Sdavidxu				tts = ets;
2394164839Sdavidxu				timespecsub(&tts, &cts);
2395164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2396164839Sdavidxu			}
2397164839Sdavidxu		}
2398164839Sdavidxu	}
2399164839Sdavidxu
2400211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2401211794Sdavidxu		error = 0;
2402211794Sdavidxu	else {
2403216641Sdavidxu		/*
2404216641Sdavidxu		 * This must be timeout,interrupted by signal or
2405216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2406216641Sdavidxu		 * necessary.
2407216641Sdavidxu		 */
2408216641Sdavidxu		umtxq_busy(&uq->uq_key);
2409216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2410216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2411216641Sdavidxu			umtxq_remove(uq);
2412216641Sdavidxu			if (oldlen == 1) {
2413216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2414216641Sdavidxu				suword32(
2415216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2416216641Sdavidxu					 &cv->c_has_waiters), 0);
2417216641Sdavidxu				umtxq_lock(&uq->uq_key);
2418216641Sdavidxu			}
2419216641Sdavidxu		}
2420216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2421164839Sdavidxu		if (error == ERESTART)
2422164839Sdavidxu			error = EINTR;
2423164839Sdavidxu	}
2424211794Sdavidxu
2425164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2426164839Sdavidxu	umtx_key_release(&uq->uq_key);
2427164839Sdavidxu	return (error);
2428164839Sdavidxu}
2429164839Sdavidxu
2430164839Sdavidxu/*
2431164839Sdavidxu * Signal a userland condition variable.
2432164839Sdavidxu */
2433164839Sdavidxustatic int
2434164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2435164839Sdavidxu{
2436164839Sdavidxu	struct umtx_key key;
2437164839Sdavidxu	int error, cnt, nwake;
2438164839Sdavidxu	uint32_t flags;
2439164839Sdavidxu
2440164839Sdavidxu	flags = fuword32(&cv->c_flags);
2441164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2442164839Sdavidxu		return (error);
2443164839Sdavidxu	umtxq_lock(&key);
2444164839Sdavidxu	umtxq_busy(&key);
2445164839Sdavidxu	cnt = umtxq_count(&key);
2446164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2447164839Sdavidxu	if (cnt <= nwake) {
2448164839Sdavidxu		umtxq_unlock(&key);
2449164839Sdavidxu		error = suword32(
2450164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2451164839Sdavidxu		umtxq_lock(&key);
2452164839Sdavidxu	}
2453164839Sdavidxu	umtxq_unbusy(&key);
2454164839Sdavidxu	umtxq_unlock(&key);
2455164839Sdavidxu	umtx_key_release(&key);
2456164839Sdavidxu	return (error);
2457164839Sdavidxu}
2458164839Sdavidxu
2459164839Sdavidxustatic int
2460164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2461164839Sdavidxu{
2462164839Sdavidxu	struct umtx_key key;
2463164839Sdavidxu	int error;
2464164839Sdavidxu	uint32_t flags;
2465164839Sdavidxu
2466164839Sdavidxu	flags = fuword32(&cv->c_flags);
2467164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2468164839Sdavidxu		return (error);
2469164839Sdavidxu
2470164839Sdavidxu	umtxq_lock(&key);
2471164839Sdavidxu	umtxq_busy(&key);
2472164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2473164839Sdavidxu	umtxq_unlock(&key);
2474164839Sdavidxu
2475164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2476164839Sdavidxu
2477164839Sdavidxu	umtxq_lock(&key);
2478164839Sdavidxu	umtxq_unbusy(&key);
2479164839Sdavidxu	umtxq_unlock(&key);
2480164839Sdavidxu
2481164839Sdavidxu	umtx_key_release(&key);
2482164839Sdavidxu	return (error);
2483164839Sdavidxu}
2484164839Sdavidxu
2485177848Sdavidxustatic int
2486177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2487177848Sdavidxu{
2488177848Sdavidxu	struct umtx_q *uq;
2489177848Sdavidxu	uint32_t flags, wrflags;
2490177848Sdavidxu	int32_t state, oldstate;
2491177848Sdavidxu	int32_t blocked_readers;
2492177848Sdavidxu	int error;
2493177848Sdavidxu
2494177848Sdavidxu	uq = td->td_umtxq;
2495177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2496177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2497177848Sdavidxu	if (error != 0)
2498177848Sdavidxu		return (error);
2499177848Sdavidxu
2500177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2501177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2502177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2503177848Sdavidxu
2504177848Sdavidxu	for (;;) {
2505177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2506177848Sdavidxu		/* try to lock it */
2507177848Sdavidxu		while (!(state & wrflags)) {
2508177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2509177848Sdavidxu				umtx_key_release(&uq->uq_key);
2510177848Sdavidxu				return (EAGAIN);
2511177848Sdavidxu			}
2512177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2513177848Sdavidxu			if (oldstate == state) {
2514177848Sdavidxu				umtx_key_release(&uq->uq_key);
2515177848Sdavidxu				return (0);
2516177848Sdavidxu			}
2517177848Sdavidxu			state = oldstate;
2518177848Sdavidxu		}
2519177848Sdavidxu
2520177848Sdavidxu		if (error)
2521177848Sdavidxu			break;
2522177848Sdavidxu
2523177848Sdavidxu		/* grab monitor lock */
2524177848Sdavidxu		umtxq_lock(&uq->uq_key);
2525177848Sdavidxu		umtxq_busy(&uq->uq_key);
2526177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2527177848Sdavidxu
2528203414Sdavidxu		/*
2529203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2530203414Sdavidxu		 * and the check below
2531203414Sdavidxu		 */
2532203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2533203414Sdavidxu
2534177848Sdavidxu		/* set read contention bit */
2535177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2536177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2537177848Sdavidxu			if (oldstate == state)
2538177848Sdavidxu				goto sleep;
2539177848Sdavidxu			state = oldstate;
2540177848Sdavidxu		}
2541177848Sdavidxu
2542177848Sdavidxu		/* state is changed while setting flags, restart */
2543177848Sdavidxu		if (!(state & wrflags)) {
2544177848Sdavidxu			umtxq_lock(&uq->uq_key);
2545177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2546177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2547177848Sdavidxu			continue;
2548177848Sdavidxu		}
2549177848Sdavidxu
2550177848Sdavidxusleep:
2551177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2552177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2553177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2554177848Sdavidxu
2555177848Sdavidxu		while (state & wrflags) {
2556177848Sdavidxu			umtxq_lock(&uq->uq_key);
2557177848Sdavidxu			umtxq_insert(uq);
2558177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2559177848Sdavidxu
2560177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2561177848Sdavidxu
2562177848Sdavidxu			umtxq_busy(&uq->uq_key);
2563177848Sdavidxu			umtxq_remove(uq);
2564177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2565177848Sdavidxu			if (error)
2566177848Sdavidxu				break;
2567177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2568177848Sdavidxu		}
2569177848Sdavidxu
2570177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2571177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2572177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2573177848Sdavidxu		if (blocked_readers == 1) {
2574177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2575177848Sdavidxu			for (;;) {
2576177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2577177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2578177848Sdavidxu				if (oldstate == state)
2579177848Sdavidxu					break;
2580177848Sdavidxu				state = oldstate;
2581177848Sdavidxu			}
2582177848Sdavidxu		}
2583177848Sdavidxu
2584177848Sdavidxu		umtxq_lock(&uq->uq_key);
2585177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2586177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2587177848Sdavidxu	}
2588177848Sdavidxu	umtx_key_release(&uq->uq_key);
2589177848Sdavidxu	return (error);
2590177848Sdavidxu}
2591177848Sdavidxu
2592177848Sdavidxustatic int
2593177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2594177848Sdavidxu{
2595177848Sdavidxu	struct timespec ts, ts2, ts3;
2596177848Sdavidxu	struct timeval tv;
2597177848Sdavidxu	int error;
2598177848Sdavidxu
2599177848Sdavidxu	getnanouptime(&ts);
2600177848Sdavidxu	timespecadd(&ts, timeout);
2601177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2602177848Sdavidxu	for (;;) {
2603177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2604177848Sdavidxu		if (error != ETIMEDOUT)
2605177848Sdavidxu			break;
2606177848Sdavidxu		getnanouptime(&ts2);
2607177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2608177848Sdavidxu			error = ETIMEDOUT;
2609177848Sdavidxu			break;
2610177848Sdavidxu		}
2611177848Sdavidxu		ts3 = ts;
2612177848Sdavidxu		timespecsub(&ts3, &ts2);
2613177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2614177848Sdavidxu	}
2615177849Sdavidxu	if (error == ERESTART)
2616177849Sdavidxu		error = EINTR;
2617177848Sdavidxu	return (error);
2618177848Sdavidxu}
2619177848Sdavidxu
2620177848Sdavidxustatic int
2621177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2622177848Sdavidxu{
2623177848Sdavidxu	struct umtx_q *uq;
2624177848Sdavidxu	uint32_t flags;
2625177848Sdavidxu	int32_t state, oldstate;
2626177848Sdavidxu	int32_t blocked_writers;
2627197476Sdavidxu	int32_t blocked_readers;
2628177848Sdavidxu	int error;
2629177848Sdavidxu
2630177848Sdavidxu	uq = td->td_umtxq;
2631177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2632177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2633177848Sdavidxu	if (error != 0)
2634177848Sdavidxu		return (error);
2635177848Sdavidxu
2636197476Sdavidxu	blocked_readers = 0;
2637177848Sdavidxu	for (;;) {
2638177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2639177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2640177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2641177848Sdavidxu			if (oldstate == state) {
2642177848Sdavidxu				umtx_key_release(&uq->uq_key);
2643177848Sdavidxu				return (0);
2644177848Sdavidxu			}
2645177848Sdavidxu			state = oldstate;
2646177848Sdavidxu		}
2647177848Sdavidxu
2648197476Sdavidxu		if (error) {
2649197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2650197476Sdavidxu			    blocked_readers != 0) {
2651197476Sdavidxu				umtxq_lock(&uq->uq_key);
2652197476Sdavidxu				umtxq_busy(&uq->uq_key);
2653197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2654197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2655197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2656197476Sdavidxu			}
2657197476Sdavidxu
2658177848Sdavidxu			break;
2659197476Sdavidxu		}
2660177848Sdavidxu
2661177848Sdavidxu		/* grab monitor lock */
2662177848Sdavidxu		umtxq_lock(&uq->uq_key);
2663177848Sdavidxu		umtxq_busy(&uq->uq_key);
2664177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2665177848Sdavidxu
2666203414Sdavidxu		/*
2667203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2668203414Sdavidxu		 * and the check below
2669203414Sdavidxu		 */
2670203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2671203414Sdavidxu
2672177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2673177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2674177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2675177848Sdavidxu			if (oldstate == state)
2676177848Sdavidxu				goto sleep;
2677177848Sdavidxu			state = oldstate;
2678177848Sdavidxu		}
2679177848Sdavidxu
2680177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2681177848Sdavidxu			umtxq_lock(&uq->uq_key);
2682177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2683177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2684177848Sdavidxu			continue;
2685177848Sdavidxu		}
2686177848Sdavidxusleep:
2687177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2688177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2689177848Sdavidxu
2690177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2691177848Sdavidxu			umtxq_lock(&uq->uq_key);
2692177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2693177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2694177848Sdavidxu
2695177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2696177848Sdavidxu
2697177848Sdavidxu			umtxq_busy(&uq->uq_key);
2698177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2699177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2700177848Sdavidxu			if (error)
2701177848Sdavidxu				break;
2702177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2703177848Sdavidxu		}
2704177848Sdavidxu
2705177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2706177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2707177848Sdavidxu		if (blocked_writers == 1) {
2708177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2709177848Sdavidxu			for (;;) {
2710177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2711177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2712177848Sdavidxu				if (oldstate == state)
2713177848Sdavidxu					break;
2714177848Sdavidxu				state = oldstate;
2715177848Sdavidxu			}
2716197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2717197476Sdavidxu		} else
2718197476Sdavidxu			blocked_readers = 0;
2719177848Sdavidxu
2720177848Sdavidxu		umtxq_lock(&uq->uq_key);
2721177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2722177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2723177848Sdavidxu	}
2724177848Sdavidxu
2725177848Sdavidxu	umtx_key_release(&uq->uq_key);
2726177848Sdavidxu	return (error);
2727177848Sdavidxu}
2728177848Sdavidxu
2729177848Sdavidxustatic int
2730177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2731177848Sdavidxu{
2732177848Sdavidxu	struct timespec ts, ts2, ts3;
2733177848Sdavidxu	struct timeval tv;
2734177848Sdavidxu	int error;
2735177848Sdavidxu
2736177848Sdavidxu	getnanouptime(&ts);
2737177848Sdavidxu	timespecadd(&ts, timeout);
2738177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2739177848Sdavidxu	for (;;) {
2740177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2741177848Sdavidxu		if (error != ETIMEDOUT)
2742177848Sdavidxu			break;
2743177848Sdavidxu		getnanouptime(&ts2);
2744177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2745177848Sdavidxu			error = ETIMEDOUT;
2746177848Sdavidxu			break;
2747177848Sdavidxu		}
2748177848Sdavidxu		ts3 = ts;
2749177848Sdavidxu		timespecsub(&ts3, &ts2);
2750177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2751177848Sdavidxu	}
2752177849Sdavidxu	if (error == ERESTART)
2753177849Sdavidxu		error = EINTR;
2754177848Sdavidxu	return (error);
2755177848Sdavidxu}
2756177848Sdavidxu
2757177848Sdavidxustatic int
2758177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2759177848Sdavidxu{
2760177848Sdavidxu	struct umtx_q *uq;
2761177848Sdavidxu	uint32_t flags;
2762177848Sdavidxu	int32_t state, oldstate;
2763177848Sdavidxu	int error, q, count;
2764177848Sdavidxu
2765177848Sdavidxu	uq = td->td_umtxq;
2766177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2767177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2768177848Sdavidxu	if (error != 0)
2769177848Sdavidxu		return (error);
2770177848Sdavidxu
2771177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2772177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2773177848Sdavidxu		for (;;) {
2774177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2775177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2776177848Sdavidxu			if (oldstate != state) {
2777177848Sdavidxu				state = oldstate;
2778177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2779177848Sdavidxu					error = EPERM;
2780177848Sdavidxu					goto out;
2781177848Sdavidxu				}
2782177848Sdavidxu			} else
2783177848Sdavidxu				break;
2784177848Sdavidxu		}
2785177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2786177848Sdavidxu		for (;;) {
2787177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2788177848Sdavidxu				state - 1);
2789177848Sdavidxu			if (oldstate != state) {
2790177848Sdavidxu				state = oldstate;
2791177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2792177848Sdavidxu					error = EPERM;
2793177848Sdavidxu					goto out;
2794177848Sdavidxu				}
2795177848Sdavidxu			}
2796177848Sdavidxu			else
2797177848Sdavidxu				break;
2798177848Sdavidxu		}
2799177848Sdavidxu	} else {
2800177848Sdavidxu		error = EPERM;
2801177848Sdavidxu		goto out;
2802177848Sdavidxu	}
2803177848Sdavidxu
2804177848Sdavidxu	count = 0;
2805177848Sdavidxu
2806177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2807177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2808177848Sdavidxu			count = 1;
2809177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2810177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2811177848Sdavidxu			count = INT_MAX;
2812177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2813177848Sdavidxu		}
2814177848Sdavidxu	} else {
2815177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2816177848Sdavidxu			count = INT_MAX;
2817177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2818177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2819177848Sdavidxu			count = 1;
2820177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2821177848Sdavidxu		}
2822177848Sdavidxu	}
2823177848Sdavidxu
2824177848Sdavidxu	if (count) {
2825177848Sdavidxu		umtxq_lock(&uq->uq_key);
2826177848Sdavidxu		umtxq_busy(&uq->uq_key);
2827177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2828177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2829177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2830177848Sdavidxu	}
2831177848Sdavidxuout:
2832177848Sdavidxu	umtx_key_release(&uq->uq_key);
2833177848Sdavidxu	return (error);
2834177848Sdavidxu}
2835177848Sdavidxu
2836201472Sdavidxustatic int
2837201472Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
2838201472Sdavidxu{
2839201472Sdavidxu	struct umtx_q *uq;
2840201472Sdavidxu	struct timeval tv;
2841201472Sdavidxu	struct timespec cts, ets, tts;
2842201472Sdavidxu	uint32_t flags, count;
2843201472Sdavidxu	int error;
2844201472Sdavidxu
2845201472Sdavidxu	uq = td->td_umtxq;
2846201472Sdavidxu	flags = fuword32(&sem->_flags);
2847201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2848201472Sdavidxu	if (error != 0)
2849201472Sdavidxu		return (error);
2850201472Sdavidxu	umtxq_lock(&uq->uq_key);
2851201472Sdavidxu	umtxq_busy(&uq->uq_key);
2852201472Sdavidxu	umtxq_insert(uq);
2853201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2854201472Sdavidxu
2855215652Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
2856215652Sdavidxu		casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2857203657Sdavidxu
2858201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2859201472Sdavidxu	if (count != 0) {
2860201472Sdavidxu		umtxq_lock(&uq->uq_key);
2861201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2862201472Sdavidxu		umtxq_remove(uq);
2863201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2864201472Sdavidxu		umtx_key_release(&uq->uq_key);
2865201472Sdavidxu		return (0);
2866201472Sdavidxu	}
2867201472Sdavidxu
2868201472Sdavidxu	umtxq_lock(&uq->uq_key);
2869201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2870201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2871201472Sdavidxu
2872201472Sdavidxu	umtxq_lock(&uq->uq_key);
2873201472Sdavidxu	if (timeout == NULL) {
2874201472Sdavidxu		error = umtxq_sleep(uq, "usem", 0);
2875201472Sdavidxu	} else {
2876201472Sdavidxu		getnanouptime(&ets);
2877201472Sdavidxu		timespecadd(&ets, timeout);
2878201472Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2879201472Sdavidxu		for (;;) {
2880201472Sdavidxu			error = umtxq_sleep(uq, "usem", tvtohz(&tv));
2881201472Sdavidxu			if (error != ETIMEDOUT)
2882201472Sdavidxu				break;
2883201472Sdavidxu			getnanouptime(&cts);
2884201472Sdavidxu			if (timespeccmp(&cts, &ets, >=)) {
2885201472Sdavidxu				error = ETIMEDOUT;
2886201472Sdavidxu				break;
2887201472Sdavidxu			}
2888201472Sdavidxu			tts = ets;
2889201472Sdavidxu			timespecsub(&tts, &cts);
2890201472Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &tts);
2891201472Sdavidxu		}
2892201472Sdavidxu	}
2893201472Sdavidxu
2894211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2895211794Sdavidxu		error = 0;
2896211794Sdavidxu	else {
2897211794Sdavidxu		umtxq_remove(uq);
2898201472Sdavidxu		if (error == ERESTART)
2899201472Sdavidxu			error = EINTR;
2900201472Sdavidxu	}
2901201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2902201472Sdavidxu	umtx_key_release(&uq->uq_key);
2903201472Sdavidxu	return (error);
2904201472Sdavidxu}
2905201472Sdavidxu
2906201472Sdavidxu/*
2907201472Sdavidxu * Signal a userland condition variable.
2908201472Sdavidxu */
2909201472Sdavidxustatic int
2910201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2911201472Sdavidxu{
2912201472Sdavidxu	struct umtx_key key;
2913201472Sdavidxu	int error, cnt, nwake;
2914201472Sdavidxu	uint32_t flags;
2915201472Sdavidxu
2916201472Sdavidxu	flags = fuword32(&sem->_flags);
2917201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
2918201472Sdavidxu		return (error);
2919201472Sdavidxu	umtxq_lock(&key);
2920201472Sdavidxu	umtxq_busy(&key);
2921201472Sdavidxu	cnt = umtxq_count(&key);
2922201472Sdavidxu	nwake = umtxq_signal(&key, 1);
2923201472Sdavidxu	if (cnt <= nwake) {
2924201472Sdavidxu		umtxq_unlock(&key);
2925201472Sdavidxu		error = suword32(
2926201472Sdavidxu		    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
2927201472Sdavidxu		umtxq_lock(&key);
2928201472Sdavidxu	}
2929201472Sdavidxu	umtxq_unbusy(&key);
2930201472Sdavidxu	umtxq_unlock(&key);
2931201472Sdavidxu	umtx_key_release(&key);
2932201472Sdavidxu	return (error);
2933201472Sdavidxu}
2934201472Sdavidxu
2935139013Sdavidxuint
2936139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2937139013Sdavidxu    /* struct umtx *umtx */
2938139013Sdavidxu{
2939162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2940139013Sdavidxu}
2941139013Sdavidxu
2942139013Sdavidxuint
2943139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2944139013Sdavidxu    /* struct umtx *umtx */
2945139013Sdavidxu{
2946162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2947139013Sdavidxu}
2948139013Sdavidxu
2949162536Sdavidxustatic int
2950162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2951139013Sdavidxu{
2952162536Sdavidxu	struct timespec *ts, timeout;
2953139013Sdavidxu	int error;
2954139013Sdavidxu
2955162536Sdavidxu	/* Allow a null timespec (wait forever). */
2956162536Sdavidxu	if (uap->uaddr2 == NULL)
2957162536Sdavidxu		ts = NULL;
2958162536Sdavidxu	else {
2959162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2960162536Sdavidxu		if (error != 0)
2961162536Sdavidxu			return (error);
2962162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2963162536Sdavidxu		    timeout.tv_nsec < 0) {
2964162536Sdavidxu			return (EINVAL);
2965161678Sdavidxu		}
2966162536Sdavidxu		ts = &timeout;
2967162536Sdavidxu	}
2968162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2969162536Sdavidxu}
2970162536Sdavidxu
2971162536Sdavidxustatic int
2972162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2973162536Sdavidxu{
2974162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2975162536Sdavidxu}
2976162536Sdavidxu
2977162536Sdavidxustatic int
2978162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2979162536Sdavidxu{
2980162536Sdavidxu	struct timespec *ts, timeout;
2981162536Sdavidxu	int error;
2982162536Sdavidxu
2983162536Sdavidxu	if (uap->uaddr2 == NULL)
2984162536Sdavidxu		ts = NULL;
2985162536Sdavidxu	else {
2986162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2987162536Sdavidxu		if (error != 0)
2988162536Sdavidxu			return (error);
2989162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2990162536Sdavidxu		    timeout.tv_nsec < 0)
2991162536Sdavidxu			return (EINVAL);
2992162536Sdavidxu		ts = &timeout;
2993162536Sdavidxu	}
2994178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2995162536Sdavidxu}
2996162536Sdavidxu
2997162536Sdavidxustatic int
2998173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2999173800Sdavidxu{
3000173800Sdavidxu	struct timespec *ts, timeout;
3001173800Sdavidxu	int error;
3002173800Sdavidxu
3003173800Sdavidxu	if (uap->uaddr2 == NULL)
3004173800Sdavidxu		ts = NULL;
3005173800Sdavidxu	else {
3006173800Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3007173800Sdavidxu		if (error != 0)
3008173800Sdavidxu			return (error);
3009173800Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3010173800Sdavidxu		    timeout.tv_nsec < 0)
3011173800Sdavidxu			return (EINVAL);
3012173800Sdavidxu		ts = &timeout;
3013173800Sdavidxu	}
3014178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3015173800Sdavidxu}
3016173800Sdavidxu
3017173800Sdavidxustatic int
3018178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3019178646Sdavidxu{
3020178646Sdavidxu	struct timespec *ts, timeout;
3021178646Sdavidxu	int error;
3022178646Sdavidxu
3023178646Sdavidxu	if (uap->uaddr2 == NULL)
3024178646Sdavidxu		ts = NULL;
3025178646Sdavidxu	else {
3026178646Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
3027178646Sdavidxu		if (error != 0)
3028178646Sdavidxu			return (error);
3029178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3030178646Sdavidxu		    timeout.tv_nsec < 0)
3031178646Sdavidxu			return (EINVAL);
3032178646Sdavidxu		ts = &timeout;
3033178646Sdavidxu	}
3034178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3035178646Sdavidxu}
3036178646Sdavidxu
3037178646Sdavidxustatic int
3038162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3039162536Sdavidxu{
3040178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3041162536Sdavidxu}
3042162536Sdavidxu
3043216641Sdavidxu#define BATCH_SIZE	128
3044162536Sdavidxustatic int
3045216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3046216641Sdavidxu{
3047216641Sdavidxu	int count = uap->val;
3048216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3049216641Sdavidxu	char **upp = (char **)uap->obj;
3050216641Sdavidxu	int tocopy;
3051216641Sdavidxu	int error = 0;
3052216641Sdavidxu	int i, pos = 0;
3053216641Sdavidxu
3054216641Sdavidxu	while (count > 0) {
3055216641Sdavidxu		tocopy = count;
3056216641Sdavidxu		if (tocopy > BATCH_SIZE)
3057216641Sdavidxu			tocopy = BATCH_SIZE;
3058216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3059216641Sdavidxu		if (error != 0)
3060216641Sdavidxu			break;
3061216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3062216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3063216641Sdavidxu		count -= tocopy;
3064216641Sdavidxu		pos += tocopy;
3065216641Sdavidxu	}
3066216641Sdavidxu	return (error);
3067216641Sdavidxu}
3068216641Sdavidxu
3069216641Sdavidxustatic int
3070178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3071178646Sdavidxu{
3072178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3073178646Sdavidxu}
3074178646Sdavidxu
3075178646Sdavidxustatic int
3076162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3077162536Sdavidxu{
3078162536Sdavidxu	struct timespec *ts, timeout;
3079162536Sdavidxu	int error;
3080162536Sdavidxu
3081162536Sdavidxu	/* Allow a null timespec (wait forever). */
3082162536Sdavidxu	if (uap->uaddr2 == NULL)
3083162536Sdavidxu		ts = NULL;
3084162536Sdavidxu	else {
3085162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3086162536Sdavidxu		    sizeof(timeout));
3087162536Sdavidxu		if (error != 0)
3088162536Sdavidxu			return (error);
3089162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3090162536Sdavidxu		    timeout.tv_nsec < 0) {
3091162536Sdavidxu			return (EINVAL);
3092139013Sdavidxu		}
3093162536Sdavidxu		ts = &timeout;
3094139013Sdavidxu	}
3095162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3096162536Sdavidxu}
3097162536Sdavidxu
3098162536Sdavidxustatic int
3099162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3100162536Sdavidxu{
3101179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3102162536Sdavidxu}
3103162536Sdavidxu
3104162536Sdavidxustatic int
3105179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3106179970Sdavidxu{
3107179970Sdavidxu	struct timespec *ts, timeout;
3108179970Sdavidxu	int error;
3109179970Sdavidxu
3110179970Sdavidxu	/* Allow a null timespec (wait forever). */
3111179970Sdavidxu	if (uap->uaddr2 == NULL)
3112179970Sdavidxu		ts = NULL;
3113179970Sdavidxu	else {
3114179970Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3115179970Sdavidxu		    sizeof(timeout));
3116179970Sdavidxu		if (error != 0)
3117179970Sdavidxu			return (error);
3118179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3119179970Sdavidxu		    timeout.tv_nsec < 0) {
3120179970Sdavidxu			return (EINVAL);
3121179970Sdavidxu		}
3122179970Sdavidxu		ts = &timeout;
3123179970Sdavidxu	}
3124179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3125179970Sdavidxu}
3126179970Sdavidxu
3127179970Sdavidxustatic int
3128179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3129179970Sdavidxu{
3130179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3131179970Sdavidxu}
3132179970Sdavidxu
3133179970Sdavidxustatic int
3134162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3135162536Sdavidxu{
3136162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3137162536Sdavidxu}
3138162536Sdavidxu
3139162536Sdavidxustatic int
3140162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3141162536Sdavidxu{
3142162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3143162536Sdavidxu}
3144162536Sdavidxu
3145164839Sdavidxustatic int
3146164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3147164839Sdavidxu{
3148164839Sdavidxu	struct timespec *ts, timeout;
3149164839Sdavidxu	int error;
3150164839Sdavidxu
3151164839Sdavidxu	/* Allow a null timespec (wait forever). */
3152164839Sdavidxu	if (uap->uaddr2 == NULL)
3153164839Sdavidxu		ts = NULL;
3154164839Sdavidxu	else {
3155164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3156164839Sdavidxu		    sizeof(timeout));
3157164839Sdavidxu		if (error != 0)
3158164839Sdavidxu			return (error);
3159164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3160164839Sdavidxu		    timeout.tv_nsec < 0) {
3161164839Sdavidxu			return (EINVAL);
3162164839Sdavidxu		}
3163164839Sdavidxu		ts = &timeout;
3164164839Sdavidxu	}
3165164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3166164839Sdavidxu}
3167164839Sdavidxu
3168164839Sdavidxustatic int
3169164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3170164839Sdavidxu{
3171164839Sdavidxu	return do_cv_signal(td, uap->obj);
3172164839Sdavidxu}
3173164839Sdavidxu
3174164839Sdavidxustatic int
3175164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3176164839Sdavidxu{
3177164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3178164839Sdavidxu}
3179164839Sdavidxu
3180177848Sdavidxustatic int
3181177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3182177848Sdavidxu{
3183177848Sdavidxu	struct timespec timeout;
3184177848Sdavidxu	int error;
3185177848Sdavidxu
3186177848Sdavidxu	/* Allow a null timespec (wait forever). */
3187177848Sdavidxu	if (uap->uaddr2 == NULL) {
3188177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3189177848Sdavidxu	} else {
3190177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3191177848Sdavidxu		    sizeof(timeout));
3192177848Sdavidxu		if (error != 0)
3193177848Sdavidxu			return (error);
3194177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3195177848Sdavidxu		    timeout.tv_nsec < 0) {
3196177848Sdavidxu			return (EINVAL);
3197177848Sdavidxu		}
3198177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3199177848Sdavidxu	}
3200177848Sdavidxu	return (error);
3201177848Sdavidxu}
3202177848Sdavidxu
3203177848Sdavidxustatic int
3204177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3205177848Sdavidxu{
3206177848Sdavidxu	struct timespec timeout;
3207177848Sdavidxu	int error;
3208177848Sdavidxu
3209177848Sdavidxu	/* Allow a null timespec (wait forever). */
3210177848Sdavidxu	if (uap->uaddr2 == NULL) {
3211177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3212177848Sdavidxu	} else {
3213177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3214177848Sdavidxu		    sizeof(timeout));
3215177848Sdavidxu		if (error != 0)
3216177848Sdavidxu			return (error);
3217177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3218177848Sdavidxu		    timeout.tv_nsec < 0) {
3219177848Sdavidxu			return (EINVAL);
3220177848Sdavidxu		}
3221177848Sdavidxu
3222177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3223177848Sdavidxu	}
3224177848Sdavidxu	return (error);
3225177848Sdavidxu}
3226177848Sdavidxu
3227177848Sdavidxustatic int
3228177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3229177848Sdavidxu{
3230177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3231177848Sdavidxu}
3232177848Sdavidxu
3233201472Sdavidxustatic int
3234201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3235201472Sdavidxu{
3236201472Sdavidxu	struct timespec *ts, timeout;
3237201472Sdavidxu	int error;
3238201472Sdavidxu
3239201472Sdavidxu	/* Allow a null timespec (wait forever). */
3240201472Sdavidxu	if (uap->uaddr2 == NULL)
3241201472Sdavidxu		ts = NULL;
3242201472Sdavidxu	else {
3243201472Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3244201472Sdavidxu		    sizeof(timeout));
3245201472Sdavidxu		if (error != 0)
3246201472Sdavidxu			return (error);
3247201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3248201472Sdavidxu		    timeout.tv_nsec < 0) {
3249201472Sdavidxu			return (EINVAL);
3250201472Sdavidxu		}
3251201472Sdavidxu		ts = &timeout;
3252201472Sdavidxu	}
3253201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3254201472Sdavidxu}
3255201472Sdavidxu
3256201472Sdavidxustatic int
3257201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3258201472Sdavidxu{
3259201472Sdavidxu	return do_sem_wake(td, uap->obj);
3260201472Sdavidxu}
3261201472Sdavidxu
3262162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3263162536Sdavidxu
3264162536Sdavidxustatic _umtx_op_func op_table[] = {
3265162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3266162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3267162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3268162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3269162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3270162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3271162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3272164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3273164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3274164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3275173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3276177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3277177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3278177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3279178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3280178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3281179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3282179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3283201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3284201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3285216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3286216641Sdavidxu	__umtx_op_nwake_private		/* UMTX_OP_NWAKE_PRIVATE */
3287162536Sdavidxu};
3288162536Sdavidxu
3289162536Sdavidxuint
3290162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
3291162536Sdavidxu{
3292163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3293162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3294162536Sdavidxu	return (EINVAL);
3295162536Sdavidxu}
3296162536Sdavidxu
3297205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3298163046Sdavidxuint
3299163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3300163046Sdavidxu    /* struct umtx *umtx */
3301163046Sdavidxu{
3302163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3303163046Sdavidxu}
3304163046Sdavidxu
3305163046Sdavidxuint
3306163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3307163046Sdavidxu    /* struct umtx *umtx */
3308163046Sdavidxu{
3309163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3310163046Sdavidxu}
3311163046Sdavidxu
3312162536Sdavidxustruct timespec32 {
3313209390Sed	uint32_t tv_sec;
3314209390Sed	uint32_t tv_nsec;
3315162536Sdavidxu};
3316162536Sdavidxu
3317162536Sdavidxustatic inline int
3318162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
3319162536Sdavidxu{
3320162536Sdavidxu	struct timespec32 ts32;
3321162536Sdavidxu	int error;
3322162536Sdavidxu
3323162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3324162536Sdavidxu	if (error == 0) {
3325162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
3326162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
3327162536Sdavidxu	}
3328140421Sdavidxu	return (error);
3329139013Sdavidxu}
3330161678Sdavidxu
3331162536Sdavidxustatic int
3332162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3333162536Sdavidxu{
3334162536Sdavidxu	struct timespec *ts, timeout;
3335162536Sdavidxu	int error;
3336162536Sdavidxu
3337162536Sdavidxu	/* Allow a null timespec (wait forever). */
3338162536Sdavidxu	if (uap->uaddr2 == NULL)
3339162536Sdavidxu		ts = NULL;
3340162536Sdavidxu	else {
3341162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3342162536Sdavidxu		if (error != 0)
3343162536Sdavidxu			return (error);
3344162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3345162536Sdavidxu		    timeout.tv_nsec < 0) {
3346162536Sdavidxu			return (EINVAL);
3347162536Sdavidxu		}
3348162536Sdavidxu		ts = &timeout;
3349162536Sdavidxu	}
3350162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3351162536Sdavidxu}
3352162536Sdavidxu
3353162536Sdavidxustatic int
3354162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3355162536Sdavidxu{
3356162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3357162536Sdavidxu}
3358162536Sdavidxu
3359162536Sdavidxustatic int
3360162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3361162536Sdavidxu{
3362162536Sdavidxu	struct timespec *ts, timeout;
3363162536Sdavidxu	int error;
3364162536Sdavidxu
3365162536Sdavidxu	if (uap->uaddr2 == NULL)
3366162536Sdavidxu		ts = NULL;
3367162536Sdavidxu	else {
3368162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3369162536Sdavidxu		if (error != 0)
3370162536Sdavidxu			return (error);
3371162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3372162536Sdavidxu		    timeout.tv_nsec < 0)
3373162536Sdavidxu			return (EINVAL);
3374162536Sdavidxu		ts = &timeout;
3375162536Sdavidxu	}
3376178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3377162536Sdavidxu}
3378162536Sdavidxu
3379162536Sdavidxustatic int
3380162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3381162536Sdavidxu{
3382162536Sdavidxu	struct timespec *ts, timeout;
3383162536Sdavidxu	int error;
3384162536Sdavidxu
3385162536Sdavidxu	/* Allow a null timespec (wait forever). */
3386162536Sdavidxu	if (uap->uaddr2 == NULL)
3387162536Sdavidxu		ts = NULL;
3388162536Sdavidxu	else {
3389162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3390162536Sdavidxu		if (error != 0)
3391162536Sdavidxu			return (error);
3392162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3393162536Sdavidxu		    timeout.tv_nsec < 0)
3394162536Sdavidxu			return (EINVAL);
3395162536Sdavidxu		ts = &timeout;
3396162536Sdavidxu	}
3397162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3398162536Sdavidxu}
3399162536Sdavidxu
3400164839Sdavidxustatic int
3401179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3402179970Sdavidxu{
3403179970Sdavidxu	struct timespec *ts, timeout;
3404179970Sdavidxu	int error;
3405179970Sdavidxu
3406179970Sdavidxu	/* Allow a null timespec (wait forever). */
3407179970Sdavidxu	if (uap->uaddr2 == NULL)
3408179970Sdavidxu		ts = NULL;
3409179970Sdavidxu	else {
3410179970Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3411179970Sdavidxu		if (error != 0)
3412179970Sdavidxu			return (error);
3413179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3414179970Sdavidxu		    timeout.tv_nsec < 0)
3415179970Sdavidxu			return (EINVAL);
3416179970Sdavidxu		ts = &timeout;
3417179970Sdavidxu	}
3418179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3419179970Sdavidxu}
3420179970Sdavidxu
3421179970Sdavidxustatic int
3422164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3423164839Sdavidxu{
3424164839Sdavidxu	struct timespec *ts, timeout;
3425164839Sdavidxu	int error;
3426164839Sdavidxu
3427164839Sdavidxu	/* Allow a null timespec (wait forever). */
3428164839Sdavidxu	if (uap->uaddr2 == NULL)
3429164839Sdavidxu		ts = NULL;
3430164839Sdavidxu	else {
3431164839Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3432164839Sdavidxu		if (error != 0)
3433164839Sdavidxu			return (error);
3434164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3435164839Sdavidxu		    timeout.tv_nsec < 0)
3436164839Sdavidxu			return (EINVAL);
3437164839Sdavidxu		ts = &timeout;
3438164839Sdavidxu	}
3439164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3440164839Sdavidxu}
3441164839Sdavidxu
3442177848Sdavidxustatic int
3443177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3444177848Sdavidxu{
3445177848Sdavidxu	struct timespec timeout;
3446177848Sdavidxu	int error;
3447177848Sdavidxu
3448177848Sdavidxu	/* Allow a null timespec (wait forever). */
3449177848Sdavidxu	if (uap->uaddr2 == NULL) {
3450177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3451177848Sdavidxu	} else {
3452216463Smdf		error = copyin_timeout32(uap->uaddr2, &timeout);
3453177848Sdavidxu		if (error != 0)
3454177848Sdavidxu			return (error);
3455177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3456177848Sdavidxu		    timeout.tv_nsec < 0) {
3457177848Sdavidxu			return (EINVAL);
3458177848Sdavidxu		}
3459177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3460177848Sdavidxu	}
3461177848Sdavidxu	return (error);
3462177848Sdavidxu}
3463177848Sdavidxu
3464177848Sdavidxustatic int
3465177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3466177848Sdavidxu{
3467177848Sdavidxu	struct timespec timeout;
3468177848Sdavidxu	int error;
3469177848Sdavidxu
3470177848Sdavidxu	/* Allow a null timespec (wait forever). */
3471177848Sdavidxu	if (uap->uaddr2 == NULL) {
3472177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3473177848Sdavidxu	} else {
3474177848Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3475177848Sdavidxu		if (error != 0)
3476177848Sdavidxu			return (error);
3477177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3478177848Sdavidxu		    timeout.tv_nsec < 0) {
3479177848Sdavidxu			return (EINVAL);
3480177848Sdavidxu		}
3481177848Sdavidxu
3482177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3483177848Sdavidxu	}
3484177848Sdavidxu	return (error);
3485177848Sdavidxu}
3486177848Sdavidxu
3487178646Sdavidxustatic int
3488178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3489178646Sdavidxu{
3490178646Sdavidxu	struct timespec *ts, timeout;
3491178646Sdavidxu	int error;
3492178646Sdavidxu
3493178646Sdavidxu	if (uap->uaddr2 == NULL)
3494178646Sdavidxu		ts = NULL;
3495178646Sdavidxu	else {
3496178646Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3497178646Sdavidxu		if (error != 0)
3498178646Sdavidxu			return (error);
3499178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3500178646Sdavidxu		    timeout.tv_nsec < 0)
3501178646Sdavidxu			return (EINVAL);
3502178646Sdavidxu		ts = &timeout;
3503178646Sdavidxu	}
3504178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3505178646Sdavidxu}
3506178646Sdavidxu
3507201472Sdavidxustatic int
3508201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3509201472Sdavidxu{
3510201472Sdavidxu	struct timespec *ts, timeout;
3511201472Sdavidxu	int error;
3512201472Sdavidxu
3513201472Sdavidxu	/* Allow a null timespec (wait forever). */
3514201472Sdavidxu	if (uap->uaddr2 == NULL)
3515201472Sdavidxu		ts = NULL;
3516201472Sdavidxu	else {
3517201472Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3518201472Sdavidxu		if (error != 0)
3519201472Sdavidxu			return (error);
3520201472Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3521201472Sdavidxu		    timeout.tv_nsec < 0)
3522201472Sdavidxu			return (EINVAL);
3523201472Sdavidxu		ts = &timeout;
3524201472Sdavidxu	}
3525201472Sdavidxu	return (do_sem_wait(td, uap->obj, ts));
3526201472Sdavidxu}
3527201472Sdavidxu
3528216641Sdavidxustatic int
3529216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3530216641Sdavidxu{
3531216641Sdavidxu	int count = uap->val;
3532216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3533216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3534216641Sdavidxu	int tocopy;
3535216641Sdavidxu	int error = 0;
3536216641Sdavidxu	int i, pos = 0;
3537216641Sdavidxu
3538216641Sdavidxu	while (count > 0) {
3539216641Sdavidxu		tocopy = count;
3540216641Sdavidxu		if (tocopy > BATCH_SIZE)
3541216641Sdavidxu			tocopy = BATCH_SIZE;
3542216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3543216641Sdavidxu		if (error != 0)
3544216641Sdavidxu			break;
3545216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3546216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3547216641Sdavidxu				INT_MAX, 1);
3548216641Sdavidxu		count -= tocopy;
3549216641Sdavidxu		pos += tocopy;
3550216641Sdavidxu	}
3551216641Sdavidxu	return (error);
3552216641Sdavidxu}
3553216641Sdavidxu
3554162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3555162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3556162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3557162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3558162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3559162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3560162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3561162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3562164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3563164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3564164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3565173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3566177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3567177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3568177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3569178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3570178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3571179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3572179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3573201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3574201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3575216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3576216641Sdavidxu	__umtx_op_nwake_private32	/* UMTX_OP_NWAKE_PRIVATE */
3577162536Sdavidxu};
3578162536Sdavidxu
3579162536Sdavidxuint
3580162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3581162536Sdavidxu{
3582163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3583162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3584162536Sdavidxu			(struct _umtx_op_args *)uap);
3585162536Sdavidxu	return (EINVAL);
3586162536Sdavidxu}
3587162536Sdavidxu#endif
3588162536Sdavidxu
3589161678Sdavidxuvoid
3590161678Sdavidxuumtx_thread_init(struct thread *td)
3591161678Sdavidxu{
3592161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3593161678Sdavidxu	td->td_umtxq->uq_thread = td;
3594161678Sdavidxu}
3595161678Sdavidxu
3596161678Sdavidxuvoid
3597161678Sdavidxuumtx_thread_fini(struct thread *td)
3598161678Sdavidxu{
3599161678Sdavidxu	umtxq_free(td->td_umtxq);
3600161678Sdavidxu}
3601161678Sdavidxu
3602161678Sdavidxu/*
3603161678Sdavidxu * It will be called when new thread is created, e.g fork().
3604161678Sdavidxu */
3605161678Sdavidxuvoid
3606161678Sdavidxuumtx_thread_alloc(struct thread *td)
3607161678Sdavidxu{
3608161678Sdavidxu	struct umtx_q *uq;
3609161678Sdavidxu
3610161678Sdavidxu	uq = td->td_umtxq;
3611161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3612161678Sdavidxu
3613161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3614161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3615161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3616161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3617161678Sdavidxu}
3618161678Sdavidxu
3619161678Sdavidxu/*
3620161678Sdavidxu * exec() hook.
3621161678Sdavidxu */
3622161678Sdavidxustatic void
3623161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3624161678Sdavidxu	struct image_params *imgp __unused)
3625161678Sdavidxu{
3626161678Sdavidxu	umtx_thread_cleanup(curthread);
3627161678Sdavidxu}
3628161678Sdavidxu
3629161678Sdavidxu/*
3630161678Sdavidxu * thread_exit() hook.
3631161678Sdavidxu */
3632161678Sdavidxuvoid
3633161678Sdavidxuumtx_thread_exit(struct thread *td)
3634161678Sdavidxu{
3635161678Sdavidxu	umtx_thread_cleanup(td);
3636161678Sdavidxu}
3637161678Sdavidxu
3638161678Sdavidxu/*
3639161678Sdavidxu * clean up umtx data.
3640161678Sdavidxu */
3641161678Sdavidxustatic void
3642161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3643161678Sdavidxu{
3644161678Sdavidxu	struct umtx_q *uq;
3645161678Sdavidxu	struct umtx_pi *pi;
3646161678Sdavidxu
3647161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3648161678Sdavidxu		return;
3649161678Sdavidxu
3650170300Sjeff	mtx_lock_spin(&umtx_lock);
3651161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3652161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3653161678Sdavidxu		pi->pi_owner = NULL;
3654161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3655161678Sdavidxu	}
3656216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3657174701Sdavidxu	thread_lock(td);
3658216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
3659174701Sdavidxu	thread_unlock(td);
3660161678Sdavidxu}
3661