kern_umtx.c revision 177880
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 177880 2008-04-03 11:49:20Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46139013Sdavidxu#include <sys/eventhandler.h>
47112904Sjeff#include <sys/umtx.h>
48112904Sjeff
49139013Sdavidxu#include <vm/vm.h>
50139013Sdavidxu#include <vm/vm_param.h>
51139013Sdavidxu#include <vm/pmap.h>
52139013Sdavidxu#include <vm/vm_map.h>
53139013Sdavidxu#include <vm/vm_object.h>
54139013Sdavidxu
55165369Sdavidxu#include <machine/cpu.h>
56165369Sdavidxu
57162536Sdavidxu#ifdef COMPAT_IA32
58162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
59162536Sdavidxu#endif
60162536Sdavidxu
61161678Sdavidxu#define TYPE_SIMPLE_LOCK	0
62161678Sdavidxu#define TYPE_SIMPLE_WAIT	1
63161678Sdavidxu#define TYPE_NORMAL_UMUTEX	2
64161678Sdavidxu#define TYPE_PI_UMUTEX		3
65161678Sdavidxu#define TYPE_PP_UMUTEX		4
66161678Sdavidxu#define TYPE_CV			5
67177848Sdavidxu#define TYPE_RWLOCK		6
68139013Sdavidxu
69161678Sdavidxu/* Key to represent a unique userland synchronous object */
70139013Sdavidxustruct umtx_key {
71161678Sdavidxu	int	hash;
72139013Sdavidxu	int	type;
73161678Sdavidxu	int	shared;
74139013Sdavidxu	union {
75139013Sdavidxu		struct {
76139013Sdavidxu			vm_object_t	object;
77161678Sdavidxu			uintptr_t	offset;
78139013Sdavidxu		} shared;
79139013Sdavidxu		struct {
80161678Sdavidxu			struct vmspace	*vs;
81161678Sdavidxu			uintptr_t	addr;
82139013Sdavidxu		} private;
83139013Sdavidxu		struct {
84161678Sdavidxu			void		*a;
85161678Sdavidxu			uintptr_t	b;
86139013Sdavidxu		} both;
87139013Sdavidxu	} info;
88139013Sdavidxu};
89139013Sdavidxu
90161678Sdavidxu/* Priority inheritance mutex info. */
91161678Sdavidxustruct umtx_pi {
92161678Sdavidxu	/* Owner thread */
93161678Sdavidxu	struct thread		*pi_owner;
94161678Sdavidxu
95161678Sdavidxu	/* Reference count */
96161678Sdavidxu	int			pi_refcount;
97161678Sdavidxu
98161678Sdavidxu 	/* List entry to link umtx holding by thread */
99161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
100161678Sdavidxu
101161678Sdavidxu	/* List entry in hash */
102161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
103161678Sdavidxu
104161678Sdavidxu	/* List for waiters */
105161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
106161678Sdavidxu
107161678Sdavidxu	/* Identify a userland lock object */
108161678Sdavidxu	struct umtx_key		pi_key;
109161678Sdavidxu};
110161678Sdavidxu
111161678Sdavidxu/* A userland synchronous object user. */
112115765Sjeffstruct umtx_q {
113161678Sdavidxu	/* Linked list for the hash. */
114161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
115161678Sdavidxu
116161678Sdavidxu	/* Umtx key. */
117161678Sdavidxu	struct umtx_key		uq_key;
118161678Sdavidxu
119161678Sdavidxu	/* Umtx flags. */
120161678Sdavidxu	int			uq_flags;
121161678Sdavidxu#define UQF_UMTXQ	0x0001
122161678Sdavidxu
123161678Sdavidxu	/* The thread waits on. */
124161678Sdavidxu	struct thread		*uq_thread;
125161678Sdavidxu
126161678Sdavidxu	/*
127161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
128170300Sjeff	 * or umtx_lock, write must have both chain lock and
129170300Sjeff	 * umtx_lock being hold.
130161678Sdavidxu	 */
131161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
132161678Sdavidxu
133161678Sdavidxu	/* On blocked list */
134161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
135161678Sdavidxu
136161678Sdavidxu	/* Thread contending with us */
137161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
138161678Sdavidxu
139161742Sdavidxu	/* Inherited priority from PP mutex */
140161678Sdavidxu	u_char			uq_inherited_pri;
141115765Sjeff};
142115765Sjeff
143161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
144161678Sdavidxu
145161678Sdavidxu/* Userland lock object's wait-queue chain */
146138224Sdavidxustruct umtxq_chain {
147161678Sdavidxu	/* Lock for this chain. */
148161678Sdavidxu	struct mtx		uc_lock;
149161678Sdavidxu
150161678Sdavidxu	/* List of sleep queues. */
151177848Sdavidxu	struct umtxq_head	uc_queue[2];
152177848Sdavidxu#define UMTX_SHARED_QUEUE	0
153177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
154161678Sdavidxu
155161678Sdavidxu	/* Busy flag */
156161678Sdavidxu	char			uc_busy;
157161678Sdavidxu
158161678Sdavidxu	/* Chain lock waiters */
159158377Sdavidxu	int			uc_waiters;
160161678Sdavidxu
161161678Sdavidxu	/* All PI in the list */
162161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
163138224Sdavidxu};
164115765Sjeff
165161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
166161678Sdavidxu
167161678Sdavidxu/*
168161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
169161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
170161678Sdavidxu * and let another thread B block on the mutex, because B is
171161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
172161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
173161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
174161678Sdavidxu */
175161678Sdavidxu
176163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
177163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
178163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
179161678Sdavidxu
180138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
181138224Sdavidxu#define	UMTX_CHAINS		128
182138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
183115765Sjeff
184161678Sdavidxu#define THREAD_SHARE		0
185161678Sdavidxu#define PROCESS_SHARE		1
186161678Sdavidxu#define AUTO_SHARE		2
187161678Sdavidxu
188161678Sdavidxu#define	GET_SHARE(flags)	\
189161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
190161678Sdavidxu
191177848Sdavidxu#define BUSY_SPINS		200
192177848Sdavidxu
193161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
194161678Sdavidxustatic struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
195138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
196161678Sdavidxustatic int			umtx_pi_allocated;
197115310Sjeff
198161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
199161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
200161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
201161678Sdavidxu
202161678Sdavidxustatic void umtxq_sysinit(void *);
203161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
204161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
205139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
206139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
207139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
208139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
209177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
210177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
211161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
212139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
213139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
214161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
215139013Sdavidxu	struct umtx_key *key);
216139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
217163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
218161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
219174701Sdavidxustatic void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
220161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
221161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
222161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
223161678Sdavidxu	struct image_params *imgp __unused);
224161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
225115310Sjeff
226177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
227177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
228177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
229177848Sdavidxu
230170300Sjeffstatic struct mtx umtx_lock;
231170300Sjeff
232161678Sdavidxustatic void
233161678Sdavidxuumtxq_sysinit(void *arg __unused)
234161678Sdavidxu{
235161678Sdavidxu	int i;
236138224Sdavidxu
237161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
238161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
239161678Sdavidxu	for (i = 0; i < UMTX_CHAINS; ++i) {
240161678Sdavidxu		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
241161678Sdavidxu			 MTX_DEF | MTX_DUPOK);
242177848Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_queue[0]);
243177848Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_queue[1]);
244161678Sdavidxu		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
245161678Sdavidxu		umtxq_chains[i].uc_busy = 0;
246161678Sdavidxu		umtxq_chains[i].uc_waiters = 0;
247161678Sdavidxu	}
248170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
249161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
250161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
251161678Sdavidxu}
252161678Sdavidxu
253143149Sdavidxustruct umtx_q *
254143149Sdavidxuumtxq_alloc(void)
255143149Sdavidxu{
256161678Sdavidxu	struct umtx_q *uq;
257161678Sdavidxu
258161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
259161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
260161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
261161678Sdavidxu	return (uq);
262143149Sdavidxu}
263143149Sdavidxu
264143149Sdavidxuvoid
265143149Sdavidxuumtxq_free(struct umtx_q *uq)
266143149Sdavidxu{
267143149Sdavidxu	free(uq, M_UMTX);
268143149Sdavidxu}
269143149Sdavidxu
270161678Sdavidxustatic inline void
271139013Sdavidxuumtxq_hash(struct umtx_key *key)
272138224Sdavidxu{
273161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
274161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
275138224Sdavidxu}
276138224Sdavidxu
277139013Sdavidxustatic inline int
278139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
279139013Sdavidxu{
280139013Sdavidxu	return (k1->type == k2->type &&
281161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
282161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
283139013Sdavidxu}
284139013Sdavidxu
285161678Sdavidxustatic inline struct umtxq_chain *
286161678Sdavidxuumtxq_getchain(struct umtx_key *key)
287139013Sdavidxu{
288161678Sdavidxu	return (&umtxq_chains[key->hash]);
289139013Sdavidxu}
290139013Sdavidxu
291161678Sdavidxu/*
292177848Sdavidxu * Lock a chain.
293161678Sdavidxu */
294138224Sdavidxustatic inline void
295177848Sdavidxuumtxq_lock(struct umtx_key *key)
296139257Sdavidxu{
297161678Sdavidxu	struct umtxq_chain *uc;
298139257Sdavidxu
299161678Sdavidxu	uc = umtxq_getchain(key);
300177848Sdavidxu	mtx_lock(&uc->uc_lock);
301139257Sdavidxu}
302139257Sdavidxu
303161678Sdavidxu/*
304177848Sdavidxu * Unlock a chain.
305161678Sdavidxu */
306139257Sdavidxustatic inline void
307177848Sdavidxuumtxq_unlock(struct umtx_key *key)
308139257Sdavidxu{
309161678Sdavidxu	struct umtxq_chain *uc;
310139257Sdavidxu
311161678Sdavidxu	uc = umtxq_getchain(key);
312177848Sdavidxu	mtx_unlock(&uc->uc_lock);
313139257Sdavidxu}
314139257Sdavidxu
315161678Sdavidxu/*
316177848Sdavidxu * Set chain to busy state when following operation
317177848Sdavidxu * may be blocked (kernel mutex can not be used).
318161678Sdavidxu */
319139257Sdavidxustatic inline void
320177848Sdavidxuumtxq_busy(struct umtx_key *key)
321138224Sdavidxu{
322161678Sdavidxu	struct umtxq_chain *uc;
323161678Sdavidxu
324161678Sdavidxu	uc = umtxq_getchain(key);
325177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
326177848Sdavidxu	if (uc->uc_busy) {
327177880Sdavidxu#ifdef SMP
328177880Sdavidxu		if (smp_cpus > 1) {
329177880Sdavidxu			int count = BUSY_SPINS;
330177880Sdavidxu			if (count > 0) {
331177880Sdavidxu				umtxq_unlock(key);
332177880Sdavidxu				while (uc->uc_busy && --count > 0)
333177880Sdavidxu					cpu_spinwait();
334177880Sdavidxu				umtxq_lock(key);
335177880Sdavidxu			}
336177848Sdavidxu		}
337177880Sdavidxu#endif
338177880Sdavidxu		while (uc->uc_busy) {
339177848Sdavidxu			uc->uc_waiters++;
340177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
341177848Sdavidxu			uc->uc_waiters--;
342177848Sdavidxu		}
343177848Sdavidxu	}
344177848Sdavidxu	uc->uc_busy = 1;
345138224Sdavidxu}
346138224Sdavidxu
347161678Sdavidxu/*
348177848Sdavidxu * Unbusy a chain.
349161678Sdavidxu */
350138225Sdavidxustatic inline void
351177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
352138224Sdavidxu{
353161678Sdavidxu	struct umtxq_chain *uc;
354161678Sdavidxu
355161678Sdavidxu	uc = umtxq_getchain(key);
356177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
357177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
358177848Sdavidxu	uc->uc_busy = 0;
359177848Sdavidxu	if (uc->uc_waiters)
360177848Sdavidxu		wakeup_one(uc);
361138224Sdavidxu}
362138224Sdavidxu
363139013Sdavidxustatic inline void
364177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
365115765Sjeff{
366161678Sdavidxu	struct umtxq_chain *uc;
367139013Sdavidxu
368161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
369161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
370177848Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
371158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
372139013Sdavidxu}
373139013Sdavidxu
374139013Sdavidxustatic inline void
375177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
376139013Sdavidxu{
377161678Sdavidxu	struct umtxq_chain *uc;
378161678Sdavidxu
379161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
380161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
381158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
382177848Sdavidxu		TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
383158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
384139013Sdavidxu	}
385139013Sdavidxu}
386139013Sdavidxu
387161678Sdavidxu/*
388161678Sdavidxu * Check if there are multiple waiters
389161678Sdavidxu */
390139013Sdavidxustatic int
391139013Sdavidxuumtxq_count(struct umtx_key *key)
392139013Sdavidxu{
393161678Sdavidxu	struct umtxq_chain *uc;
394115765Sjeff	struct umtx_q *uq;
395161678Sdavidxu	int count = 0;
396115765Sjeff
397161678Sdavidxu	uc = umtxq_getchain(key);
398161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
399177848Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
400139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
401139013Sdavidxu			if (++count > 1)
402139013Sdavidxu				break;
403139013Sdavidxu		}
404115765Sjeff	}
405139013Sdavidxu	return (count);
406115765Sjeff}
407115765Sjeff
408161678Sdavidxu/*
409161678Sdavidxu * Check if there are multiple PI waiters and returns first
410161678Sdavidxu * waiter.
411161678Sdavidxu */
412139257Sdavidxustatic int
413161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
414161678Sdavidxu{
415161678Sdavidxu	struct umtxq_chain *uc;
416161678Sdavidxu	struct umtx_q *uq;
417161678Sdavidxu	int count = 0;
418161678Sdavidxu
419161678Sdavidxu	*first = NULL;
420161678Sdavidxu	uc = umtxq_getchain(key);
421161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
422177848Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
423161678Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
424161678Sdavidxu			if (++count > 1)
425161678Sdavidxu				break;
426161678Sdavidxu			*first = uq;
427161678Sdavidxu		}
428161678Sdavidxu	}
429161678Sdavidxu	return (count);
430161678Sdavidxu}
431161678Sdavidxu
432161678Sdavidxu/*
433161678Sdavidxu * Wake up threads waiting on an userland object.
434161678Sdavidxu */
435177848Sdavidxu
436161678Sdavidxustatic int
437177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
438115765Sjeff{
439161678Sdavidxu	struct umtxq_chain *uc;
440139257Sdavidxu	struct umtx_q *uq, *next;
441161678Sdavidxu	int ret;
442115765Sjeff
443139257Sdavidxu	ret = 0;
444161678Sdavidxu	uc = umtxq_getchain(key);
445161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
446177848Sdavidxu	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
447139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
448177848Sdavidxu			umtxq_remove_queue(uq, q);
449161678Sdavidxu			wakeup(uq);
450139257Sdavidxu			if (++ret >= n_wake)
451139257Sdavidxu				break;
452139013Sdavidxu		}
453139013Sdavidxu	}
454139257Sdavidxu	return (ret);
455138224Sdavidxu}
456138224Sdavidxu
457177848Sdavidxu
458161678Sdavidxu/*
459161678Sdavidxu * Wake up specified thread.
460161678Sdavidxu */
461161678Sdavidxustatic inline void
462161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
463161678Sdavidxu{
464161678Sdavidxu	struct umtxq_chain *uc;
465161678Sdavidxu
466161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
467161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
468161678Sdavidxu	umtxq_remove(uq);
469161678Sdavidxu	wakeup(uq);
470161678Sdavidxu}
471161678Sdavidxu
472161678Sdavidxu/*
473161678Sdavidxu * Put thread into sleep state, before sleeping, check if
474161678Sdavidxu * thread was removed from umtx queue.
475161678Sdavidxu */
476138224Sdavidxustatic inline int
477161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
478138224Sdavidxu{
479161678Sdavidxu	struct umtxq_chain *uc;
480161678Sdavidxu	int error;
481161678Sdavidxu
482161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
483161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
484161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
485161678Sdavidxu		return (0);
486161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
487139751Sdavidxu	if (error == EWOULDBLOCK)
488139751Sdavidxu		error = ETIMEDOUT;
489139751Sdavidxu	return (error);
490138224Sdavidxu}
491138224Sdavidxu
492161678Sdavidxu/*
493161678Sdavidxu * Convert userspace address into unique logical address.
494161678Sdavidxu */
495139013Sdavidxustatic int
496161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
497139013Sdavidxu{
498161678Sdavidxu	struct thread *td = curthread;
499139013Sdavidxu	vm_map_t map;
500139013Sdavidxu	vm_map_entry_t entry;
501139013Sdavidxu	vm_pindex_t pindex;
502139013Sdavidxu	vm_prot_t prot;
503139013Sdavidxu	boolean_t wired;
504139013Sdavidxu
505161678Sdavidxu	key->type = type;
506161678Sdavidxu	if (share == THREAD_SHARE) {
507161678Sdavidxu		key->shared = 0;
508161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
509161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
510163677Sdavidxu	} else {
511163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
512161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
513161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
514161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
515161678Sdavidxu		    &wired) != KERN_SUCCESS) {
516161678Sdavidxu			return EFAULT;
517161678Sdavidxu		}
518161678Sdavidxu
519161678Sdavidxu		if ((share == PROCESS_SHARE) ||
520161678Sdavidxu		    (share == AUTO_SHARE &&
521161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
522161678Sdavidxu			key->shared = 1;
523161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
524161678Sdavidxu				(vm_offset_t)addr;
525161678Sdavidxu			vm_object_reference(key->info.shared.object);
526161678Sdavidxu		} else {
527161678Sdavidxu			key->shared = 0;
528161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
529161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
530161678Sdavidxu		}
531161678Sdavidxu		vm_map_lookup_done(map, entry);
532139013Sdavidxu	}
533139013Sdavidxu
534161678Sdavidxu	umtxq_hash(key);
535139013Sdavidxu	return (0);
536139013Sdavidxu}
537139013Sdavidxu
538161678Sdavidxu/*
539161678Sdavidxu * Release key.
540161678Sdavidxu */
541139013Sdavidxustatic inline void
542139013Sdavidxuumtx_key_release(struct umtx_key *key)
543139013Sdavidxu{
544161678Sdavidxu	if (key->shared)
545139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
546139013Sdavidxu}
547139013Sdavidxu
548161678Sdavidxu/*
549161678Sdavidxu * Lock a umtx object.
550161678Sdavidxu */
551139013Sdavidxustatic int
552163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
553112904Sjeff{
554143149Sdavidxu	struct umtx_q *uq;
555163449Sdavidxu	u_long owner;
556163449Sdavidxu	u_long old;
557138224Sdavidxu	int error = 0;
558112904Sjeff
559143149Sdavidxu	uq = td->td_umtxq;
560161678Sdavidxu
561112904Sjeff	/*
562161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
563112904Sjeff	 * can fault on any access.
564112904Sjeff	 */
565112904Sjeff	for (;;) {
566112904Sjeff		/*
567112904Sjeff		 * Try the uncontested case.  This should be done in userland.
568112904Sjeff		 */
569163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
570112904Sjeff
571138224Sdavidxu		/* The acquire succeeded. */
572138224Sdavidxu		if (owner == UMTX_UNOWNED)
573138224Sdavidxu			return (0);
574138224Sdavidxu
575115765Sjeff		/* The address was invalid. */
576115765Sjeff		if (owner == -1)
577115765Sjeff			return (EFAULT);
578115765Sjeff
579115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
580115765Sjeff		if (owner == UMTX_CONTESTED) {
581163449Sdavidxu			owner = casuword(&umtx->u_owner,
582139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
583115765Sjeff
584138224Sdavidxu			if (owner == UMTX_CONTESTED)
585138224Sdavidxu				return (0);
586138224Sdavidxu
587115765Sjeff			/* The address was invalid. */
588115765Sjeff			if (owner == -1)
589115765Sjeff				return (EFAULT);
590115765Sjeff
591115765Sjeff			/* If this failed the lock has changed, restart. */
592115765Sjeff			continue;
593112904Sjeff		}
594112904Sjeff
595138224Sdavidxu		/*
596138224Sdavidxu		 * If we caught a signal, we have retried and now
597138224Sdavidxu		 * exit immediately.
598138224Sdavidxu		 */
599161678Sdavidxu		if (error != 0)
600138224Sdavidxu			return (error);
601112904Sjeff
602161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
603161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
604161678Sdavidxu			return (error);
605161678Sdavidxu
606161678Sdavidxu		umtxq_lock(&uq->uq_key);
607161678Sdavidxu		umtxq_busy(&uq->uq_key);
608161678Sdavidxu		umtxq_insert(uq);
609161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
610161678Sdavidxu		umtxq_unlock(&uq->uq_key);
611161678Sdavidxu
612112904Sjeff		/*
613112904Sjeff		 * Set the contested bit so that a release in user space
614112904Sjeff		 * knows to use the system call for unlock.  If this fails
615112904Sjeff		 * either some one else has acquired the lock or it has been
616112904Sjeff		 * released.
617112904Sjeff		 */
618163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
619112904Sjeff
620112904Sjeff		/* The address was invalid. */
621112967Sjake		if (old == -1) {
622143149Sdavidxu			umtxq_lock(&uq->uq_key);
623143149Sdavidxu			umtxq_remove(uq);
624143149Sdavidxu			umtxq_unlock(&uq->uq_key);
625143149Sdavidxu			umtx_key_release(&uq->uq_key);
626115765Sjeff			return (EFAULT);
627112904Sjeff		}
628112904Sjeff
629112904Sjeff		/*
630115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
631117685Smtm		 * and we need to retry or we lost a race to the thread
632117685Smtm		 * unlocking the umtx.
633112904Sjeff		 */
634143149Sdavidxu		umtxq_lock(&uq->uq_key);
635161678Sdavidxu		if (old == owner)
636161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
637143149Sdavidxu		umtxq_remove(uq);
638143149Sdavidxu		umtxq_unlock(&uq->uq_key);
639143149Sdavidxu		umtx_key_release(&uq->uq_key);
640112904Sjeff	}
641117743Smtm
642117743Smtm	return (0);
643112904Sjeff}
644112904Sjeff
645161678Sdavidxu/*
646161678Sdavidxu * Lock a umtx object.
647161678Sdavidxu */
648139013Sdavidxustatic int
649163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
650140245Sdavidxu	struct timespec *timeout)
651112904Sjeff{
652140245Sdavidxu	struct timespec ts, ts2, ts3;
653139013Sdavidxu	struct timeval tv;
654140245Sdavidxu	int error;
655139013Sdavidxu
656140245Sdavidxu	if (timeout == NULL) {
657162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
658162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
659162030Sdavidxu		if (error == EINTR)
660162030Sdavidxu			error = ERESTART;
661139013Sdavidxu	} else {
662140245Sdavidxu		getnanouptime(&ts);
663140245Sdavidxu		timespecadd(&ts, timeout);
664140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
665139013Sdavidxu		for (;;) {
666162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
667140245Sdavidxu			if (error != ETIMEDOUT)
668140245Sdavidxu				break;
669140245Sdavidxu			getnanouptime(&ts2);
670140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
671139751Sdavidxu				error = ETIMEDOUT;
672139013Sdavidxu				break;
673139013Sdavidxu			}
674140245Sdavidxu			ts3 = ts;
675140245Sdavidxu			timespecsub(&ts3, &ts2);
676140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
677139013Sdavidxu		}
678162030Sdavidxu		/* Timed-locking is not restarted. */
679162030Sdavidxu		if (error == ERESTART)
680162030Sdavidxu			error = EINTR;
681139013Sdavidxu	}
682139013Sdavidxu	return (error);
683139013Sdavidxu}
684139013Sdavidxu
685161678Sdavidxu/*
686161678Sdavidxu * Unlock a umtx object.
687161678Sdavidxu */
688139013Sdavidxustatic int
689163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
690139013Sdavidxu{
691139013Sdavidxu	struct umtx_key key;
692163449Sdavidxu	u_long owner;
693163449Sdavidxu	u_long old;
694139257Sdavidxu	int error;
695139257Sdavidxu	int count;
696112904Sjeff
697112904Sjeff	/*
698112904Sjeff	 * Make sure we own this mtx.
699112904Sjeff	 */
700163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
701161678Sdavidxu	if (owner == -1)
702115765Sjeff		return (EFAULT);
703115765Sjeff
704139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
705115765Sjeff		return (EPERM);
706112904Sjeff
707161678Sdavidxu	/* This should be done in userland */
708161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
709163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
710161678Sdavidxu		if (old == -1)
711161678Sdavidxu			return (EFAULT);
712161678Sdavidxu		if (old == owner)
713161678Sdavidxu			return (0);
714161855Sdavidxu		owner = old;
715161678Sdavidxu	}
716161678Sdavidxu
717117685Smtm	/* We should only ever be in here for contested locks */
718161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
719161678Sdavidxu		&key)) != 0)
720139257Sdavidxu		return (error);
721139257Sdavidxu
722139257Sdavidxu	umtxq_lock(&key);
723139257Sdavidxu	umtxq_busy(&key);
724139257Sdavidxu	count = umtxq_count(&key);
725139257Sdavidxu	umtxq_unlock(&key);
726139257Sdavidxu
727117743Smtm	/*
728117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
729117743Smtm	 * there is zero or one thread only waiting for it.
730117743Smtm	 * Otherwise, it must be marked as contested.
731117743Smtm	 */
732163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
733163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
734139257Sdavidxu	umtxq_lock(&key);
735161678Sdavidxu	umtxq_signal(&key,1);
736139257Sdavidxu	umtxq_unbusy(&key);
737139257Sdavidxu	umtxq_unlock(&key);
738139257Sdavidxu	umtx_key_release(&key);
739115765Sjeff	if (old == -1)
740115765Sjeff		return (EFAULT);
741138224Sdavidxu	if (old != owner)
742138224Sdavidxu		return (EINVAL);
743115765Sjeff	return (0);
744112904Sjeff}
745139013Sdavidxu
746162536Sdavidxu#ifdef COMPAT_IA32
747162536Sdavidxu
748161678Sdavidxu/*
749162536Sdavidxu * Lock a umtx object.
750162536Sdavidxu */
751162536Sdavidxustatic int
752162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
753162536Sdavidxu{
754162536Sdavidxu	struct umtx_q *uq;
755162536Sdavidxu	uint32_t owner;
756162536Sdavidxu	uint32_t old;
757162536Sdavidxu	int error = 0;
758162536Sdavidxu
759162536Sdavidxu	uq = td->td_umtxq;
760162536Sdavidxu
761162536Sdavidxu	/*
762162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
763162536Sdavidxu	 * can fault on any access.
764162536Sdavidxu	 */
765162536Sdavidxu	for (;;) {
766162536Sdavidxu		/*
767162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
768162536Sdavidxu		 */
769162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
770162536Sdavidxu
771162536Sdavidxu		/* The acquire succeeded. */
772162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
773162536Sdavidxu			return (0);
774162536Sdavidxu
775162536Sdavidxu		/* The address was invalid. */
776162536Sdavidxu		if (owner == -1)
777162536Sdavidxu			return (EFAULT);
778162536Sdavidxu
779162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
780162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
781162536Sdavidxu			owner = casuword32(m,
782162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
783162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
784162536Sdavidxu				return (0);
785162536Sdavidxu
786162536Sdavidxu			/* The address was invalid. */
787162536Sdavidxu			if (owner == -1)
788162536Sdavidxu				return (EFAULT);
789162536Sdavidxu
790162536Sdavidxu			/* If this failed the lock has changed, restart. */
791162536Sdavidxu			continue;
792162536Sdavidxu		}
793162536Sdavidxu
794162536Sdavidxu		/*
795162536Sdavidxu		 * If we caught a signal, we have retried and now
796162536Sdavidxu		 * exit immediately.
797162536Sdavidxu		 */
798162536Sdavidxu		if (error != 0)
799162536Sdavidxu			return (error);
800162536Sdavidxu
801162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
802162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
803162536Sdavidxu			return (error);
804162536Sdavidxu
805162536Sdavidxu		umtxq_lock(&uq->uq_key);
806162536Sdavidxu		umtxq_busy(&uq->uq_key);
807162536Sdavidxu		umtxq_insert(uq);
808162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
809162536Sdavidxu		umtxq_unlock(&uq->uq_key);
810162536Sdavidxu
811162536Sdavidxu		/*
812162536Sdavidxu		 * Set the contested bit so that a release in user space
813162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
814162536Sdavidxu		 * either some one else has acquired the lock or it has been
815162536Sdavidxu		 * released.
816162536Sdavidxu		 */
817162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
818162536Sdavidxu
819162536Sdavidxu		/* The address was invalid. */
820162536Sdavidxu		if (old == -1) {
821162536Sdavidxu			umtxq_lock(&uq->uq_key);
822162536Sdavidxu			umtxq_remove(uq);
823162536Sdavidxu			umtxq_unlock(&uq->uq_key);
824162536Sdavidxu			umtx_key_release(&uq->uq_key);
825162536Sdavidxu			return (EFAULT);
826162536Sdavidxu		}
827162536Sdavidxu
828162536Sdavidxu		/*
829162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
830162536Sdavidxu		 * and we need to retry or we lost a race to the thread
831162536Sdavidxu		 * unlocking the umtx.
832162536Sdavidxu		 */
833162536Sdavidxu		umtxq_lock(&uq->uq_key);
834162536Sdavidxu		if (old == owner)
835162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
836162536Sdavidxu		umtxq_remove(uq);
837162536Sdavidxu		umtxq_unlock(&uq->uq_key);
838162536Sdavidxu		umtx_key_release(&uq->uq_key);
839162536Sdavidxu	}
840162536Sdavidxu
841162536Sdavidxu	return (0);
842162536Sdavidxu}
843162536Sdavidxu
844162536Sdavidxu/*
845162536Sdavidxu * Lock a umtx object.
846162536Sdavidxu */
847162536Sdavidxustatic int
848162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
849162536Sdavidxu	struct timespec *timeout)
850162536Sdavidxu{
851162536Sdavidxu	struct timespec ts, ts2, ts3;
852162536Sdavidxu	struct timeval tv;
853162536Sdavidxu	int error;
854162536Sdavidxu
855162536Sdavidxu	if (timeout == NULL) {
856162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
857162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
858162536Sdavidxu		if (error == EINTR)
859162536Sdavidxu			error = ERESTART;
860162536Sdavidxu	} else {
861162536Sdavidxu		getnanouptime(&ts);
862162536Sdavidxu		timespecadd(&ts, timeout);
863162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
864162536Sdavidxu		for (;;) {
865162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
866162536Sdavidxu			if (error != ETIMEDOUT)
867162536Sdavidxu				break;
868162536Sdavidxu			getnanouptime(&ts2);
869162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
870162536Sdavidxu				error = ETIMEDOUT;
871162536Sdavidxu				break;
872162536Sdavidxu			}
873162536Sdavidxu			ts3 = ts;
874162536Sdavidxu			timespecsub(&ts3, &ts2);
875162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
876162536Sdavidxu		}
877162536Sdavidxu		/* Timed-locking is not restarted. */
878162536Sdavidxu		if (error == ERESTART)
879162536Sdavidxu			error = EINTR;
880162536Sdavidxu	}
881162536Sdavidxu	return (error);
882162536Sdavidxu}
883162536Sdavidxu
884162536Sdavidxu/*
885162536Sdavidxu * Unlock a umtx object.
886162536Sdavidxu */
887162536Sdavidxustatic int
888162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
889162536Sdavidxu{
890162536Sdavidxu	struct umtx_key key;
891162536Sdavidxu	uint32_t owner;
892162536Sdavidxu	uint32_t old;
893162536Sdavidxu	int error;
894162536Sdavidxu	int count;
895162536Sdavidxu
896162536Sdavidxu	/*
897162536Sdavidxu	 * Make sure we own this mtx.
898162536Sdavidxu	 */
899162536Sdavidxu	owner = fuword32(m);
900162536Sdavidxu	if (owner == -1)
901162536Sdavidxu		return (EFAULT);
902162536Sdavidxu
903162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
904162536Sdavidxu		return (EPERM);
905162536Sdavidxu
906162536Sdavidxu	/* This should be done in userland */
907162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
908162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
909162536Sdavidxu		if (old == -1)
910162536Sdavidxu			return (EFAULT);
911162536Sdavidxu		if (old == owner)
912162536Sdavidxu			return (0);
913162536Sdavidxu		owner = old;
914162536Sdavidxu	}
915162536Sdavidxu
916162536Sdavidxu	/* We should only ever be in here for contested locks */
917162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
918162536Sdavidxu		&key)) != 0)
919162536Sdavidxu		return (error);
920162536Sdavidxu
921162536Sdavidxu	umtxq_lock(&key);
922162536Sdavidxu	umtxq_busy(&key);
923162536Sdavidxu	count = umtxq_count(&key);
924162536Sdavidxu	umtxq_unlock(&key);
925162536Sdavidxu
926162536Sdavidxu	/*
927162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
928162536Sdavidxu	 * there is zero or one thread only waiting for it.
929162536Sdavidxu	 * Otherwise, it must be marked as contested.
930162536Sdavidxu	 */
931162536Sdavidxu	old = casuword32(m, owner,
932162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
933162536Sdavidxu	umtxq_lock(&key);
934162536Sdavidxu	umtxq_signal(&key,1);
935162536Sdavidxu	umtxq_unbusy(&key);
936162536Sdavidxu	umtxq_unlock(&key);
937162536Sdavidxu	umtx_key_release(&key);
938162536Sdavidxu	if (old == -1)
939162536Sdavidxu		return (EFAULT);
940162536Sdavidxu	if (old != owner)
941162536Sdavidxu		return (EINVAL);
942162536Sdavidxu	return (0);
943162536Sdavidxu}
944162536Sdavidxu#endif
945162536Sdavidxu
946162536Sdavidxu/*
947161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
948161678Sdavidxu */
949139013Sdavidxustatic int
950163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
951162536Sdavidxu	struct timespec *timeout, int compat32)
952139013Sdavidxu{
953143149Sdavidxu	struct umtx_q *uq;
954140245Sdavidxu	struct timespec ts, ts2, ts3;
955139013Sdavidxu	struct timeval tv;
956163449Sdavidxu	u_long tmp;
957140245Sdavidxu	int error = 0;
958139013Sdavidxu
959143149Sdavidxu	uq = td->td_umtxq;
960162536Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
961161678Sdavidxu	    &uq->uq_key)) != 0)
962139013Sdavidxu		return (error);
963161678Sdavidxu
964161678Sdavidxu	umtxq_lock(&uq->uq_key);
965161678Sdavidxu	umtxq_insert(uq);
966161678Sdavidxu	umtxq_unlock(&uq->uq_key);
967162536Sdavidxu	if (compat32 == 0)
968162536Sdavidxu		tmp = fuword(addr);
969162536Sdavidxu        else
970162536Sdavidxu		tmp = fuword32(addr);
971139427Sdavidxu	if (tmp != id) {
972143149Sdavidxu		umtxq_lock(&uq->uq_key);
973143149Sdavidxu		umtxq_remove(uq);
974143149Sdavidxu		umtxq_unlock(&uq->uq_key);
975140245Sdavidxu	} else if (timeout == NULL) {
976143149Sdavidxu		umtxq_lock(&uq->uq_key);
977164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
978161678Sdavidxu		umtxq_remove(uq);
979143149Sdavidxu		umtxq_unlock(&uq->uq_key);
980139013Sdavidxu	} else {
981140245Sdavidxu		getnanouptime(&ts);
982140245Sdavidxu		timespecadd(&ts, timeout);
983140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
984161678Sdavidxu		umtxq_lock(&uq->uq_key);
985139013Sdavidxu		for (;;) {
986164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
987161678Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ))
988161678Sdavidxu				break;
989140245Sdavidxu			if (error != ETIMEDOUT)
990140245Sdavidxu				break;
991161678Sdavidxu			umtxq_unlock(&uq->uq_key);
992140245Sdavidxu			getnanouptime(&ts2);
993140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
994139751Sdavidxu				error = ETIMEDOUT;
995161678Sdavidxu				umtxq_lock(&uq->uq_key);
996139013Sdavidxu				break;
997139013Sdavidxu			}
998140245Sdavidxu			ts3 = ts;
999140245Sdavidxu			timespecsub(&ts3, &ts2);
1000140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1001161678Sdavidxu			umtxq_lock(&uq->uq_key);
1002139013Sdavidxu		}
1003143149Sdavidxu		umtxq_remove(uq);
1004143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1005139013Sdavidxu	}
1006143149Sdavidxu	umtx_key_release(&uq->uq_key);
1007139257Sdavidxu	if (error == ERESTART)
1008139257Sdavidxu		error = EINTR;
1009139013Sdavidxu	return (error);
1010139013Sdavidxu}
1011139013Sdavidxu
1012161678Sdavidxu/*
1013161678Sdavidxu * Wake up threads sleeping on the specified address.
1014161678Sdavidxu */
1015151692Sdavidxuint
1016151692Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
1017139013Sdavidxu{
1018139013Sdavidxu	struct umtx_key key;
1019139257Sdavidxu	int ret;
1020139013Sdavidxu
1021161678Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1022161678Sdavidxu	   &key)) != 0)
1023139257Sdavidxu		return (ret);
1024139258Sdavidxu	umtxq_lock(&key);
1025139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1026139258Sdavidxu	umtxq_unlock(&key);
1027139257Sdavidxu	umtx_key_release(&key);
1028139013Sdavidxu	return (0);
1029139013Sdavidxu}
1030139013Sdavidxu
1031161678Sdavidxu/*
1032161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1033161678Sdavidxu */
1034161678Sdavidxustatic int
1035161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1036161678Sdavidxu	int try)
1037161678Sdavidxu{
1038161678Sdavidxu	struct umtx_q *uq;
1039161678Sdavidxu	uint32_t owner, old, id;
1040161678Sdavidxu	int error = 0;
1041161678Sdavidxu
1042161678Sdavidxu	id = td->td_tid;
1043161678Sdavidxu	uq = td->td_umtxq;
1044161678Sdavidxu
1045161678Sdavidxu	/*
1046161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1047161678Sdavidxu	 * can fault on any access.
1048161678Sdavidxu	 */
1049161678Sdavidxu	for (;;) {
1050161678Sdavidxu		/*
1051161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1052161678Sdavidxu		 */
1053161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1054161678Sdavidxu
1055161678Sdavidxu		/* The acquire succeeded. */
1056161678Sdavidxu		if (owner == UMUTEX_UNOWNED)
1057161678Sdavidxu			return (0);
1058161678Sdavidxu
1059161678Sdavidxu		/* The address was invalid. */
1060161678Sdavidxu		if (owner == -1)
1061161678Sdavidxu			return (EFAULT);
1062161678Sdavidxu
1063161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1064161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1065161678Sdavidxu			owner = casuword32(&m->m_owner,
1066161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1067161678Sdavidxu
1068161678Sdavidxu			if (owner == UMUTEX_CONTESTED)
1069161678Sdavidxu				return (0);
1070161678Sdavidxu
1071161678Sdavidxu			/* The address was invalid. */
1072161678Sdavidxu			if (owner == -1)
1073161678Sdavidxu				return (EFAULT);
1074161678Sdavidxu
1075161678Sdavidxu			/* If this failed the lock has changed, restart. */
1076161678Sdavidxu			continue;
1077161678Sdavidxu		}
1078161678Sdavidxu
1079161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1080161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1081161678Sdavidxu			return (EDEADLK);
1082161678Sdavidxu
1083161678Sdavidxu		if (try != 0)
1084161678Sdavidxu			return (EBUSY);
1085161678Sdavidxu
1086161678Sdavidxu		/*
1087161678Sdavidxu		 * If we caught a signal, we have retried and now
1088161678Sdavidxu		 * exit immediately.
1089161678Sdavidxu		 */
1090161678Sdavidxu		if (error != 0)
1091161678Sdavidxu			return (error);
1092161678Sdavidxu
1093161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1094161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1095161678Sdavidxu			return (error);
1096161678Sdavidxu
1097161678Sdavidxu		umtxq_lock(&uq->uq_key);
1098161678Sdavidxu		umtxq_busy(&uq->uq_key);
1099161678Sdavidxu		umtxq_insert(uq);
1100161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1101161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1102161678Sdavidxu
1103161678Sdavidxu		/*
1104161678Sdavidxu		 * Set the contested bit so that a release in user space
1105161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1106161678Sdavidxu		 * either some one else has acquired the lock or it has been
1107161678Sdavidxu		 * released.
1108161678Sdavidxu		 */
1109161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1110161678Sdavidxu
1111161678Sdavidxu		/* The address was invalid. */
1112161678Sdavidxu		if (old == -1) {
1113161678Sdavidxu			umtxq_lock(&uq->uq_key);
1114161678Sdavidxu			umtxq_remove(uq);
1115161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1116161678Sdavidxu			umtx_key_release(&uq->uq_key);
1117161678Sdavidxu			return (EFAULT);
1118161678Sdavidxu		}
1119161678Sdavidxu
1120161678Sdavidxu		/*
1121161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1122161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1123161678Sdavidxu		 * unlocking the umtx.
1124161678Sdavidxu		 */
1125161678Sdavidxu		umtxq_lock(&uq->uq_key);
1126161678Sdavidxu		if (old == owner)
1127161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1128161678Sdavidxu		umtxq_remove(uq);
1129161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1130161678Sdavidxu		umtx_key_release(&uq->uq_key);
1131161678Sdavidxu	}
1132161678Sdavidxu
1133161678Sdavidxu	return (0);
1134161678Sdavidxu}
1135161678Sdavidxu
1136161678Sdavidxu/*
1137161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1138161678Sdavidxu */
1139161678Sdavidxu/*
1140161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1141161678Sdavidxu */
1142161678Sdavidxustatic int
1143161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1144161678Sdavidxu{
1145161678Sdavidxu	struct umtx_key key;
1146161678Sdavidxu	uint32_t owner, old, id;
1147161678Sdavidxu	int error;
1148161678Sdavidxu	int count;
1149161678Sdavidxu
1150161678Sdavidxu	id = td->td_tid;
1151161678Sdavidxu	/*
1152161678Sdavidxu	 * Make sure we own this mtx.
1153161678Sdavidxu	 */
1154163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1155161678Sdavidxu	if (owner == -1)
1156161678Sdavidxu		return (EFAULT);
1157161678Sdavidxu
1158161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1159161678Sdavidxu		return (EPERM);
1160161678Sdavidxu
1161161678Sdavidxu	/* This should be done in userland */
1162161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1163161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1164161678Sdavidxu		if (old == -1)
1165161678Sdavidxu			return (EFAULT);
1166161678Sdavidxu		if (old == owner)
1167161678Sdavidxu			return (0);
1168161855Sdavidxu		owner = old;
1169161678Sdavidxu	}
1170161678Sdavidxu
1171161678Sdavidxu	/* We should only ever be in here for contested locks */
1172161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1173161678Sdavidxu	    &key)) != 0)
1174161678Sdavidxu		return (error);
1175161678Sdavidxu
1176161678Sdavidxu	umtxq_lock(&key);
1177161678Sdavidxu	umtxq_busy(&key);
1178161678Sdavidxu	count = umtxq_count(&key);
1179161678Sdavidxu	umtxq_unlock(&key);
1180161678Sdavidxu
1181161678Sdavidxu	/*
1182161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1183161678Sdavidxu	 * there is zero or one thread only waiting for it.
1184161678Sdavidxu	 * Otherwise, it must be marked as contested.
1185161678Sdavidxu	 */
1186161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1187161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1188161678Sdavidxu	umtxq_lock(&key);
1189161678Sdavidxu	umtxq_signal(&key,1);
1190161678Sdavidxu	umtxq_unbusy(&key);
1191161678Sdavidxu	umtxq_unlock(&key);
1192161678Sdavidxu	umtx_key_release(&key);
1193161678Sdavidxu	if (old == -1)
1194161678Sdavidxu		return (EFAULT);
1195161678Sdavidxu	if (old != owner)
1196161678Sdavidxu		return (EINVAL);
1197161678Sdavidxu	return (0);
1198161678Sdavidxu}
1199161678Sdavidxu
1200161678Sdavidxustatic inline struct umtx_pi *
1201163697Sdavidxuumtx_pi_alloc(int flags)
1202161678Sdavidxu{
1203161678Sdavidxu	struct umtx_pi *pi;
1204161678Sdavidxu
1205163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1206161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1207161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1208161678Sdavidxu	return (pi);
1209161678Sdavidxu}
1210161678Sdavidxu
1211161678Sdavidxustatic inline void
1212161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1213161678Sdavidxu{
1214161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1215161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1216161678Sdavidxu}
1217161678Sdavidxu
1218161678Sdavidxu/*
1219161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1220161678Sdavidxu * changed.
1221161678Sdavidxu */
1222161678Sdavidxustatic int
1223161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1224161678Sdavidxu{
1225161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1226161678Sdavidxu	struct thread *td1;
1227161678Sdavidxu
1228170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1229161678Sdavidxu	if (pi == NULL)
1230161678Sdavidxu		return (0);
1231161678Sdavidxu
1232161678Sdavidxu	uq = td->td_umtxq;
1233161678Sdavidxu
1234161678Sdavidxu	/*
1235161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1236161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1237161678Sdavidxu	 * the previous thread or higher than the next thread.
1238161678Sdavidxu	 */
1239161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1240161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1241161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1242161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1243161678Sdavidxu		/*
1244161678Sdavidxu		 * Remove thread from blocked chain and determine where
1245161678Sdavidxu		 * it should be moved to.
1246161678Sdavidxu		 */
1247161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1248161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1249161678Sdavidxu			td1 = uq1->uq_thread;
1250161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1251161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1252161678Sdavidxu				break;
1253161678Sdavidxu		}
1254161678Sdavidxu
1255161678Sdavidxu		if (uq1 == NULL)
1256161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1257161678Sdavidxu		else
1258161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1259161678Sdavidxu	}
1260161678Sdavidxu	return (1);
1261161678Sdavidxu}
1262161678Sdavidxu
1263161678Sdavidxu/*
1264161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1265161678Sdavidxu * PI mutex.
1266161678Sdavidxu */
1267161678Sdavidxustatic void
1268161678Sdavidxuumtx_propagate_priority(struct thread *td)
1269161678Sdavidxu{
1270161678Sdavidxu	struct umtx_q *uq;
1271161678Sdavidxu	struct umtx_pi *pi;
1272161678Sdavidxu	int pri;
1273161678Sdavidxu
1274170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1275161678Sdavidxu	pri = UPRI(td);
1276161678Sdavidxu	uq = td->td_umtxq;
1277161678Sdavidxu	pi = uq->uq_pi_blocked;
1278161678Sdavidxu	if (pi == NULL)
1279161678Sdavidxu		return;
1280161678Sdavidxu
1281161678Sdavidxu	for (;;) {
1282161678Sdavidxu		td = pi->pi_owner;
1283161678Sdavidxu		if (td == NULL)
1284161678Sdavidxu			return;
1285161678Sdavidxu
1286161678Sdavidxu		MPASS(td->td_proc != NULL);
1287161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1288161678Sdavidxu
1289161678Sdavidxu		if (UPRI(td) <= pri)
1290161678Sdavidxu			return;
1291161678Sdavidxu
1292170300Sjeff		thread_lock(td);
1293161678Sdavidxu		sched_lend_user_prio(td, pri);
1294170300Sjeff		thread_unlock(td);
1295161678Sdavidxu
1296161678Sdavidxu		/*
1297161678Sdavidxu		 * Pick up the lock that td is blocked on.
1298161678Sdavidxu		 */
1299161678Sdavidxu		uq = td->td_umtxq;
1300161678Sdavidxu		pi = uq->uq_pi_blocked;
1301161678Sdavidxu		/* Resort td on the list if needed. */
1302161678Sdavidxu		if (!umtx_pi_adjust_thread(pi, td))
1303161678Sdavidxu			break;
1304161678Sdavidxu	}
1305161678Sdavidxu}
1306161678Sdavidxu
1307161678Sdavidxu/*
1308161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1309161678Sdavidxu * it is interrupted by signal or resumed by others.
1310161678Sdavidxu */
1311161678Sdavidxustatic void
1312161678Sdavidxuumtx_unpropagate_priority(struct umtx_pi *pi)
1313161678Sdavidxu{
1314161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1315161678Sdavidxu	struct umtx_pi *pi2;
1316174701Sdavidxu	int pri, oldpri;
1317161678Sdavidxu
1318170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1319161678Sdavidxu
1320161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1321161678Sdavidxu		pri = PRI_MAX;
1322161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1323161678Sdavidxu
1324161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1325161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1326161678Sdavidxu			if (uq != NULL) {
1327161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1328161678Sdavidxu					pri = UPRI(uq->uq_thread);
1329161678Sdavidxu			}
1330161678Sdavidxu		}
1331161678Sdavidxu
1332161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1333161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1334170300Sjeff		thread_lock(pi->pi_owner);
1335174701Sdavidxu		oldpri = pi->pi_owner->td_user_pri;
1336161678Sdavidxu		sched_unlend_user_prio(pi->pi_owner, pri);
1337170300Sjeff		thread_unlock(pi->pi_owner);
1338174701Sdavidxu		umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1339161678Sdavidxu		pi = uq_owner->uq_pi_blocked;
1340161678Sdavidxu	}
1341161678Sdavidxu}
1342161678Sdavidxu
1343161678Sdavidxu/*
1344161678Sdavidxu * Insert a PI mutex into owned list.
1345161678Sdavidxu */
1346161678Sdavidxustatic void
1347161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1348161678Sdavidxu{
1349161678Sdavidxu	struct umtx_q *uq_owner;
1350161678Sdavidxu
1351161678Sdavidxu	uq_owner = owner->td_umtxq;
1352170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1353161678Sdavidxu	if (pi->pi_owner != NULL)
1354161678Sdavidxu		panic("pi_ower != NULL");
1355161678Sdavidxu	pi->pi_owner = owner;
1356161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1357161678Sdavidxu}
1358161678Sdavidxu
1359161678Sdavidxu/*
1360161678Sdavidxu * Claim ownership of a PI mutex.
1361161678Sdavidxu */
1362161678Sdavidxustatic int
1363161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1364161678Sdavidxu{
1365161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1366161678Sdavidxu
1367161678Sdavidxu	uq_owner = owner->td_umtxq;
1368170300Sjeff	mtx_lock_spin(&umtx_lock);
1369161678Sdavidxu	if (pi->pi_owner == owner) {
1370170300Sjeff		mtx_unlock_spin(&umtx_lock);
1371161678Sdavidxu		return (0);
1372161678Sdavidxu	}
1373161678Sdavidxu
1374161678Sdavidxu	if (pi->pi_owner != NULL) {
1375161678Sdavidxu		/*
1376161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1377161678Sdavidxu		 */
1378170300Sjeff		mtx_unlock_spin(&umtx_lock);
1379161678Sdavidxu		return (EPERM);
1380161678Sdavidxu	}
1381161678Sdavidxu	umtx_pi_setowner(pi, owner);
1382161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1383161678Sdavidxu	if (uq != NULL) {
1384161678Sdavidxu		int pri;
1385161678Sdavidxu
1386161678Sdavidxu		pri = UPRI(uq->uq_thread);
1387170300Sjeff		thread_lock(owner);
1388161678Sdavidxu		if (pri < UPRI(owner))
1389161678Sdavidxu			sched_lend_user_prio(owner, pri);
1390170300Sjeff		thread_unlock(owner);
1391161678Sdavidxu	}
1392170300Sjeff	mtx_unlock_spin(&umtx_lock);
1393161678Sdavidxu	return (0);
1394161678Sdavidxu}
1395161678Sdavidxu
1396174701Sdavidxustatic void
1397174701Sdavidxuumtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1398161599Sdavidxu{
1399161678Sdavidxu	struct umtx_q *uq;
1400161678Sdavidxu	struct umtx_pi *pi;
1401161678Sdavidxu
1402161678Sdavidxu	uq = td->td_umtxq;
1403161678Sdavidxu	/*
1404161678Sdavidxu	 * Pick up the lock that td is blocked on.
1405161678Sdavidxu	 */
1406161678Sdavidxu	pi = uq->uq_pi_blocked;
1407161678Sdavidxu	MPASS(pi != NULL);
1408161678Sdavidxu
1409161678Sdavidxu	/* Resort the turnstile on the list. */
1410161678Sdavidxu	if (!umtx_pi_adjust_thread(pi, td))
1411161678Sdavidxu		return;
1412161678Sdavidxu
1413161678Sdavidxu	/*
1414161678Sdavidxu	 * If our priority was lowered and we are at the head of the
1415161678Sdavidxu	 * turnstile, then propagate our new priority up the chain.
1416161678Sdavidxu	 */
1417161678Sdavidxu	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1418161678Sdavidxu		umtx_propagate_priority(td);
1419161599Sdavidxu}
1420161599Sdavidxu
1421161678Sdavidxu/*
1422174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1423174701Sdavidxu * this may result new priority propagating process.
1424174701Sdavidxu */
1425174701Sdavidxuvoid
1426174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1427174701Sdavidxu{
1428174707Sdavidxu	struct umtx_q *uq;
1429174707Sdavidxu	struct umtx_pi *pi;
1430174707Sdavidxu
1431174707Sdavidxu	uq = td->td_umtxq;
1432174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1433174707Sdavidxu	/*
1434174707Sdavidxu	 * Pick up the lock that td is blocked on.
1435174707Sdavidxu	 */
1436174707Sdavidxu	pi = uq->uq_pi_blocked;
1437174707Sdavidxu	if (pi != NULL)
1438174707Sdavidxu		umtx_pi_adjust_locked(td, oldpri);
1439174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1440174701Sdavidxu}
1441174701Sdavidxu
1442174701Sdavidxu/*
1443161678Sdavidxu * Sleep on a PI mutex.
1444161678Sdavidxu */
1445161678Sdavidxustatic int
1446161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1447161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1448161678Sdavidxu{
1449161678Sdavidxu	struct umtxq_chain *uc;
1450161678Sdavidxu	struct thread *td, *td1;
1451161678Sdavidxu	struct umtx_q *uq1;
1452161678Sdavidxu	int pri;
1453161678Sdavidxu	int error = 0;
1454161678Sdavidxu
1455161678Sdavidxu	td = uq->uq_thread;
1456161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1457161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1458161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1459161678Sdavidxu	umtxq_insert(uq);
1460161678Sdavidxu	if (pi->pi_owner == NULL) {
1461161678Sdavidxu		/* XXX
1462161678Sdavidxu		 * Current, We only support process private PI-mutex,
1463161678Sdavidxu		 * non-contended PI-mutexes are locked in userland.
1464161678Sdavidxu		 * Process shared PI-mutex should always be initialized
1465161678Sdavidxu		 * by kernel and be registered in kernel, locking should
1466161678Sdavidxu		 * always be done by kernel to avoid security problems.
1467161678Sdavidxu		 * For process private PI-mutex, we can find owner
1468161678Sdavidxu		 * thread and boost its priority safely.
1469161678Sdavidxu		 */
1470161678Sdavidxu		PROC_LOCK(curproc);
1471161678Sdavidxu		td1 = thread_find(curproc, owner);
1472170300Sjeff		mtx_lock_spin(&umtx_lock);
1473161678Sdavidxu		if (td1 != NULL && pi->pi_owner == NULL) {
1474161678Sdavidxu			uq1 = td1->td_umtxq;
1475161678Sdavidxu			umtx_pi_setowner(pi, td1);
1476161678Sdavidxu		}
1477161678Sdavidxu		PROC_UNLOCK(curproc);
1478161678Sdavidxu	} else {
1479170300Sjeff		mtx_lock_spin(&umtx_lock);
1480161678Sdavidxu	}
1481161678Sdavidxu
1482161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1483161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1484161678Sdavidxu		if (pri > UPRI(td))
1485161678Sdavidxu			break;
1486161678Sdavidxu	}
1487161678Sdavidxu
1488161678Sdavidxu	if (uq1 != NULL)
1489161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1490161678Sdavidxu	else
1491161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1492161678Sdavidxu
1493161678Sdavidxu	uq->uq_pi_blocked = pi;
1494174701Sdavidxu	thread_lock(td);
1495161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1496174701Sdavidxu	thread_unlock(td);
1497170300Sjeff	mtx_unlock_spin(&umtx_lock);
1498161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1499161678Sdavidxu
1500170300Sjeff	mtx_lock_spin(&umtx_lock);
1501161678Sdavidxu	umtx_propagate_priority(td);
1502170300Sjeff	mtx_unlock_spin(&umtx_lock);
1503161678Sdavidxu
1504161678Sdavidxu	umtxq_lock(&uq->uq_key);
1505161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1506161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1507161678Sdavidxu		if (error == EWOULDBLOCK)
1508161678Sdavidxu			error = ETIMEDOUT;
1509161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1510161678Sdavidxu			umtxq_busy(&uq->uq_key);
1511161678Sdavidxu			umtxq_remove(uq);
1512161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1513161678Sdavidxu		}
1514161678Sdavidxu	}
1515161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1516161678Sdavidxu
1517170300Sjeff	mtx_lock_spin(&umtx_lock);
1518161678Sdavidxu	uq->uq_pi_blocked = NULL;
1519174701Sdavidxu	thread_lock(td);
1520161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1521174701Sdavidxu	thread_unlock(td);
1522161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1523161678Sdavidxu	umtx_unpropagate_priority(pi);
1524170300Sjeff	mtx_unlock_spin(&umtx_lock);
1525161678Sdavidxu
1526161678Sdavidxu	umtxq_lock(&uq->uq_key);
1527161678Sdavidxu
1528161678Sdavidxu	return (error);
1529161678Sdavidxu}
1530161678Sdavidxu
1531161678Sdavidxu/*
1532161678Sdavidxu * Add reference count for a PI mutex.
1533161678Sdavidxu */
1534161678Sdavidxustatic void
1535161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1536161678Sdavidxu{
1537161678Sdavidxu	struct umtxq_chain *uc;
1538161678Sdavidxu
1539161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1540161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1541161678Sdavidxu	pi->pi_refcount++;
1542161678Sdavidxu}
1543161678Sdavidxu
1544161678Sdavidxu/*
1545161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1546161678Sdavidxu * is decreased to zero, its memory space is freed.
1547161678Sdavidxu */
1548161678Sdavidxustatic void
1549161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1550161678Sdavidxu{
1551161678Sdavidxu	struct umtxq_chain *uc;
1552161678Sdavidxu	int free = 0;
1553161678Sdavidxu
1554161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1555161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1556161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1557161678Sdavidxu	if (--pi->pi_refcount == 0) {
1558170300Sjeff		mtx_lock_spin(&umtx_lock);
1559161678Sdavidxu		if (pi->pi_owner != NULL) {
1560161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1561161678Sdavidxu				pi, pi_link);
1562161678Sdavidxu			pi->pi_owner = NULL;
1563161678Sdavidxu		}
1564161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1565161678Sdavidxu			("blocked queue not empty"));
1566170300Sjeff		mtx_unlock_spin(&umtx_lock);
1567161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1568161678Sdavidxu		free = 1;
1569161678Sdavidxu	}
1570161678Sdavidxu	if (free)
1571161678Sdavidxu		umtx_pi_free(pi);
1572161678Sdavidxu}
1573161678Sdavidxu
1574161678Sdavidxu/*
1575161678Sdavidxu * Find a PI mutex in hash table.
1576161678Sdavidxu */
1577161678Sdavidxustatic struct umtx_pi *
1578161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1579161678Sdavidxu{
1580161678Sdavidxu	struct umtxq_chain *uc;
1581161678Sdavidxu	struct umtx_pi *pi;
1582161678Sdavidxu
1583161678Sdavidxu	uc = umtxq_getchain(key);
1584161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1585161678Sdavidxu
1586161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1587161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1588161678Sdavidxu			return (pi);
1589161678Sdavidxu		}
1590161678Sdavidxu	}
1591161678Sdavidxu	return (NULL);
1592161678Sdavidxu}
1593161678Sdavidxu
1594161678Sdavidxu/*
1595161678Sdavidxu * Insert a PI mutex into hash table.
1596161678Sdavidxu */
1597161678Sdavidxustatic inline void
1598161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1599161678Sdavidxu{
1600161678Sdavidxu	struct umtxq_chain *uc;
1601161678Sdavidxu
1602161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1603161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1604161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1605161678Sdavidxu}
1606161678Sdavidxu
1607161678Sdavidxu/*
1608161678Sdavidxu * Lock a PI mutex.
1609161678Sdavidxu */
1610161678Sdavidxustatic int
1611161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1612161678Sdavidxu	int try)
1613161678Sdavidxu{
1614161678Sdavidxu	struct umtx_q *uq;
1615161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1616161678Sdavidxu	uint32_t id, owner, old;
1617161678Sdavidxu	int error;
1618161678Sdavidxu
1619161678Sdavidxu	id = td->td_tid;
1620161678Sdavidxu	uq = td->td_umtxq;
1621161678Sdavidxu
1622161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1623161678Sdavidxu	    &uq->uq_key)) != 0)
1624161678Sdavidxu		return (error);
1625163697Sdavidxu	umtxq_lock(&uq->uq_key);
1626163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1627163697Sdavidxu	if (pi == NULL) {
1628163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1629163697Sdavidxu		if (new_pi == NULL) {
1630161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1631163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1632161678Sdavidxu			new_pi->pi_key = uq->uq_key;
1633161678Sdavidxu			umtxq_lock(&uq->uq_key);
1634161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1635163697Sdavidxu			if (pi != NULL) {
1636161678Sdavidxu				umtx_pi_free(new_pi);
1637163697Sdavidxu				new_pi = NULL;
1638161678Sdavidxu			}
1639161678Sdavidxu		}
1640163697Sdavidxu		if (new_pi != NULL) {
1641163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1642163697Sdavidxu			umtx_pi_insert(new_pi);
1643163697Sdavidxu			pi = new_pi;
1644163697Sdavidxu		}
1645163697Sdavidxu	}
1646163697Sdavidxu	umtx_pi_ref(pi);
1647163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1648161678Sdavidxu
1649163697Sdavidxu	/*
1650163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1651163697Sdavidxu	 * can fault on any access.
1652163697Sdavidxu	 */
1653163697Sdavidxu	for (;;) {
1654161678Sdavidxu		/*
1655161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1656161678Sdavidxu		 */
1657161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1658161678Sdavidxu
1659161678Sdavidxu		/* The acquire succeeded. */
1660161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1661161678Sdavidxu			error = 0;
1662161678Sdavidxu			break;
1663161678Sdavidxu		}
1664161678Sdavidxu
1665161678Sdavidxu		/* The address was invalid. */
1666161678Sdavidxu		if (owner == -1) {
1667161678Sdavidxu			error = EFAULT;
1668161678Sdavidxu			break;
1669161678Sdavidxu		}
1670161678Sdavidxu
1671161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1672161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1673161678Sdavidxu			owner = casuword32(&m->m_owner,
1674161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1675161678Sdavidxu
1676161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1677161678Sdavidxu				umtxq_lock(&uq->uq_key);
1678161678Sdavidxu				error = umtx_pi_claim(pi, td);
1679161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1680161678Sdavidxu				break;
1681161678Sdavidxu			}
1682161678Sdavidxu
1683161678Sdavidxu			/* The address was invalid. */
1684161678Sdavidxu			if (owner == -1) {
1685161678Sdavidxu				error = EFAULT;
1686161678Sdavidxu				break;
1687161678Sdavidxu			}
1688161678Sdavidxu
1689161678Sdavidxu			/* If this failed the lock has changed, restart. */
1690161678Sdavidxu			continue;
1691161678Sdavidxu		}
1692161678Sdavidxu
1693161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1694161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1695161678Sdavidxu			error = EDEADLK;
1696161678Sdavidxu			break;
1697161678Sdavidxu		}
1698161678Sdavidxu
1699161678Sdavidxu		if (try != 0) {
1700161678Sdavidxu			error = EBUSY;
1701161678Sdavidxu			break;
1702161678Sdavidxu		}
1703161678Sdavidxu
1704161678Sdavidxu		/*
1705161678Sdavidxu		 * If we caught a signal, we have retried and now
1706161678Sdavidxu		 * exit immediately.
1707161678Sdavidxu		 */
1708161678Sdavidxu		if (error != 0)
1709161678Sdavidxu			break;
1710161678Sdavidxu
1711161678Sdavidxu		umtxq_lock(&uq->uq_key);
1712161678Sdavidxu		umtxq_busy(&uq->uq_key);
1713161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1714161678Sdavidxu
1715161678Sdavidxu		/*
1716161678Sdavidxu		 * Set the contested bit so that a release in user space
1717161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1718161678Sdavidxu		 * either some one else has acquired the lock or it has been
1719161678Sdavidxu		 * released.
1720161678Sdavidxu		 */
1721161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1722161678Sdavidxu
1723161678Sdavidxu		/* The address was invalid. */
1724161678Sdavidxu		if (old == -1) {
1725161678Sdavidxu			umtxq_lock(&uq->uq_key);
1726161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1727161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1728161678Sdavidxu			error = EFAULT;
1729161678Sdavidxu			break;
1730161678Sdavidxu		}
1731161678Sdavidxu
1732161678Sdavidxu		umtxq_lock(&uq->uq_key);
1733161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1734161678Sdavidxu		/*
1735161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1736161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1737161678Sdavidxu		 * unlocking the umtx.
1738161678Sdavidxu		 */
1739161678Sdavidxu		if (old == owner)
1740161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1741161678Sdavidxu				 "umtxpi", timo);
1742161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1743161678Sdavidxu	}
1744161678Sdavidxu
1745163697Sdavidxu	umtxq_lock(&uq->uq_key);
1746163697Sdavidxu	umtx_pi_unref(pi);
1747163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1748161678Sdavidxu
1749161678Sdavidxu	umtx_key_release(&uq->uq_key);
1750161678Sdavidxu	return (error);
1751161678Sdavidxu}
1752161678Sdavidxu
1753161678Sdavidxu/*
1754161678Sdavidxu * Unlock a PI mutex.
1755161678Sdavidxu */
1756161678Sdavidxustatic int
1757161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1758161678Sdavidxu{
1759161678Sdavidxu	struct umtx_key key;
1760161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1761161678Sdavidxu	struct umtx_pi *pi, *pi2;
1762161678Sdavidxu	uint32_t owner, old, id;
1763161678Sdavidxu	int error;
1764161678Sdavidxu	int count;
1765161678Sdavidxu	int pri;
1766161678Sdavidxu
1767161678Sdavidxu	id = td->td_tid;
1768161678Sdavidxu	/*
1769161678Sdavidxu	 * Make sure we own this mtx.
1770161678Sdavidxu	 */
1771163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1772161678Sdavidxu	if (owner == -1)
1773161678Sdavidxu		return (EFAULT);
1774161678Sdavidxu
1775161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1776161678Sdavidxu		return (EPERM);
1777161678Sdavidxu
1778161678Sdavidxu	/* This should be done in userland */
1779161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1780161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1781161678Sdavidxu		if (old == -1)
1782161678Sdavidxu			return (EFAULT);
1783161678Sdavidxu		if (old == owner)
1784161678Sdavidxu			return (0);
1785161855Sdavidxu		owner = old;
1786161678Sdavidxu	}
1787161678Sdavidxu
1788161678Sdavidxu	/* We should only ever be in here for contested locks */
1789161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1790161678Sdavidxu	    &key)) != 0)
1791161678Sdavidxu		return (error);
1792161678Sdavidxu
1793161678Sdavidxu	umtxq_lock(&key);
1794161678Sdavidxu	umtxq_busy(&key);
1795161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1796161678Sdavidxu	if (uq_first != NULL) {
1797161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1798161678Sdavidxu		if (pi->pi_owner != curthread) {
1799161678Sdavidxu			umtxq_unbusy(&key);
1800161678Sdavidxu			umtxq_unlock(&key);
1801161678Sdavidxu			/* userland messed the mutex */
1802161678Sdavidxu			return (EPERM);
1803161678Sdavidxu		}
1804161678Sdavidxu		uq_me = curthread->td_umtxq;
1805170300Sjeff		mtx_lock_spin(&umtx_lock);
1806161678Sdavidxu		pi->pi_owner = NULL;
1807161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1808161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1809161678Sdavidxu		pri = PRI_MAX;
1810161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1811161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1812161678Sdavidxu			if (uq_first2 != NULL) {
1813161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1814161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1815161678Sdavidxu			}
1816161678Sdavidxu		}
1817170300Sjeff		thread_lock(curthread);
1818161678Sdavidxu		sched_unlend_user_prio(curthread, pri);
1819170300Sjeff		thread_unlock(curthread);
1820170300Sjeff		mtx_unlock_spin(&umtx_lock);
1821161678Sdavidxu	}
1822161678Sdavidxu	umtxq_unlock(&key);
1823161678Sdavidxu
1824161678Sdavidxu	/*
1825161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1826161678Sdavidxu	 * there is zero or one thread only waiting for it.
1827161678Sdavidxu	 * Otherwise, it must be marked as contested.
1828161678Sdavidxu	 */
1829161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1830161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1831161678Sdavidxu
1832161678Sdavidxu	umtxq_lock(&key);
1833161678Sdavidxu	if (uq_first != NULL)
1834161678Sdavidxu		umtxq_signal_thread(uq_first);
1835161678Sdavidxu	umtxq_unbusy(&key);
1836161678Sdavidxu	umtxq_unlock(&key);
1837161678Sdavidxu	umtx_key_release(&key);
1838161678Sdavidxu	if (old == -1)
1839161678Sdavidxu		return (EFAULT);
1840161678Sdavidxu	if (old != owner)
1841161678Sdavidxu		return (EINVAL);
1842161678Sdavidxu	return (0);
1843161678Sdavidxu}
1844161678Sdavidxu
1845161678Sdavidxu/*
1846161678Sdavidxu * Lock a PP mutex.
1847161678Sdavidxu */
1848161678Sdavidxustatic int
1849161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1850161678Sdavidxu	int try)
1851161678Sdavidxu{
1852161678Sdavidxu	struct umtx_q *uq, *uq2;
1853161678Sdavidxu	struct umtx_pi *pi;
1854161678Sdavidxu	uint32_t ceiling;
1855161678Sdavidxu	uint32_t owner, id;
1856161678Sdavidxu	int error, pri, old_inherited_pri, su;
1857161678Sdavidxu
1858161678Sdavidxu	id = td->td_tid;
1859161678Sdavidxu	uq = td->td_umtxq;
1860161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1861161678Sdavidxu	    &uq->uq_key)) != 0)
1862161678Sdavidxu		return (error);
1863164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1864161678Sdavidxu	for (;;) {
1865161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1866161678Sdavidxu		umtxq_lock(&uq->uq_key);
1867161678Sdavidxu		umtxq_busy(&uq->uq_key);
1868161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1869161678Sdavidxu
1870161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1871161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1872161678Sdavidxu			error = EINVAL;
1873161678Sdavidxu			goto out;
1874161678Sdavidxu		}
1875161678Sdavidxu
1876170300Sjeff		mtx_lock_spin(&umtx_lock);
1877161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1878170300Sjeff			mtx_unlock_spin(&umtx_lock);
1879161678Sdavidxu			error = EINVAL;
1880161678Sdavidxu			goto out;
1881161678Sdavidxu		}
1882161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1883161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1884170300Sjeff			thread_lock(td);
1885161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1886161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1887170300Sjeff			thread_unlock(td);
1888161678Sdavidxu		}
1889170300Sjeff		mtx_unlock_spin(&umtx_lock);
1890161678Sdavidxu
1891161678Sdavidxu		owner = casuword32(&m->m_owner,
1892161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1893161678Sdavidxu
1894161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1895161678Sdavidxu			error = 0;
1896161678Sdavidxu			break;
1897161678Sdavidxu		}
1898161678Sdavidxu
1899161678Sdavidxu		/* The address was invalid. */
1900161678Sdavidxu		if (owner == -1) {
1901161678Sdavidxu			error = EFAULT;
1902161678Sdavidxu			break;
1903161678Sdavidxu		}
1904161678Sdavidxu
1905161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1906161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1907161678Sdavidxu			error = EDEADLK;
1908161678Sdavidxu			break;
1909161678Sdavidxu		}
1910161678Sdavidxu
1911161678Sdavidxu		if (try != 0) {
1912161678Sdavidxu			error = EBUSY;
1913161678Sdavidxu			break;
1914161678Sdavidxu		}
1915161678Sdavidxu
1916161678Sdavidxu		/*
1917161678Sdavidxu		 * If we caught a signal, we have retried and now
1918161678Sdavidxu		 * exit immediately.
1919161678Sdavidxu		 */
1920161678Sdavidxu		if (error != 0)
1921161678Sdavidxu			break;
1922161678Sdavidxu
1923161678Sdavidxu		umtxq_lock(&uq->uq_key);
1924161678Sdavidxu		umtxq_insert(uq);
1925161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1926161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
1927161678Sdavidxu		umtxq_remove(uq);
1928161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1929161678Sdavidxu
1930170300Sjeff		mtx_lock_spin(&umtx_lock);
1931161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1932161678Sdavidxu		pri = PRI_MAX;
1933161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1934161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1935161678Sdavidxu			if (uq2 != NULL) {
1936161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1937161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1938161678Sdavidxu			}
1939161678Sdavidxu		}
1940161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1941161678Sdavidxu			pri = uq->uq_inherited_pri;
1942170300Sjeff		thread_lock(td);
1943161678Sdavidxu		sched_unlend_user_prio(td, pri);
1944170300Sjeff		thread_unlock(td);
1945170300Sjeff		mtx_unlock_spin(&umtx_lock);
1946161678Sdavidxu	}
1947161678Sdavidxu
1948161678Sdavidxu	if (error != 0) {
1949170300Sjeff		mtx_lock_spin(&umtx_lock);
1950161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1951161678Sdavidxu		pri = PRI_MAX;
1952161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1953161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1954161678Sdavidxu			if (uq2 != NULL) {
1955161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1956161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1957161678Sdavidxu			}
1958161678Sdavidxu		}
1959161678Sdavidxu		if (pri > uq->uq_inherited_pri)
1960161678Sdavidxu			pri = uq->uq_inherited_pri;
1961170300Sjeff		thread_lock(td);
1962161678Sdavidxu		sched_unlend_user_prio(td, pri);
1963170300Sjeff		thread_unlock(td);
1964170300Sjeff		mtx_unlock_spin(&umtx_lock);
1965161678Sdavidxu	}
1966161678Sdavidxu
1967161678Sdavidxuout:
1968161678Sdavidxu	umtxq_lock(&uq->uq_key);
1969161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
1970161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1971161678Sdavidxu	umtx_key_release(&uq->uq_key);
1972161678Sdavidxu	return (error);
1973161678Sdavidxu}
1974161678Sdavidxu
1975161678Sdavidxu/*
1976161678Sdavidxu * Unlock a PP mutex.
1977161678Sdavidxu */
1978161678Sdavidxustatic int
1979161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1980161678Sdavidxu{
1981161678Sdavidxu	struct umtx_key key;
1982161678Sdavidxu	struct umtx_q *uq, *uq2;
1983161678Sdavidxu	struct umtx_pi *pi;
1984161678Sdavidxu	uint32_t owner, id;
1985161678Sdavidxu	uint32_t rceiling;
1986161926Sdavidxu	int error, pri, new_inherited_pri, su;
1987161678Sdavidxu
1988161678Sdavidxu	id = td->td_tid;
1989161678Sdavidxu	uq = td->td_umtxq;
1990164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1991161678Sdavidxu
1992161678Sdavidxu	/*
1993161678Sdavidxu	 * Make sure we own this mtx.
1994161678Sdavidxu	 */
1995163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1996161678Sdavidxu	if (owner == -1)
1997161678Sdavidxu		return (EFAULT);
1998161678Sdavidxu
1999161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2000161678Sdavidxu		return (EPERM);
2001161678Sdavidxu
2002161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2003161678Sdavidxu	if (error != 0)
2004161678Sdavidxu		return (error);
2005161678Sdavidxu
2006161678Sdavidxu	if (rceiling == -1)
2007161678Sdavidxu		new_inherited_pri = PRI_MAX;
2008161678Sdavidxu	else {
2009161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2010161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2011161678Sdavidxu			return (EINVAL);
2012161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2013161678Sdavidxu	}
2014161678Sdavidxu
2015161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2016161678Sdavidxu	    &key)) != 0)
2017161678Sdavidxu		return (error);
2018161678Sdavidxu	umtxq_lock(&key);
2019161678Sdavidxu	umtxq_busy(&key);
2020161678Sdavidxu	umtxq_unlock(&key);
2021161678Sdavidxu	/*
2022161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2023161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2024161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2025161678Sdavidxu	 * has to be adjusted for such mutex.
2026161678Sdavidxu	 */
2027163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2028163449Sdavidxu		UMUTEX_CONTESTED);
2029161678Sdavidxu
2030161678Sdavidxu	umtxq_lock(&key);
2031161678Sdavidxu	if (error == 0)
2032161678Sdavidxu		umtxq_signal(&key, 1);
2033161678Sdavidxu	umtxq_unbusy(&key);
2034161678Sdavidxu	umtxq_unlock(&key);
2035161678Sdavidxu
2036161678Sdavidxu	if (error == -1)
2037161678Sdavidxu		error = EFAULT;
2038161678Sdavidxu	else {
2039170300Sjeff		mtx_lock_spin(&umtx_lock);
2040161926Sdavidxu		if (su != 0)
2041161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2042161678Sdavidxu		pri = PRI_MAX;
2043161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2044161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2045161678Sdavidxu			if (uq2 != NULL) {
2046161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2047161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2048161678Sdavidxu			}
2049161678Sdavidxu		}
2050161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2051161678Sdavidxu			pri = uq->uq_inherited_pri;
2052170300Sjeff		thread_lock(td);
2053161678Sdavidxu		sched_unlend_user_prio(td, pri);
2054170300Sjeff		thread_unlock(td);
2055170300Sjeff		mtx_unlock_spin(&umtx_lock);
2056161678Sdavidxu	}
2057161678Sdavidxu	umtx_key_release(&key);
2058161678Sdavidxu	return (error);
2059161678Sdavidxu}
2060161678Sdavidxu
2061161678Sdavidxustatic int
2062161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2063161678Sdavidxu	uint32_t *old_ceiling)
2064161678Sdavidxu{
2065161678Sdavidxu	struct umtx_q *uq;
2066161678Sdavidxu	uint32_t save_ceiling;
2067161678Sdavidxu	uint32_t owner, id;
2068161678Sdavidxu	uint32_t flags;
2069161678Sdavidxu	int error;
2070161678Sdavidxu
2071161678Sdavidxu	flags = fuword32(&m->m_flags);
2072161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2073161678Sdavidxu		return (EINVAL);
2074161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2075161678Sdavidxu		return (EINVAL);
2076161678Sdavidxu	id = td->td_tid;
2077161678Sdavidxu	uq = td->td_umtxq;
2078161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2079161678Sdavidxu	   &uq->uq_key)) != 0)
2080161678Sdavidxu		return (error);
2081161678Sdavidxu	for (;;) {
2082161678Sdavidxu		umtxq_lock(&uq->uq_key);
2083161678Sdavidxu		umtxq_busy(&uq->uq_key);
2084161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2085161678Sdavidxu
2086161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2087161678Sdavidxu
2088161678Sdavidxu		owner = casuword32(&m->m_owner,
2089161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2090161678Sdavidxu
2091161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2092161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2093163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2094163449Sdavidxu				UMUTEX_CONTESTED);
2095161678Sdavidxu			error = 0;
2096161678Sdavidxu			break;
2097161678Sdavidxu		}
2098161678Sdavidxu
2099161678Sdavidxu		/* The address was invalid. */
2100161678Sdavidxu		if (owner == -1) {
2101161678Sdavidxu			error = EFAULT;
2102161678Sdavidxu			break;
2103161678Sdavidxu		}
2104161678Sdavidxu
2105161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2106161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2107161678Sdavidxu			error = 0;
2108161678Sdavidxu			break;
2109161678Sdavidxu		}
2110161678Sdavidxu
2111161678Sdavidxu		/*
2112161678Sdavidxu		 * If we caught a signal, we have retried and now
2113161678Sdavidxu		 * exit immediately.
2114161678Sdavidxu		 */
2115161678Sdavidxu		if (error != 0)
2116161678Sdavidxu			break;
2117161678Sdavidxu
2118161678Sdavidxu		/*
2119161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2120161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2121161678Sdavidxu		 * unlocking the umtx.
2122161678Sdavidxu		 */
2123161678Sdavidxu		umtxq_lock(&uq->uq_key);
2124161678Sdavidxu		umtxq_insert(uq);
2125161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2126161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2127161678Sdavidxu		umtxq_remove(uq);
2128161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2129161678Sdavidxu	}
2130161678Sdavidxu	umtxq_lock(&uq->uq_key);
2131161678Sdavidxu	if (error == 0)
2132161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2133161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2134161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2135161678Sdavidxu	umtx_key_release(&uq->uq_key);
2136161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2137161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2138161678Sdavidxu	return (error);
2139161678Sdavidxu}
2140161678Sdavidxu
2141162030Sdavidxustatic int
2142162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2143162030Sdavidxu	int try)
2144162030Sdavidxu{
2145162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2146162030Sdavidxu	case 0:
2147162030Sdavidxu		return (_do_lock_normal(td, m, flags, timo, try));
2148162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2149162030Sdavidxu		return (_do_lock_pi(td, m, flags, timo, try));
2150162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2151162030Sdavidxu		return (_do_lock_pp(td, m, flags, timo, try));
2152162030Sdavidxu	}
2153162030Sdavidxu	return (EINVAL);
2154162030Sdavidxu}
2155162030Sdavidxu
2156161678Sdavidxu/*
2157161678Sdavidxu * Lock a userland POSIX mutex.
2158161678Sdavidxu */
2159161678Sdavidxustatic int
2160162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2161162030Sdavidxu	struct timespec *timeout, int try)
2162161678Sdavidxu{
2163162030Sdavidxu	struct timespec ts, ts2, ts3;
2164162030Sdavidxu	struct timeval tv;
2165161678Sdavidxu	uint32_t flags;
2166162030Sdavidxu	int error;
2167161678Sdavidxu
2168161678Sdavidxu	flags = fuword32(&m->m_flags);
2169161678Sdavidxu	if (flags == -1)
2170161678Sdavidxu		return (EFAULT);
2171161678Sdavidxu
2172162030Sdavidxu	if (timeout == NULL) {
2173162030Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, try);
2174162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2175162030Sdavidxu		if (error == EINTR)
2176162030Sdavidxu			error = ERESTART;
2177162030Sdavidxu	} else {
2178162030Sdavidxu		getnanouptime(&ts);
2179162030Sdavidxu		timespecadd(&ts, timeout);
2180162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2181162030Sdavidxu		for (;;) {
2182162030Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2183162030Sdavidxu			if (error != ETIMEDOUT)
2184162030Sdavidxu				break;
2185162030Sdavidxu			getnanouptime(&ts2);
2186162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2187162030Sdavidxu				error = ETIMEDOUT;
2188162030Sdavidxu				break;
2189162030Sdavidxu			}
2190162030Sdavidxu			ts3 = ts;
2191162030Sdavidxu			timespecsub(&ts3, &ts2);
2192162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2193162030Sdavidxu		}
2194162030Sdavidxu		/* Timed-locking is not restarted. */
2195162030Sdavidxu		if (error == ERESTART)
2196162030Sdavidxu			error = EINTR;
2197161742Sdavidxu	}
2198162030Sdavidxu	return (error);
2199161678Sdavidxu}
2200161678Sdavidxu
2201161678Sdavidxu/*
2202161678Sdavidxu * Unlock a userland POSIX mutex.
2203161678Sdavidxu */
2204161678Sdavidxustatic int
2205161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2206161678Sdavidxu{
2207161678Sdavidxu	uint32_t flags;
2208161678Sdavidxu
2209161678Sdavidxu	flags = fuword32(&m->m_flags);
2210161678Sdavidxu	if (flags == -1)
2211161678Sdavidxu		return (EFAULT);
2212161678Sdavidxu
2213161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2214161855Sdavidxu	case 0:
2215161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2216161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2217161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2218161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2219161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2220161855Sdavidxu	}
2221161678Sdavidxu
2222161855Sdavidxu	return (EINVAL);
2223161678Sdavidxu}
2224161678Sdavidxu
2225164839Sdavidxustatic int
2226164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2227164876Sdavidxu	struct timespec *timeout, u_long wflags)
2228164839Sdavidxu{
2229164839Sdavidxu	struct umtx_q *uq;
2230164839Sdavidxu	struct timeval tv;
2231164839Sdavidxu	struct timespec cts, ets, tts;
2232164839Sdavidxu	uint32_t flags;
2233164839Sdavidxu	int error;
2234164839Sdavidxu
2235164839Sdavidxu	uq = td->td_umtxq;
2236164839Sdavidxu	flags = fuword32(&cv->c_flags);
2237164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2238164839Sdavidxu	if (error != 0)
2239164839Sdavidxu		return (error);
2240164839Sdavidxu	umtxq_lock(&uq->uq_key);
2241164839Sdavidxu	umtxq_busy(&uq->uq_key);
2242164839Sdavidxu	umtxq_insert(uq);
2243164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2244164839Sdavidxu
2245164839Sdavidxu	/*
2246164839Sdavidxu	 * The magic thing is we should set c_has_waiters to 1 before
2247164839Sdavidxu	 * releasing user mutex.
2248164839Sdavidxu	 */
2249164839Sdavidxu	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2250164839Sdavidxu
2251164839Sdavidxu	umtxq_lock(&uq->uq_key);
2252164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2253164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2254164839Sdavidxu
2255164839Sdavidxu	error = do_unlock_umutex(td, m);
2256164839Sdavidxu
2257164839Sdavidxu	umtxq_lock(&uq->uq_key);
2258164839Sdavidxu	if (error == 0) {
2259164876Sdavidxu		if ((wflags & UMTX_CHECK_UNPARKING) &&
2260164876Sdavidxu		    (td->td_pflags & TDP_WAKEUP)) {
2261164876Sdavidxu			td->td_pflags &= ~TDP_WAKEUP;
2262164876Sdavidxu			error = EINTR;
2263164876Sdavidxu		} else if (timeout == NULL) {
2264164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2265164839Sdavidxu		} else {
2266164839Sdavidxu			getnanouptime(&ets);
2267164839Sdavidxu			timespecadd(&ets, timeout);
2268164839Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2269164839Sdavidxu			for (;;) {
2270164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2271164839Sdavidxu				if (error != ETIMEDOUT)
2272164839Sdavidxu					break;
2273164839Sdavidxu				getnanouptime(&cts);
2274164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2275164839Sdavidxu					error = ETIMEDOUT;
2276164839Sdavidxu					break;
2277164839Sdavidxu				}
2278164839Sdavidxu				tts = ets;
2279164839Sdavidxu				timespecsub(&tts, &cts);
2280164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2281164839Sdavidxu			}
2282164839Sdavidxu		}
2283164839Sdavidxu	}
2284164839Sdavidxu
2285164839Sdavidxu	if (error != 0) {
2286164839Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2287164839Sdavidxu			/*
2288164839Sdavidxu			 * If we concurrently got do_cv_signal()d
2289164839Sdavidxu			 * and we got an error or UNIX signals or a timeout,
2290164839Sdavidxu			 * then, perform another umtxq_signal to avoid
2291164839Sdavidxu			 * consuming the wakeup. This may cause supurious
2292164839Sdavidxu			 * wakeup for another thread which was just queued,
2293164839Sdavidxu			 * but SUSV3 explicitly allows supurious wakeup to
2294164839Sdavidxu			 * occur, and indeed a kernel based implementation
2295164839Sdavidxu			 * can not avoid it.
2296164839Sdavidxu			 */
2297164876Sdavidxu			if (!umtxq_signal(&uq->uq_key, 1))
2298164876Sdavidxu				error = 0;
2299164839Sdavidxu		}
2300164839Sdavidxu		if (error == ERESTART)
2301164839Sdavidxu			error = EINTR;
2302164839Sdavidxu	}
2303164839Sdavidxu	umtxq_remove(uq);
2304164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2305164839Sdavidxu	umtx_key_release(&uq->uq_key);
2306164839Sdavidxu	return (error);
2307164839Sdavidxu}
2308164839Sdavidxu
2309164839Sdavidxu/*
2310164839Sdavidxu * Signal a userland condition variable.
2311164839Sdavidxu */
2312164839Sdavidxustatic int
2313164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2314164839Sdavidxu{
2315164839Sdavidxu	struct umtx_key key;
2316164839Sdavidxu	int error, cnt, nwake;
2317164839Sdavidxu	uint32_t flags;
2318164839Sdavidxu
2319164839Sdavidxu	flags = fuword32(&cv->c_flags);
2320164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2321164839Sdavidxu		return (error);
2322164839Sdavidxu	umtxq_lock(&key);
2323164839Sdavidxu	umtxq_busy(&key);
2324164839Sdavidxu	cnt = umtxq_count(&key);
2325164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2326164839Sdavidxu	if (cnt <= nwake) {
2327164839Sdavidxu		umtxq_unlock(&key);
2328164839Sdavidxu		error = suword32(
2329164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2330164839Sdavidxu		umtxq_lock(&key);
2331164839Sdavidxu	}
2332164839Sdavidxu	umtxq_unbusy(&key);
2333164839Sdavidxu	umtxq_unlock(&key);
2334164839Sdavidxu	umtx_key_release(&key);
2335164839Sdavidxu	return (error);
2336164839Sdavidxu}
2337164839Sdavidxu
2338164839Sdavidxustatic int
2339164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2340164839Sdavidxu{
2341164839Sdavidxu	struct umtx_key key;
2342164839Sdavidxu	int error;
2343164839Sdavidxu	uint32_t flags;
2344164839Sdavidxu
2345164839Sdavidxu	flags = fuword32(&cv->c_flags);
2346164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2347164839Sdavidxu		return (error);
2348164839Sdavidxu
2349164839Sdavidxu	umtxq_lock(&key);
2350164839Sdavidxu	umtxq_busy(&key);
2351164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2352164839Sdavidxu	umtxq_unlock(&key);
2353164839Sdavidxu
2354164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2355164839Sdavidxu
2356164839Sdavidxu	umtxq_lock(&key);
2357164839Sdavidxu	umtxq_unbusy(&key);
2358164839Sdavidxu	umtxq_unlock(&key);
2359164839Sdavidxu
2360164839Sdavidxu	umtx_key_release(&key);
2361164839Sdavidxu	return (error);
2362164839Sdavidxu}
2363164839Sdavidxu
2364177848Sdavidxustatic int
2365177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2366177848Sdavidxu{
2367177848Sdavidxu	struct umtx_q *uq;
2368177848Sdavidxu	uint32_t flags, wrflags;
2369177848Sdavidxu	int32_t state, oldstate;
2370177848Sdavidxu	int32_t blocked_readers;
2371177848Sdavidxu	int error;
2372177848Sdavidxu
2373177848Sdavidxu	uq = td->td_umtxq;
2374177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2375177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2376177848Sdavidxu	if (error != 0)
2377177848Sdavidxu		return (error);
2378177848Sdavidxu
2379177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2380177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2381177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2382177848Sdavidxu
2383177848Sdavidxu	for (;;) {
2384177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2385177848Sdavidxu		/* try to lock it */
2386177848Sdavidxu		while (!(state & wrflags)) {
2387177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2388177848Sdavidxu				umtx_key_release(&uq->uq_key);
2389177848Sdavidxu				return (EAGAIN);
2390177848Sdavidxu			}
2391177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2392177848Sdavidxu			if (oldstate == state) {
2393177848Sdavidxu				umtx_key_release(&uq->uq_key);
2394177848Sdavidxu				return (0);
2395177848Sdavidxu			}
2396177848Sdavidxu			state = oldstate;
2397177848Sdavidxu		}
2398177848Sdavidxu
2399177848Sdavidxu		if (error)
2400177848Sdavidxu			break;
2401177848Sdavidxu
2402177848Sdavidxu		/* grab monitor lock */
2403177848Sdavidxu		umtxq_lock(&uq->uq_key);
2404177848Sdavidxu		umtxq_busy(&uq->uq_key);
2405177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2406177848Sdavidxu
2407177848Sdavidxu		/* set read contention bit */
2408177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2409177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2410177848Sdavidxu			if (oldstate == state)
2411177848Sdavidxu				goto sleep;
2412177848Sdavidxu			state = oldstate;
2413177848Sdavidxu		}
2414177848Sdavidxu
2415177848Sdavidxu		/* state is changed while setting flags, restart */
2416177848Sdavidxu		if (!(state & wrflags)) {
2417177848Sdavidxu			umtxq_lock(&uq->uq_key);
2418177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2419177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2420177848Sdavidxu			continue;
2421177848Sdavidxu		}
2422177848Sdavidxu
2423177848Sdavidxusleep:
2424177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2425177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2426177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2427177848Sdavidxu
2428177848Sdavidxu		while (state & wrflags) {
2429177848Sdavidxu			umtxq_lock(&uq->uq_key);
2430177848Sdavidxu			umtxq_insert(uq);
2431177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2432177848Sdavidxu
2433177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2434177848Sdavidxu
2435177848Sdavidxu			umtxq_busy(&uq->uq_key);
2436177848Sdavidxu			umtxq_remove(uq);
2437177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2438177848Sdavidxu			if (error)
2439177848Sdavidxu				break;
2440177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2441177848Sdavidxu		}
2442177848Sdavidxu
2443177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2444177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2445177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2446177848Sdavidxu		if (blocked_readers == 1) {
2447177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2448177848Sdavidxu			for (;;) {
2449177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2450177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2451177848Sdavidxu				if (oldstate == state)
2452177848Sdavidxu					break;
2453177848Sdavidxu				state = oldstate;
2454177848Sdavidxu			}
2455177848Sdavidxu		}
2456177848Sdavidxu
2457177848Sdavidxu		umtxq_lock(&uq->uq_key);
2458177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2459177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2460177848Sdavidxu	}
2461177848Sdavidxu	umtx_key_release(&uq->uq_key);
2462177848Sdavidxu	return (error);
2463177848Sdavidxu}
2464177848Sdavidxu
2465177848Sdavidxustatic int
2466177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2467177848Sdavidxu{
2468177848Sdavidxu	struct timespec ts, ts2, ts3;
2469177848Sdavidxu	struct timeval tv;
2470177848Sdavidxu	int error;
2471177848Sdavidxu
2472177848Sdavidxu	getnanouptime(&ts);
2473177848Sdavidxu	timespecadd(&ts, timeout);
2474177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2475177848Sdavidxu	for (;;) {
2476177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2477177848Sdavidxu		if (error != ETIMEDOUT)
2478177848Sdavidxu			break;
2479177848Sdavidxu		getnanouptime(&ts2);
2480177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2481177848Sdavidxu			error = ETIMEDOUT;
2482177848Sdavidxu			break;
2483177848Sdavidxu		}
2484177848Sdavidxu		ts3 = ts;
2485177848Sdavidxu		timespecsub(&ts3, &ts2);
2486177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2487177848Sdavidxu	}
2488177849Sdavidxu	if (error == ERESTART)
2489177849Sdavidxu		error = EINTR;
2490177848Sdavidxu	return (error);
2491177848Sdavidxu}
2492177848Sdavidxu
2493177848Sdavidxustatic int
2494177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2495177848Sdavidxu{
2496177848Sdavidxu	struct umtx_q *uq;
2497177848Sdavidxu	uint32_t flags;
2498177848Sdavidxu	int32_t state, oldstate;
2499177848Sdavidxu	int32_t blocked_writers;
2500177848Sdavidxu	int error;
2501177848Sdavidxu
2502177848Sdavidxu	uq = td->td_umtxq;
2503177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2504177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2505177848Sdavidxu	if (error != 0)
2506177848Sdavidxu		return (error);
2507177848Sdavidxu
2508177848Sdavidxu	for (;;) {
2509177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2510177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2511177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2512177848Sdavidxu			if (oldstate == state) {
2513177848Sdavidxu				umtx_key_release(&uq->uq_key);
2514177848Sdavidxu				return (0);
2515177848Sdavidxu			}
2516177848Sdavidxu			state = oldstate;
2517177848Sdavidxu		}
2518177848Sdavidxu
2519177848Sdavidxu		if (error)
2520177848Sdavidxu			break;
2521177848Sdavidxu
2522177848Sdavidxu		/* grab monitor lock */
2523177848Sdavidxu		umtxq_lock(&uq->uq_key);
2524177848Sdavidxu		umtxq_busy(&uq->uq_key);
2525177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2526177848Sdavidxu
2527177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2528177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2529177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2530177848Sdavidxu			if (oldstate == state)
2531177848Sdavidxu				goto sleep;
2532177848Sdavidxu			state = oldstate;
2533177848Sdavidxu		}
2534177848Sdavidxu
2535177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2536177848Sdavidxu			umtxq_lock(&uq->uq_key);
2537177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2538177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2539177848Sdavidxu			continue;
2540177848Sdavidxu		}
2541177848Sdavidxusleep:
2542177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2543177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2544177848Sdavidxu
2545177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2546177848Sdavidxu			umtxq_lock(&uq->uq_key);
2547177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2548177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2549177848Sdavidxu
2550177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2551177848Sdavidxu
2552177848Sdavidxu			umtxq_busy(&uq->uq_key);
2553177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2554177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2555177848Sdavidxu			if (error)
2556177848Sdavidxu				break;
2557177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2558177848Sdavidxu		}
2559177848Sdavidxu
2560177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2561177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2562177848Sdavidxu		if (blocked_writers == 1) {
2563177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2564177848Sdavidxu			for (;;) {
2565177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2566177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2567177848Sdavidxu				if (oldstate == state)
2568177848Sdavidxu					break;
2569177848Sdavidxu				state = oldstate;
2570177848Sdavidxu			}
2571177848Sdavidxu		}
2572177848Sdavidxu
2573177848Sdavidxu		umtxq_lock(&uq->uq_key);
2574177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2575177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2576177848Sdavidxu	}
2577177848Sdavidxu
2578177848Sdavidxu	umtx_key_release(&uq->uq_key);
2579177848Sdavidxu	return (error);
2580177848Sdavidxu}
2581177848Sdavidxu
2582177848Sdavidxustatic int
2583177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2584177848Sdavidxu{
2585177848Sdavidxu	struct timespec ts, ts2, ts3;
2586177848Sdavidxu	struct timeval tv;
2587177848Sdavidxu	int error;
2588177848Sdavidxu
2589177848Sdavidxu	getnanouptime(&ts);
2590177848Sdavidxu	timespecadd(&ts, timeout);
2591177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2592177848Sdavidxu	for (;;) {
2593177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2594177848Sdavidxu		if (error != ETIMEDOUT)
2595177848Sdavidxu			break;
2596177848Sdavidxu		getnanouptime(&ts2);
2597177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2598177848Sdavidxu			error = ETIMEDOUT;
2599177848Sdavidxu			break;
2600177848Sdavidxu		}
2601177848Sdavidxu		ts3 = ts;
2602177848Sdavidxu		timespecsub(&ts3, &ts2);
2603177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2604177848Sdavidxu	}
2605177849Sdavidxu	if (error == ERESTART)
2606177849Sdavidxu		error = EINTR;
2607177848Sdavidxu	return (error);
2608177848Sdavidxu}
2609177848Sdavidxu
2610177848Sdavidxustatic int
2611177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2612177848Sdavidxu{
2613177848Sdavidxu	struct umtx_q *uq;
2614177848Sdavidxu	uint32_t flags;
2615177848Sdavidxu	int32_t state, oldstate;
2616177848Sdavidxu	int error, q, count;
2617177848Sdavidxu
2618177848Sdavidxu	uq = td->td_umtxq;
2619177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2620177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2621177848Sdavidxu	if (error != 0)
2622177848Sdavidxu		return (error);
2623177848Sdavidxu
2624177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2625177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2626177848Sdavidxu		for (;;) {
2627177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2628177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2629177848Sdavidxu			if (oldstate != state) {
2630177848Sdavidxu				state = oldstate;
2631177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2632177848Sdavidxu					error = EPERM;
2633177848Sdavidxu					goto out;
2634177848Sdavidxu				}
2635177848Sdavidxu			} else
2636177848Sdavidxu				break;
2637177848Sdavidxu		}
2638177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2639177848Sdavidxu		for (;;) {
2640177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2641177848Sdavidxu				state - 1);
2642177848Sdavidxu			if (oldstate != state) {
2643177848Sdavidxu				state = oldstate;
2644177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2645177848Sdavidxu					error = EPERM;
2646177848Sdavidxu					goto out;
2647177848Sdavidxu				}
2648177848Sdavidxu			}
2649177848Sdavidxu			else
2650177848Sdavidxu				break;
2651177848Sdavidxu		}
2652177848Sdavidxu	} else {
2653177848Sdavidxu		error = EPERM;
2654177848Sdavidxu		goto out;
2655177848Sdavidxu	}
2656177848Sdavidxu
2657177848Sdavidxu	count = 0;
2658177848Sdavidxu
2659177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2660177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2661177848Sdavidxu			count = 1;
2662177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2663177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2664177848Sdavidxu			count = INT_MAX;
2665177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2666177848Sdavidxu		}
2667177848Sdavidxu	} else {
2668177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2669177848Sdavidxu			count = INT_MAX;
2670177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2671177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2672177848Sdavidxu			count = 1;
2673177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2674177848Sdavidxu		}
2675177848Sdavidxu	}
2676177848Sdavidxu
2677177848Sdavidxu	if (count) {
2678177848Sdavidxu		umtxq_lock(&uq->uq_key);
2679177848Sdavidxu		umtxq_busy(&uq->uq_key);
2680177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2681177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2682177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2683177848Sdavidxu	}
2684177848Sdavidxuout:
2685177848Sdavidxu	umtx_key_release(&uq->uq_key);
2686177848Sdavidxu	return (error);
2687177848Sdavidxu}
2688177848Sdavidxu
2689139013Sdavidxuint
2690139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2691139013Sdavidxu    /* struct umtx *umtx */
2692139013Sdavidxu{
2693162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2694139013Sdavidxu}
2695139013Sdavidxu
2696139013Sdavidxuint
2697139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2698139013Sdavidxu    /* struct umtx *umtx */
2699139013Sdavidxu{
2700162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2701139013Sdavidxu}
2702139013Sdavidxu
2703162536Sdavidxustatic int
2704162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2705139013Sdavidxu{
2706162536Sdavidxu	struct timespec *ts, timeout;
2707139013Sdavidxu	int error;
2708139013Sdavidxu
2709162536Sdavidxu	/* Allow a null timespec (wait forever). */
2710162536Sdavidxu	if (uap->uaddr2 == NULL)
2711162536Sdavidxu		ts = NULL;
2712162536Sdavidxu	else {
2713162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2714162536Sdavidxu		if (error != 0)
2715162536Sdavidxu			return (error);
2716162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2717162536Sdavidxu		    timeout.tv_nsec < 0) {
2718162536Sdavidxu			return (EINVAL);
2719161678Sdavidxu		}
2720162536Sdavidxu		ts = &timeout;
2721162536Sdavidxu	}
2722162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2723162536Sdavidxu}
2724162536Sdavidxu
2725162536Sdavidxustatic int
2726162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2727162536Sdavidxu{
2728162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2729162536Sdavidxu}
2730162536Sdavidxu
2731162536Sdavidxustatic int
2732162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2733162536Sdavidxu{
2734162536Sdavidxu	struct timespec *ts, timeout;
2735162536Sdavidxu	int error;
2736162536Sdavidxu
2737162536Sdavidxu	if (uap->uaddr2 == NULL)
2738162536Sdavidxu		ts = NULL;
2739162536Sdavidxu	else {
2740162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2741162536Sdavidxu		if (error != 0)
2742162536Sdavidxu			return (error);
2743162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2744162536Sdavidxu		    timeout.tv_nsec < 0)
2745162536Sdavidxu			return (EINVAL);
2746162536Sdavidxu		ts = &timeout;
2747162536Sdavidxu	}
2748162536Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0);
2749162536Sdavidxu}
2750162536Sdavidxu
2751162536Sdavidxustatic int
2752173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2753173800Sdavidxu{
2754173800Sdavidxu	struct timespec *ts, timeout;
2755173800Sdavidxu	int error;
2756173800Sdavidxu
2757173800Sdavidxu	if (uap->uaddr2 == NULL)
2758173800Sdavidxu		ts = NULL;
2759173800Sdavidxu	else {
2760173800Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2761173800Sdavidxu		if (error != 0)
2762173800Sdavidxu			return (error);
2763173800Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2764173800Sdavidxu		    timeout.tv_nsec < 0)
2765173800Sdavidxu			return (EINVAL);
2766173800Sdavidxu		ts = &timeout;
2767173800Sdavidxu	}
2768173800Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1);
2769173800Sdavidxu}
2770173800Sdavidxu
2771173800Sdavidxustatic int
2772162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2773162536Sdavidxu{
2774162536Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val));
2775162536Sdavidxu}
2776162536Sdavidxu
2777162536Sdavidxustatic int
2778162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2779162536Sdavidxu{
2780162536Sdavidxu	struct timespec *ts, timeout;
2781162536Sdavidxu	int error;
2782162536Sdavidxu
2783162536Sdavidxu	/* Allow a null timespec (wait forever). */
2784162536Sdavidxu	if (uap->uaddr2 == NULL)
2785162536Sdavidxu		ts = NULL;
2786162536Sdavidxu	else {
2787162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2788162536Sdavidxu		    sizeof(timeout));
2789162536Sdavidxu		if (error != 0)
2790162536Sdavidxu			return (error);
2791162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2792162536Sdavidxu		    timeout.tv_nsec < 0) {
2793162536Sdavidxu			return (EINVAL);
2794139013Sdavidxu		}
2795162536Sdavidxu		ts = &timeout;
2796139013Sdavidxu	}
2797162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
2798162536Sdavidxu}
2799162536Sdavidxu
2800162536Sdavidxustatic int
2801162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2802162536Sdavidxu{
2803162536Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, 1);
2804162536Sdavidxu}
2805162536Sdavidxu
2806162536Sdavidxustatic int
2807162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2808162536Sdavidxu{
2809162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
2810162536Sdavidxu}
2811162536Sdavidxu
2812162536Sdavidxustatic int
2813162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2814162536Sdavidxu{
2815162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2816162536Sdavidxu}
2817162536Sdavidxu
2818164839Sdavidxustatic int
2819164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2820164839Sdavidxu{
2821164839Sdavidxu	struct timespec *ts, timeout;
2822164839Sdavidxu	int error;
2823164839Sdavidxu
2824164839Sdavidxu	/* Allow a null timespec (wait forever). */
2825164839Sdavidxu	if (uap->uaddr2 == NULL)
2826164839Sdavidxu		ts = NULL;
2827164839Sdavidxu	else {
2828164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2829164839Sdavidxu		    sizeof(timeout));
2830164839Sdavidxu		if (error != 0)
2831164839Sdavidxu			return (error);
2832164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2833164839Sdavidxu		    timeout.tv_nsec < 0) {
2834164839Sdavidxu			return (EINVAL);
2835164839Sdavidxu		}
2836164839Sdavidxu		ts = &timeout;
2837164839Sdavidxu	}
2838164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2839164839Sdavidxu}
2840164839Sdavidxu
2841164839Sdavidxustatic int
2842164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2843164839Sdavidxu{
2844164839Sdavidxu	return do_cv_signal(td, uap->obj);
2845164839Sdavidxu}
2846164839Sdavidxu
2847164839Sdavidxustatic int
2848164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2849164839Sdavidxu{
2850164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
2851164839Sdavidxu}
2852164839Sdavidxu
2853177848Sdavidxustatic int
2854177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2855177848Sdavidxu{
2856177848Sdavidxu	struct timespec timeout;
2857177848Sdavidxu	int error;
2858177848Sdavidxu
2859177848Sdavidxu	/* Allow a null timespec (wait forever). */
2860177848Sdavidxu	if (uap->uaddr2 == NULL) {
2861177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2862177848Sdavidxu	} else {
2863177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2864177848Sdavidxu		    sizeof(timeout));
2865177848Sdavidxu		if (error != 0)
2866177848Sdavidxu			return (error);
2867177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2868177848Sdavidxu		    timeout.tv_nsec < 0) {
2869177848Sdavidxu			return (EINVAL);
2870177848Sdavidxu		}
2871177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
2872177848Sdavidxu	}
2873177848Sdavidxu	return (error);
2874177848Sdavidxu}
2875177848Sdavidxu
2876177848Sdavidxustatic int
2877177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
2878177848Sdavidxu{
2879177848Sdavidxu	struct timespec timeout;
2880177848Sdavidxu	int error;
2881177848Sdavidxu
2882177848Sdavidxu	/* Allow a null timespec (wait forever). */
2883177848Sdavidxu	if (uap->uaddr2 == NULL) {
2884177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
2885177848Sdavidxu	} else {
2886177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2887177848Sdavidxu		    sizeof(timeout));
2888177848Sdavidxu		if (error != 0)
2889177848Sdavidxu			return (error);
2890177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2891177848Sdavidxu		    timeout.tv_nsec < 0) {
2892177848Sdavidxu			return (EINVAL);
2893177848Sdavidxu		}
2894177848Sdavidxu
2895177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
2896177848Sdavidxu	}
2897177848Sdavidxu	return (error);
2898177848Sdavidxu}
2899177848Sdavidxu
2900177848Sdavidxustatic int
2901177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
2902177848Sdavidxu{
2903177880Sdavidxu	return do_rw_unlock(td, uap->obj);
2904177848Sdavidxu}
2905177848Sdavidxu
2906162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2907162536Sdavidxu
2908162536Sdavidxustatic _umtx_op_func op_table[] = {
2909162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2910162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2911162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
2912162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
2913162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2914162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2915162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2916164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2917164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
2918164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2919173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
2920177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
2921177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
2922177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
2923177848Sdavidxu	__umtx_op_rw_unlock		/* UMTX_OP_RW_UNLOCK */
2924162536Sdavidxu};
2925162536Sdavidxu
2926162536Sdavidxuint
2927162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
2928162536Sdavidxu{
2929163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
2930162536Sdavidxu		return (*op_table[uap->op])(td, uap);
2931162536Sdavidxu	return (EINVAL);
2932162536Sdavidxu}
2933162536Sdavidxu
2934162536Sdavidxu#ifdef COMPAT_IA32
2935163046Sdavidxuint
2936163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2937163046Sdavidxu    /* struct umtx *umtx */
2938163046Sdavidxu{
2939163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2940163046Sdavidxu}
2941163046Sdavidxu
2942163046Sdavidxuint
2943163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2944163046Sdavidxu    /* struct umtx *umtx */
2945163046Sdavidxu{
2946163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2947163046Sdavidxu}
2948163046Sdavidxu
2949162536Sdavidxustruct timespec32 {
2950162536Sdavidxu	u_int32_t tv_sec;
2951162536Sdavidxu	u_int32_t tv_nsec;
2952162536Sdavidxu};
2953162536Sdavidxu
2954162536Sdavidxustatic inline int
2955162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
2956162536Sdavidxu{
2957162536Sdavidxu	struct timespec32 ts32;
2958162536Sdavidxu	int error;
2959162536Sdavidxu
2960162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
2961162536Sdavidxu	if (error == 0) {
2962162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
2963162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
2964162536Sdavidxu	}
2965140421Sdavidxu	return (error);
2966139013Sdavidxu}
2967161678Sdavidxu
2968162536Sdavidxustatic int
2969162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2970162536Sdavidxu{
2971162536Sdavidxu	struct timespec *ts, timeout;
2972162536Sdavidxu	int error;
2973162536Sdavidxu
2974162536Sdavidxu	/* Allow a null timespec (wait forever). */
2975162536Sdavidxu	if (uap->uaddr2 == NULL)
2976162536Sdavidxu		ts = NULL;
2977162536Sdavidxu	else {
2978162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
2979162536Sdavidxu		if (error != 0)
2980162536Sdavidxu			return (error);
2981162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2982162536Sdavidxu		    timeout.tv_nsec < 0) {
2983162536Sdavidxu			return (EINVAL);
2984162536Sdavidxu		}
2985162536Sdavidxu		ts = &timeout;
2986162536Sdavidxu	}
2987162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2988162536Sdavidxu}
2989162536Sdavidxu
2990162536Sdavidxustatic int
2991162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2992162536Sdavidxu{
2993162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2994162536Sdavidxu}
2995162536Sdavidxu
2996162536Sdavidxustatic int
2997162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2998162536Sdavidxu{
2999162536Sdavidxu	struct timespec *ts, timeout;
3000162536Sdavidxu	int error;
3001162536Sdavidxu
3002162536Sdavidxu	if (uap->uaddr2 == NULL)
3003162536Sdavidxu		ts = NULL;
3004162536Sdavidxu	else {
3005162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3006162536Sdavidxu		if (error != 0)
3007162536Sdavidxu			return (error);
3008162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3009162536Sdavidxu		    timeout.tv_nsec < 0)
3010162536Sdavidxu			return (EINVAL);
3011162536Sdavidxu		ts = &timeout;
3012162536Sdavidxu	}
3013162536Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1);
3014162536Sdavidxu}
3015162536Sdavidxu
3016162536Sdavidxustatic int
3017162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3018162536Sdavidxu{
3019162536Sdavidxu	struct timespec *ts, timeout;
3020162536Sdavidxu	int error;
3021162536Sdavidxu
3022162536Sdavidxu	/* Allow a null timespec (wait forever). */
3023162536Sdavidxu	if (uap->uaddr2 == NULL)
3024162536Sdavidxu		ts = NULL;
3025162536Sdavidxu	else {
3026162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3027162536Sdavidxu		if (error != 0)
3028162536Sdavidxu			return (error);
3029162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3030162536Sdavidxu		    timeout.tv_nsec < 0)
3031162536Sdavidxu			return (EINVAL);
3032162536Sdavidxu		ts = &timeout;
3033162536Sdavidxu	}
3034162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3035162536Sdavidxu}
3036162536Sdavidxu
3037164839Sdavidxustatic int
3038164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3039164839Sdavidxu{
3040164839Sdavidxu	struct timespec *ts, timeout;
3041164839Sdavidxu	int error;
3042164839Sdavidxu
3043164839Sdavidxu	/* Allow a null timespec (wait forever). */
3044164839Sdavidxu	if (uap->uaddr2 == NULL)
3045164839Sdavidxu		ts = NULL;
3046164839Sdavidxu	else {
3047164839Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3048164839Sdavidxu		if (error != 0)
3049164839Sdavidxu			return (error);
3050164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3051164839Sdavidxu		    timeout.tv_nsec < 0)
3052164839Sdavidxu			return (EINVAL);
3053164839Sdavidxu		ts = &timeout;
3054164839Sdavidxu	}
3055164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3056164839Sdavidxu}
3057164839Sdavidxu
3058177848Sdavidxustatic int
3059177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3060177848Sdavidxu{
3061177848Sdavidxu	struct timespec timeout;
3062177848Sdavidxu	int error;
3063177848Sdavidxu
3064177848Sdavidxu	/* Allow a null timespec (wait forever). */
3065177848Sdavidxu	if (uap->uaddr2 == NULL) {
3066177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3067177848Sdavidxu	} else {
3068177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3069177848Sdavidxu		    sizeof(timeout));
3070177848Sdavidxu		if (error != 0)
3071177848Sdavidxu			return (error);
3072177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3073177848Sdavidxu		    timeout.tv_nsec < 0) {
3074177848Sdavidxu			return (EINVAL);
3075177848Sdavidxu		}
3076177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3077177848Sdavidxu	}
3078177848Sdavidxu	return (error);
3079177848Sdavidxu}
3080177848Sdavidxu
3081177848Sdavidxustatic int
3082177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3083177848Sdavidxu{
3084177848Sdavidxu	struct timespec timeout;
3085177848Sdavidxu	int error;
3086177848Sdavidxu
3087177848Sdavidxu	/* Allow a null timespec (wait forever). */
3088177848Sdavidxu	if (uap->uaddr2 == NULL) {
3089177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3090177848Sdavidxu	} else {
3091177848Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3092177848Sdavidxu		if (error != 0)
3093177848Sdavidxu			return (error);
3094177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3095177848Sdavidxu		    timeout.tv_nsec < 0) {
3096177848Sdavidxu			return (EINVAL);
3097177848Sdavidxu		}
3098177848Sdavidxu
3099177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3100177848Sdavidxu	}
3101177848Sdavidxu	return (error);
3102177848Sdavidxu}
3103177848Sdavidxu
3104162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3105162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3106162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3107162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3108162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3109162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3110162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3111162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3112164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3113164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3114164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3115173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3116177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3117177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3118177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3119177848Sdavidxu	__umtx_op_rw_unlock		/* UMTX_OP_RW_UNLOCK */
3120162536Sdavidxu};
3121162536Sdavidxu
3122162536Sdavidxuint
3123162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3124162536Sdavidxu{
3125163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3126162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3127162536Sdavidxu			(struct _umtx_op_args *)uap);
3128162536Sdavidxu	return (EINVAL);
3129162536Sdavidxu}
3130162536Sdavidxu#endif
3131162536Sdavidxu
3132161678Sdavidxuvoid
3133161678Sdavidxuumtx_thread_init(struct thread *td)
3134161678Sdavidxu{
3135161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3136161678Sdavidxu	td->td_umtxq->uq_thread = td;
3137161678Sdavidxu}
3138161678Sdavidxu
3139161678Sdavidxuvoid
3140161678Sdavidxuumtx_thread_fini(struct thread *td)
3141161678Sdavidxu{
3142161678Sdavidxu	umtxq_free(td->td_umtxq);
3143161678Sdavidxu}
3144161678Sdavidxu
3145161678Sdavidxu/*
3146161678Sdavidxu * It will be called when new thread is created, e.g fork().
3147161678Sdavidxu */
3148161678Sdavidxuvoid
3149161678Sdavidxuumtx_thread_alloc(struct thread *td)
3150161678Sdavidxu{
3151161678Sdavidxu	struct umtx_q *uq;
3152161678Sdavidxu
3153161678Sdavidxu	uq = td->td_umtxq;
3154161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3155161678Sdavidxu
3156161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3157161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3158161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3159161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3160161678Sdavidxu}
3161161678Sdavidxu
3162161678Sdavidxu/*
3163161678Sdavidxu * exec() hook.
3164161678Sdavidxu */
3165161678Sdavidxustatic void
3166161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3167161678Sdavidxu	struct image_params *imgp __unused)
3168161678Sdavidxu{
3169161678Sdavidxu	umtx_thread_cleanup(curthread);
3170161678Sdavidxu}
3171161678Sdavidxu
3172161678Sdavidxu/*
3173161678Sdavidxu * thread_exit() hook.
3174161678Sdavidxu */
3175161678Sdavidxuvoid
3176161678Sdavidxuumtx_thread_exit(struct thread *td)
3177161678Sdavidxu{
3178161678Sdavidxu	umtx_thread_cleanup(td);
3179161678Sdavidxu}
3180161678Sdavidxu
3181161678Sdavidxu/*
3182161678Sdavidxu * clean up umtx data.
3183161678Sdavidxu */
3184161678Sdavidxustatic void
3185161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3186161678Sdavidxu{
3187161678Sdavidxu	struct umtx_q *uq;
3188161678Sdavidxu	struct umtx_pi *pi;
3189161678Sdavidxu
3190161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3191161678Sdavidxu		return;
3192161678Sdavidxu
3193170300Sjeff	mtx_lock_spin(&umtx_lock);
3194161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3195161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3196161678Sdavidxu		pi->pi_owner = NULL;
3197161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3198161678Sdavidxu	}
3199174701Sdavidxu	thread_lock(td);
3200161678Sdavidxu	td->td_flags &= ~TDF_UBORROWING;
3201174701Sdavidxu	thread_unlock(td);
3202170300Sjeff	mtx_unlock_spin(&umtx_lock);
3203161678Sdavidxu}
3204