kern_umtx.c revision 197476
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 197476 2009-09-25 00:03:13Z davidxu $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32112904Sjeff#include <sys/param.h>
33112904Sjeff#include <sys/kernel.h>
34131431Smarcel#include <sys/limits.h>
35112904Sjeff#include <sys/lock.h>
36115765Sjeff#include <sys/malloc.h>
37112904Sjeff#include <sys/mutex.h>
38164033Srwatson#include <sys/priv.h>
39112904Sjeff#include <sys/proc.h>
40161678Sdavidxu#include <sys/sched.h>
41165369Sdavidxu#include <sys/smp.h>
42161678Sdavidxu#include <sys/sysctl.h>
43112904Sjeff#include <sys/sysent.h>
44112904Sjeff#include <sys/systm.h>
45112904Sjeff#include <sys/sysproto.h>
46139013Sdavidxu#include <sys/eventhandler.h>
47112904Sjeff#include <sys/umtx.h>
48112904Sjeff
49139013Sdavidxu#include <vm/vm.h>
50139013Sdavidxu#include <vm/vm_param.h>
51139013Sdavidxu#include <vm/pmap.h>
52139013Sdavidxu#include <vm/vm_map.h>
53139013Sdavidxu#include <vm/vm_object.h>
54139013Sdavidxu
55165369Sdavidxu#include <machine/cpu.h>
56165369Sdavidxu
57162536Sdavidxu#ifdef COMPAT_IA32
58162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
59162536Sdavidxu#endif
60162536Sdavidxu
61179421Sdavidxu#define TYPE_SIMPLE_WAIT	0
62179421Sdavidxu#define TYPE_CV			1
63179421Sdavidxu#define TYPE_SIMPLE_LOCK	2
64179421Sdavidxu#define TYPE_NORMAL_UMUTEX	3
65179421Sdavidxu#define TYPE_PI_UMUTEX		4
66179421Sdavidxu#define TYPE_PP_UMUTEX		5
67177848Sdavidxu#define TYPE_RWLOCK		6
68139013Sdavidxu
69179970Sdavidxu#define _UMUTEX_TRY		1
70179970Sdavidxu#define _UMUTEX_WAIT		2
71179970Sdavidxu
72161678Sdavidxu/* Key to represent a unique userland synchronous object */
73139013Sdavidxustruct umtx_key {
74161678Sdavidxu	int	hash;
75139013Sdavidxu	int	type;
76161678Sdavidxu	int	shared;
77139013Sdavidxu	union {
78139013Sdavidxu		struct {
79139013Sdavidxu			vm_object_t	object;
80161678Sdavidxu			uintptr_t	offset;
81139013Sdavidxu		} shared;
82139013Sdavidxu		struct {
83161678Sdavidxu			struct vmspace	*vs;
84161678Sdavidxu			uintptr_t	addr;
85139013Sdavidxu		} private;
86139013Sdavidxu		struct {
87161678Sdavidxu			void		*a;
88161678Sdavidxu			uintptr_t	b;
89139013Sdavidxu		} both;
90139013Sdavidxu	} info;
91139013Sdavidxu};
92139013Sdavidxu
93161678Sdavidxu/* Priority inheritance mutex info. */
94161678Sdavidxustruct umtx_pi {
95161678Sdavidxu	/* Owner thread */
96161678Sdavidxu	struct thread		*pi_owner;
97161678Sdavidxu
98161678Sdavidxu	/* Reference count */
99161678Sdavidxu	int			pi_refcount;
100161678Sdavidxu
101161678Sdavidxu 	/* List entry to link umtx holding by thread */
102161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
103161678Sdavidxu
104161678Sdavidxu	/* List entry in hash */
105161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
106161678Sdavidxu
107161678Sdavidxu	/* List for waiters */
108161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
109161678Sdavidxu
110161678Sdavidxu	/* Identify a userland lock object */
111161678Sdavidxu	struct umtx_key		pi_key;
112161678Sdavidxu};
113161678Sdavidxu
114161678Sdavidxu/* A userland synchronous object user. */
115115765Sjeffstruct umtx_q {
116161678Sdavidxu	/* Linked list for the hash. */
117161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
118161678Sdavidxu
119161678Sdavidxu	/* Umtx key. */
120161678Sdavidxu	struct umtx_key		uq_key;
121161678Sdavidxu
122161678Sdavidxu	/* Umtx flags. */
123161678Sdavidxu	int			uq_flags;
124161678Sdavidxu#define UQF_UMTXQ	0x0001
125161678Sdavidxu
126161678Sdavidxu	/* The thread waits on. */
127161678Sdavidxu	struct thread		*uq_thread;
128161678Sdavidxu
129161678Sdavidxu	/*
130161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
131170300Sjeff	 * or umtx_lock, write must have both chain lock and
132170300Sjeff	 * umtx_lock being hold.
133161678Sdavidxu	 */
134161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
135161678Sdavidxu
136161678Sdavidxu	/* On blocked list */
137161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
138161678Sdavidxu
139161678Sdavidxu	/* Thread contending with us */
140161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
141161678Sdavidxu
142161742Sdavidxu	/* Inherited priority from PP mutex */
143161678Sdavidxu	u_char			uq_inherited_pri;
144115765Sjeff};
145115765Sjeff
146161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
147161678Sdavidxu
148161678Sdavidxu/* Userland lock object's wait-queue chain */
149138224Sdavidxustruct umtxq_chain {
150161678Sdavidxu	/* Lock for this chain. */
151161678Sdavidxu	struct mtx		uc_lock;
152161678Sdavidxu
153161678Sdavidxu	/* List of sleep queues. */
154177848Sdavidxu	struct umtxq_head	uc_queue[2];
155177848Sdavidxu#define UMTX_SHARED_QUEUE	0
156177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
157161678Sdavidxu
158161678Sdavidxu	/* Busy flag */
159161678Sdavidxu	char			uc_busy;
160161678Sdavidxu
161161678Sdavidxu	/* Chain lock waiters */
162158377Sdavidxu	int			uc_waiters;
163161678Sdavidxu
164161678Sdavidxu	/* All PI in the list */
165161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
166138224Sdavidxu};
167115765Sjeff
168161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
169189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
170161678Sdavidxu
171161678Sdavidxu/*
172161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
173161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
174161678Sdavidxu * and let another thread B block on the mutex, because B is
175161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
176161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
177161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
178161678Sdavidxu */
179161678Sdavidxu
180163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
181163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
182163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
183161678Sdavidxu
184138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
185138224Sdavidxu#define	UMTX_CHAINS		128
186138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
187115765Sjeff
188161678Sdavidxu#define THREAD_SHARE		0
189161678Sdavidxu#define PROCESS_SHARE		1
190161678Sdavidxu#define AUTO_SHARE		2
191161678Sdavidxu
192161678Sdavidxu#define	GET_SHARE(flags)	\
193161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
194161678Sdavidxu
195177848Sdavidxu#define BUSY_SPINS		200
196177848Sdavidxu
197161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
198179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
199138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
200161678Sdavidxustatic int			umtx_pi_allocated;
201115310Sjeff
202161678SdavidxuSYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
203161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
204161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
205161678Sdavidxu
206161678Sdavidxustatic void umtxq_sysinit(void *);
207161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
208161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
209139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
210139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
211139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
212139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
213177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
214177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
215161678Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
216139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
217139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
218161678Sdavidxustatic int umtx_key_get(void *addr, int type, int share,
219139013Sdavidxu	struct umtx_key *key);
220139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
221163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
222161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
223174701Sdavidxustatic void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
224161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
225161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
226161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
227161678Sdavidxu	struct image_params *imgp __unused);
228161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
229115310Sjeff
230177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
231177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
232177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
233177848Sdavidxu
234170300Sjeffstatic struct mtx umtx_lock;
235170300Sjeff
236161678Sdavidxustatic void
237161678Sdavidxuumtxq_sysinit(void *arg __unused)
238161678Sdavidxu{
239179421Sdavidxu	int i, j;
240138224Sdavidxu
241161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
242161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
243179421Sdavidxu	for (i = 0; i < 2; ++i) {
244179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
245179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
246179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
247179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
248179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
249179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
250179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
251179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
252179421Sdavidxu		}
253161678Sdavidxu	}
254170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
255161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
256161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
257161678Sdavidxu}
258161678Sdavidxu
259143149Sdavidxustruct umtx_q *
260143149Sdavidxuumtxq_alloc(void)
261143149Sdavidxu{
262161678Sdavidxu	struct umtx_q *uq;
263161678Sdavidxu
264161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
265161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
266161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
267161678Sdavidxu	return (uq);
268143149Sdavidxu}
269143149Sdavidxu
270143149Sdavidxuvoid
271143149Sdavidxuumtxq_free(struct umtx_q *uq)
272143149Sdavidxu{
273143149Sdavidxu	free(uq, M_UMTX);
274143149Sdavidxu}
275143149Sdavidxu
276161678Sdavidxustatic inline void
277139013Sdavidxuumtxq_hash(struct umtx_key *key)
278138224Sdavidxu{
279161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
280161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
281138224Sdavidxu}
282138224Sdavidxu
283139013Sdavidxustatic inline int
284139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
285139013Sdavidxu{
286139013Sdavidxu	return (k1->type == k2->type &&
287161678Sdavidxu		k1->info.both.a == k2->info.both.a &&
288161678Sdavidxu	        k1->info.both.b == k2->info.both.b);
289139013Sdavidxu}
290139013Sdavidxu
291161678Sdavidxustatic inline struct umtxq_chain *
292161678Sdavidxuumtxq_getchain(struct umtx_key *key)
293139013Sdavidxu{
294179421Sdavidxu	if (key->type <= TYPE_CV)
295179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
296179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
297139013Sdavidxu}
298139013Sdavidxu
299161678Sdavidxu/*
300177848Sdavidxu * Lock a chain.
301161678Sdavidxu */
302138224Sdavidxustatic inline void
303177848Sdavidxuumtxq_lock(struct umtx_key *key)
304139257Sdavidxu{
305161678Sdavidxu	struct umtxq_chain *uc;
306139257Sdavidxu
307161678Sdavidxu	uc = umtxq_getchain(key);
308177848Sdavidxu	mtx_lock(&uc->uc_lock);
309139257Sdavidxu}
310139257Sdavidxu
311161678Sdavidxu/*
312177848Sdavidxu * Unlock a chain.
313161678Sdavidxu */
314139257Sdavidxustatic inline void
315177848Sdavidxuumtxq_unlock(struct umtx_key *key)
316139257Sdavidxu{
317161678Sdavidxu	struct umtxq_chain *uc;
318139257Sdavidxu
319161678Sdavidxu	uc = umtxq_getchain(key);
320177848Sdavidxu	mtx_unlock(&uc->uc_lock);
321139257Sdavidxu}
322139257Sdavidxu
323161678Sdavidxu/*
324177848Sdavidxu * Set chain to busy state when following operation
325177848Sdavidxu * may be blocked (kernel mutex can not be used).
326161678Sdavidxu */
327139257Sdavidxustatic inline void
328177848Sdavidxuumtxq_busy(struct umtx_key *key)
329138224Sdavidxu{
330161678Sdavidxu	struct umtxq_chain *uc;
331161678Sdavidxu
332161678Sdavidxu	uc = umtxq_getchain(key);
333177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
334177848Sdavidxu	if (uc->uc_busy) {
335177880Sdavidxu#ifdef SMP
336177880Sdavidxu		if (smp_cpus > 1) {
337177880Sdavidxu			int count = BUSY_SPINS;
338177880Sdavidxu			if (count > 0) {
339177880Sdavidxu				umtxq_unlock(key);
340177880Sdavidxu				while (uc->uc_busy && --count > 0)
341177880Sdavidxu					cpu_spinwait();
342177880Sdavidxu				umtxq_lock(key);
343177880Sdavidxu			}
344177848Sdavidxu		}
345177880Sdavidxu#endif
346177880Sdavidxu		while (uc->uc_busy) {
347177848Sdavidxu			uc->uc_waiters++;
348177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
349177848Sdavidxu			uc->uc_waiters--;
350177848Sdavidxu		}
351177848Sdavidxu	}
352177848Sdavidxu	uc->uc_busy = 1;
353138224Sdavidxu}
354138224Sdavidxu
355161678Sdavidxu/*
356177848Sdavidxu * Unbusy a chain.
357161678Sdavidxu */
358138225Sdavidxustatic inline void
359177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
360138224Sdavidxu{
361161678Sdavidxu	struct umtxq_chain *uc;
362161678Sdavidxu
363161678Sdavidxu	uc = umtxq_getchain(key);
364177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
365177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
366177848Sdavidxu	uc->uc_busy = 0;
367177848Sdavidxu	if (uc->uc_waiters)
368177848Sdavidxu		wakeup_one(uc);
369138224Sdavidxu}
370138224Sdavidxu
371139013Sdavidxustatic inline void
372177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
373115765Sjeff{
374161678Sdavidxu	struct umtxq_chain *uc;
375139013Sdavidxu
376161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
377161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
378177848Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
379158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
380139013Sdavidxu}
381139013Sdavidxu
382139013Sdavidxustatic inline void
383177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
384139013Sdavidxu{
385161678Sdavidxu	struct umtxq_chain *uc;
386161678Sdavidxu
387161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
388161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
389158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
390177848Sdavidxu		TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
391158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
392139013Sdavidxu	}
393139013Sdavidxu}
394139013Sdavidxu
395161678Sdavidxu/*
396161678Sdavidxu * Check if there are multiple waiters
397161678Sdavidxu */
398139013Sdavidxustatic int
399139013Sdavidxuumtxq_count(struct umtx_key *key)
400139013Sdavidxu{
401161678Sdavidxu	struct umtxq_chain *uc;
402115765Sjeff	struct umtx_q *uq;
403161678Sdavidxu	int count = 0;
404115765Sjeff
405161678Sdavidxu	uc = umtxq_getchain(key);
406161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
407177848Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
408139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
409139013Sdavidxu			if (++count > 1)
410139013Sdavidxu				break;
411139013Sdavidxu		}
412115765Sjeff	}
413139013Sdavidxu	return (count);
414115765Sjeff}
415115765Sjeff
416161678Sdavidxu/*
417161678Sdavidxu * Check if there are multiple PI waiters and returns first
418161678Sdavidxu * waiter.
419161678Sdavidxu */
420139257Sdavidxustatic int
421161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
422161678Sdavidxu{
423161678Sdavidxu	struct umtxq_chain *uc;
424161678Sdavidxu	struct umtx_q *uq;
425161678Sdavidxu	int count = 0;
426161678Sdavidxu
427161678Sdavidxu	*first = NULL;
428161678Sdavidxu	uc = umtxq_getchain(key);
429161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
430177848Sdavidxu	TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
431161678Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
432161678Sdavidxu			if (++count > 1)
433161678Sdavidxu				break;
434161678Sdavidxu			*first = uq;
435161678Sdavidxu		}
436161678Sdavidxu	}
437161678Sdavidxu	return (count);
438161678Sdavidxu}
439161678Sdavidxu
440161678Sdavidxu/*
441161678Sdavidxu * Wake up threads waiting on an userland object.
442161678Sdavidxu */
443177848Sdavidxu
444161678Sdavidxustatic int
445177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
446115765Sjeff{
447161678Sdavidxu	struct umtxq_chain *uc;
448139257Sdavidxu	struct umtx_q *uq, *next;
449161678Sdavidxu	int ret;
450115765Sjeff
451139257Sdavidxu	ret = 0;
452161678Sdavidxu	uc = umtxq_getchain(key);
453161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
454177848Sdavidxu	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
455139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
456177848Sdavidxu			umtxq_remove_queue(uq, q);
457161678Sdavidxu			wakeup(uq);
458139257Sdavidxu			if (++ret >= n_wake)
459139257Sdavidxu				break;
460139013Sdavidxu		}
461139013Sdavidxu	}
462139257Sdavidxu	return (ret);
463138224Sdavidxu}
464138224Sdavidxu
465177848Sdavidxu
466161678Sdavidxu/*
467161678Sdavidxu * Wake up specified thread.
468161678Sdavidxu */
469161678Sdavidxustatic inline void
470161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
471161678Sdavidxu{
472161678Sdavidxu	struct umtxq_chain *uc;
473161678Sdavidxu
474161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
475161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
476161678Sdavidxu	umtxq_remove(uq);
477161678Sdavidxu	wakeup(uq);
478161678Sdavidxu}
479161678Sdavidxu
480161678Sdavidxu/*
481161678Sdavidxu * Put thread into sleep state, before sleeping, check if
482161678Sdavidxu * thread was removed from umtx queue.
483161678Sdavidxu */
484138224Sdavidxustatic inline int
485161678Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
486138224Sdavidxu{
487161678Sdavidxu	struct umtxq_chain *uc;
488161678Sdavidxu	int error;
489161678Sdavidxu
490161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
491161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
492161678Sdavidxu	if (!(uq->uq_flags & UQF_UMTXQ))
493161678Sdavidxu		return (0);
494161678Sdavidxu	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
495139751Sdavidxu	if (error == EWOULDBLOCK)
496139751Sdavidxu		error = ETIMEDOUT;
497139751Sdavidxu	return (error);
498138224Sdavidxu}
499138224Sdavidxu
500161678Sdavidxu/*
501161678Sdavidxu * Convert userspace address into unique logical address.
502161678Sdavidxu */
503139013Sdavidxustatic int
504161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
505139013Sdavidxu{
506161678Sdavidxu	struct thread *td = curthread;
507139013Sdavidxu	vm_map_t map;
508139013Sdavidxu	vm_map_entry_t entry;
509139013Sdavidxu	vm_pindex_t pindex;
510139013Sdavidxu	vm_prot_t prot;
511139013Sdavidxu	boolean_t wired;
512139013Sdavidxu
513161678Sdavidxu	key->type = type;
514161678Sdavidxu	if (share == THREAD_SHARE) {
515161678Sdavidxu		key->shared = 0;
516161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
517161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
518163677Sdavidxu	} else {
519163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
520161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
521161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
522161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
523161678Sdavidxu		    &wired) != KERN_SUCCESS) {
524161678Sdavidxu			return EFAULT;
525161678Sdavidxu		}
526161678Sdavidxu
527161678Sdavidxu		if ((share == PROCESS_SHARE) ||
528161678Sdavidxu		    (share == AUTO_SHARE &&
529161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
530161678Sdavidxu			key->shared = 1;
531161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
532161678Sdavidxu				(vm_offset_t)addr;
533161678Sdavidxu			vm_object_reference(key->info.shared.object);
534161678Sdavidxu		} else {
535161678Sdavidxu			key->shared = 0;
536161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
537161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
538161678Sdavidxu		}
539161678Sdavidxu		vm_map_lookup_done(map, entry);
540139013Sdavidxu	}
541139013Sdavidxu
542161678Sdavidxu	umtxq_hash(key);
543139013Sdavidxu	return (0);
544139013Sdavidxu}
545139013Sdavidxu
546161678Sdavidxu/*
547161678Sdavidxu * Release key.
548161678Sdavidxu */
549139013Sdavidxustatic inline void
550139013Sdavidxuumtx_key_release(struct umtx_key *key)
551139013Sdavidxu{
552161678Sdavidxu	if (key->shared)
553139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
554139013Sdavidxu}
555139013Sdavidxu
556161678Sdavidxu/*
557161678Sdavidxu * Lock a umtx object.
558161678Sdavidxu */
559139013Sdavidxustatic int
560163449Sdavidxu_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
561112904Sjeff{
562143149Sdavidxu	struct umtx_q *uq;
563163449Sdavidxu	u_long owner;
564163449Sdavidxu	u_long old;
565138224Sdavidxu	int error = 0;
566112904Sjeff
567143149Sdavidxu	uq = td->td_umtxq;
568161678Sdavidxu
569112904Sjeff	/*
570161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
571112904Sjeff	 * can fault on any access.
572112904Sjeff	 */
573112904Sjeff	for (;;) {
574112904Sjeff		/*
575112904Sjeff		 * Try the uncontested case.  This should be done in userland.
576112904Sjeff		 */
577163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
578112904Sjeff
579138224Sdavidxu		/* The acquire succeeded. */
580138224Sdavidxu		if (owner == UMTX_UNOWNED)
581138224Sdavidxu			return (0);
582138224Sdavidxu
583115765Sjeff		/* The address was invalid. */
584115765Sjeff		if (owner == -1)
585115765Sjeff			return (EFAULT);
586115765Sjeff
587115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
588115765Sjeff		if (owner == UMTX_CONTESTED) {
589163449Sdavidxu			owner = casuword(&umtx->u_owner,
590139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
591115765Sjeff
592138224Sdavidxu			if (owner == UMTX_CONTESTED)
593138224Sdavidxu				return (0);
594138224Sdavidxu
595115765Sjeff			/* The address was invalid. */
596115765Sjeff			if (owner == -1)
597115765Sjeff				return (EFAULT);
598115765Sjeff
599115765Sjeff			/* If this failed the lock has changed, restart. */
600115765Sjeff			continue;
601112904Sjeff		}
602112904Sjeff
603138224Sdavidxu		/*
604138224Sdavidxu		 * If we caught a signal, we have retried and now
605138224Sdavidxu		 * exit immediately.
606138224Sdavidxu		 */
607161678Sdavidxu		if (error != 0)
608138224Sdavidxu			return (error);
609112904Sjeff
610161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
611161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
612161678Sdavidxu			return (error);
613161678Sdavidxu
614161678Sdavidxu		umtxq_lock(&uq->uq_key);
615161678Sdavidxu		umtxq_busy(&uq->uq_key);
616161678Sdavidxu		umtxq_insert(uq);
617161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
618161678Sdavidxu		umtxq_unlock(&uq->uq_key);
619161678Sdavidxu
620112904Sjeff		/*
621112904Sjeff		 * Set the contested bit so that a release in user space
622112904Sjeff		 * knows to use the system call for unlock.  If this fails
623112904Sjeff		 * either some one else has acquired the lock or it has been
624112904Sjeff		 * released.
625112904Sjeff		 */
626163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
627112904Sjeff
628112904Sjeff		/* The address was invalid. */
629112967Sjake		if (old == -1) {
630143149Sdavidxu			umtxq_lock(&uq->uq_key);
631143149Sdavidxu			umtxq_remove(uq);
632143149Sdavidxu			umtxq_unlock(&uq->uq_key);
633143149Sdavidxu			umtx_key_release(&uq->uq_key);
634115765Sjeff			return (EFAULT);
635112904Sjeff		}
636112904Sjeff
637112904Sjeff		/*
638115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
639117685Smtm		 * and we need to retry or we lost a race to the thread
640117685Smtm		 * unlocking the umtx.
641112904Sjeff		 */
642143149Sdavidxu		umtxq_lock(&uq->uq_key);
643161678Sdavidxu		if (old == owner)
644161678Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
645143149Sdavidxu		umtxq_remove(uq);
646143149Sdavidxu		umtxq_unlock(&uq->uq_key);
647143149Sdavidxu		umtx_key_release(&uq->uq_key);
648112904Sjeff	}
649117743Smtm
650117743Smtm	return (0);
651112904Sjeff}
652112904Sjeff
653161678Sdavidxu/*
654161678Sdavidxu * Lock a umtx object.
655161678Sdavidxu */
656139013Sdavidxustatic int
657163449Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
658140245Sdavidxu	struct timespec *timeout)
659112904Sjeff{
660140245Sdavidxu	struct timespec ts, ts2, ts3;
661139013Sdavidxu	struct timeval tv;
662140245Sdavidxu	int error;
663139013Sdavidxu
664140245Sdavidxu	if (timeout == NULL) {
665162536Sdavidxu		error = _do_lock_umtx(td, umtx, id, 0);
666162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
667162030Sdavidxu		if (error == EINTR)
668162030Sdavidxu			error = ERESTART;
669139013Sdavidxu	} else {
670140245Sdavidxu		getnanouptime(&ts);
671140245Sdavidxu		timespecadd(&ts, timeout);
672140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
673139013Sdavidxu		for (;;) {
674162536Sdavidxu			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
675140245Sdavidxu			if (error != ETIMEDOUT)
676140245Sdavidxu				break;
677140245Sdavidxu			getnanouptime(&ts2);
678140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
679139751Sdavidxu				error = ETIMEDOUT;
680139013Sdavidxu				break;
681139013Sdavidxu			}
682140245Sdavidxu			ts3 = ts;
683140245Sdavidxu			timespecsub(&ts3, &ts2);
684140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
685139013Sdavidxu		}
686162030Sdavidxu		/* Timed-locking is not restarted. */
687162030Sdavidxu		if (error == ERESTART)
688162030Sdavidxu			error = EINTR;
689139013Sdavidxu	}
690139013Sdavidxu	return (error);
691139013Sdavidxu}
692139013Sdavidxu
693161678Sdavidxu/*
694161678Sdavidxu * Unlock a umtx object.
695161678Sdavidxu */
696139013Sdavidxustatic int
697163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
698139013Sdavidxu{
699139013Sdavidxu	struct umtx_key key;
700163449Sdavidxu	u_long owner;
701163449Sdavidxu	u_long old;
702139257Sdavidxu	int error;
703139257Sdavidxu	int count;
704112904Sjeff
705112904Sjeff	/*
706112904Sjeff	 * Make sure we own this mtx.
707112904Sjeff	 */
708163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
709161678Sdavidxu	if (owner == -1)
710115765Sjeff		return (EFAULT);
711115765Sjeff
712139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
713115765Sjeff		return (EPERM);
714112904Sjeff
715161678Sdavidxu	/* This should be done in userland */
716161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
717163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
718161678Sdavidxu		if (old == -1)
719161678Sdavidxu			return (EFAULT);
720161678Sdavidxu		if (old == owner)
721161678Sdavidxu			return (0);
722161855Sdavidxu		owner = old;
723161678Sdavidxu	}
724161678Sdavidxu
725117685Smtm	/* We should only ever be in here for contested locks */
726161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
727161678Sdavidxu		&key)) != 0)
728139257Sdavidxu		return (error);
729139257Sdavidxu
730139257Sdavidxu	umtxq_lock(&key);
731139257Sdavidxu	umtxq_busy(&key);
732139257Sdavidxu	count = umtxq_count(&key);
733139257Sdavidxu	umtxq_unlock(&key);
734139257Sdavidxu
735117743Smtm	/*
736117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
737117743Smtm	 * there is zero or one thread only waiting for it.
738117743Smtm	 * Otherwise, it must be marked as contested.
739117743Smtm	 */
740163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
741163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
742139257Sdavidxu	umtxq_lock(&key);
743161678Sdavidxu	umtxq_signal(&key,1);
744139257Sdavidxu	umtxq_unbusy(&key);
745139257Sdavidxu	umtxq_unlock(&key);
746139257Sdavidxu	umtx_key_release(&key);
747115765Sjeff	if (old == -1)
748115765Sjeff		return (EFAULT);
749138224Sdavidxu	if (old != owner)
750138224Sdavidxu		return (EINVAL);
751115765Sjeff	return (0);
752112904Sjeff}
753139013Sdavidxu
754162536Sdavidxu#ifdef COMPAT_IA32
755162536Sdavidxu
756161678Sdavidxu/*
757162536Sdavidxu * Lock a umtx object.
758162536Sdavidxu */
759162536Sdavidxustatic int
760162536Sdavidxu_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
761162536Sdavidxu{
762162536Sdavidxu	struct umtx_q *uq;
763162536Sdavidxu	uint32_t owner;
764162536Sdavidxu	uint32_t old;
765162536Sdavidxu	int error = 0;
766162536Sdavidxu
767162536Sdavidxu	uq = td->td_umtxq;
768162536Sdavidxu
769162536Sdavidxu	/*
770162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
771162536Sdavidxu	 * can fault on any access.
772162536Sdavidxu	 */
773162536Sdavidxu	for (;;) {
774162536Sdavidxu		/*
775162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
776162536Sdavidxu		 */
777162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
778162536Sdavidxu
779162536Sdavidxu		/* The acquire succeeded. */
780162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
781162536Sdavidxu			return (0);
782162536Sdavidxu
783162536Sdavidxu		/* The address was invalid. */
784162536Sdavidxu		if (owner == -1)
785162536Sdavidxu			return (EFAULT);
786162536Sdavidxu
787162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
788162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
789162536Sdavidxu			owner = casuword32(m,
790162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
791162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
792162536Sdavidxu				return (0);
793162536Sdavidxu
794162536Sdavidxu			/* The address was invalid. */
795162536Sdavidxu			if (owner == -1)
796162536Sdavidxu				return (EFAULT);
797162536Sdavidxu
798162536Sdavidxu			/* If this failed the lock has changed, restart. */
799162536Sdavidxu			continue;
800162536Sdavidxu		}
801162536Sdavidxu
802162536Sdavidxu		/*
803162536Sdavidxu		 * If we caught a signal, we have retried and now
804162536Sdavidxu		 * exit immediately.
805162536Sdavidxu		 */
806162536Sdavidxu		if (error != 0)
807162536Sdavidxu			return (error);
808162536Sdavidxu
809162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
810162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
811162536Sdavidxu			return (error);
812162536Sdavidxu
813162536Sdavidxu		umtxq_lock(&uq->uq_key);
814162536Sdavidxu		umtxq_busy(&uq->uq_key);
815162536Sdavidxu		umtxq_insert(uq);
816162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
817162536Sdavidxu		umtxq_unlock(&uq->uq_key);
818162536Sdavidxu
819162536Sdavidxu		/*
820162536Sdavidxu		 * Set the contested bit so that a release in user space
821162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
822162536Sdavidxu		 * either some one else has acquired the lock or it has been
823162536Sdavidxu		 * released.
824162536Sdavidxu		 */
825162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
826162536Sdavidxu
827162536Sdavidxu		/* The address was invalid. */
828162536Sdavidxu		if (old == -1) {
829162536Sdavidxu			umtxq_lock(&uq->uq_key);
830162536Sdavidxu			umtxq_remove(uq);
831162536Sdavidxu			umtxq_unlock(&uq->uq_key);
832162536Sdavidxu			umtx_key_release(&uq->uq_key);
833162536Sdavidxu			return (EFAULT);
834162536Sdavidxu		}
835162536Sdavidxu
836162536Sdavidxu		/*
837162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
838162536Sdavidxu		 * and we need to retry or we lost a race to the thread
839162536Sdavidxu		 * unlocking the umtx.
840162536Sdavidxu		 */
841162536Sdavidxu		umtxq_lock(&uq->uq_key);
842162536Sdavidxu		if (old == owner)
843162536Sdavidxu			error = umtxq_sleep(uq, "umtx", timo);
844162536Sdavidxu		umtxq_remove(uq);
845162536Sdavidxu		umtxq_unlock(&uq->uq_key);
846162536Sdavidxu		umtx_key_release(&uq->uq_key);
847162536Sdavidxu	}
848162536Sdavidxu
849162536Sdavidxu	return (0);
850162536Sdavidxu}
851162536Sdavidxu
852162536Sdavidxu/*
853162536Sdavidxu * Lock a umtx object.
854162536Sdavidxu */
855162536Sdavidxustatic int
856162536Sdavidxudo_lock_umtx32(struct thread *td, void *m, uint32_t id,
857162536Sdavidxu	struct timespec *timeout)
858162536Sdavidxu{
859162536Sdavidxu	struct timespec ts, ts2, ts3;
860162536Sdavidxu	struct timeval tv;
861162536Sdavidxu	int error;
862162536Sdavidxu
863162536Sdavidxu	if (timeout == NULL) {
864162536Sdavidxu		error = _do_lock_umtx32(td, m, id, 0);
865162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
866162536Sdavidxu		if (error == EINTR)
867162536Sdavidxu			error = ERESTART;
868162536Sdavidxu	} else {
869162536Sdavidxu		getnanouptime(&ts);
870162536Sdavidxu		timespecadd(&ts, timeout);
871162536Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
872162536Sdavidxu		for (;;) {
873162536Sdavidxu			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
874162536Sdavidxu			if (error != ETIMEDOUT)
875162536Sdavidxu				break;
876162536Sdavidxu			getnanouptime(&ts2);
877162536Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
878162536Sdavidxu				error = ETIMEDOUT;
879162536Sdavidxu				break;
880162536Sdavidxu			}
881162536Sdavidxu			ts3 = ts;
882162536Sdavidxu			timespecsub(&ts3, &ts2);
883162536Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
884162536Sdavidxu		}
885162536Sdavidxu		/* Timed-locking is not restarted. */
886162536Sdavidxu		if (error == ERESTART)
887162536Sdavidxu			error = EINTR;
888162536Sdavidxu	}
889162536Sdavidxu	return (error);
890162536Sdavidxu}
891162536Sdavidxu
892162536Sdavidxu/*
893162536Sdavidxu * Unlock a umtx object.
894162536Sdavidxu */
895162536Sdavidxustatic int
896162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
897162536Sdavidxu{
898162536Sdavidxu	struct umtx_key key;
899162536Sdavidxu	uint32_t owner;
900162536Sdavidxu	uint32_t old;
901162536Sdavidxu	int error;
902162536Sdavidxu	int count;
903162536Sdavidxu
904162536Sdavidxu	/*
905162536Sdavidxu	 * Make sure we own this mtx.
906162536Sdavidxu	 */
907162536Sdavidxu	owner = fuword32(m);
908162536Sdavidxu	if (owner == -1)
909162536Sdavidxu		return (EFAULT);
910162536Sdavidxu
911162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
912162536Sdavidxu		return (EPERM);
913162536Sdavidxu
914162536Sdavidxu	/* This should be done in userland */
915162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
916162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
917162536Sdavidxu		if (old == -1)
918162536Sdavidxu			return (EFAULT);
919162536Sdavidxu		if (old == owner)
920162536Sdavidxu			return (0);
921162536Sdavidxu		owner = old;
922162536Sdavidxu	}
923162536Sdavidxu
924162536Sdavidxu	/* We should only ever be in here for contested locks */
925162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
926162536Sdavidxu		&key)) != 0)
927162536Sdavidxu		return (error);
928162536Sdavidxu
929162536Sdavidxu	umtxq_lock(&key);
930162536Sdavidxu	umtxq_busy(&key);
931162536Sdavidxu	count = umtxq_count(&key);
932162536Sdavidxu	umtxq_unlock(&key);
933162536Sdavidxu
934162536Sdavidxu	/*
935162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
936162536Sdavidxu	 * there is zero or one thread only waiting for it.
937162536Sdavidxu	 * Otherwise, it must be marked as contested.
938162536Sdavidxu	 */
939162536Sdavidxu	old = casuword32(m, owner,
940162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
941162536Sdavidxu	umtxq_lock(&key);
942162536Sdavidxu	umtxq_signal(&key,1);
943162536Sdavidxu	umtxq_unbusy(&key);
944162536Sdavidxu	umtxq_unlock(&key);
945162536Sdavidxu	umtx_key_release(&key);
946162536Sdavidxu	if (old == -1)
947162536Sdavidxu		return (EFAULT);
948162536Sdavidxu	if (old != owner)
949162536Sdavidxu		return (EINVAL);
950162536Sdavidxu	return (0);
951162536Sdavidxu}
952162536Sdavidxu#endif
953162536Sdavidxu
954162536Sdavidxu/*
955161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
956161678Sdavidxu */
957139013Sdavidxustatic int
958163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
959178646Sdavidxu	struct timespec *timeout, int compat32, int is_private)
960139013Sdavidxu{
961143149Sdavidxu	struct umtx_q *uq;
962140245Sdavidxu	struct timespec ts, ts2, ts3;
963139013Sdavidxu	struct timeval tv;
964163449Sdavidxu	u_long tmp;
965140245Sdavidxu	int error = 0;
966139013Sdavidxu
967143149Sdavidxu	uq = td->td_umtxq;
968178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
969178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
970139013Sdavidxu		return (error);
971161678Sdavidxu
972161678Sdavidxu	umtxq_lock(&uq->uq_key);
973161678Sdavidxu	umtxq_insert(uq);
974161678Sdavidxu	umtxq_unlock(&uq->uq_key);
975162536Sdavidxu	if (compat32 == 0)
976162536Sdavidxu		tmp = fuword(addr);
977162536Sdavidxu        else
978190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
979139427Sdavidxu	if (tmp != id) {
980143149Sdavidxu		umtxq_lock(&uq->uq_key);
981143149Sdavidxu		umtxq_remove(uq);
982143149Sdavidxu		umtxq_unlock(&uq->uq_key);
983140245Sdavidxu	} else if (timeout == NULL) {
984143149Sdavidxu		umtxq_lock(&uq->uq_key);
985164839Sdavidxu		error = umtxq_sleep(uq, "uwait", 0);
986161678Sdavidxu		umtxq_remove(uq);
987143149Sdavidxu		umtxq_unlock(&uq->uq_key);
988139013Sdavidxu	} else {
989140245Sdavidxu		getnanouptime(&ts);
990140245Sdavidxu		timespecadd(&ts, timeout);
991140245Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
992161678Sdavidxu		umtxq_lock(&uq->uq_key);
993139013Sdavidxu		for (;;) {
994164839Sdavidxu			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
995161678Sdavidxu			if (!(uq->uq_flags & UQF_UMTXQ))
996161678Sdavidxu				break;
997140245Sdavidxu			if (error != ETIMEDOUT)
998140245Sdavidxu				break;
999161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1000140245Sdavidxu			getnanouptime(&ts2);
1001140245Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
1002139751Sdavidxu				error = ETIMEDOUT;
1003161678Sdavidxu				umtxq_lock(&uq->uq_key);
1004139013Sdavidxu				break;
1005139013Sdavidxu			}
1006140245Sdavidxu			ts3 = ts;
1007140245Sdavidxu			timespecsub(&ts3, &ts2);
1008140245Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1009161678Sdavidxu			umtxq_lock(&uq->uq_key);
1010139013Sdavidxu		}
1011143149Sdavidxu		umtxq_remove(uq);
1012143149Sdavidxu		umtxq_unlock(&uq->uq_key);
1013139013Sdavidxu	}
1014143149Sdavidxu	umtx_key_release(&uq->uq_key);
1015139257Sdavidxu	if (error == ERESTART)
1016139257Sdavidxu		error = EINTR;
1017139013Sdavidxu	return (error);
1018139013Sdavidxu}
1019139013Sdavidxu
1020161678Sdavidxu/*
1021161678Sdavidxu * Wake up threads sleeping on the specified address.
1022161678Sdavidxu */
1023151692Sdavidxuint
1024178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1025139013Sdavidxu{
1026139013Sdavidxu	struct umtx_key key;
1027139257Sdavidxu	int ret;
1028139013Sdavidxu
1029178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1030178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1031139257Sdavidxu		return (ret);
1032139258Sdavidxu	umtxq_lock(&key);
1033139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1034139258Sdavidxu	umtxq_unlock(&key);
1035139257Sdavidxu	umtx_key_release(&key);
1036139013Sdavidxu	return (0);
1037139013Sdavidxu}
1038139013Sdavidxu
1039161678Sdavidxu/*
1040161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1041161678Sdavidxu */
1042161678Sdavidxustatic int
1043161678Sdavidxu_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1044179970Sdavidxu	int mode)
1045161678Sdavidxu{
1046161678Sdavidxu	struct umtx_q *uq;
1047161678Sdavidxu	uint32_t owner, old, id;
1048161678Sdavidxu	int error = 0;
1049161678Sdavidxu
1050161678Sdavidxu	id = td->td_tid;
1051161678Sdavidxu	uq = td->td_umtxq;
1052161678Sdavidxu
1053161678Sdavidxu	/*
1054161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1055161678Sdavidxu	 * can fault on any access.
1056161678Sdavidxu	 */
1057161678Sdavidxu	for (;;) {
1058179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1059179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1060179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1061179970Sdavidxu				return (0);
1062179970Sdavidxu		} else {
1063179970Sdavidxu			/*
1064179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1065179970Sdavidxu			 */
1066179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1067161678Sdavidxu
1068179970Sdavidxu			/* The acquire succeeded. */
1069179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1070161678Sdavidxu				return (0);
1071161678Sdavidxu
1072161678Sdavidxu			/* The address was invalid. */
1073161678Sdavidxu			if (owner == -1)
1074161678Sdavidxu				return (EFAULT);
1075161678Sdavidxu
1076179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1077179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1078179970Sdavidxu				owner = casuword32(&m->m_owner,
1079179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1080179970Sdavidxu
1081179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1082179970Sdavidxu					return (0);
1083179970Sdavidxu
1084179970Sdavidxu				/* The address was invalid. */
1085179970Sdavidxu				if (owner == -1)
1086179970Sdavidxu					return (EFAULT);
1087179970Sdavidxu
1088179970Sdavidxu				/* If this failed the lock has changed, restart. */
1089179970Sdavidxu				continue;
1090179970Sdavidxu			}
1091161678Sdavidxu		}
1092161678Sdavidxu
1093161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1094161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1095161678Sdavidxu			return (EDEADLK);
1096161678Sdavidxu
1097179970Sdavidxu		if (mode == _UMUTEX_TRY)
1098161678Sdavidxu			return (EBUSY);
1099161678Sdavidxu
1100161678Sdavidxu		/*
1101161678Sdavidxu		 * If we caught a signal, we have retried and now
1102161678Sdavidxu		 * exit immediately.
1103161678Sdavidxu		 */
1104161678Sdavidxu		if (error != 0)
1105161678Sdavidxu			return (error);
1106161678Sdavidxu
1107161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1108161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1109161678Sdavidxu			return (error);
1110161678Sdavidxu
1111161678Sdavidxu		umtxq_lock(&uq->uq_key);
1112161678Sdavidxu		umtxq_busy(&uq->uq_key);
1113161678Sdavidxu		umtxq_insert(uq);
1114161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1115161678Sdavidxu
1116161678Sdavidxu		/*
1117161678Sdavidxu		 * Set the contested bit so that a release in user space
1118161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1119161678Sdavidxu		 * either some one else has acquired the lock or it has been
1120161678Sdavidxu		 * released.
1121161678Sdavidxu		 */
1122161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1123161678Sdavidxu
1124161678Sdavidxu		/* The address was invalid. */
1125161678Sdavidxu		if (old == -1) {
1126161678Sdavidxu			umtxq_lock(&uq->uq_key);
1127161678Sdavidxu			umtxq_remove(uq);
1128179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1129161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1130161678Sdavidxu			umtx_key_release(&uq->uq_key);
1131161678Sdavidxu			return (EFAULT);
1132161678Sdavidxu		}
1133161678Sdavidxu
1134161678Sdavidxu		/*
1135161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1136161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1137161678Sdavidxu		 * unlocking the umtx.
1138161678Sdavidxu		 */
1139161678Sdavidxu		umtxq_lock(&uq->uq_key);
1140179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1141161678Sdavidxu		if (old == owner)
1142161678Sdavidxu			error = umtxq_sleep(uq, "umtxn", timo);
1143161678Sdavidxu		umtxq_remove(uq);
1144161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1145161678Sdavidxu		umtx_key_release(&uq->uq_key);
1146161678Sdavidxu	}
1147161678Sdavidxu
1148161678Sdavidxu	return (0);
1149161678Sdavidxu}
1150161678Sdavidxu
1151161678Sdavidxu/*
1152161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1153161678Sdavidxu */
1154161678Sdavidxu/*
1155161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1156161678Sdavidxu */
1157161678Sdavidxustatic int
1158161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1159161678Sdavidxu{
1160161678Sdavidxu	struct umtx_key key;
1161161678Sdavidxu	uint32_t owner, old, id;
1162161678Sdavidxu	int error;
1163161678Sdavidxu	int count;
1164161678Sdavidxu
1165161678Sdavidxu	id = td->td_tid;
1166161678Sdavidxu	/*
1167161678Sdavidxu	 * Make sure we own this mtx.
1168161678Sdavidxu	 */
1169163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1170161678Sdavidxu	if (owner == -1)
1171161678Sdavidxu		return (EFAULT);
1172161678Sdavidxu
1173161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1174161678Sdavidxu		return (EPERM);
1175161678Sdavidxu
1176161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1177161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1178161678Sdavidxu		if (old == -1)
1179161678Sdavidxu			return (EFAULT);
1180161678Sdavidxu		if (old == owner)
1181161678Sdavidxu			return (0);
1182161855Sdavidxu		owner = old;
1183161678Sdavidxu	}
1184161678Sdavidxu
1185161678Sdavidxu	/* We should only ever be in here for contested locks */
1186161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1187161678Sdavidxu	    &key)) != 0)
1188161678Sdavidxu		return (error);
1189161678Sdavidxu
1190161678Sdavidxu	umtxq_lock(&key);
1191161678Sdavidxu	umtxq_busy(&key);
1192161678Sdavidxu	count = umtxq_count(&key);
1193161678Sdavidxu	umtxq_unlock(&key);
1194161678Sdavidxu
1195161678Sdavidxu	/*
1196161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1197161678Sdavidxu	 * there is zero or one thread only waiting for it.
1198161678Sdavidxu	 * Otherwise, it must be marked as contested.
1199161678Sdavidxu	 */
1200161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1201161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1202161678Sdavidxu	umtxq_lock(&key);
1203161678Sdavidxu	umtxq_signal(&key,1);
1204161678Sdavidxu	umtxq_unbusy(&key);
1205161678Sdavidxu	umtxq_unlock(&key);
1206161678Sdavidxu	umtx_key_release(&key);
1207161678Sdavidxu	if (old == -1)
1208161678Sdavidxu		return (EFAULT);
1209161678Sdavidxu	if (old != owner)
1210161678Sdavidxu		return (EINVAL);
1211161678Sdavidxu	return (0);
1212161678Sdavidxu}
1213161678Sdavidxu
1214179970Sdavidxu/*
1215179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1216179970Sdavidxu * only for simple mutex.
1217179970Sdavidxu */
1218179970Sdavidxustatic int
1219179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1220179970Sdavidxu{
1221179970Sdavidxu	struct umtx_key key;
1222179970Sdavidxu	uint32_t owner;
1223179970Sdavidxu	uint32_t flags;
1224179970Sdavidxu	int error;
1225179970Sdavidxu	int count;
1226179970Sdavidxu
1227179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1228179970Sdavidxu	if (owner == -1)
1229179970Sdavidxu		return (EFAULT);
1230179970Sdavidxu
1231179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1232179970Sdavidxu		return (0);
1233179970Sdavidxu
1234179970Sdavidxu	flags = fuword32(&m->m_flags);
1235179970Sdavidxu
1236179970Sdavidxu	/* We should only ever be in here for contested locks */
1237179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1238179970Sdavidxu	    &key)) != 0)
1239179970Sdavidxu		return (error);
1240179970Sdavidxu
1241179970Sdavidxu	umtxq_lock(&key);
1242179970Sdavidxu	umtxq_busy(&key);
1243179970Sdavidxu	count = umtxq_count(&key);
1244179970Sdavidxu	umtxq_unlock(&key);
1245179970Sdavidxu
1246179970Sdavidxu	if (count <= 1)
1247179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1248179970Sdavidxu
1249179970Sdavidxu	umtxq_lock(&key);
1250179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1251179970Sdavidxu		umtxq_signal(&key, 1);
1252179970Sdavidxu	umtxq_unbusy(&key);
1253179970Sdavidxu	umtxq_unlock(&key);
1254179970Sdavidxu	umtx_key_release(&key);
1255179970Sdavidxu	return (0);
1256179970Sdavidxu}
1257179970Sdavidxu
1258161678Sdavidxustatic inline struct umtx_pi *
1259163697Sdavidxuumtx_pi_alloc(int flags)
1260161678Sdavidxu{
1261161678Sdavidxu	struct umtx_pi *pi;
1262161678Sdavidxu
1263163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1264161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1265161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1266161678Sdavidxu	return (pi);
1267161678Sdavidxu}
1268161678Sdavidxu
1269161678Sdavidxustatic inline void
1270161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1271161678Sdavidxu{
1272161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1273161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1274161678Sdavidxu}
1275161678Sdavidxu
1276161678Sdavidxu/*
1277161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1278161678Sdavidxu * changed.
1279161678Sdavidxu */
1280161678Sdavidxustatic int
1281161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1282161678Sdavidxu{
1283161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1284161678Sdavidxu	struct thread *td1;
1285161678Sdavidxu
1286170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1287161678Sdavidxu	if (pi == NULL)
1288161678Sdavidxu		return (0);
1289161678Sdavidxu
1290161678Sdavidxu	uq = td->td_umtxq;
1291161678Sdavidxu
1292161678Sdavidxu	/*
1293161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1294161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1295161678Sdavidxu	 * the previous thread or higher than the next thread.
1296161678Sdavidxu	 */
1297161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1298161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1299161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1300161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1301161678Sdavidxu		/*
1302161678Sdavidxu		 * Remove thread from blocked chain and determine where
1303161678Sdavidxu		 * it should be moved to.
1304161678Sdavidxu		 */
1305161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1306161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1307161678Sdavidxu			td1 = uq1->uq_thread;
1308161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1309161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1310161678Sdavidxu				break;
1311161678Sdavidxu		}
1312161678Sdavidxu
1313161678Sdavidxu		if (uq1 == NULL)
1314161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1315161678Sdavidxu		else
1316161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1317161678Sdavidxu	}
1318161678Sdavidxu	return (1);
1319161678Sdavidxu}
1320161678Sdavidxu
1321161678Sdavidxu/*
1322161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1323161678Sdavidxu * PI mutex.
1324161678Sdavidxu */
1325161678Sdavidxustatic void
1326161678Sdavidxuumtx_propagate_priority(struct thread *td)
1327161678Sdavidxu{
1328161678Sdavidxu	struct umtx_q *uq;
1329161678Sdavidxu	struct umtx_pi *pi;
1330161678Sdavidxu	int pri;
1331161678Sdavidxu
1332170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1333161678Sdavidxu	pri = UPRI(td);
1334161678Sdavidxu	uq = td->td_umtxq;
1335161678Sdavidxu	pi = uq->uq_pi_blocked;
1336161678Sdavidxu	if (pi == NULL)
1337161678Sdavidxu		return;
1338161678Sdavidxu
1339161678Sdavidxu	for (;;) {
1340161678Sdavidxu		td = pi->pi_owner;
1341161678Sdavidxu		if (td == NULL)
1342161678Sdavidxu			return;
1343161678Sdavidxu
1344161678Sdavidxu		MPASS(td->td_proc != NULL);
1345161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1346161678Sdavidxu
1347161678Sdavidxu		if (UPRI(td) <= pri)
1348161678Sdavidxu			return;
1349161678Sdavidxu
1350170300Sjeff		thread_lock(td);
1351161678Sdavidxu		sched_lend_user_prio(td, pri);
1352170300Sjeff		thread_unlock(td);
1353161678Sdavidxu
1354161678Sdavidxu		/*
1355161678Sdavidxu		 * Pick up the lock that td is blocked on.
1356161678Sdavidxu		 */
1357161678Sdavidxu		uq = td->td_umtxq;
1358161678Sdavidxu		pi = uq->uq_pi_blocked;
1359161678Sdavidxu		/* Resort td on the list if needed. */
1360161678Sdavidxu		if (!umtx_pi_adjust_thread(pi, td))
1361161678Sdavidxu			break;
1362161678Sdavidxu	}
1363161678Sdavidxu}
1364161678Sdavidxu
1365161678Sdavidxu/*
1366161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1367161678Sdavidxu * it is interrupted by signal or resumed by others.
1368161678Sdavidxu */
1369161678Sdavidxustatic void
1370161678Sdavidxuumtx_unpropagate_priority(struct umtx_pi *pi)
1371161678Sdavidxu{
1372161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1373161678Sdavidxu	struct umtx_pi *pi2;
1374174701Sdavidxu	int pri, oldpri;
1375161678Sdavidxu
1376170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1377161678Sdavidxu
1378161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1379161678Sdavidxu		pri = PRI_MAX;
1380161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1381161678Sdavidxu
1382161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1383161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1384161678Sdavidxu			if (uq != NULL) {
1385161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1386161678Sdavidxu					pri = UPRI(uq->uq_thread);
1387161678Sdavidxu			}
1388161678Sdavidxu		}
1389161678Sdavidxu
1390161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1391161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1392170300Sjeff		thread_lock(pi->pi_owner);
1393174701Sdavidxu		oldpri = pi->pi_owner->td_user_pri;
1394161678Sdavidxu		sched_unlend_user_prio(pi->pi_owner, pri);
1395170300Sjeff		thread_unlock(pi->pi_owner);
1396189756Sdavidxu		if (uq_owner->uq_pi_blocked != NULL)
1397189756Sdavidxu			umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1398161678Sdavidxu		pi = uq_owner->uq_pi_blocked;
1399161678Sdavidxu	}
1400161678Sdavidxu}
1401161678Sdavidxu
1402161678Sdavidxu/*
1403161678Sdavidxu * Insert a PI mutex into owned list.
1404161678Sdavidxu */
1405161678Sdavidxustatic void
1406161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1407161678Sdavidxu{
1408161678Sdavidxu	struct umtx_q *uq_owner;
1409161678Sdavidxu
1410161678Sdavidxu	uq_owner = owner->td_umtxq;
1411170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1412161678Sdavidxu	if (pi->pi_owner != NULL)
1413161678Sdavidxu		panic("pi_ower != NULL");
1414161678Sdavidxu	pi->pi_owner = owner;
1415161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1416161678Sdavidxu}
1417161678Sdavidxu
1418161678Sdavidxu/*
1419161678Sdavidxu * Claim ownership of a PI mutex.
1420161678Sdavidxu */
1421161678Sdavidxustatic int
1422161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1423161678Sdavidxu{
1424161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1425161678Sdavidxu
1426161678Sdavidxu	uq_owner = owner->td_umtxq;
1427170300Sjeff	mtx_lock_spin(&umtx_lock);
1428161678Sdavidxu	if (pi->pi_owner == owner) {
1429170300Sjeff		mtx_unlock_spin(&umtx_lock);
1430161678Sdavidxu		return (0);
1431161678Sdavidxu	}
1432161678Sdavidxu
1433161678Sdavidxu	if (pi->pi_owner != NULL) {
1434161678Sdavidxu		/*
1435161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1436161678Sdavidxu		 */
1437170300Sjeff		mtx_unlock_spin(&umtx_lock);
1438161678Sdavidxu		return (EPERM);
1439161678Sdavidxu	}
1440161678Sdavidxu	umtx_pi_setowner(pi, owner);
1441161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1442161678Sdavidxu	if (uq != NULL) {
1443161678Sdavidxu		int pri;
1444161678Sdavidxu
1445161678Sdavidxu		pri = UPRI(uq->uq_thread);
1446170300Sjeff		thread_lock(owner);
1447161678Sdavidxu		if (pri < UPRI(owner))
1448161678Sdavidxu			sched_lend_user_prio(owner, pri);
1449170300Sjeff		thread_unlock(owner);
1450161678Sdavidxu	}
1451170300Sjeff	mtx_unlock_spin(&umtx_lock);
1452161678Sdavidxu	return (0);
1453161678Sdavidxu}
1454161678Sdavidxu
1455174701Sdavidxustatic void
1456174701Sdavidxuumtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1457161599Sdavidxu{
1458161678Sdavidxu	struct umtx_q *uq;
1459161678Sdavidxu	struct umtx_pi *pi;
1460161678Sdavidxu
1461161678Sdavidxu	uq = td->td_umtxq;
1462161678Sdavidxu	/*
1463161678Sdavidxu	 * Pick up the lock that td is blocked on.
1464161678Sdavidxu	 */
1465161678Sdavidxu	pi = uq->uq_pi_blocked;
1466161678Sdavidxu	MPASS(pi != NULL);
1467161678Sdavidxu
1468161678Sdavidxu	/* Resort the turnstile on the list. */
1469161678Sdavidxu	if (!umtx_pi_adjust_thread(pi, td))
1470161678Sdavidxu		return;
1471161678Sdavidxu
1472161678Sdavidxu	/*
1473161678Sdavidxu	 * If our priority was lowered and we are at the head of the
1474161678Sdavidxu	 * turnstile, then propagate our new priority up the chain.
1475161678Sdavidxu	 */
1476161678Sdavidxu	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1477161678Sdavidxu		umtx_propagate_priority(td);
1478161599Sdavidxu}
1479161599Sdavidxu
1480161678Sdavidxu/*
1481174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1482174701Sdavidxu * this may result new priority propagating process.
1483174701Sdavidxu */
1484174701Sdavidxuvoid
1485174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1486174701Sdavidxu{
1487174707Sdavidxu	struct umtx_q *uq;
1488174707Sdavidxu	struct umtx_pi *pi;
1489174707Sdavidxu
1490174707Sdavidxu	uq = td->td_umtxq;
1491174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1492174707Sdavidxu	/*
1493174707Sdavidxu	 * Pick up the lock that td is blocked on.
1494174707Sdavidxu	 */
1495174707Sdavidxu	pi = uq->uq_pi_blocked;
1496174707Sdavidxu	if (pi != NULL)
1497174707Sdavidxu		umtx_pi_adjust_locked(td, oldpri);
1498174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1499174701Sdavidxu}
1500174701Sdavidxu
1501174701Sdavidxu/*
1502161678Sdavidxu * Sleep on a PI mutex.
1503161678Sdavidxu */
1504161678Sdavidxustatic int
1505161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1506161678Sdavidxu	uint32_t owner, const char *wmesg, int timo)
1507161678Sdavidxu{
1508161678Sdavidxu	struct umtxq_chain *uc;
1509161678Sdavidxu	struct thread *td, *td1;
1510161678Sdavidxu	struct umtx_q *uq1;
1511161678Sdavidxu	int pri;
1512161678Sdavidxu	int error = 0;
1513161678Sdavidxu
1514161678Sdavidxu	td = uq->uq_thread;
1515161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1516161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1517161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1518189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1519161678Sdavidxu	umtxq_insert(uq);
1520189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1521161678Sdavidxu	if (pi->pi_owner == NULL) {
1522161678Sdavidxu		/* XXX
1523161678Sdavidxu		 * Current, We only support process private PI-mutex,
1524161678Sdavidxu		 * non-contended PI-mutexes are locked in userland.
1525161678Sdavidxu		 * Process shared PI-mutex should always be initialized
1526161678Sdavidxu		 * by kernel and be registered in kernel, locking should
1527161678Sdavidxu		 * always be done by kernel to avoid security problems.
1528161678Sdavidxu		 * For process private PI-mutex, we can find owner
1529161678Sdavidxu		 * thread and boost its priority safely.
1530161678Sdavidxu		 */
1531189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1532161678Sdavidxu		PROC_LOCK(curproc);
1533161678Sdavidxu		td1 = thread_find(curproc, owner);
1534170300Sjeff		mtx_lock_spin(&umtx_lock);
1535161678Sdavidxu		if (td1 != NULL && pi->pi_owner == NULL) {
1536161678Sdavidxu			uq1 = td1->td_umtxq;
1537161678Sdavidxu			umtx_pi_setowner(pi, td1);
1538161678Sdavidxu		}
1539161678Sdavidxu		PROC_UNLOCK(curproc);
1540161678Sdavidxu	}
1541161678Sdavidxu
1542161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1543161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1544161678Sdavidxu		if (pri > UPRI(td))
1545161678Sdavidxu			break;
1546161678Sdavidxu	}
1547161678Sdavidxu
1548161678Sdavidxu	if (uq1 != NULL)
1549161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1550161678Sdavidxu	else
1551161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1552161678Sdavidxu
1553161678Sdavidxu	uq->uq_pi_blocked = pi;
1554174701Sdavidxu	thread_lock(td);
1555161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1556174701Sdavidxu	thread_unlock(td);
1557161678Sdavidxu	umtx_propagate_priority(td);
1558170300Sjeff	mtx_unlock_spin(&umtx_lock);
1559189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1560161678Sdavidxu
1561161678Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
1562161678Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1563161678Sdavidxu		if (error == EWOULDBLOCK)
1564161678Sdavidxu			error = ETIMEDOUT;
1565161678Sdavidxu		if (uq->uq_flags & UQF_UMTXQ) {
1566161678Sdavidxu			umtxq_remove(uq);
1567161678Sdavidxu		}
1568161678Sdavidxu	}
1569170300Sjeff	mtx_lock_spin(&umtx_lock);
1570161678Sdavidxu	uq->uq_pi_blocked = NULL;
1571174701Sdavidxu	thread_lock(td);
1572161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1573174701Sdavidxu	thread_unlock(td);
1574161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1575161678Sdavidxu	umtx_unpropagate_priority(pi);
1576170300Sjeff	mtx_unlock_spin(&umtx_lock);
1577189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1578161678Sdavidxu
1579161678Sdavidxu	return (error);
1580161678Sdavidxu}
1581161678Sdavidxu
1582161678Sdavidxu/*
1583161678Sdavidxu * Add reference count for a PI mutex.
1584161678Sdavidxu */
1585161678Sdavidxustatic void
1586161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1587161678Sdavidxu{
1588161678Sdavidxu	struct umtxq_chain *uc;
1589161678Sdavidxu
1590161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1591161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1592161678Sdavidxu	pi->pi_refcount++;
1593161678Sdavidxu}
1594161678Sdavidxu
1595161678Sdavidxu/*
1596161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1597161678Sdavidxu * is decreased to zero, its memory space is freed.
1598161678Sdavidxu */
1599161678Sdavidxustatic void
1600161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1601161678Sdavidxu{
1602161678Sdavidxu	struct umtxq_chain *uc;
1603161678Sdavidxu
1604161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1605161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1606161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1607161678Sdavidxu	if (--pi->pi_refcount == 0) {
1608170300Sjeff		mtx_lock_spin(&umtx_lock);
1609161678Sdavidxu		if (pi->pi_owner != NULL) {
1610161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1611161678Sdavidxu				pi, pi_link);
1612161678Sdavidxu			pi->pi_owner = NULL;
1613161678Sdavidxu		}
1614161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1615161678Sdavidxu			("blocked queue not empty"));
1616170300Sjeff		mtx_unlock_spin(&umtx_lock);
1617161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1618189756Sdavidxu		umtx_pi_free(pi);
1619161678Sdavidxu	}
1620161678Sdavidxu}
1621161678Sdavidxu
1622161678Sdavidxu/*
1623161678Sdavidxu * Find a PI mutex in hash table.
1624161678Sdavidxu */
1625161678Sdavidxustatic struct umtx_pi *
1626161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1627161678Sdavidxu{
1628161678Sdavidxu	struct umtxq_chain *uc;
1629161678Sdavidxu	struct umtx_pi *pi;
1630161678Sdavidxu
1631161678Sdavidxu	uc = umtxq_getchain(key);
1632161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1633161678Sdavidxu
1634161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1635161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1636161678Sdavidxu			return (pi);
1637161678Sdavidxu		}
1638161678Sdavidxu	}
1639161678Sdavidxu	return (NULL);
1640161678Sdavidxu}
1641161678Sdavidxu
1642161678Sdavidxu/*
1643161678Sdavidxu * Insert a PI mutex into hash table.
1644161678Sdavidxu */
1645161678Sdavidxustatic inline void
1646161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1647161678Sdavidxu{
1648161678Sdavidxu	struct umtxq_chain *uc;
1649161678Sdavidxu
1650161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1651161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1652161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1653161678Sdavidxu}
1654161678Sdavidxu
1655161678Sdavidxu/*
1656161678Sdavidxu * Lock a PI mutex.
1657161678Sdavidxu */
1658161678Sdavidxustatic int
1659161678Sdavidxu_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1660161678Sdavidxu	int try)
1661161678Sdavidxu{
1662161678Sdavidxu	struct umtx_q *uq;
1663161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1664161678Sdavidxu	uint32_t id, owner, old;
1665161678Sdavidxu	int error;
1666161678Sdavidxu
1667161678Sdavidxu	id = td->td_tid;
1668161678Sdavidxu	uq = td->td_umtxq;
1669161678Sdavidxu
1670161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1671161678Sdavidxu	    &uq->uq_key)) != 0)
1672161678Sdavidxu		return (error);
1673163697Sdavidxu	umtxq_lock(&uq->uq_key);
1674163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1675163697Sdavidxu	if (pi == NULL) {
1676163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1677163697Sdavidxu		if (new_pi == NULL) {
1678161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1679163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1680161678Sdavidxu			umtxq_lock(&uq->uq_key);
1681161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1682163697Sdavidxu			if (pi != NULL) {
1683161678Sdavidxu				umtx_pi_free(new_pi);
1684163697Sdavidxu				new_pi = NULL;
1685161678Sdavidxu			}
1686161678Sdavidxu		}
1687163697Sdavidxu		if (new_pi != NULL) {
1688163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1689163697Sdavidxu			umtx_pi_insert(new_pi);
1690163697Sdavidxu			pi = new_pi;
1691163697Sdavidxu		}
1692163697Sdavidxu	}
1693163697Sdavidxu	umtx_pi_ref(pi);
1694163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1695161678Sdavidxu
1696163697Sdavidxu	/*
1697163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1698163697Sdavidxu	 * can fault on any access.
1699163697Sdavidxu	 */
1700163697Sdavidxu	for (;;) {
1701161678Sdavidxu		/*
1702161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1703161678Sdavidxu		 */
1704161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1705161678Sdavidxu
1706161678Sdavidxu		/* The acquire succeeded. */
1707161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1708161678Sdavidxu			error = 0;
1709161678Sdavidxu			break;
1710161678Sdavidxu		}
1711161678Sdavidxu
1712161678Sdavidxu		/* The address was invalid. */
1713161678Sdavidxu		if (owner == -1) {
1714161678Sdavidxu			error = EFAULT;
1715161678Sdavidxu			break;
1716161678Sdavidxu		}
1717161678Sdavidxu
1718161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1719161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1720161678Sdavidxu			owner = casuword32(&m->m_owner,
1721161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1722161678Sdavidxu
1723161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1724161678Sdavidxu				umtxq_lock(&uq->uq_key);
1725189756Sdavidxu				umtxq_busy(&uq->uq_key);
1726161678Sdavidxu				error = umtx_pi_claim(pi, td);
1727189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1728161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1729161678Sdavidxu				break;
1730161678Sdavidxu			}
1731161678Sdavidxu
1732161678Sdavidxu			/* The address was invalid. */
1733161678Sdavidxu			if (owner == -1) {
1734161678Sdavidxu				error = EFAULT;
1735161678Sdavidxu				break;
1736161678Sdavidxu			}
1737161678Sdavidxu
1738161678Sdavidxu			/* If this failed the lock has changed, restart. */
1739161678Sdavidxu			continue;
1740161678Sdavidxu		}
1741161678Sdavidxu
1742161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1743161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1744161678Sdavidxu			error = EDEADLK;
1745161678Sdavidxu			break;
1746161678Sdavidxu		}
1747161678Sdavidxu
1748161678Sdavidxu		if (try != 0) {
1749161678Sdavidxu			error = EBUSY;
1750161678Sdavidxu			break;
1751161678Sdavidxu		}
1752161678Sdavidxu
1753161678Sdavidxu		/*
1754161678Sdavidxu		 * If we caught a signal, we have retried and now
1755161678Sdavidxu		 * exit immediately.
1756161678Sdavidxu		 */
1757161678Sdavidxu		if (error != 0)
1758161678Sdavidxu			break;
1759161678Sdavidxu
1760161678Sdavidxu		umtxq_lock(&uq->uq_key);
1761161678Sdavidxu		umtxq_busy(&uq->uq_key);
1762161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1763161678Sdavidxu
1764161678Sdavidxu		/*
1765161678Sdavidxu		 * Set the contested bit so that a release in user space
1766161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1767161678Sdavidxu		 * either some one else has acquired the lock or it has been
1768161678Sdavidxu		 * released.
1769161678Sdavidxu		 */
1770161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1771161678Sdavidxu
1772161678Sdavidxu		/* The address was invalid. */
1773161678Sdavidxu		if (old == -1) {
1774161678Sdavidxu			umtxq_lock(&uq->uq_key);
1775161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
1776161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1777161678Sdavidxu			error = EFAULT;
1778161678Sdavidxu			break;
1779161678Sdavidxu		}
1780161678Sdavidxu
1781161678Sdavidxu		umtxq_lock(&uq->uq_key);
1782161678Sdavidxu		/*
1783161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1784161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1785161678Sdavidxu		 * unlocking the umtx.
1786161678Sdavidxu		 */
1787161678Sdavidxu		if (old == owner)
1788161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1789161678Sdavidxu				 "umtxpi", timo);
1790189756Sdavidxu		else {
1791189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
1792189756Sdavidxu			umtxq_unlock(&uq->uq_key);
1793189756Sdavidxu		}
1794161678Sdavidxu	}
1795161678Sdavidxu
1796163697Sdavidxu	umtxq_lock(&uq->uq_key);
1797163697Sdavidxu	umtx_pi_unref(pi);
1798163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1799161678Sdavidxu
1800161678Sdavidxu	umtx_key_release(&uq->uq_key);
1801161678Sdavidxu	return (error);
1802161678Sdavidxu}
1803161678Sdavidxu
1804161678Sdavidxu/*
1805161678Sdavidxu * Unlock a PI mutex.
1806161678Sdavidxu */
1807161678Sdavidxustatic int
1808161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1809161678Sdavidxu{
1810161678Sdavidxu	struct umtx_key key;
1811161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
1812161678Sdavidxu	struct umtx_pi *pi, *pi2;
1813161678Sdavidxu	uint32_t owner, old, id;
1814161678Sdavidxu	int error;
1815161678Sdavidxu	int count;
1816161678Sdavidxu	int pri;
1817161678Sdavidxu
1818161678Sdavidxu	id = td->td_tid;
1819161678Sdavidxu	/*
1820161678Sdavidxu	 * Make sure we own this mtx.
1821161678Sdavidxu	 */
1822163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1823161678Sdavidxu	if (owner == -1)
1824161678Sdavidxu		return (EFAULT);
1825161678Sdavidxu
1826161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1827161678Sdavidxu		return (EPERM);
1828161678Sdavidxu
1829161678Sdavidxu	/* This should be done in userland */
1830161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1831161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1832161678Sdavidxu		if (old == -1)
1833161678Sdavidxu			return (EFAULT);
1834161678Sdavidxu		if (old == owner)
1835161678Sdavidxu			return (0);
1836161855Sdavidxu		owner = old;
1837161678Sdavidxu	}
1838161678Sdavidxu
1839161678Sdavidxu	/* We should only ever be in here for contested locks */
1840161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1841161678Sdavidxu	    &key)) != 0)
1842161678Sdavidxu		return (error);
1843161678Sdavidxu
1844161678Sdavidxu	umtxq_lock(&key);
1845161678Sdavidxu	umtxq_busy(&key);
1846161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
1847161678Sdavidxu	if (uq_first != NULL) {
1848189756Sdavidxu		mtx_lock_spin(&umtx_lock);
1849161678Sdavidxu		pi = uq_first->uq_pi_blocked;
1850189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
1851161678Sdavidxu		if (pi->pi_owner != curthread) {
1852189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
1853161678Sdavidxu			umtxq_unbusy(&key);
1854161678Sdavidxu			umtxq_unlock(&key);
1855189756Sdavidxu			umtx_key_release(&key);
1856161678Sdavidxu			/* userland messed the mutex */
1857161678Sdavidxu			return (EPERM);
1858161678Sdavidxu		}
1859161678Sdavidxu		uq_me = curthread->td_umtxq;
1860161678Sdavidxu		pi->pi_owner = NULL;
1861161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1862189756Sdavidxu		/* get highest priority thread which is still sleeping. */
1863161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1864189756Sdavidxu		while (uq_first != NULL &&
1865189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1866189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1867189756Sdavidxu		}
1868161678Sdavidxu		pri = PRI_MAX;
1869161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1870161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1871161678Sdavidxu			if (uq_first2 != NULL) {
1872161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
1873161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
1874161678Sdavidxu			}
1875161678Sdavidxu		}
1876170300Sjeff		thread_lock(curthread);
1877161678Sdavidxu		sched_unlend_user_prio(curthread, pri);
1878170300Sjeff		thread_unlock(curthread);
1879170300Sjeff		mtx_unlock_spin(&umtx_lock);
1880189756Sdavidxu		if (uq_first)
1881189756Sdavidxu			umtxq_signal_thread(uq_first);
1882161678Sdavidxu	}
1883161678Sdavidxu	umtxq_unlock(&key);
1884161678Sdavidxu
1885161678Sdavidxu	/*
1886161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1887161678Sdavidxu	 * there is zero or one thread only waiting for it.
1888161678Sdavidxu	 * Otherwise, it must be marked as contested.
1889161678Sdavidxu	 */
1890161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1891161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1892161678Sdavidxu
1893161678Sdavidxu	umtxq_lock(&key);
1894161678Sdavidxu	umtxq_unbusy(&key);
1895161678Sdavidxu	umtxq_unlock(&key);
1896161678Sdavidxu	umtx_key_release(&key);
1897161678Sdavidxu	if (old == -1)
1898161678Sdavidxu		return (EFAULT);
1899161678Sdavidxu	if (old != owner)
1900161678Sdavidxu		return (EINVAL);
1901161678Sdavidxu	return (0);
1902161678Sdavidxu}
1903161678Sdavidxu
1904161678Sdavidxu/*
1905161678Sdavidxu * Lock a PP mutex.
1906161678Sdavidxu */
1907161678Sdavidxustatic int
1908161678Sdavidxu_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1909161678Sdavidxu	int try)
1910161678Sdavidxu{
1911161678Sdavidxu	struct umtx_q *uq, *uq2;
1912161678Sdavidxu	struct umtx_pi *pi;
1913161678Sdavidxu	uint32_t ceiling;
1914161678Sdavidxu	uint32_t owner, id;
1915161678Sdavidxu	int error, pri, old_inherited_pri, su;
1916161678Sdavidxu
1917161678Sdavidxu	id = td->td_tid;
1918161678Sdavidxu	uq = td->td_umtxq;
1919161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1920161678Sdavidxu	    &uq->uq_key)) != 0)
1921161678Sdavidxu		return (error);
1922164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1923161678Sdavidxu	for (;;) {
1924161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
1925161678Sdavidxu		umtxq_lock(&uq->uq_key);
1926161678Sdavidxu		umtxq_busy(&uq->uq_key);
1927161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1928161678Sdavidxu
1929161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1930161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
1931161678Sdavidxu			error = EINVAL;
1932161678Sdavidxu			goto out;
1933161678Sdavidxu		}
1934161678Sdavidxu
1935170300Sjeff		mtx_lock_spin(&umtx_lock);
1936161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1937170300Sjeff			mtx_unlock_spin(&umtx_lock);
1938161678Sdavidxu			error = EINVAL;
1939161678Sdavidxu			goto out;
1940161678Sdavidxu		}
1941161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1942161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1943170300Sjeff			thread_lock(td);
1944161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
1945161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
1946170300Sjeff			thread_unlock(td);
1947161678Sdavidxu		}
1948170300Sjeff		mtx_unlock_spin(&umtx_lock);
1949161678Sdavidxu
1950161678Sdavidxu		owner = casuword32(&m->m_owner,
1951161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1952161678Sdavidxu
1953161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1954161678Sdavidxu			error = 0;
1955161678Sdavidxu			break;
1956161678Sdavidxu		}
1957161678Sdavidxu
1958161678Sdavidxu		/* The address was invalid. */
1959161678Sdavidxu		if (owner == -1) {
1960161678Sdavidxu			error = EFAULT;
1961161678Sdavidxu			break;
1962161678Sdavidxu		}
1963161678Sdavidxu
1964161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1965161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1966161678Sdavidxu			error = EDEADLK;
1967161678Sdavidxu			break;
1968161678Sdavidxu		}
1969161678Sdavidxu
1970161678Sdavidxu		if (try != 0) {
1971161678Sdavidxu			error = EBUSY;
1972161678Sdavidxu			break;
1973161678Sdavidxu		}
1974161678Sdavidxu
1975161678Sdavidxu		/*
1976161678Sdavidxu		 * If we caught a signal, we have retried and now
1977161678Sdavidxu		 * exit immediately.
1978161678Sdavidxu		 */
1979161678Sdavidxu		if (error != 0)
1980161678Sdavidxu			break;
1981161678Sdavidxu
1982161678Sdavidxu		umtxq_lock(&uq->uq_key);
1983161678Sdavidxu		umtxq_insert(uq);
1984161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
1985161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timo);
1986161678Sdavidxu		umtxq_remove(uq);
1987161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1988161678Sdavidxu
1989170300Sjeff		mtx_lock_spin(&umtx_lock);
1990161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
1991161678Sdavidxu		pri = PRI_MAX;
1992161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1993161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1994161678Sdavidxu			if (uq2 != NULL) {
1995161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
1996161678Sdavidxu					pri = UPRI(uq2->uq_thread);
1997161678Sdavidxu			}
1998161678Sdavidxu		}
1999161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2000161678Sdavidxu			pri = uq->uq_inherited_pri;
2001170300Sjeff		thread_lock(td);
2002161678Sdavidxu		sched_unlend_user_prio(td, pri);
2003170300Sjeff		thread_unlock(td);
2004170300Sjeff		mtx_unlock_spin(&umtx_lock);
2005161678Sdavidxu	}
2006161678Sdavidxu
2007161678Sdavidxu	if (error != 0) {
2008170300Sjeff		mtx_lock_spin(&umtx_lock);
2009161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2010161678Sdavidxu		pri = PRI_MAX;
2011161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2012161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2013161678Sdavidxu			if (uq2 != NULL) {
2014161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2015161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2016161678Sdavidxu			}
2017161678Sdavidxu		}
2018161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2019161678Sdavidxu			pri = uq->uq_inherited_pri;
2020170300Sjeff		thread_lock(td);
2021161678Sdavidxu		sched_unlend_user_prio(td, pri);
2022170300Sjeff		thread_unlock(td);
2023170300Sjeff		mtx_unlock_spin(&umtx_lock);
2024161678Sdavidxu	}
2025161678Sdavidxu
2026161678Sdavidxuout:
2027161678Sdavidxu	umtxq_lock(&uq->uq_key);
2028161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2029161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2030161678Sdavidxu	umtx_key_release(&uq->uq_key);
2031161678Sdavidxu	return (error);
2032161678Sdavidxu}
2033161678Sdavidxu
2034161678Sdavidxu/*
2035161678Sdavidxu * Unlock a PP mutex.
2036161678Sdavidxu */
2037161678Sdavidxustatic int
2038161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2039161678Sdavidxu{
2040161678Sdavidxu	struct umtx_key key;
2041161678Sdavidxu	struct umtx_q *uq, *uq2;
2042161678Sdavidxu	struct umtx_pi *pi;
2043161678Sdavidxu	uint32_t owner, id;
2044161678Sdavidxu	uint32_t rceiling;
2045161926Sdavidxu	int error, pri, new_inherited_pri, su;
2046161678Sdavidxu
2047161678Sdavidxu	id = td->td_tid;
2048161678Sdavidxu	uq = td->td_umtxq;
2049164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2050161678Sdavidxu
2051161678Sdavidxu	/*
2052161678Sdavidxu	 * Make sure we own this mtx.
2053161678Sdavidxu	 */
2054163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2055161678Sdavidxu	if (owner == -1)
2056161678Sdavidxu		return (EFAULT);
2057161678Sdavidxu
2058161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2059161678Sdavidxu		return (EPERM);
2060161678Sdavidxu
2061161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2062161678Sdavidxu	if (error != 0)
2063161678Sdavidxu		return (error);
2064161678Sdavidxu
2065161678Sdavidxu	if (rceiling == -1)
2066161678Sdavidxu		new_inherited_pri = PRI_MAX;
2067161678Sdavidxu	else {
2068161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2069161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2070161678Sdavidxu			return (EINVAL);
2071161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2072161678Sdavidxu	}
2073161678Sdavidxu
2074161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2075161678Sdavidxu	    &key)) != 0)
2076161678Sdavidxu		return (error);
2077161678Sdavidxu	umtxq_lock(&key);
2078161678Sdavidxu	umtxq_busy(&key);
2079161678Sdavidxu	umtxq_unlock(&key);
2080161678Sdavidxu	/*
2081161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2082161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2083161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2084161678Sdavidxu	 * has to be adjusted for such mutex.
2085161678Sdavidxu	 */
2086163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2087163449Sdavidxu		UMUTEX_CONTESTED);
2088161678Sdavidxu
2089161678Sdavidxu	umtxq_lock(&key);
2090161678Sdavidxu	if (error == 0)
2091161678Sdavidxu		umtxq_signal(&key, 1);
2092161678Sdavidxu	umtxq_unbusy(&key);
2093161678Sdavidxu	umtxq_unlock(&key);
2094161678Sdavidxu
2095161678Sdavidxu	if (error == -1)
2096161678Sdavidxu		error = EFAULT;
2097161678Sdavidxu	else {
2098170300Sjeff		mtx_lock_spin(&umtx_lock);
2099161926Sdavidxu		if (su != 0)
2100161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2101161678Sdavidxu		pri = PRI_MAX;
2102161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2103161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2104161678Sdavidxu			if (uq2 != NULL) {
2105161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2106161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2107161678Sdavidxu			}
2108161678Sdavidxu		}
2109161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2110161678Sdavidxu			pri = uq->uq_inherited_pri;
2111170300Sjeff		thread_lock(td);
2112161678Sdavidxu		sched_unlend_user_prio(td, pri);
2113170300Sjeff		thread_unlock(td);
2114170300Sjeff		mtx_unlock_spin(&umtx_lock);
2115161678Sdavidxu	}
2116161678Sdavidxu	umtx_key_release(&key);
2117161678Sdavidxu	return (error);
2118161678Sdavidxu}
2119161678Sdavidxu
2120161678Sdavidxustatic int
2121161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2122161678Sdavidxu	uint32_t *old_ceiling)
2123161678Sdavidxu{
2124161678Sdavidxu	struct umtx_q *uq;
2125161678Sdavidxu	uint32_t save_ceiling;
2126161678Sdavidxu	uint32_t owner, id;
2127161678Sdavidxu	uint32_t flags;
2128161678Sdavidxu	int error;
2129161678Sdavidxu
2130161678Sdavidxu	flags = fuword32(&m->m_flags);
2131161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2132161678Sdavidxu		return (EINVAL);
2133161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2134161678Sdavidxu		return (EINVAL);
2135161678Sdavidxu	id = td->td_tid;
2136161678Sdavidxu	uq = td->td_umtxq;
2137161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2138161678Sdavidxu	   &uq->uq_key)) != 0)
2139161678Sdavidxu		return (error);
2140161678Sdavidxu	for (;;) {
2141161678Sdavidxu		umtxq_lock(&uq->uq_key);
2142161678Sdavidxu		umtxq_busy(&uq->uq_key);
2143161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2144161678Sdavidxu
2145161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2146161678Sdavidxu
2147161678Sdavidxu		owner = casuword32(&m->m_owner,
2148161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2149161678Sdavidxu
2150161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2151161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2152163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2153163449Sdavidxu				UMUTEX_CONTESTED);
2154161678Sdavidxu			error = 0;
2155161678Sdavidxu			break;
2156161678Sdavidxu		}
2157161678Sdavidxu
2158161678Sdavidxu		/* The address was invalid. */
2159161678Sdavidxu		if (owner == -1) {
2160161678Sdavidxu			error = EFAULT;
2161161678Sdavidxu			break;
2162161678Sdavidxu		}
2163161678Sdavidxu
2164161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2165161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2166161678Sdavidxu			error = 0;
2167161678Sdavidxu			break;
2168161678Sdavidxu		}
2169161678Sdavidxu
2170161678Sdavidxu		/*
2171161678Sdavidxu		 * If we caught a signal, we have retried and now
2172161678Sdavidxu		 * exit immediately.
2173161678Sdavidxu		 */
2174161678Sdavidxu		if (error != 0)
2175161678Sdavidxu			break;
2176161678Sdavidxu
2177161678Sdavidxu		/*
2178161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2179161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2180161678Sdavidxu		 * unlocking the umtx.
2181161678Sdavidxu		 */
2182161678Sdavidxu		umtxq_lock(&uq->uq_key);
2183161678Sdavidxu		umtxq_insert(uq);
2184161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2185161678Sdavidxu		error = umtxq_sleep(uq, "umtxpp", 0);
2186161678Sdavidxu		umtxq_remove(uq);
2187161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2188161678Sdavidxu	}
2189161678Sdavidxu	umtxq_lock(&uq->uq_key);
2190161678Sdavidxu	if (error == 0)
2191161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2192161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2193161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2194161678Sdavidxu	umtx_key_release(&uq->uq_key);
2195161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2196161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2197161678Sdavidxu	return (error);
2198161678Sdavidxu}
2199161678Sdavidxu
2200162030Sdavidxustatic int
2201162030Sdavidxu_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2202179970Sdavidxu	int mode)
2203162030Sdavidxu{
2204162030Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2205162030Sdavidxu	case 0:
2206179970Sdavidxu		return (_do_lock_normal(td, m, flags, timo, mode));
2207162030Sdavidxu	case UMUTEX_PRIO_INHERIT:
2208179970Sdavidxu		return (_do_lock_pi(td, m, flags, timo, mode));
2209162030Sdavidxu	case UMUTEX_PRIO_PROTECT:
2210179970Sdavidxu		return (_do_lock_pp(td, m, flags, timo, mode));
2211162030Sdavidxu	}
2212162030Sdavidxu	return (EINVAL);
2213162030Sdavidxu}
2214162030Sdavidxu
2215161678Sdavidxu/*
2216161678Sdavidxu * Lock a userland POSIX mutex.
2217161678Sdavidxu */
2218161678Sdavidxustatic int
2219162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2220179970Sdavidxu	struct timespec *timeout, int mode)
2221161678Sdavidxu{
2222162030Sdavidxu	struct timespec ts, ts2, ts3;
2223162030Sdavidxu	struct timeval tv;
2224161678Sdavidxu	uint32_t flags;
2225162030Sdavidxu	int error;
2226161678Sdavidxu
2227161678Sdavidxu	flags = fuword32(&m->m_flags);
2228161678Sdavidxu	if (flags == -1)
2229161678Sdavidxu		return (EFAULT);
2230161678Sdavidxu
2231162030Sdavidxu	if (timeout == NULL) {
2232179970Sdavidxu		error = _do_lock_umutex(td, m, flags, 0, mode);
2233162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
2234179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2235162030Sdavidxu			error = ERESTART;
2236162030Sdavidxu	} else {
2237162030Sdavidxu		getnanouptime(&ts);
2238162030Sdavidxu		timespecadd(&ts, timeout);
2239162030Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2240162030Sdavidxu		for (;;) {
2241179970Sdavidxu			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2242162030Sdavidxu			if (error != ETIMEDOUT)
2243162030Sdavidxu				break;
2244162030Sdavidxu			getnanouptime(&ts2);
2245162030Sdavidxu			if (timespeccmp(&ts2, &ts, >=)) {
2246162030Sdavidxu				error = ETIMEDOUT;
2247162030Sdavidxu				break;
2248162030Sdavidxu			}
2249162030Sdavidxu			ts3 = ts;
2250162030Sdavidxu			timespecsub(&ts3, &ts2);
2251162030Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2252162030Sdavidxu		}
2253162030Sdavidxu		/* Timed-locking is not restarted. */
2254162030Sdavidxu		if (error == ERESTART)
2255162030Sdavidxu			error = EINTR;
2256161742Sdavidxu	}
2257162030Sdavidxu	return (error);
2258161678Sdavidxu}
2259161678Sdavidxu
2260161678Sdavidxu/*
2261161678Sdavidxu * Unlock a userland POSIX mutex.
2262161678Sdavidxu */
2263161678Sdavidxustatic int
2264161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2265161678Sdavidxu{
2266161678Sdavidxu	uint32_t flags;
2267161678Sdavidxu
2268161678Sdavidxu	flags = fuword32(&m->m_flags);
2269161678Sdavidxu	if (flags == -1)
2270161678Sdavidxu		return (EFAULT);
2271161678Sdavidxu
2272161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2273161855Sdavidxu	case 0:
2274161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2275161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2276161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2277161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2278161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2279161855Sdavidxu	}
2280161678Sdavidxu
2281161855Sdavidxu	return (EINVAL);
2282161678Sdavidxu}
2283161678Sdavidxu
2284164839Sdavidxustatic int
2285164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2286164876Sdavidxu	struct timespec *timeout, u_long wflags)
2287164839Sdavidxu{
2288164839Sdavidxu	struct umtx_q *uq;
2289164839Sdavidxu	struct timeval tv;
2290164839Sdavidxu	struct timespec cts, ets, tts;
2291164839Sdavidxu	uint32_t flags;
2292164839Sdavidxu	int error;
2293164839Sdavidxu
2294164839Sdavidxu	uq = td->td_umtxq;
2295164839Sdavidxu	flags = fuword32(&cv->c_flags);
2296164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2297164839Sdavidxu	if (error != 0)
2298164839Sdavidxu		return (error);
2299164839Sdavidxu	umtxq_lock(&uq->uq_key);
2300164839Sdavidxu	umtxq_busy(&uq->uq_key);
2301164839Sdavidxu	umtxq_insert(uq);
2302164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2303164839Sdavidxu
2304164839Sdavidxu	/*
2305164839Sdavidxu	 * The magic thing is we should set c_has_waiters to 1 before
2306164839Sdavidxu	 * releasing user mutex.
2307164839Sdavidxu	 */
2308164839Sdavidxu	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2309164839Sdavidxu
2310164839Sdavidxu	umtxq_lock(&uq->uq_key);
2311164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2312164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2313164839Sdavidxu
2314164839Sdavidxu	error = do_unlock_umutex(td, m);
2315164839Sdavidxu
2316164839Sdavidxu	umtxq_lock(&uq->uq_key);
2317164839Sdavidxu	if (error == 0) {
2318164876Sdavidxu		if ((wflags & UMTX_CHECK_UNPARKING) &&
2319164876Sdavidxu		    (td->td_pflags & TDP_WAKEUP)) {
2320164876Sdavidxu			td->td_pflags &= ~TDP_WAKEUP;
2321164876Sdavidxu			error = EINTR;
2322164876Sdavidxu		} else if (timeout == NULL) {
2323164839Sdavidxu			error = umtxq_sleep(uq, "ucond", 0);
2324164839Sdavidxu		} else {
2325164839Sdavidxu			getnanouptime(&ets);
2326164839Sdavidxu			timespecadd(&ets, timeout);
2327164839Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2328164839Sdavidxu			for (;;) {
2329164839Sdavidxu				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2330164839Sdavidxu				if (error != ETIMEDOUT)
2331164839Sdavidxu					break;
2332164839Sdavidxu				getnanouptime(&cts);
2333164839Sdavidxu				if (timespeccmp(&cts, &ets, >=)) {
2334164839Sdavidxu					error = ETIMEDOUT;
2335164839Sdavidxu					break;
2336164839Sdavidxu				}
2337164839Sdavidxu				tts = ets;
2338164839Sdavidxu				timespecsub(&tts, &cts);
2339164839Sdavidxu				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2340164839Sdavidxu			}
2341164839Sdavidxu		}
2342164839Sdavidxu	}
2343164839Sdavidxu
2344164839Sdavidxu	if (error != 0) {
2345164839Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2346164839Sdavidxu			/*
2347164839Sdavidxu			 * If we concurrently got do_cv_signal()d
2348164839Sdavidxu			 * and we got an error or UNIX signals or a timeout,
2349164839Sdavidxu			 * then, perform another umtxq_signal to avoid
2350164839Sdavidxu			 * consuming the wakeup. This may cause supurious
2351164839Sdavidxu			 * wakeup for another thread which was just queued,
2352164839Sdavidxu			 * but SUSV3 explicitly allows supurious wakeup to
2353164839Sdavidxu			 * occur, and indeed a kernel based implementation
2354164839Sdavidxu			 * can not avoid it.
2355164839Sdavidxu			 */
2356164876Sdavidxu			if (!umtxq_signal(&uq->uq_key, 1))
2357164876Sdavidxu				error = 0;
2358164839Sdavidxu		}
2359164839Sdavidxu		if (error == ERESTART)
2360164839Sdavidxu			error = EINTR;
2361164839Sdavidxu	}
2362164839Sdavidxu	umtxq_remove(uq);
2363164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2364164839Sdavidxu	umtx_key_release(&uq->uq_key);
2365164839Sdavidxu	return (error);
2366164839Sdavidxu}
2367164839Sdavidxu
2368164839Sdavidxu/*
2369164839Sdavidxu * Signal a userland condition variable.
2370164839Sdavidxu */
2371164839Sdavidxustatic int
2372164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2373164839Sdavidxu{
2374164839Sdavidxu	struct umtx_key key;
2375164839Sdavidxu	int error, cnt, nwake;
2376164839Sdavidxu	uint32_t flags;
2377164839Sdavidxu
2378164839Sdavidxu	flags = fuword32(&cv->c_flags);
2379164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2380164839Sdavidxu		return (error);
2381164839Sdavidxu	umtxq_lock(&key);
2382164839Sdavidxu	umtxq_busy(&key);
2383164839Sdavidxu	cnt = umtxq_count(&key);
2384164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2385164839Sdavidxu	if (cnt <= nwake) {
2386164839Sdavidxu		umtxq_unlock(&key);
2387164839Sdavidxu		error = suword32(
2388164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2389164839Sdavidxu		umtxq_lock(&key);
2390164839Sdavidxu	}
2391164839Sdavidxu	umtxq_unbusy(&key);
2392164839Sdavidxu	umtxq_unlock(&key);
2393164839Sdavidxu	umtx_key_release(&key);
2394164839Sdavidxu	return (error);
2395164839Sdavidxu}
2396164839Sdavidxu
2397164839Sdavidxustatic int
2398164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2399164839Sdavidxu{
2400164839Sdavidxu	struct umtx_key key;
2401164839Sdavidxu	int error;
2402164839Sdavidxu	uint32_t flags;
2403164839Sdavidxu
2404164839Sdavidxu	flags = fuword32(&cv->c_flags);
2405164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2406164839Sdavidxu		return (error);
2407164839Sdavidxu
2408164839Sdavidxu	umtxq_lock(&key);
2409164839Sdavidxu	umtxq_busy(&key);
2410164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2411164839Sdavidxu	umtxq_unlock(&key);
2412164839Sdavidxu
2413164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2414164839Sdavidxu
2415164839Sdavidxu	umtxq_lock(&key);
2416164839Sdavidxu	umtxq_unbusy(&key);
2417164839Sdavidxu	umtxq_unlock(&key);
2418164839Sdavidxu
2419164839Sdavidxu	umtx_key_release(&key);
2420164839Sdavidxu	return (error);
2421164839Sdavidxu}
2422164839Sdavidxu
2423177848Sdavidxustatic int
2424177848Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2425177848Sdavidxu{
2426177848Sdavidxu	struct umtx_q *uq;
2427177848Sdavidxu	uint32_t flags, wrflags;
2428177848Sdavidxu	int32_t state, oldstate;
2429177848Sdavidxu	int32_t blocked_readers;
2430177848Sdavidxu	int error;
2431177848Sdavidxu
2432177848Sdavidxu	uq = td->td_umtxq;
2433177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2434177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2435177848Sdavidxu	if (error != 0)
2436177848Sdavidxu		return (error);
2437177848Sdavidxu
2438177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2439177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2440177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2441177848Sdavidxu
2442177848Sdavidxu	for (;;) {
2443177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2444177848Sdavidxu		/* try to lock it */
2445177848Sdavidxu		while (!(state & wrflags)) {
2446177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2447177848Sdavidxu				umtx_key_release(&uq->uq_key);
2448177848Sdavidxu				return (EAGAIN);
2449177848Sdavidxu			}
2450177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2451177848Sdavidxu			if (oldstate == state) {
2452177848Sdavidxu				umtx_key_release(&uq->uq_key);
2453177848Sdavidxu				return (0);
2454177848Sdavidxu			}
2455177848Sdavidxu			state = oldstate;
2456177848Sdavidxu		}
2457177848Sdavidxu
2458177848Sdavidxu		if (error)
2459177848Sdavidxu			break;
2460177848Sdavidxu
2461177848Sdavidxu		/* grab monitor lock */
2462177848Sdavidxu		umtxq_lock(&uq->uq_key);
2463177848Sdavidxu		umtxq_busy(&uq->uq_key);
2464177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2465177848Sdavidxu
2466177848Sdavidxu		/* set read contention bit */
2467177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2468177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2469177848Sdavidxu			if (oldstate == state)
2470177848Sdavidxu				goto sleep;
2471177848Sdavidxu			state = oldstate;
2472177848Sdavidxu		}
2473177848Sdavidxu
2474177848Sdavidxu		/* state is changed while setting flags, restart */
2475177848Sdavidxu		if (!(state & wrflags)) {
2476177848Sdavidxu			umtxq_lock(&uq->uq_key);
2477177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2478177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2479177848Sdavidxu			continue;
2480177848Sdavidxu		}
2481177848Sdavidxu
2482177848Sdavidxusleep:
2483177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2484177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2485177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2486177848Sdavidxu
2487177848Sdavidxu		while (state & wrflags) {
2488177848Sdavidxu			umtxq_lock(&uq->uq_key);
2489177848Sdavidxu			umtxq_insert(uq);
2490177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2491177848Sdavidxu
2492177848Sdavidxu			error = umtxq_sleep(uq, "urdlck", timo);
2493177848Sdavidxu
2494177848Sdavidxu			umtxq_busy(&uq->uq_key);
2495177848Sdavidxu			umtxq_remove(uq);
2496177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2497177848Sdavidxu			if (error)
2498177848Sdavidxu				break;
2499177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2500177848Sdavidxu		}
2501177848Sdavidxu
2502177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2503177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2504177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2505177848Sdavidxu		if (blocked_readers == 1) {
2506177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2507177848Sdavidxu			for (;;) {
2508177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2509177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2510177848Sdavidxu				if (oldstate == state)
2511177848Sdavidxu					break;
2512177848Sdavidxu				state = oldstate;
2513177848Sdavidxu			}
2514177848Sdavidxu		}
2515177848Sdavidxu
2516177848Sdavidxu		umtxq_lock(&uq->uq_key);
2517177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2518177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2519177848Sdavidxu	}
2520177848Sdavidxu	umtx_key_release(&uq->uq_key);
2521177848Sdavidxu	return (error);
2522177848Sdavidxu}
2523177848Sdavidxu
2524177848Sdavidxustatic int
2525177848Sdavidxudo_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2526177848Sdavidxu{
2527177848Sdavidxu	struct timespec ts, ts2, ts3;
2528177848Sdavidxu	struct timeval tv;
2529177848Sdavidxu	int error;
2530177848Sdavidxu
2531177848Sdavidxu	getnanouptime(&ts);
2532177848Sdavidxu	timespecadd(&ts, timeout);
2533177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2534177848Sdavidxu	for (;;) {
2535177848Sdavidxu		error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2536177848Sdavidxu		if (error != ETIMEDOUT)
2537177848Sdavidxu			break;
2538177848Sdavidxu		getnanouptime(&ts2);
2539177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2540177848Sdavidxu			error = ETIMEDOUT;
2541177848Sdavidxu			break;
2542177848Sdavidxu		}
2543177848Sdavidxu		ts3 = ts;
2544177848Sdavidxu		timespecsub(&ts3, &ts2);
2545177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2546177848Sdavidxu	}
2547177849Sdavidxu	if (error == ERESTART)
2548177849Sdavidxu		error = EINTR;
2549177848Sdavidxu	return (error);
2550177848Sdavidxu}
2551177848Sdavidxu
2552177848Sdavidxustatic int
2553177848Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2554177848Sdavidxu{
2555177848Sdavidxu	struct umtx_q *uq;
2556177848Sdavidxu	uint32_t flags;
2557177848Sdavidxu	int32_t state, oldstate;
2558177848Sdavidxu	int32_t blocked_writers;
2559197476Sdavidxu	int32_t blocked_readers;
2560177848Sdavidxu	int error;
2561177848Sdavidxu
2562177848Sdavidxu	uq = td->td_umtxq;
2563177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2564177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2565177848Sdavidxu	if (error != 0)
2566177848Sdavidxu		return (error);
2567177848Sdavidxu
2568197476Sdavidxu	blocked_readers = 0;
2569177848Sdavidxu	for (;;) {
2570177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2571177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2572177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2573177848Sdavidxu			if (oldstate == state) {
2574177848Sdavidxu				umtx_key_release(&uq->uq_key);
2575177848Sdavidxu				return (0);
2576177848Sdavidxu			}
2577177848Sdavidxu			state = oldstate;
2578177848Sdavidxu		}
2579177848Sdavidxu
2580197476Sdavidxu		if (error) {
2581197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2582197476Sdavidxu			    blocked_readers != 0) {
2583197476Sdavidxu				umtxq_lock(&uq->uq_key);
2584197476Sdavidxu				umtxq_busy(&uq->uq_key);
2585197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2586197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2587197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2588197476Sdavidxu			}
2589197476Sdavidxu
2590177848Sdavidxu			break;
2591197476Sdavidxu		}
2592177848Sdavidxu
2593177848Sdavidxu		/* grab monitor lock */
2594177848Sdavidxu		umtxq_lock(&uq->uq_key);
2595177848Sdavidxu		umtxq_busy(&uq->uq_key);
2596177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2597177848Sdavidxu
2598177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2599177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2600177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2601177848Sdavidxu			if (oldstate == state)
2602177848Sdavidxu				goto sleep;
2603177848Sdavidxu			state = oldstate;
2604177848Sdavidxu		}
2605177848Sdavidxu
2606177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2607177848Sdavidxu			umtxq_lock(&uq->uq_key);
2608177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2609177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2610177848Sdavidxu			continue;
2611177848Sdavidxu		}
2612177848Sdavidxusleep:
2613177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2614177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2615177848Sdavidxu
2616177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2617177848Sdavidxu			umtxq_lock(&uq->uq_key);
2618177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2619177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2620177848Sdavidxu
2621177848Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timo);
2622177848Sdavidxu
2623177848Sdavidxu			umtxq_busy(&uq->uq_key);
2624177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2625177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2626177848Sdavidxu			if (error)
2627177848Sdavidxu				break;
2628177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2629177848Sdavidxu		}
2630177848Sdavidxu
2631177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2632177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2633177848Sdavidxu		if (blocked_writers == 1) {
2634177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2635177848Sdavidxu			for (;;) {
2636177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2637177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2638177848Sdavidxu				if (oldstate == state)
2639177848Sdavidxu					break;
2640177848Sdavidxu				state = oldstate;
2641177848Sdavidxu			}
2642197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2643197476Sdavidxu		} else
2644197476Sdavidxu			blocked_readers = 0;
2645177848Sdavidxu
2646177848Sdavidxu		umtxq_lock(&uq->uq_key);
2647177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2648177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2649177848Sdavidxu	}
2650177848Sdavidxu
2651177848Sdavidxu	umtx_key_release(&uq->uq_key);
2652177848Sdavidxu	return (error);
2653177848Sdavidxu}
2654177848Sdavidxu
2655177848Sdavidxustatic int
2656177848Sdavidxudo_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2657177848Sdavidxu{
2658177848Sdavidxu	struct timespec ts, ts2, ts3;
2659177848Sdavidxu	struct timeval tv;
2660177848Sdavidxu	int error;
2661177848Sdavidxu
2662177848Sdavidxu	getnanouptime(&ts);
2663177848Sdavidxu	timespecadd(&ts, timeout);
2664177848Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, timeout);
2665177848Sdavidxu	for (;;) {
2666177848Sdavidxu		error = do_rw_wrlock(td, obj, tvtohz(&tv));
2667177848Sdavidxu		if (error != ETIMEDOUT)
2668177848Sdavidxu			break;
2669177848Sdavidxu		getnanouptime(&ts2);
2670177848Sdavidxu		if (timespeccmp(&ts2, &ts, >=)) {
2671177848Sdavidxu			error = ETIMEDOUT;
2672177848Sdavidxu			break;
2673177848Sdavidxu		}
2674177848Sdavidxu		ts3 = ts;
2675177848Sdavidxu		timespecsub(&ts3, &ts2);
2676177848Sdavidxu		TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2677177848Sdavidxu	}
2678177849Sdavidxu	if (error == ERESTART)
2679177849Sdavidxu		error = EINTR;
2680177848Sdavidxu	return (error);
2681177848Sdavidxu}
2682177848Sdavidxu
2683177848Sdavidxustatic int
2684177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2685177848Sdavidxu{
2686177848Sdavidxu	struct umtx_q *uq;
2687177848Sdavidxu	uint32_t flags;
2688177848Sdavidxu	int32_t state, oldstate;
2689177848Sdavidxu	int error, q, count;
2690177848Sdavidxu
2691177848Sdavidxu	uq = td->td_umtxq;
2692177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2693177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2694177848Sdavidxu	if (error != 0)
2695177848Sdavidxu		return (error);
2696177848Sdavidxu
2697177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2698177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2699177848Sdavidxu		for (;;) {
2700177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2701177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2702177848Sdavidxu			if (oldstate != state) {
2703177848Sdavidxu				state = oldstate;
2704177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2705177848Sdavidxu					error = EPERM;
2706177848Sdavidxu					goto out;
2707177848Sdavidxu				}
2708177848Sdavidxu			} else
2709177848Sdavidxu				break;
2710177848Sdavidxu		}
2711177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2712177848Sdavidxu		for (;;) {
2713177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2714177848Sdavidxu				state - 1);
2715177848Sdavidxu			if (oldstate != state) {
2716177848Sdavidxu				state = oldstate;
2717177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2718177848Sdavidxu					error = EPERM;
2719177848Sdavidxu					goto out;
2720177848Sdavidxu				}
2721177848Sdavidxu			}
2722177848Sdavidxu			else
2723177848Sdavidxu				break;
2724177848Sdavidxu		}
2725177848Sdavidxu	} else {
2726177848Sdavidxu		error = EPERM;
2727177848Sdavidxu		goto out;
2728177848Sdavidxu	}
2729177848Sdavidxu
2730177848Sdavidxu	count = 0;
2731177848Sdavidxu
2732177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2733177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2734177848Sdavidxu			count = 1;
2735177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2736177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2737177848Sdavidxu			count = INT_MAX;
2738177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2739177848Sdavidxu		}
2740177848Sdavidxu	} else {
2741177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2742177848Sdavidxu			count = INT_MAX;
2743177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2744177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2745177848Sdavidxu			count = 1;
2746177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2747177848Sdavidxu		}
2748177848Sdavidxu	}
2749177848Sdavidxu
2750177848Sdavidxu	if (count) {
2751177848Sdavidxu		umtxq_lock(&uq->uq_key);
2752177848Sdavidxu		umtxq_busy(&uq->uq_key);
2753177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2754177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2755177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2756177848Sdavidxu	}
2757177848Sdavidxuout:
2758177848Sdavidxu	umtx_key_release(&uq->uq_key);
2759177848Sdavidxu	return (error);
2760177848Sdavidxu}
2761177848Sdavidxu
2762139013Sdavidxuint
2763139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2764139013Sdavidxu    /* struct umtx *umtx */
2765139013Sdavidxu{
2766162536Sdavidxu	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2767139013Sdavidxu}
2768139013Sdavidxu
2769139013Sdavidxuint
2770139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2771139013Sdavidxu    /* struct umtx *umtx */
2772139013Sdavidxu{
2773162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2774139013Sdavidxu}
2775139013Sdavidxu
2776162536Sdavidxustatic int
2777162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2778139013Sdavidxu{
2779162536Sdavidxu	struct timespec *ts, timeout;
2780139013Sdavidxu	int error;
2781139013Sdavidxu
2782162536Sdavidxu	/* Allow a null timespec (wait forever). */
2783162536Sdavidxu	if (uap->uaddr2 == NULL)
2784162536Sdavidxu		ts = NULL;
2785162536Sdavidxu	else {
2786162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2787162536Sdavidxu		if (error != 0)
2788162536Sdavidxu			return (error);
2789162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2790162536Sdavidxu		    timeout.tv_nsec < 0) {
2791162536Sdavidxu			return (EINVAL);
2792161678Sdavidxu		}
2793162536Sdavidxu		ts = &timeout;
2794162536Sdavidxu	}
2795162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2796162536Sdavidxu}
2797162536Sdavidxu
2798162536Sdavidxustatic int
2799162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2800162536Sdavidxu{
2801162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
2802162536Sdavidxu}
2803162536Sdavidxu
2804162536Sdavidxustatic int
2805162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2806162536Sdavidxu{
2807162536Sdavidxu	struct timespec *ts, timeout;
2808162536Sdavidxu	int error;
2809162536Sdavidxu
2810162536Sdavidxu	if (uap->uaddr2 == NULL)
2811162536Sdavidxu		ts = NULL;
2812162536Sdavidxu	else {
2813162536Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2814162536Sdavidxu		if (error != 0)
2815162536Sdavidxu			return (error);
2816162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2817162536Sdavidxu		    timeout.tv_nsec < 0)
2818162536Sdavidxu			return (EINVAL);
2819162536Sdavidxu		ts = &timeout;
2820162536Sdavidxu	}
2821178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2822162536Sdavidxu}
2823162536Sdavidxu
2824162536Sdavidxustatic int
2825173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2826173800Sdavidxu{
2827173800Sdavidxu	struct timespec *ts, timeout;
2828173800Sdavidxu	int error;
2829173800Sdavidxu
2830173800Sdavidxu	if (uap->uaddr2 == NULL)
2831173800Sdavidxu		ts = NULL;
2832173800Sdavidxu	else {
2833173800Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2834173800Sdavidxu		if (error != 0)
2835173800Sdavidxu			return (error);
2836173800Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2837173800Sdavidxu		    timeout.tv_nsec < 0)
2838173800Sdavidxu			return (EINVAL);
2839173800Sdavidxu		ts = &timeout;
2840173800Sdavidxu	}
2841178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2842173800Sdavidxu}
2843173800Sdavidxu
2844173800Sdavidxustatic int
2845178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2846178646Sdavidxu{
2847178646Sdavidxu	struct timespec *ts, timeout;
2848178646Sdavidxu	int error;
2849178646Sdavidxu
2850178646Sdavidxu	if (uap->uaddr2 == NULL)
2851178646Sdavidxu		ts = NULL;
2852178646Sdavidxu	else {
2853178646Sdavidxu		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2854178646Sdavidxu		if (error != 0)
2855178646Sdavidxu			return (error);
2856178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2857178646Sdavidxu		    timeout.tv_nsec < 0)
2858178646Sdavidxu			return (EINVAL);
2859178646Sdavidxu		ts = &timeout;
2860178646Sdavidxu	}
2861178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2862178646Sdavidxu}
2863178646Sdavidxu
2864178646Sdavidxustatic int
2865162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2866162536Sdavidxu{
2867178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2868162536Sdavidxu}
2869162536Sdavidxu
2870162536Sdavidxustatic int
2871178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2872178646Sdavidxu{
2873178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2874178646Sdavidxu}
2875178646Sdavidxu
2876178646Sdavidxustatic int
2877162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2878162536Sdavidxu{
2879162536Sdavidxu	struct timespec *ts, timeout;
2880162536Sdavidxu	int error;
2881162536Sdavidxu
2882162536Sdavidxu	/* Allow a null timespec (wait forever). */
2883162536Sdavidxu	if (uap->uaddr2 == NULL)
2884162536Sdavidxu		ts = NULL;
2885162536Sdavidxu	else {
2886162536Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2887162536Sdavidxu		    sizeof(timeout));
2888162536Sdavidxu		if (error != 0)
2889162536Sdavidxu			return (error);
2890162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2891162536Sdavidxu		    timeout.tv_nsec < 0) {
2892162536Sdavidxu			return (EINVAL);
2893139013Sdavidxu		}
2894162536Sdavidxu		ts = &timeout;
2895139013Sdavidxu	}
2896162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
2897162536Sdavidxu}
2898162536Sdavidxu
2899162536Sdavidxustatic int
2900162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2901162536Sdavidxu{
2902179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
2903162536Sdavidxu}
2904162536Sdavidxu
2905162536Sdavidxustatic int
2906179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
2907179970Sdavidxu{
2908179970Sdavidxu	struct timespec *ts, timeout;
2909179970Sdavidxu	int error;
2910179970Sdavidxu
2911179970Sdavidxu	/* Allow a null timespec (wait forever). */
2912179970Sdavidxu	if (uap->uaddr2 == NULL)
2913179970Sdavidxu		ts = NULL;
2914179970Sdavidxu	else {
2915179970Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2916179970Sdavidxu		    sizeof(timeout));
2917179970Sdavidxu		if (error != 0)
2918179970Sdavidxu			return (error);
2919179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2920179970Sdavidxu		    timeout.tv_nsec < 0) {
2921179970Sdavidxu			return (EINVAL);
2922179970Sdavidxu		}
2923179970Sdavidxu		ts = &timeout;
2924179970Sdavidxu	}
2925179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
2926179970Sdavidxu}
2927179970Sdavidxu
2928179970Sdavidxustatic int
2929179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
2930179970Sdavidxu{
2931179970Sdavidxu	return do_wake_umutex(td, uap->obj);
2932179970Sdavidxu}
2933179970Sdavidxu
2934179970Sdavidxustatic int
2935162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2936162536Sdavidxu{
2937162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
2938162536Sdavidxu}
2939162536Sdavidxu
2940162536Sdavidxustatic int
2941162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2942162536Sdavidxu{
2943162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2944162536Sdavidxu}
2945162536Sdavidxu
2946164839Sdavidxustatic int
2947164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2948164839Sdavidxu{
2949164839Sdavidxu	struct timespec *ts, timeout;
2950164839Sdavidxu	int error;
2951164839Sdavidxu
2952164839Sdavidxu	/* Allow a null timespec (wait forever). */
2953164839Sdavidxu	if (uap->uaddr2 == NULL)
2954164839Sdavidxu		ts = NULL;
2955164839Sdavidxu	else {
2956164839Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2957164839Sdavidxu		    sizeof(timeout));
2958164839Sdavidxu		if (error != 0)
2959164839Sdavidxu			return (error);
2960164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2961164839Sdavidxu		    timeout.tv_nsec < 0) {
2962164839Sdavidxu			return (EINVAL);
2963164839Sdavidxu		}
2964164839Sdavidxu		ts = &timeout;
2965164839Sdavidxu	}
2966164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2967164839Sdavidxu}
2968164839Sdavidxu
2969164839Sdavidxustatic int
2970164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2971164839Sdavidxu{
2972164839Sdavidxu	return do_cv_signal(td, uap->obj);
2973164839Sdavidxu}
2974164839Sdavidxu
2975164839Sdavidxustatic int
2976164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2977164839Sdavidxu{
2978164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
2979164839Sdavidxu}
2980164839Sdavidxu
2981177848Sdavidxustatic int
2982177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2983177848Sdavidxu{
2984177848Sdavidxu	struct timespec timeout;
2985177848Sdavidxu	int error;
2986177848Sdavidxu
2987177848Sdavidxu	/* Allow a null timespec (wait forever). */
2988177848Sdavidxu	if (uap->uaddr2 == NULL) {
2989177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2990177848Sdavidxu	} else {
2991177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
2992177848Sdavidxu		    sizeof(timeout));
2993177848Sdavidxu		if (error != 0)
2994177848Sdavidxu			return (error);
2995177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
2996177848Sdavidxu		    timeout.tv_nsec < 0) {
2997177848Sdavidxu			return (EINVAL);
2998177848Sdavidxu		}
2999177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3000177848Sdavidxu	}
3001177848Sdavidxu	return (error);
3002177848Sdavidxu}
3003177848Sdavidxu
3004177848Sdavidxustatic int
3005177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3006177848Sdavidxu{
3007177848Sdavidxu	struct timespec timeout;
3008177848Sdavidxu	int error;
3009177848Sdavidxu
3010177848Sdavidxu	/* Allow a null timespec (wait forever). */
3011177848Sdavidxu	if (uap->uaddr2 == NULL) {
3012177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3013177848Sdavidxu	} else {
3014177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3015177848Sdavidxu		    sizeof(timeout));
3016177848Sdavidxu		if (error != 0)
3017177848Sdavidxu			return (error);
3018177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3019177848Sdavidxu		    timeout.tv_nsec < 0) {
3020177848Sdavidxu			return (EINVAL);
3021177848Sdavidxu		}
3022177848Sdavidxu
3023177848Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3024177848Sdavidxu	}
3025177848Sdavidxu	return (error);
3026177848Sdavidxu}
3027177848Sdavidxu
3028177848Sdavidxustatic int
3029177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3030177848Sdavidxu{
3031177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3032177848Sdavidxu}
3033177848Sdavidxu
3034162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3035162536Sdavidxu
3036162536Sdavidxustatic _umtx_op_func op_table[] = {
3037162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3038162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3039162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3040162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3041162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3042162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3043162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3044164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3045164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3046164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3047173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3048177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3049177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3050177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3051178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3052178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3053179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3054179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3055179970Sdavidxu	__umtx_op_wake_umutex		/* UMTX_OP_UMUTEX_WAKE */
3056162536Sdavidxu};
3057162536Sdavidxu
3058162536Sdavidxuint
3059162536Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
3060162536Sdavidxu{
3061163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3062162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3063162536Sdavidxu	return (EINVAL);
3064162536Sdavidxu}
3065162536Sdavidxu
3066162536Sdavidxu#ifdef COMPAT_IA32
3067163046Sdavidxuint
3068163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3069163046Sdavidxu    /* struct umtx *umtx */
3070163046Sdavidxu{
3071163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3072163046Sdavidxu}
3073163046Sdavidxu
3074163046Sdavidxuint
3075163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3076163046Sdavidxu    /* struct umtx *umtx */
3077163046Sdavidxu{
3078163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3079163046Sdavidxu}
3080163046Sdavidxu
3081162536Sdavidxustruct timespec32 {
3082162536Sdavidxu	u_int32_t tv_sec;
3083162536Sdavidxu	u_int32_t tv_nsec;
3084162536Sdavidxu};
3085162536Sdavidxu
3086162536Sdavidxustatic inline int
3087162536Sdavidxucopyin_timeout32(void *addr, struct timespec *tsp)
3088162536Sdavidxu{
3089162536Sdavidxu	struct timespec32 ts32;
3090162536Sdavidxu	int error;
3091162536Sdavidxu
3092162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3093162536Sdavidxu	if (error == 0) {
3094162536Sdavidxu		tsp->tv_sec = ts32.tv_sec;
3095162536Sdavidxu		tsp->tv_nsec = ts32.tv_nsec;
3096162536Sdavidxu	}
3097140421Sdavidxu	return (error);
3098139013Sdavidxu}
3099161678Sdavidxu
3100162536Sdavidxustatic int
3101162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3102162536Sdavidxu{
3103162536Sdavidxu	struct timespec *ts, timeout;
3104162536Sdavidxu	int error;
3105162536Sdavidxu
3106162536Sdavidxu	/* Allow a null timespec (wait forever). */
3107162536Sdavidxu	if (uap->uaddr2 == NULL)
3108162536Sdavidxu		ts = NULL;
3109162536Sdavidxu	else {
3110162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3111162536Sdavidxu		if (error != 0)
3112162536Sdavidxu			return (error);
3113162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3114162536Sdavidxu		    timeout.tv_nsec < 0) {
3115162536Sdavidxu			return (EINVAL);
3116162536Sdavidxu		}
3117162536Sdavidxu		ts = &timeout;
3118162536Sdavidxu	}
3119162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3120162536Sdavidxu}
3121162536Sdavidxu
3122162536Sdavidxustatic int
3123162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3124162536Sdavidxu{
3125162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3126162536Sdavidxu}
3127162536Sdavidxu
3128162536Sdavidxustatic int
3129162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3130162536Sdavidxu{
3131162536Sdavidxu	struct timespec *ts, timeout;
3132162536Sdavidxu	int error;
3133162536Sdavidxu
3134162536Sdavidxu	if (uap->uaddr2 == NULL)
3135162536Sdavidxu		ts = NULL;
3136162536Sdavidxu	else {
3137162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3138162536Sdavidxu		if (error != 0)
3139162536Sdavidxu			return (error);
3140162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3141162536Sdavidxu		    timeout.tv_nsec < 0)
3142162536Sdavidxu			return (EINVAL);
3143162536Sdavidxu		ts = &timeout;
3144162536Sdavidxu	}
3145178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3146162536Sdavidxu}
3147162536Sdavidxu
3148162536Sdavidxustatic int
3149162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3150162536Sdavidxu{
3151162536Sdavidxu	struct timespec *ts, timeout;
3152162536Sdavidxu	int error;
3153162536Sdavidxu
3154162536Sdavidxu	/* Allow a null timespec (wait forever). */
3155162536Sdavidxu	if (uap->uaddr2 == NULL)
3156162536Sdavidxu		ts = NULL;
3157162536Sdavidxu	else {
3158162536Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3159162536Sdavidxu		if (error != 0)
3160162536Sdavidxu			return (error);
3161162536Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3162162536Sdavidxu		    timeout.tv_nsec < 0)
3163162536Sdavidxu			return (EINVAL);
3164162536Sdavidxu		ts = &timeout;
3165162536Sdavidxu	}
3166162536Sdavidxu	return do_lock_umutex(td, uap->obj, ts, 0);
3167162536Sdavidxu}
3168162536Sdavidxu
3169164839Sdavidxustatic int
3170179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3171179970Sdavidxu{
3172179970Sdavidxu	struct timespec *ts, timeout;
3173179970Sdavidxu	int error;
3174179970Sdavidxu
3175179970Sdavidxu	/* Allow a null timespec (wait forever). */
3176179970Sdavidxu	if (uap->uaddr2 == NULL)
3177179970Sdavidxu		ts = NULL;
3178179970Sdavidxu	else {
3179179970Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3180179970Sdavidxu		if (error != 0)
3181179970Sdavidxu			return (error);
3182179970Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3183179970Sdavidxu		    timeout.tv_nsec < 0)
3184179970Sdavidxu			return (EINVAL);
3185179970Sdavidxu		ts = &timeout;
3186179970Sdavidxu	}
3187179970Sdavidxu	return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3188179970Sdavidxu}
3189179970Sdavidxu
3190179970Sdavidxustatic int
3191164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3192164839Sdavidxu{
3193164839Sdavidxu	struct timespec *ts, timeout;
3194164839Sdavidxu	int error;
3195164839Sdavidxu
3196164839Sdavidxu	/* Allow a null timespec (wait forever). */
3197164839Sdavidxu	if (uap->uaddr2 == NULL)
3198164839Sdavidxu		ts = NULL;
3199164839Sdavidxu	else {
3200164839Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3201164839Sdavidxu		if (error != 0)
3202164839Sdavidxu			return (error);
3203164839Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3204164839Sdavidxu		    timeout.tv_nsec < 0)
3205164839Sdavidxu			return (EINVAL);
3206164839Sdavidxu		ts = &timeout;
3207164839Sdavidxu	}
3208164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3209164839Sdavidxu}
3210164839Sdavidxu
3211177848Sdavidxustatic int
3212177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3213177848Sdavidxu{
3214177848Sdavidxu	struct timespec timeout;
3215177848Sdavidxu	int error;
3216177848Sdavidxu
3217177848Sdavidxu	/* Allow a null timespec (wait forever). */
3218177848Sdavidxu	if (uap->uaddr2 == NULL) {
3219177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3220177848Sdavidxu	} else {
3221177848Sdavidxu		error = copyin(uap->uaddr2, &timeout,
3222177848Sdavidxu		    sizeof(timeout));
3223177848Sdavidxu		if (error != 0)
3224177848Sdavidxu			return (error);
3225177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3226177848Sdavidxu		    timeout.tv_nsec < 0) {
3227177848Sdavidxu			return (EINVAL);
3228177848Sdavidxu		}
3229177848Sdavidxu		error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3230177848Sdavidxu	}
3231177848Sdavidxu	return (error);
3232177848Sdavidxu}
3233177848Sdavidxu
3234177848Sdavidxustatic int
3235177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3236177848Sdavidxu{
3237177848Sdavidxu	struct timespec timeout;
3238177848Sdavidxu	int error;
3239177848Sdavidxu
3240177848Sdavidxu	/* Allow a null timespec (wait forever). */
3241177848Sdavidxu	if (uap->uaddr2 == NULL) {
3242177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3243177848Sdavidxu	} else {
3244177848Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3245177848Sdavidxu		if (error != 0)
3246177848Sdavidxu			return (error);
3247177848Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3248177848Sdavidxu		    timeout.tv_nsec < 0) {
3249177848Sdavidxu			return (EINVAL);
3250177848Sdavidxu		}
3251177848Sdavidxu
3252177852Sdavidxu		error = do_rw_wrlock2(td, uap->obj, &timeout);
3253177848Sdavidxu	}
3254177848Sdavidxu	return (error);
3255177848Sdavidxu}
3256177848Sdavidxu
3257178646Sdavidxustatic int
3258178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3259178646Sdavidxu{
3260178646Sdavidxu	struct timespec *ts, timeout;
3261178646Sdavidxu	int error;
3262178646Sdavidxu
3263178646Sdavidxu	if (uap->uaddr2 == NULL)
3264178646Sdavidxu		ts = NULL;
3265178646Sdavidxu	else {
3266178646Sdavidxu		error = copyin_timeout32(uap->uaddr2, &timeout);
3267178646Sdavidxu		if (error != 0)
3268178646Sdavidxu			return (error);
3269178646Sdavidxu		if (timeout.tv_nsec >= 1000000000 ||
3270178646Sdavidxu		    timeout.tv_nsec < 0)
3271178646Sdavidxu			return (EINVAL);
3272178646Sdavidxu		ts = &timeout;
3273178646Sdavidxu	}
3274178646Sdavidxu	return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3275178646Sdavidxu}
3276178646Sdavidxu
3277162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3278162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3279162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3280162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3281162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3282162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3283162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3284162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3285164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3286164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3287164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3288173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3289177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3290177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3291177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3292178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3293178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3294179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3295179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3296179970Sdavidxu	__umtx_op_wake_umutex		/* UMTX_OP_UMUTEX_WAKE */
3297162536Sdavidxu};
3298162536Sdavidxu
3299162536Sdavidxuint
3300162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3301162536Sdavidxu{
3302163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3303162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3304162536Sdavidxu			(struct _umtx_op_args *)uap);
3305162536Sdavidxu	return (EINVAL);
3306162536Sdavidxu}
3307162536Sdavidxu#endif
3308162536Sdavidxu
3309161678Sdavidxuvoid
3310161678Sdavidxuumtx_thread_init(struct thread *td)
3311161678Sdavidxu{
3312161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3313161678Sdavidxu	td->td_umtxq->uq_thread = td;
3314161678Sdavidxu}
3315161678Sdavidxu
3316161678Sdavidxuvoid
3317161678Sdavidxuumtx_thread_fini(struct thread *td)
3318161678Sdavidxu{
3319161678Sdavidxu	umtxq_free(td->td_umtxq);
3320161678Sdavidxu}
3321161678Sdavidxu
3322161678Sdavidxu/*
3323161678Sdavidxu * It will be called when new thread is created, e.g fork().
3324161678Sdavidxu */
3325161678Sdavidxuvoid
3326161678Sdavidxuumtx_thread_alloc(struct thread *td)
3327161678Sdavidxu{
3328161678Sdavidxu	struct umtx_q *uq;
3329161678Sdavidxu
3330161678Sdavidxu	uq = td->td_umtxq;
3331161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3332161678Sdavidxu
3333161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3334161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3335161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3336161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3337161678Sdavidxu}
3338161678Sdavidxu
3339161678Sdavidxu/*
3340161678Sdavidxu * exec() hook.
3341161678Sdavidxu */
3342161678Sdavidxustatic void
3343161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3344161678Sdavidxu	struct image_params *imgp __unused)
3345161678Sdavidxu{
3346161678Sdavidxu	umtx_thread_cleanup(curthread);
3347161678Sdavidxu}
3348161678Sdavidxu
3349161678Sdavidxu/*
3350161678Sdavidxu * thread_exit() hook.
3351161678Sdavidxu */
3352161678Sdavidxuvoid
3353161678Sdavidxuumtx_thread_exit(struct thread *td)
3354161678Sdavidxu{
3355161678Sdavidxu	umtx_thread_cleanup(td);
3356161678Sdavidxu}
3357161678Sdavidxu
3358161678Sdavidxu/*
3359161678Sdavidxu * clean up umtx data.
3360161678Sdavidxu */
3361161678Sdavidxustatic void
3362161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3363161678Sdavidxu{
3364161678Sdavidxu	struct umtx_q *uq;
3365161678Sdavidxu	struct umtx_pi *pi;
3366161678Sdavidxu
3367161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3368161678Sdavidxu		return;
3369161678Sdavidxu
3370170300Sjeff	mtx_lock_spin(&umtx_lock);
3371161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3372161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3373161678Sdavidxu		pi->pi_owner = NULL;
3374161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3375161678Sdavidxu	}
3376174701Sdavidxu	thread_lock(td);
3377161678Sdavidxu	td->td_flags &= ~TDF_UBORROWING;
3378174701Sdavidxu	thread_unlock(td);
3379170300Sjeff	mtx_unlock_spin(&umtx_lock);
3380161678Sdavidxu}
3381