kern_umtx.c revision 248105
1139804Simp/*-
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 248105 2013-03-09 15:31:19Z attilio $");
30116182Sobrien
31162536Sdavidxu#include "opt_compat.h"
32233045Sdavide#include "opt_umtx_profiling.h"
33233045Sdavide
34112904Sjeff#include <sys/param.h>
35112904Sjeff#include <sys/kernel.h>
36131431Smarcel#include <sys/limits.h>
37112904Sjeff#include <sys/lock.h>
38115765Sjeff#include <sys/malloc.h>
39112904Sjeff#include <sys/mutex.h>
40164033Srwatson#include <sys/priv.h>
41112904Sjeff#include <sys/proc.h>
42248105Sattilio#include <sys/sbuf.h>
43161678Sdavidxu#include <sys/sched.h>
44165369Sdavidxu#include <sys/smp.h>
45161678Sdavidxu#include <sys/sysctl.h>
46112904Sjeff#include <sys/sysent.h>
47112904Sjeff#include <sys/systm.h>
48112904Sjeff#include <sys/sysproto.h>
49216641Sdavidxu#include <sys/syscallsubr.h>
50139013Sdavidxu#include <sys/eventhandler.h>
51112904Sjeff#include <sys/umtx.h>
52112904Sjeff
53139013Sdavidxu#include <vm/vm.h>
54139013Sdavidxu#include <vm/vm_param.h>
55139013Sdavidxu#include <vm/pmap.h>
56139013Sdavidxu#include <vm/vm_map.h>
57139013Sdavidxu#include <vm/vm_object.h>
58139013Sdavidxu
59165369Sdavidxu#include <machine/cpu.h>
60165369Sdavidxu
61205014Snwhitehorn#ifdef COMPAT_FREEBSD32
62162536Sdavidxu#include <compat/freebsd32/freebsd32_proto.h>
63162536Sdavidxu#endif
64162536Sdavidxu
65179970Sdavidxu#define _UMUTEX_TRY		1
66179970Sdavidxu#define _UMUTEX_WAIT		2
67179970Sdavidxu
68248105Sattilio#ifdef UMTX_PROFILING
69248105Sattilio#define	UPROF_PERC_BIGGER(w, f, sw, sf)					\
70248105Sattilio	(((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
71248105Sattilio#endif
72248105Sattilio
73161678Sdavidxu/* Priority inheritance mutex info. */
74161678Sdavidxustruct umtx_pi {
75161678Sdavidxu	/* Owner thread */
76161678Sdavidxu	struct thread		*pi_owner;
77161678Sdavidxu
78161678Sdavidxu	/* Reference count */
79161678Sdavidxu	int			pi_refcount;
80161678Sdavidxu
81161678Sdavidxu 	/* List entry to link umtx holding by thread */
82161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_link;
83161678Sdavidxu
84161678Sdavidxu	/* List entry in hash */
85161678Sdavidxu	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
86161678Sdavidxu
87161678Sdavidxu	/* List for waiters */
88161678Sdavidxu	TAILQ_HEAD(,umtx_q)	pi_blocked;
89161678Sdavidxu
90161678Sdavidxu	/* Identify a userland lock object */
91161678Sdavidxu	struct umtx_key		pi_key;
92161678Sdavidxu};
93161678Sdavidxu
94161678Sdavidxu/* A userland synchronous object user. */
95115765Sjeffstruct umtx_q {
96161678Sdavidxu	/* Linked list for the hash. */
97161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_link;
98161678Sdavidxu
99161678Sdavidxu	/* Umtx key. */
100161678Sdavidxu	struct umtx_key		uq_key;
101161678Sdavidxu
102161678Sdavidxu	/* Umtx flags. */
103161678Sdavidxu	int			uq_flags;
104161678Sdavidxu#define UQF_UMTXQ	0x0001
105161678Sdavidxu
106161678Sdavidxu	/* The thread waits on. */
107161678Sdavidxu	struct thread		*uq_thread;
108161678Sdavidxu
109161678Sdavidxu	/*
110161678Sdavidxu	 * Blocked on PI mutex. read can use chain lock
111170300Sjeff	 * or umtx_lock, write must have both chain lock and
112170300Sjeff	 * umtx_lock being hold.
113161678Sdavidxu	 */
114161678Sdavidxu	struct umtx_pi		*uq_pi_blocked;
115161678Sdavidxu
116161678Sdavidxu	/* On blocked list */
117161678Sdavidxu	TAILQ_ENTRY(umtx_q)	uq_lockq;
118161678Sdavidxu
119161678Sdavidxu	/* Thread contending with us */
120161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
121161678Sdavidxu
122161742Sdavidxu	/* Inherited priority from PP mutex */
123161678Sdavidxu	u_char			uq_inherited_pri;
124201991Sdavidxu
125201991Sdavidxu	/* Spare queue ready to be reused */
126201991Sdavidxu	struct umtxq_queue	*uq_spare_queue;
127201991Sdavidxu
128201991Sdavidxu	/* The queue we on */
129201991Sdavidxu	struct umtxq_queue	*uq_cur_queue;
130115765Sjeff};
131115765Sjeff
132161678SdavidxuTAILQ_HEAD(umtxq_head, umtx_q);
133161678Sdavidxu
134201991Sdavidxu/* Per-key wait-queue */
135201991Sdavidxustruct umtxq_queue {
136201991Sdavidxu	struct umtxq_head	head;
137201991Sdavidxu	struct umtx_key		key;
138201991Sdavidxu	LIST_ENTRY(umtxq_queue)	link;
139201991Sdavidxu	int			length;
140201991Sdavidxu};
141201991Sdavidxu
142201991SdavidxuLIST_HEAD(umtxq_list, umtxq_queue);
143201991Sdavidxu
144161678Sdavidxu/* Userland lock object's wait-queue chain */
145138224Sdavidxustruct umtxq_chain {
146161678Sdavidxu	/* Lock for this chain. */
147161678Sdavidxu	struct mtx		uc_lock;
148161678Sdavidxu
149161678Sdavidxu	/* List of sleep queues. */
150201991Sdavidxu	struct umtxq_list	uc_queue[2];
151177848Sdavidxu#define UMTX_SHARED_QUEUE	0
152177848Sdavidxu#define UMTX_EXCLUSIVE_QUEUE	1
153161678Sdavidxu
154201991Sdavidxu	LIST_HEAD(, umtxq_queue) uc_spare_queue;
155201991Sdavidxu
156161678Sdavidxu	/* Busy flag */
157161678Sdavidxu	char			uc_busy;
158161678Sdavidxu
159161678Sdavidxu	/* Chain lock waiters */
160158377Sdavidxu	int			uc_waiters;
161161678Sdavidxu
162161678Sdavidxu	/* All PI in the list */
163161678Sdavidxu	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
164201991Sdavidxu
165233045Sdavide#ifdef UMTX_PROFILING
166248105Sattilio	u_int 			length;
167248105Sattilio	u_int			max_length;
168233045Sdavide#endif
169138224Sdavidxu};
170115765Sjeff
171161678Sdavidxu#define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
172189756Sdavidxu#define	UMTXQ_BUSY_ASSERT(uc)	KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
173161678Sdavidxu
174161678Sdavidxu/*
175161678Sdavidxu * Don't propagate time-sharing priority, there is a security reason,
176161678Sdavidxu * a user can simply introduce PI-mutex, let thread A lock the mutex,
177161678Sdavidxu * and let another thread B block on the mutex, because B is
178161678Sdavidxu * sleeping, its priority will be boosted, this causes A's priority to
179161678Sdavidxu * be boosted via priority propagating too and will never be lowered even
180161678Sdavidxu * if it is using 100%CPU, this is unfair to other processes.
181161678Sdavidxu */
182161678Sdavidxu
183163709Sjb#define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
184163709Sjb			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
185163709Sjb			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
186161678Sdavidxu
187138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
188216678Sdavidxu#define	UMTX_CHAINS		512
189216678Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 9)
190115765Sjeff
191161678Sdavidxu#define	GET_SHARE(flags)	\
192161678Sdavidxu    (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
193161678Sdavidxu
194177848Sdavidxu#define BUSY_SPINS		200
195177848Sdavidxu
196233690Sdavidxustruct abs_timeout {
197233690Sdavidxu	int clockid;
198233690Sdavidxu	struct timespec cur;
199233690Sdavidxu	struct timespec end;
200233690Sdavidxu};
201233690Sdavidxu
202161678Sdavidxustatic uma_zone_t		umtx_pi_zone;
203179421Sdavidxustatic struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
204138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
205161678Sdavidxustatic int			umtx_pi_allocated;
206115310Sjeff
207227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
208161678SdavidxuSYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
209161678Sdavidxu    &umtx_pi_allocated, 0, "Allocated umtx_pi");
210161678Sdavidxu
211233045Sdavide#ifdef UMTX_PROFILING
212233045Sdavidestatic long max_length;
213233045SdavideSYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
214233045Sdavidestatic SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
215233045Sdavide#endif
216233045Sdavide
217161678Sdavidxustatic void umtxq_sysinit(void *);
218161678Sdavidxustatic void umtxq_hash(struct umtx_key *key);
219161678Sdavidxustatic struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
220139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
221139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
222139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
223139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
224177848Sdavidxustatic void umtxq_insert_queue(struct umtx_q *uq, int q);
225177848Sdavidxustatic void umtxq_remove_queue(struct umtx_q *uq, int q);
226233690Sdavidxustatic int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
227139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
228163697Sdavidxustatic struct umtx_pi *umtx_pi_alloc(int);
229161678Sdavidxustatic void umtx_pi_free(struct umtx_pi *pi);
230161678Sdavidxustatic int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
231161678Sdavidxustatic void umtx_thread_cleanup(struct thread *td);
232161678Sdavidxustatic void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
233161678Sdavidxu	struct image_params *imgp __unused);
234161678SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
235115310Sjeff
236177848Sdavidxu#define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
237177848Sdavidxu#define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
238177848Sdavidxu#define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
239177848Sdavidxu
240170300Sjeffstatic struct mtx umtx_lock;
241170300Sjeff
242233045Sdavide#ifdef UMTX_PROFILING
243161678Sdavidxustatic void
244233045Sdavideumtx_init_profiling(void)
245233045Sdavide{
246233045Sdavide	struct sysctl_oid *chain_oid;
247233045Sdavide	char chain_name[10];
248233045Sdavide	int i;
249233045Sdavide
250233045Sdavide	for (i = 0; i < UMTX_CHAINS; ++i) {
251233045Sdavide		snprintf(chain_name, sizeof(chain_name), "%d", i);
252233045Sdavide		chain_oid = SYSCTL_ADD_NODE(NULL,
253233045Sdavide		    SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
254233045Sdavide		    chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
255233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
256233045Sdavide		    "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
257233045Sdavide		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
258233045Sdavide		    "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
259233045Sdavide	}
260233045Sdavide}
261248105Sattilio
262248105Sattiliostatic int
263248105Sattiliosysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
264248105Sattilio{
265248105Sattilio	char buf[512];
266248105Sattilio	struct sbuf sb;
267248105Sattilio	struct umtxq_chain *uc;
268248105Sattilio	u_int fract, i, j, tot, whole;
269248105Sattilio	u_int sf0, sf1, sf2, sf3, sf4;
270248105Sattilio	u_int si0, si1, si2, si3, si4;
271248105Sattilio	u_int sw0, sw1, sw2, sw3, sw4;
272248105Sattilio
273248105Sattilio	sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
274248105Sattilio	for (i = 0; i < 2; i++) {
275248105Sattilio		tot = 0;
276248105Sattilio		for (j = 0; j < UMTX_CHAINS; ++j) {
277248105Sattilio			uc = &umtxq_chains[i][j];
278248105Sattilio			mtx_lock(&uc->uc_lock);
279248105Sattilio			tot += uc->max_length;
280248105Sattilio			mtx_unlock(&uc->uc_lock);
281248105Sattilio		}
282248105Sattilio		if (tot == 0)
283248105Sattilio			sbuf_printf(&sb, "%u) Empty ", i);
284248105Sattilio		else {
285248105Sattilio			sf0 = sf1 = sf2 = sf3 = sf4 = 0;
286248105Sattilio			si0 = si1 = si2 = si3 = si4 = 0;
287248105Sattilio			sw0 = sw1 = sw2 = sw3 = sw4 = 0;
288248105Sattilio			for (j = 0; j < UMTX_CHAINS; j++) {
289248105Sattilio				uc = &umtxq_chains[i][j];
290248105Sattilio				mtx_lock(&uc->uc_lock);
291248105Sattilio				whole = uc->max_length * 100;
292248105Sattilio				mtx_unlock(&uc->uc_lock);
293248105Sattilio				fract = (whole % tot) * 100;
294248105Sattilio				if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
295248105Sattilio					sf0 = fract;
296248105Sattilio					si0 = j;
297248105Sattilio					sw0 = whole;
298248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw1,
299248105Sattilio				    sf1)) {
300248105Sattilio					sf1 = fract;
301248105Sattilio					si1 = j;
302248105Sattilio					sw1 = whole;
303248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw2,
304248105Sattilio				    sf2)) {
305248105Sattilio					sf2 = fract;
306248105Sattilio					si2 = j;
307248105Sattilio					sw2 = whole;
308248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw3,
309248105Sattilio				    sf3)) {
310248105Sattilio					sf3 = fract;
311248105Sattilio					si3 = j;
312248105Sattilio					sw3 = whole;
313248105Sattilio				} else if (UPROF_PERC_BIGGER(whole, fract, sw4,
314248105Sattilio				    sf4)) {
315248105Sattilio					sf4 = fract;
316248105Sattilio					si4 = j;
317248105Sattilio					sw4 = whole;
318248105Sattilio				}
319248105Sattilio			}
320248105Sattilio			sbuf_printf(&sb, "queue %u:\n", i);
321248105Sattilio			sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
322248105Sattilio			    sf0 / tot, si0);
323248105Sattilio			sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
324248105Sattilio			    sf1 / tot, si1);
325248105Sattilio			sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
326248105Sattilio			    sf2 / tot, si2);
327248105Sattilio			sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
328248105Sattilio			    sf3 / tot, si3);
329248105Sattilio			sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
330248105Sattilio			    sf4 / tot, si4);
331248105Sattilio		}
332248105Sattilio	}
333248105Sattilio	sbuf_trim(&sb);
334248105Sattilio	sbuf_finish(&sb);
335248105Sattilio	sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
336248105Sattilio	sbuf_delete(&sb);
337248105Sattilio	return (0);
338248105Sattilio}
339248105Sattilio
340248105Sattiliostatic int
341248105Sattiliosysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
342248105Sattilio{
343248105Sattilio	struct umtxq_chain *uc;
344248105Sattilio	u_int i, j;
345248105Sattilio	int clear, error;
346248105Sattilio
347248105Sattilio	clear = 0;
348248105Sattilio	error = sysctl_handle_int(oidp, &clear, 0, req);
349248105Sattilio	if (error != 0 || req->newptr == NULL)
350248105Sattilio		return (error);
351248105Sattilio
352248105Sattilio	if (clear != 0) {
353248105Sattilio		for (i = 0; i < 2; ++i) {
354248105Sattilio			for (j = 0; j < UMTX_CHAINS; ++j) {
355248105Sattilio				uc = &umtxq_chains[i][j];
356248105Sattilio				mtx_lock(&uc->uc_lock);
357248105Sattilio				uc->length = 0;
358248105Sattilio				uc->max_length = 0;
359248105Sattilio				mtx_unlock(&uc->uc_lock);
360248105Sattilio			}
361248105Sattilio		}
362248105Sattilio	}
363248105Sattilio	return (0);
364248105Sattilio}
365248105Sattilio
366248105SattilioSYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
367248105Sattilio    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
368248105Sattilio    sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
369248105SattilioSYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
370248105Sattilio    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
371248105Sattilio    sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
372233045Sdavide#endif
373233045Sdavide
374233045Sdavidestatic void
375161678Sdavidxuumtxq_sysinit(void *arg __unused)
376161678Sdavidxu{
377179421Sdavidxu	int i, j;
378138224Sdavidxu
379161678Sdavidxu	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
380161678Sdavidxu		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
381179421Sdavidxu	for (i = 0; i < 2; ++i) {
382179421Sdavidxu		for (j = 0; j < UMTX_CHAINS; ++j) {
383179421Sdavidxu			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
384179421Sdavidxu				 MTX_DEF | MTX_DUPOK);
385201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
386201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
387201991Sdavidxu			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
388179421Sdavidxu			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
389179421Sdavidxu			umtxq_chains[i][j].uc_busy = 0;
390179421Sdavidxu			umtxq_chains[i][j].uc_waiters = 0;
391234302Sdavide#ifdef UMTX_PROFILING
392233045Sdavide			umtxq_chains[i][j].length = 0;
393233045Sdavide			umtxq_chains[i][j].max_length = 0;
394234302Sdavide#endif
395179421Sdavidxu		}
396161678Sdavidxu	}
397234302Sdavide#ifdef UMTX_PROFILING
398233045Sdavide	umtx_init_profiling();
399234302Sdavide#endif
400170300Sjeff	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
401161678Sdavidxu	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
402161678Sdavidxu	    EVENTHANDLER_PRI_ANY);
403161678Sdavidxu}
404161678Sdavidxu
405143149Sdavidxustruct umtx_q *
406143149Sdavidxuumtxq_alloc(void)
407143149Sdavidxu{
408161678Sdavidxu	struct umtx_q *uq;
409161678Sdavidxu
410161678Sdavidxu	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
411201991Sdavidxu	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
412201991Sdavidxu	TAILQ_INIT(&uq->uq_spare_queue->head);
413161678Sdavidxu	TAILQ_INIT(&uq->uq_pi_contested);
414161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
415161678Sdavidxu	return (uq);
416143149Sdavidxu}
417143149Sdavidxu
418143149Sdavidxuvoid
419143149Sdavidxuumtxq_free(struct umtx_q *uq)
420143149Sdavidxu{
421201991Sdavidxu	MPASS(uq->uq_spare_queue != NULL);
422201991Sdavidxu	free(uq->uq_spare_queue, M_UMTX);
423143149Sdavidxu	free(uq, M_UMTX);
424143149Sdavidxu}
425143149Sdavidxu
426161678Sdavidxustatic inline void
427139013Sdavidxuumtxq_hash(struct umtx_key *key)
428138224Sdavidxu{
429161678Sdavidxu	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
430161678Sdavidxu	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
431138224Sdavidxu}
432138224Sdavidxu
433161678Sdavidxustatic inline struct umtxq_chain *
434161678Sdavidxuumtxq_getchain(struct umtx_key *key)
435139013Sdavidxu{
436201886Sdavidxu	if (key->type <= TYPE_SEM)
437179421Sdavidxu		return (&umtxq_chains[1][key->hash]);
438179421Sdavidxu	return (&umtxq_chains[0][key->hash]);
439139013Sdavidxu}
440139013Sdavidxu
441161678Sdavidxu/*
442177848Sdavidxu * Lock a chain.
443161678Sdavidxu */
444138224Sdavidxustatic inline void
445177848Sdavidxuumtxq_lock(struct umtx_key *key)
446139257Sdavidxu{
447161678Sdavidxu	struct umtxq_chain *uc;
448139257Sdavidxu
449161678Sdavidxu	uc = umtxq_getchain(key);
450177848Sdavidxu	mtx_lock(&uc->uc_lock);
451139257Sdavidxu}
452139257Sdavidxu
453161678Sdavidxu/*
454177848Sdavidxu * Unlock a chain.
455161678Sdavidxu */
456139257Sdavidxustatic inline void
457177848Sdavidxuumtxq_unlock(struct umtx_key *key)
458139257Sdavidxu{
459161678Sdavidxu	struct umtxq_chain *uc;
460139257Sdavidxu
461161678Sdavidxu	uc = umtxq_getchain(key);
462177848Sdavidxu	mtx_unlock(&uc->uc_lock);
463139257Sdavidxu}
464139257Sdavidxu
465161678Sdavidxu/*
466177848Sdavidxu * Set chain to busy state when following operation
467177848Sdavidxu * may be blocked (kernel mutex can not be used).
468161678Sdavidxu */
469139257Sdavidxustatic inline void
470177848Sdavidxuumtxq_busy(struct umtx_key *key)
471138224Sdavidxu{
472161678Sdavidxu	struct umtxq_chain *uc;
473161678Sdavidxu
474161678Sdavidxu	uc = umtxq_getchain(key);
475177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
476177848Sdavidxu	if (uc->uc_busy) {
477177880Sdavidxu#ifdef SMP
478177880Sdavidxu		if (smp_cpus > 1) {
479177880Sdavidxu			int count = BUSY_SPINS;
480177880Sdavidxu			if (count > 0) {
481177880Sdavidxu				umtxq_unlock(key);
482177880Sdavidxu				while (uc->uc_busy && --count > 0)
483177880Sdavidxu					cpu_spinwait();
484177880Sdavidxu				umtxq_lock(key);
485177880Sdavidxu			}
486177848Sdavidxu		}
487177880Sdavidxu#endif
488177880Sdavidxu		while (uc->uc_busy) {
489177848Sdavidxu			uc->uc_waiters++;
490177848Sdavidxu			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
491177848Sdavidxu			uc->uc_waiters--;
492177848Sdavidxu		}
493177848Sdavidxu	}
494177848Sdavidxu	uc->uc_busy = 1;
495138224Sdavidxu}
496138224Sdavidxu
497161678Sdavidxu/*
498177848Sdavidxu * Unbusy a chain.
499161678Sdavidxu */
500138225Sdavidxustatic inline void
501177848Sdavidxuumtxq_unbusy(struct umtx_key *key)
502138224Sdavidxu{
503161678Sdavidxu	struct umtxq_chain *uc;
504161678Sdavidxu
505161678Sdavidxu	uc = umtxq_getchain(key);
506177848Sdavidxu	mtx_assert(&uc->uc_lock, MA_OWNED);
507177848Sdavidxu	KASSERT(uc->uc_busy != 0, ("not busy"));
508177848Sdavidxu	uc->uc_busy = 0;
509177848Sdavidxu	if (uc->uc_waiters)
510177848Sdavidxu		wakeup_one(uc);
511138224Sdavidxu}
512138224Sdavidxu
513201991Sdavidxustatic struct umtxq_queue *
514201991Sdavidxuumtxq_queue_lookup(struct umtx_key *key, int q)
515201991Sdavidxu{
516201991Sdavidxu	struct umtxq_queue *uh;
517201991Sdavidxu	struct umtxq_chain *uc;
518201991Sdavidxu
519201991Sdavidxu	uc = umtxq_getchain(key);
520201991Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
521201991Sdavidxu	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
522201991Sdavidxu		if (umtx_key_match(&uh->key, key))
523201991Sdavidxu			return (uh);
524201991Sdavidxu	}
525201991Sdavidxu
526201991Sdavidxu	return (NULL);
527201991Sdavidxu}
528201991Sdavidxu
529139013Sdavidxustatic inline void
530177848Sdavidxuumtxq_insert_queue(struct umtx_q *uq, int q)
531115765Sjeff{
532201991Sdavidxu	struct umtxq_queue *uh;
533161678Sdavidxu	struct umtxq_chain *uc;
534139013Sdavidxu
535161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
536161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
537201991Sdavidxu	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
538203744Sdavidxu	uh = umtxq_queue_lookup(&uq->uq_key, q);
539201991Sdavidxu	if (uh != NULL) {
540201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
541201991Sdavidxu	} else {
542201991Sdavidxu		uh = uq->uq_spare_queue;
543201991Sdavidxu		uh->key = uq->uq_key;
544201991Sdavidxu		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
545201991Sdavidxu	}
546201991Sdavidxu	uq->uq_spare_queue = NULL;
547201991Sdavidxu
548201991Sdavidxu	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
549201991Sdavidxu	uh->length++;
550234302Sdavide#ifdef UMTX_PROFILING
551233045Sdavide	uc->length++;
552233045Sdavide	if (uc->length > uc->max_length) {
553233045Sdavide		uc->max_length = uc->length;
554233045Sdavide		if (uc->max_length > max_length)
555233045Sdavide			max_length = uc->max_length;
556233045Sdavide	}
557234302Sdavide#endif
558158718Sdavidxu	uq->uq_flags |= UQF_UMTXQ;
559201991Sdavidxu	uq->uq_cur_queue = uh;
560201991Sdavidxu	return;
561139013Sdavidxu}
562139013Sdavidxu
563139013Sdavidxustatic inline void
564177848Sdavidxuumtxq_remove_queue(struct umtx_q *uq, int q)
565139013Sdavidxu{
566161678Sdavidxu	struct umtxq_chain *uc;
567201991Sdavidxu	struct umtxq_queue *uh;
568161678Sdavidxu
569161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
570161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
571158718Sdavidxu	if (uq->uq_flags & UQF_UMTXQ) {
572201991Sdavidxu		uh = uq->uq_cur_queue;
573201991Sdavidxu		TAILQ_REMOVE(&uh->head, uq, uq_link);
574201991Sdavidxu		uh->length--;
575234302Sdavide#ifdef UMTX_PROFILING
576233045Sdavide		uc->length--;
577234302Sdavide#endif
578158718Sdavidxu		uq->uq_flags &= ~UQF_UMTXQ;
579201991Sdavidxu		if (TAILQ_EMPTY(&uh->head)) {
580201991Sdavidxu			KASSERT(uh->length == 0,
581201991Sdavidxu			    ("inconsistent umtxq_queue length"));
582201991Sdavidxu			LIST_REMOVE(uh, link);
583201991Sdavidxu		} else {
584201991Sdavidxu			uh = LIST_FIRST(&uc->uc_spare_queue);
585201991Sdavidxu			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
586201991Sdavidxu			LIST_REMOVE(uh, link);
587201991Sdavidxu		}
588201991Sdavidxu		uq->uq_spare_queue = uh;
589201991Sdavidxu		uq->uq_cur_queue = NULL;
590139013Sdavidxu	}
591139013Sdavidxu}
592139013Sdavidxu
593161678Sdavidxu/*
594161678Sdavidxu * Check if there are multiple waiters
595161678Sdavidxu */
596139013Sdavidxustatic int
597139013Sdavidxuumtxq_count(struct umtx_key *key)
598139013Sdavidxu{
599161678Sdavidxu	struct umtxq_chain *uc;
600201991Sdavidxu	struct umtxq_queue *uh;
601115765Sjeff
602161678Sdavidxu	uc = umtxq_getchain(key);
603161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
604201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
605201991Sdavidxu	if (uh != NULL)
606201991Sdavidxu		return (uh->length);
607201991Sdavidxu	return (0);
608115765Sjeff}
609115765Sjeff
610161678Sdavidxu/*
611161678Sdavidxu * Check if there are multiple PI waiters and returns first
612161678Sdavidxu * waiter.
613161678Sdavidxu */
614139257Sdavidxustatic int
615161678Sdavidxuumtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
616161678Sdavidxu{
617161678Sdavidxu	struct umtxq_chain *uc;
618201991Sdavidxu	struct umtxq_queue *uh;
619161678Sdavidxu
620161678Sdavidxu	*first = NULL;
621161678Sdavidxu	uc = umtxq_getchain(key);
622161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
623201991Sdavidxu	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
624201991Sdavidxu	if (uh != NULL) {
625201991Sdavidxu		*first = TAILQ_FIRST(&uh->head);
626201991Sdavidxu		return (uh->length);
627161678Sdavidxu	}
628201991Sdavidxu	return (0);
629161678Sdavidxu}
630161678Sdavidxu
631161678Sdavidxu/*
632161678Sdavidxu * Wake up threads waiting on an userland object.
633161678Sdavidxu */
634177848Sdavidxu
635161678Sdavidxustatic int
636177848Sdavidxuumtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
637115765Sjeff{
638161678Sdavidxu	struct umtxq_chain *uc;
639201991Sdavidxu	struct umtxq_queue *uh;
640201991Sdavidxu	struct umtx_q *uq;
641161678Sdavidxu	int ret;
642115765Sjeff
643139257Sdavidxu	ret = 0;
644161678Sdavidxu	uc = umtxq_getchain(key);
645161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
646201991Sdavidxu	uh = umtxq_queue_lookup(key, q);
647201991Sdavidxu	if (uh != NULL) {
648201991Sdavidxu		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
649177848Sdavidxu			umtxq_remove_queue(uq, q);
650161678Sdavidxu			wakeup(uq);
651139257Sdavidxu			if (++ret >= n_wake)
652201991Sdavidxu				return (ret);
653139013Sdavidxu		}
654139013Sdavidxu	}
655139257Sdavidxu	return (ret);
656138224Sdavidxu}
657138224Sdavidxu
658177848Sdavidxu
659161678Sdavidxu/*
660161678Sdavidxu * Wake up specified thread.
661161678Sdavidxu */
662161678Sdavidxustatic inline void
663161678Sdavidxuumtxq_signal_thread(struct umtx_q *uq)
664161678Sdavidxu{
665161678Sdavidxu	struct umtxq_chain *uc;
666161678Sdavidxu
667161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
668161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
669161678Sdavidxu	umtxq_remove(uq);
670161678Sdavidxu	wakeup(uq);
671161678Sdavidxu}
672161678Sdavidxu
673233690Sdavidxustatic inline int
674233690Sdavidxutstohz(const struct timespec *tsp)
675233690Sdavidxu{
676233690Sdavidxu	struct timeval tv;
677233690Sdavidxu
678233690Sdavidxu	TIMESPEC_TO_TIMEVAL(&tv, tsp);
679233690Sdavidxu	return tvtohz(&tv);
680233690Sdavidxu}
681233690Sdavidxu
682233690Sdavidxustatic void
683233690Sdavidxuabs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
684233690Sdavidxu	const struct timespec *timeout)
685233690Sdavidxu{
686233690Sdavidxu
687233690Sdavidxu	timo->clockid = clockid;
688233690Sdavidxu	if (!absolute) {
689233690Sdavidxu		kern_clock_gettime(curthread, clockid, &timo->end);
690233690Sdavidxu		timo->cur = timo->end;
691233690Sdavidxu		timespecadd(&timo->end, timeout);
692233690Sdavidxu	} else {
693233690Sdavidxu		timo->end = *timeout;
694233690Sdavidxu		kern_clock_gettime(curthread, clockid, &timo->cur);
695233690Sdavidxu	}
696233690Sdavidxu}
697233690Sdavidxu
698233690Sdavidxustatic void
699233690Sdavidxuabs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
700233690Sdavidxu{
701233690Sdavidxu
702233690Sdavidxu	abs_timeout_init(timo, umtxtime->_clockid,
703233690Sdavidxu		(umtxtime->_flags & UMTX_ABSTIME) != 0,
704233690Sdavidxu		&umtxtime->_timeout);
705233690Sdavidxu}
706233690Sdavidxu
707239202Sdavidxustatic inline void
708233690Sdavidxuabs_timeout_update(struct abs_timeout *timo)
709233690Sdavidxu{
710233690Sdavidxu	kern_clock_gettime(curthread, timo->clockid, &timo->cur);
711233690Sdavidxu}
712233690Sdavidxu
713233690Sdavidxustatic int
714233690Sdavidxuabs_timeout_gethz(struct abs_timeout *timo)
715233690Sdavidxu{
716233690Sdavidxu	struct timespec tts;
717233690Sdavidxu
718239202Sdavidxu	if (timespeccmp(&timo->end, &timo->cur, <=))
719239202Sdavidxu		return (-1);
720233690Sdavidxu	tts = timo->end;
721233690Sdavidxu	timespecsub(&tts, &timo->cur);
722233690Sdavidxu	return (tstohz(&tts));
723233690Sdavidxu}
724233690Sdavidxu
725161678Sdavidxu/*
726161678Sdavidxu * Put thread into sleep state, before sleeping, check if
727161678Sdavidxu * thread was removed from umtx queue.
728161678Sdavidxu */
729138224Sdavidxustatic inline int
730239202Sdavidxuumtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
731138224Sdavidxu{
732161678Sdavidxu	struct umtxq_chain *uc;
733239202Sdavidxu	int error, timo;
734161678Sdavidxu
735161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
736161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
737233690Sdavidxu	for (;;) {
738233690Sdavidxu		if (!(uq->uq_flags & UQF_UMTXQ))
739233690Sdavidxu			return (0);
740239202Sdavidxu		if (abstime != NULL) {
741239202Sdavidxu			timo = abs_timeout_gethz(abstime);
742239202Sdavidxu			if (timo < 0)
743239187Sdavidxu				return (ETIMEDOUT);
744239187Sdavidxu		} else
745239202Sdavidxu			timo = 0;
746239202Sdavidxu		error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
747239187Sdavidxu		if (error != EWOULDBLOCK) {
748233690Sdavidxu			umtxq_lock(&uq->uq_key);
749233690Sdavidxu			break;
750233690Sdavidxu		}
751239202Sdavidxu		if (abstime != NULL)
752239202Sdavidxu			abs_timeout_update(abstime);
753233690Sdavidxu		umtxq_lock(&uq->uq_key);
754233690Sdavidxu	}
755139751Sdavidxu	return (error);
756138224Sdavidxu}
757138224Sdavidxu
758161678Sdavidxu/*
759161678Sdavidxu * Convert userspace address into unique logical address.
760161678Sdavidxu */
761218969Sjhbint
762161678Sdavidxuumtx_key_get(void *addr, int type, int share, struct umtx_key *key)
763139013Sdavidxu{
764161678Sdavidxu	struct thread *td = curthread;
765139013Sdavidxu	vm_map_t map;
766139013Sdavidxu	vm_map_entry_t entry;
767139013Sdavidxu	vm_pindex_t pindex;
768139013Sdavidxu	vm_prot_t prot;
769139013Sdavidxu	boolean_t wired;
770139013Sdavidxu
771161678Sdavidxu	key->type = type;
772161678Sdavidxu	if (share == THREAD_SHARE) {
773161678Sdavidxu		key->shared = 0;
774161678Sdavidxu		key->info.private.vs = td->td_proc->p_vmspace;
775161678Sdavidxu		key->info.private.addr = (uintptr_t)addr;
776163677Sdavidxu	} else {
777163677Sdavidxu		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
778161678Sdavidxu		map = &td->td_proc->p_vmspace->vm_map;
779161678Sdavidxu		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
780161678Sdavidxu		    &entry, &key->info.shared.object, &pindex, &prot,
781161678Sdavidxu		    &wired) != KERN_SUCCESS) {
782161678Sdavidxu			return EFAULT;
783161678Sdavidxu		}
784161678Sdavidxu
785161678Sdavidxu		if ((share == PROCESS_SHARE) ||
786161678Sdavidxu		    (share == AUTO_SHARE &&
787161678Sdavidxu		     VM_INHERIT_SHARE == entry->inheritance)) {
788161678Sdavidxu			key->shared = 1;
789161678Sdavidxu			key->info.shared.offset = entry->offset + entry->start -
790161678Sdavidxu				(vm_offset_t)addr;
791161678Sdavidxu			vm_object_reference(key->info.shared.object);
792161678Sdavidxu		} else {
793161678Sdavidxu			key->shared = 0;
794161678Sdavidxu			key->info.private.vs = td->td_proc->p_vmspace;
795161678Sdavidxu			key->info.private.addr = (uintptr_t)addr;
796161678Sdavidxu		}
797161678Sdavidxu		vm_map_lookup_done(map, entry);
798139013Sdavidxu	}
799139013Sdavidxu
800161678Sdavidxu	umtxq_hash(key);
801139013Sdavidxu	return (0);
802139013Sdavidxu}
803139013Sdavidxu
804161678Sdavidxu/*
805161678Sdavidxu * Release key.
806161678Sdavidxu */
807218969Sjhbvoid
808139013Sdavidxuumtx_key_release(struct umtx_key *key)
809139013Sdavidxu{
810161678Sdavidxu	if (key->shared)
811139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
812139013Sdavidxu}
813139013Sdavidxu
814161678Sdavidxu/*
815161678Sdavidxu * Lock a umtx object.
816161678Sdavidxu */
817139013Sdavidxustatic int
818233690Sdavidxudo_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
819233690Sdavidxu	const struct timespec *timeout)
820112904Sjeff{
821233690Sdavidxu	struct abs_timeout timo;
822143149Sdavidxu	struct umtx_q *uq;
823163449Sdavidxu	u_long owner;
824163449Sdavidxu	u_long old;
825138224Sdavidxu	int error = 0;
826112904Sjeff
827143149Sdavidxu	uq = td->td_umtxq;
828233690Sdavidxu	if (timeout != NULL)
829233690Sdavidxu		abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
830161678Sdavidxu
831112904Sjeff	/*
832161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
833112904Sjeff	 * can fault on any access.
834112904Sjeff	 */
835112904Sjeff	for (;;) {
836112904Sjeff		/*
837112904Sjeff		 * Try the uncontested case.  This should be done in userland.
838112904Sjeff		 */
839163449Sdavidxu		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
840112904Sjeff
841138224Sdavidxu		/* The acquire succeeded. */
842138224Sdavidxu		if (owner == UMTX_UNOWNED)
843138224Sdavidxu			return (0);
844138224Sdavidxu
845115765Sjeff		/* The address was invalid. */
846115765Sjeff		if (owner == -1)
847115765Sjeff			return (EFAULT);
848115765Sjeff
849115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
850115765Sjeff		if (owner == UMTX_CONTESTED) {
851163449Sdavidxu			owner = casuword(&umtx->u_owner,
852139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
853115765Sjeff
854138224Sdavidxu			if (owner == UMTX_CONTESTED)
855138224Sdavidxu				return (0);
856138224Sdavidxu
857115765Sjeff			/* The address was invalid. */
858115765Sjeff			if (owner == -1)
859115765Sjeff				return (EFAULT);
860115765Sjeff
861115765Sjeff			/* If this failed the lock has changed, restart. */
862115765Sjeff			continue;
863112904Sjeff		}
864112904Sjeff
865138224Sdavidxu		/*
866138224Sdavidxu		 * If we caught a signal, we have retried and now
867138224Sdavidxu		 * exit immediately.
868138224Sdavidxu		 */
869161678Sdavidxu		if (error != 0)
870233690Sdavidxu			break;
871112904Sjeff
872161678Sdavidxu		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
873161678Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
874161678Sdavidxu			return (error);
875161678Sdavidxu
876161678Sdavidxu		umtxq_lock(&uq->uq_key);
877161678Sdavidxu		umtxq_busy(&uq->uq_key);
878161678Sdavidxu		umtxq_insert(uq);
879161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
880161678Sdavidxu		umtxq_unlock(&uq->uq_key);
881161678Sdavidxu
882112904Sjeff		/*
883112904Sjeff		 * Set the contested bit so that a release in user space
884112904Sjeff		 * knows to use the system call for unlock.  If this fails
885112904Sjeff		 * either some one else has acquired the lock or it has been
886112904Sjeff		 * released.
887112904Sjeff		 */
888163449Sdavidxu		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
889112904Sjeff
890112904Sjeff		/* The address was invalid. */
891112967Sjake		if (old == -1) {
892143149Sdavidxu			umtxq_lock(&uq->uq_key);
893143149Sdavidxu			umtxq_remove(uq);
894143149Sdavidxu			umtxq_unlock(&uq->uq_key);
895143149Sdavidxu			umtx_key_release(&uq->uq_key);
896115765Sjeff			return (EFAULT);
897112904Sjeff		}
898112904Sjeff
899112904Sjeff		/*
900115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
901117685Smtm		 * and we need to retry or we lost a race to the thread
902117685Smtm		 * unlocking the umtx.
903112904Sjeff		 */
904143149Sdavidxu		umtxq_lock(&uq->uq_key);
905161678Sdavidxu		if (old == owner)
906233690Sdavidxu			error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
907233690Sdavidxu			    &timo);
908143149Sdavidxu		umtxq_remove(uq);
909143149Sdavidxu		umtxq_unlock(&uq->uq_key);
910143149Sdavidxu		umtx_key_release(&uq->uq_key);
911112904Sjeff	}
912117743Smtm
913140245Sdavidxu	if (timeout == NULL) {
914162030Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
915162030Sdavidxu		if (error == EINTR)
916162030Sdavidxu			error = ERESTART;
917139013Sdavidxu	} else {
918162030Sdavidxu		/* Timed-locking is not restarted. */
919162030Sdavidxu		if (error == ERESTART)
920162030Sdavidxu			error = EINTR;
921139013Sdavidxu	}
922139013Sdavidxu	return (error);
923139013Sdavidxu}
924139013Sdavidxu
925161678Sdavidxu/*
926161678Sdavidxu * Unlock a umtx object.
927161678Sdavidxu */
928139013Sdavidxustatic int
929163449Sdavidxudo_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
930139013Sdavidxu{
931139013Sdavidxu	struct umtx_key key;
932163449Sdavidxu	u_long owner;
933163449Sdavidxu	u_long old;
934139257Sdavidxu	int error;
935139257Sdavidxu	int count;
936112904Sjeff
937112904Sjeff	/*
938112904Sjeff	 * Make sure we own this mtx.
939112904Sjeff	 */
940163449Sdavidxu	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
941161678Sdavidxu	if (owner == -1)
942115765Sjeff		return (EFAULT);
943115765Sjeff
944139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
945115765Sjeff		return (EPERM);
946112904Sjeff
947161678Sdavidxu	/* This should be done in userland */
948161678Sdavidxu	if ((owner & UMTX_CONTESTED) == 0) {
949163449Sdavidxu		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
950161678Sdavidxu		if (old == -1)
951161678Sdavidxu			return (EFAULT);
952161678Sdavidxu		if (old == owner)
953161678Sdavidxu			return (0);
954161855Sdavidxu		owner = old;
955161678Sdavidxu	}
956161678Sdavidxu
957117685Smtm	/* We should only ever be in here for contested locks */
958161678Sdavidxu	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
959161678Sdavidxu		&key)) != 0)
960139257Sdavidxu		return (error);
961139257Sdavidxu
962139257Sdavidxu	umtxq_lock(&key);
963139257Sdavidxu	umtxq_busy(&key);
964139257Sdavidxu	count = umtxq_count(&key);
965139257Sdavidxu	umtxq_unlock(&key);
966139257Sdavidxu
967117743Smtm	/*
968117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
969117743Smtm	 * there is zero or one thread only waiting for it.
970117743Smtm	 * Otherwise, it must be marked as contested.
971117743Smtm	 */
972163449Sdavidxu	old = casuword(&umtx->u_owner, owner,
973163449Sdavidxu		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
974139257Sdavidxu	umtxq_lock(&key);
975161678Sdavidxu	umtxq_signal(&key,1);
976139257Sdavidxu	umtxq_unbusy(&key);
977139257Sdavidxu	umtxq_unlock(&key);
978139257Sdavidxu	umtx_key_release(&key);
979115765Sjeff	if (old == -1)
980115765Sjeff		return (EFAULT);
981138224Sdavidxu	if (old != owner)
982138224Sdavidxu		return (EINVAL);
983115765Sjeff	return (0);
984112904Sjeff}
985139013Sdavidxu
986205014Snwhitehorn#ifdef COMPAT_FREEBSD32
987162536Sdavidxu
988161678Sdavidxu/*
989162536Sdavidxu * Lock a umtx object.
990162536Sdavidxu */
991162536Sdavidxustatic int
992233690Sdavidxudo_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
993233690Sdavidxu	const struct timespec *timeout)
994162536Sdavidxu{
995233690Sdavidxu	struct abs_timeout timo;
996162536Sdavidxu	struct umtx_q *uq;
997162536Sdavidxu	uint32_t owner;
998162536Sdavidxu	uint32_t old;
999162536Sdavidxu	int error = 0;
1000162536Sdavidxu
1001162536Sdavidxu	uq = td->td_umtxq;
1002162536Sdavidxu
1003233690Sdavidxu	if (timeout != NULL)
1004233690Sdavidxu		abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
1005233690Sdavidxu
1006162536Sdavidxu	/*
1007162536Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1008162536Sdavidxu	 * can fault on any access.
1009162536Sdavidxu	 */
1010162536Sdavidxu	for (;;) {
1011162536Sdavidxu		/*
1012162536Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1013162536Sdavidxu		 */
1014162536Sdavidxu		owner = casuword32(m, UMUTEX_UNOWNED, id);
1015162536Sdavidxu
1016162536Sdavidxu		/* The acquire succeeded. */
1017162536Sdavidxu		if (owner == UMUTEX_UNOWNED)
1018162536Sdavidxu			return (0);
1019162536Sdavidxu
1020162536Sdavidxu		/* The address was invalid. */
1021162536Sdavidxu		if (owner == -1)
1022162536Sdavidxu			return (EFAULT);
1023162536Sdavidxu
1024162536Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1025162536Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1026162536Sdavidxu			owner = casuword32(m,
1027162536Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1028162536Sdavidxu			if (owner == UMUTEX_CONTESTED)
1029162536Sdavidxu				return (0);
1030162536Sdavidxu
1031162536Sdavidxu			/* The address was invalid. */
1032162536Sdavidxu			if (owner == -1)
1033162536Sdavidxu				return (EFAULT);
1034162536Sdavidxu
1035162536Sdavidxu			/* If this failed the lock has changed, restart. */
1036162536Sdavidxu			continue;
1037162536Sdavidxu		}
1038162536Sdavidxu
1039162536Sdavidxu		/*
1040162536Sdavidxu		 * If we caught a signal, we have retried and now
1041162536Sdavidxu		 * exit immediately.
1042162536Sdavidxu		 */
1043162536Sdavidxu		if (error != 0)
1044162536Sdavidxu			return (error);
1045162536Sdavidxu
1046162536Sdavidxu		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
1047162536Sdavidxu			AUTO_SHARE, &uq->uq_key)) != 0)
1048162536Sdavidxu			return (error);
1049162536Sdavidxu
1050162536Sdavidxu		umtxq_lock(&uq->uq_key);
1051162536Sdavidxu		umtxq_busy(&uq->uq_key);
1052162536Sdavidxu		umtxq_insert(uq);
1053162536Sdavidxu		umtxq_unbusy(&uq->uq_key);
1054162536Sdavidxu		umtxq_unlock(&uq->uq_key);
1055162536Sdavidxu
1056162536Sdavidxu		/*
1057162536Sdavidxu		 * Set the contested bit so that a release in user space
1058162536Sdavidxu		 * knows to use the system call for unlock.  If this fails
1059162536Sdavidxu		 * either some one else has acquired the lock or it has been
1060162536Sdavidxu		 * released.
1061162536Sdavidxu		 */
1062162536Sdavidxu		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
1063162536Sdavidxu
1064162536Sdavidxu		/* The address was invalid. */
1065162536Sdavidxu		if (old == -1) {
1066162536Sdavidxu			umtxq_lock(&uq->uq_key);
1067162536Sdavidxu			umtxq_remove(uq);
1068162536Sdavidxu			umtxq_unlock(&uq->uq_key);
1069162536Sdavidxu			umtx_key_release(&uq->uq_key);
1070162536Sdavidxu			return (EFAULT);
1071162536Sdavidxu		}
1072162536Sdavidxu
1073162536Sdavidxu		/*
1074162536Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1075162536Sdavidxu		 * and we need to retry or we lost a race to the thread
1076162536Sdavidxu		 * unlocking the umtx.
1077162536Sdavidxu		 */
1078162536Sdavidxu		umtxq_lock(&uq->uq_key);
1079162536Sdavidxu		if (old == owner)
1080233690Sdavidxu			error = umtxq_sleep(uq, "umtx", timeout == NULL ?
1081233693Sdavidxu			    NULL : &timo);
1082162536Sdavidxu		umtxq_remove(uq);
1083162536Sdavidxu		umtxq_unlock(&uq->uq_key);
1084162536Sdavidxu		umtx_key_release(&uq->uq_key);
1085162536Sdavidxu	}
1086162536Sdavidxu
1087162536Sdavidxu	if (timeout == NULL) {
1088162536Sdavidxu		/* Mutex locking is restarted if it is interrupted. */
1089162536Sdavidxu		if (error == EINTR)
1090162536Sdavidxu			error = ERESTART;
1091162536Sdavidxu	} else {
1092162536Sdavidxu		/* Timed-locking is not restarted. */
1093162536Sdavidxu		if (error == ERESTART)
1094162536Sdavidxu			error = EINTR;
1095162536Sdavidxu	}
1096162536Sdavidxu	return (error);
1097162536Sdavidxu}
1098162536Sdavidxu
1099162536Sdavidxu/*
1100162536Sdavidxu * Unlock a umtx object.
1101162536Sdavidxu */
1102162536Sdavidxustatic int
1103162536Sdavidxudo_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
1104162536Sdavidxu{
1105162536Sdavidxu	struct umtx_key key;
1106162536Sdavidxu	uint32_t owner;
1107162536Sdavidxu	uint32_t old;
1108162536Sdavidxu	int error;
1109162536Sdavidxu	int count;
1110162536Sdavidxu
1111162536Sdavidxu	/*
1112162536Sdavidxu	 * Make sure we own this mtx.
1113162536Sdavidxu	 */
1114162536Sdavidxu	owner = fuword32(m);
1115162536Sdavidxu	if (owner == -1)
1116162536Sdavidxu		return (EFAULT);
1117162536Sdavidxu
1118162536Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1119162536Sdavidxu		return (EPERM);
1120162536Sdavidxu
1121162536Sdavidxu	/* This should be done in userland */
1122162536Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1123162536Sdavidxu		old = casuword32(m, owner, UMUTEX_UNOWNED);
1124162536Sdavidxu		if (old == -1)
1125162536Sdavidxu			return (EFAULT);
1126162536Sdavidxu		if (old == owner)
1127162536Sdavidxu			return (0);
1128162536Sdavidxu		owner = old;
1129162536Sdavidxu	}
1130162536Sdavidxu
1131162536Sdavidxu	/* We should only ever be in here for contested locks */
1132162536Sdavidxu	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
1133162536Sdavidxu		&key)) != 0)
1134162536Sdavidxu		return (error);
1135162536Sdavidxu
1136162536Sdavidxu	umtxq_lock(&key);
1137162536Sdavidxu	umtxq_busy(&key);
1138162536Sdavidxu	count = umtxq_count(&key);
1139162536Sdavidxu	umtxq_unlock(&key);
1140162536Sdavidxu
1141162536Sdavidxu	/*
1142162536Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1143162536Sdavidxu	 * there is zero or one thread only waiting for it.
1144162536Sdavidxu	 * Otherwise, it must be marked as contested.
1145162536Sdavidxu	 */
1146162536Sdavidxu	old = casuword32(m, owner,
1147162536Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1148162536Sdavidxu	umtxq_lock(&key);
1149162536Sdavidxu	umtxq_signal(&key,1);
1150162536Sdavidxu	umtxq_unbusy(&key);
1151162536Sdavidxu	umtxq_unlock(&key);
1152162536Sdavidxu	umtx_key_release(&key);
1153162536Sdavidxu	if (old == -1)
1154162536Sdavidxu		return (EFAULT);
1155162536Sdavidxu	if (old != owner)
1156162536Sdavidxu		return (EINVAL);
1157162536Sdavidxu	return (0);
1158162536Sdavidxu}
1159162536Sdavidxu#endif
1160162536Sdavidxu
1161162536Sdavidxu/*
1162161678Sdavidxu * Fetch and compare value, sleep on the address if value is not changed.
1163161678Sdavidxu */
1164139013Sdavidxustatic int
1165163449Sdavidxudo_wait(struct thread *td, void *addr, u_long id,
1166232144Sdavidxu	struct _umtx_time *timeout, int compat32, int is_private)
1167139013Sdavidxu{
1168233690Sdavidxu	struct abs_timeout timo;
1169143149Sdavidxu	struct umtx_q *uq;
1170163449Sdavidxu	u_long tmp;
1171140245Sdavidxu	int error = 0;
1172139013Sdavidxu
1173143149Sdavidxu	uq = td->td_umtxq;
1174178646Sdavidxu	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
1175178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
1176139013Sdavidxu		return (error);
1177161678Sdavidxu
1178233690Sdavidxu	if (timeout != NULL)
1179233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1180233690Sdavidxu
1181161678Sdavidxu	umtxq_lock(&uq->uq_key);
1182161678Sdavidxu	umtxq_insert(uq);
1183161678Sdavidxu	umtxq_unlock(&uq->uq_key);
1184162536Sdavidxu	if (compat32 == 0)
1185162536Sdavidxu		tmp = fuword(addr);
1186162536Sdavidxu        else
1187190987Sdavidxu		tmp = (unsigned int)fuword32(addr);
1188233642Sdavidxu	umtxq_lock(&uq->uq_key);
1189233690Sdavidxu	if (tmp == id)
1190233690Sdavidxu		error = umtxq_sleep(uq, "uwait", timeout == NULL ?
1191233690Sdavidxu		    NULL : &timo);
1192233642Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
1193233642Sdavidxu		error = 0;
1194233642Sdavidxu	else
1195143149Sdavidxu		umtxq_remove(uq);
1196233642Sdavidxu	umtxq_unlock(&uq->uq_key);
1197143149Sdavidxu	umtx_key_release(&uq->uq_key);
1198139257Sdavidxu	if (error == ERESTART)
1199139257Sdavidxu		error = EINTR;
1200139013Sdavidxu	return (error);
1201139013Sdavidxu}
1202139013Sdavidxu
1203161678Sdavidxu/*
1204161678Sdavidxu * Wake up threads sleeping on the specified address.
1205161678Sdavidxu */
1206151692Sdavidxuint
1207178646Sdavidxukern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
1208139013Sdavidxu{
1209139013Sdavidxu	struct umtx_key key;
1210139257Sdavidxu	int ret;
1211139013Sdavidxu
1212178646Sdavidxu	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
1213178646Sdavidxu		is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
1214139257Sdavidxu		return (ret);
1215139258Sdavidxu	umtxq_lock(&key);
1216139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
1217139258Sdavidxu	umtxq_unlock(&key);
1218139257Sdavidxu	umtx_key_release(&key);
1219139013Sdavidxu	return (0);
1220139013Sdavidxu}
1221139013Sdavidxu
1222161678Sdavidxu/*
1223161678Sdavidxu * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1224161678Sdavidxu */
1225161678Sdavidxustatic int
1226233690Sdavidxudo_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
1227233690Sdavidxu	struct _umtx_time *timeout, int mode)
1228161678Sdavidxu{
1229233690Sdavidxu	struct abs_timeout timo;
1230161678Sdavidxu	struct umtx_q *uq;
1231161678Sdavidxu	uint32_t owner, old, id;
1232161678Sdavidxu	int error = 0;
1233161678Sdavidxu
1234161678Sdavidxu	id = td->td_tid;
1235161678Sdavidxu	uq = td->td_umtxq;
1236161678Sdavidxu
1237233690Sdavidxu	if (timeout != NULL)
1238233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1239233690Sdavidxu
1240161678Sdavidxu	/*
1241161678Sdavidxu	 * Care must be exercised when dealing with umtx structure. It
1242161678Sdavidxu	 * can fault on any access.
1243161678Sdavidxu	 */
1244161678Sdavidxu	for (;;) {
1245179970Sdavidxu		owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1246179970Sdavidxu		if (mode == _UMUTEX_WAIT) {
1247179970Sdavidxu			if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1248179970Sdavidxu				return (0);
1249179970Sdavidxu		} else {
1250179970Sdavidxu			/*
1251179970Sdavidxu			 * Try the uncontested case.  This should be done in userland.
1252179970Sdavidxu			 */
1253179970Sdavidxu			owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1254161678Sdavidxu
1255179970Sdavidxu			/* The acquire succeeded. */
1256179970Sdavidxu			if (owner == UMUTEX_UNOWNED)
1257161678Sdavidxu				return (0);
1258161678Sdavidxu
1259161678Sdavidxu			/* The address was invalid. */
1260161678Sdavidxu			if (owner == -1)
1261161678Sdavidxu				return (EFAULT);
1262161678Sdavidxu
1263179970Sdavidxu			/* If no one owns it but it is contested try to acquire it. */
1264179970Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1265179970Sdavidxu				owner = casuword32(&m->m_owner,
1266179970Sdavidxu				    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1267179970Sdavidxu
1268179970Sdavidxu				if (owner == UMUTEX_CONTESTED)
1269179970Sdavidxu					return (0);
1270179970Sdavidxu
1271179970Sdavidxu				/* The address was invalid. */
1272179970Sdavidxu				if (owner == -1)
1273179970Sdavidxu					return (EFAULT);
1274179970Sdavidxu
1275179970Sdavidxu				/* If this failed the lock has changed, restart. */
1276179970Sdavidxu				continue;
1277179970Sdavidxu			}
1278161678Sdavidxu		}
1279161678Sdavidxu
1280161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1281161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id)
1282161678Sdavidxu			return (EDEADLK);
1283161678Sdavidxu
1284179970Sdavidxu		if (mode == _UMUTEX_TRY)
1285161678Sdavidxu			return (EBUSY);
1286161678Sdavidxu
1287161678Sdavidxu		/*
1288161678Sdavidxu		 * If we caught a signal, we have retried and now
1289161678Sdavidxu		 * exit immediately.
1290161678Sdavidxu		 */
1291233691Sdavidxu		if (error != 0)
1292161678Sdavidxu			return (error);
1293161678Sdavidxu
1294161678Sdavidxu		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1295161678Sdavidxu		    GET_SHARE(flags), &uq->uq_key)) != 0)
1296161678Sdavidxu			return (error);
1297161678Sdavidxu
1298161678Sdavidxu		umtxq_lock(&uq->uq_key);
1299161678Sdavidxu		umtxq_busy(&uq->uq_key);
1300161678Sdavidxu		umtxq_insert(uq);
1301161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1302161678Sdavidxu
1303161678Sdavidxu		/*
1304161678Sdavidxu		 * Set the contested bit so that a release in user space
1305161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1306161678Sdavidxu		 * either some one else has acquired the lock or it has been
1307161678Sdavidxu		 * released.
1308161678Sdavidxu		 */
1309161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1310161678Sdavidxu
1311161678Sdavidxu		/* The address was invalid. */
1312161678Sdavidxu		if (old == -1) {
1313161678Sdavidxu			umtxq_lock(&uq->uq_key);
1314161678Sdavidxu			umtxq_remove(uq);
1315179970Sdavidxu			umtxq_unbusy(&uq->uq_key);
1316161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1317161678Sdavidxu			umtx_key_release(&uq->uq_key);
1318161678Sdavidxu			return (EFAULT);
1319161678Sdavidxu		}
1320161678Sdavidxu
1321161678Sdavidxu		/*
1322161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
1323161678Sdavidxu		 * and we need to retry or we lost a race to the thread
1324161678Sdavidxu		 * unlocking the umtx.
1325161678Sdavidxu		 */
1326161678Sdavidxu		umtxq_lock(&uq->uq_key);
1327179970Sdavidxu		umtxq_unbusy(&uq->uq_key);
1328161678Sdavidxu		if (old == owner)
1329233690Sdavidxu			error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1330233690Sdavidxu			    NULL : &timo);
1331161678Sdavidxu		umtxq_remove(uq);
1332161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1333161678Sdavidxu		umtx_key_release(&uq->uq_key);
1334161678Sdavidxu	}
1335161678Sdavidxu
1336161678Sdavidxu	return (0);
1337161678Sdavidxu}
1338161678Sdavidxu
1339161678Sdavidxu/*
1340161678Sdavidxu * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1341161678Sdavidxu */
1342161678Sdavidxustatic int
1343161678Sdavidxudo_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1344161678Sdavidxu{
1345161678Sdavidxu	struct umtx_key key;
1346161678Sdavidxu	uint32_t owner, old, id;
1347161678Sdavidxu	int error;
1348161678Sdavidxu	int count;
1349161678Sdavidxu
1350161678Sdavidxu	id = td->td_tid;
1351161678Sdavidxu	/*
1352161678Sdavidxu	 * Make sure we own this mtx.
1353161678Sdavidxu	 */
1354163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1355161678Sdavidxu	if (owner == -1)
1356161678Sdavidxu		return (EFAULT);
1357161678Sdavidxu
1358161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
1359161678Sdavidxu		return (EPERM);
1360161678Sdavidxu
1361161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
1362161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1363161678Sdavidxu		if (old == -1)
1364161678Sdavidxu			return (EFAULT);
1365161678Sdavidxu		if (old == owner)
1366161678Sdavidxu			return (0);
1367161855Sdavidxu		owner = old;
1368161678Sdavidxu	}
1369161678Sdavidxu
1370161678Sdavidxu	/* We should only ever be in here for contested locks */
1371161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1372161678Sdavidxu	    &key)) != 0)
1373161678Sdavidxu		return (error);
1374161678Sdavidxu
1375161678Sdavidxu	umtxq_lock(&key);
1376161678Sdavidxu	umtxq_busy(&key);
1377161678Sdavidxu	count = umtxq_count(&key);
1378161678Sdavidxu	umtxq_unlock(&key);
1379161678Sdavidxu
1380161678Sdavidxu	/*
1381161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
1382161678Sdavidxu	 * there is zero or one thread only waiting for it.
1383161678Sdavidxu	 * Otherwise, it must be marked as contested.
1384161678Sdavidxu	 */
1385161678Sdavidxu	old = casuword32(&m->m_owner, owner,
1386161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1387161678Sdavidxu	umtxq_lock(&key);
1388161678Sdavidxu	umtxq_signal(&key,1);
1389161678Sdavidxu	umtxq_unbusy(&key);
1390161678Sdavidxu	umtxq_unlock(&key);
1391161678Sdavidxu	umtx_key_release(&key);
1392161678Sdavidxu	if (old == -1)
1393161678Sdavidxu		return (EFAULT);
1394161678Sdavidxu	if (old != owner)
1395161678Sdavidxu		return (EINVAL);
1396161678Sdavidxu	return (0);
1397161678Sdavidxu}
1398161678Sdavidxu
1399179970Sdavidxu/*
1400179970Sdavidxu * Check if the mutex is available and wake up a waiter,
1401179970Sdavidxu * only for simple mutex.
1402179970Sdavidxu */
1403179970Sdavidxustatic int
1404179970Sdavidxudo_wake_umutex(struct thread *td, struct umutex *m)
1405179970Sdavidxu{
1406179970Sdavidxu	struct umtx_key key;
1407179970Sdavidxu	uint32_t owner;
1408179970Sdavidxu	uint32_t flags;
1409179970Sdavidxu	int error;
1410179970Sdavidxu	int count;
1411179970Sdavidxu
1412179970Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1413179970Sdavidxu	if (owner == -1)
1414179970Sdavidxu		return (EFAULT);
1415179970Sdavidxu
1416179970Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != 0)
1417179970Sdavidxu		return (0);
1418179970Sdavidxu
1419179970Sdavidxu	flags = fuword32(&m->m_flags);
1420179970Sdavidxu
1421179970Sdavidxu	/* We should only ever be in here for contested locks */
1422179970Sdavidxu	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1423179970Sdavidxu	    &key)) != 0)
1424179970Sdavidxu		return (error);
1425179970Sdavidxu
1426179970Sdavidxu	umtxq_lock(&key);
1427179970Sdavidxu	umtxq_busy(&key);
1428179970Sdavidxu	count = umtxq_count(&key);
1429179970Sdavidxu	umtxq_unlock(&key);
1430179970Sdavidxu
1431179970Sdavidxu	if (count <= 1)
1432179970Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1433179970Sdavidxu
1434179970Sdavidxu	umtxq_lock(&key);
1435179970Sdavidxu	if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1436179970Sdavidxu		umtxq_signal(&key, 1);
1437179970Sdavidxu	umtxq_unbusy(&key);
1438179970Sdavidxu	umtxq_unlock(&key);
1439179970Sdavidxu	umtx_key_release(&key);
1440179970Sdavidxu	return (0);
1441179970Sdavidxu}
1442179970Sdavidxu
1443233912Sdavidxu/*
1444233912Sdavidxu * Check if the mutex has waiters and tries to fix contention bit.
1445233912Sdavidxu */
1446233912Sdavidxustatic int
1447233912Sdavidxudo_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1448233912Sdavidxu{
1449233912Sdavidxu	struct umtx_key key;
1450233912Sdavidxu	uint32_t owner, old;
1451233912Sdavidxu	int type;
1452233912Sdavidxu	int error;
1453233912Sdavidxu	int count;
1454233912Sdavidxu
1455233912Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1456233912Sdavidxu	case 0:
1457233912Sdavidxu		type = TYPE_NORMAL_UMUTEX;
1458233912Sdavidxu		break;
1459233912Sdavidxu	case UMUTEX_PRIO_INHERIT:
1460233912Sdavidxu		type = TYPE_PI_UMUTEX;
1461233912Sdavidxu		break;
1462233912Sdavidxu	case UMUTEX_PRIO_PROTECT:
1463233912Sdavidxu		type = TYPE_PP_UMUTEX;
1464233912Sdavidxu		break;
1465233912Sdavidxu	default:
1466233912Sdavidxu		return (EINVAL);
1467233912Sdavidxu	}
1468233912Sdavidxu	if ((error = umtx_key_get(m, type, GET_SHARE(flags),
1469233912Sdavidxu	    &key)) != 0)
1470233912Sdavidxu		return (error);
1471233912Sdavidxu
1472233912Sdavidxu	owner = 0;
1473233912Sdavidxu	umtxq_lock(&key);
1474233912Sdavidxu	umtxq_busy(&key);
1475233912Sdavidxu	count = umtxq_count(&key);
1476233912Sdavidxu	umtxq_unlock(&key);
1477233912Sdavidxu	/*
1478233912Sdavidxu	 * Only repair contention bit if there is a waiter, this means the mutex
1479233912Sdavidxu	 * is still being referenced by userland code, otherwise don't update
1480233912Sdavidxu	 * any memory.
1481233912Sdavidxu	 */
1482233912Sdavidxu	if (count > 1) {
1483233912Sdavidxu		owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1484233912Sdavidxu		while ((owner & UMUTEX_CONTESTED) ==0) {
1485233912Sdavidxu			old = casuword32(&m->m_owner, owner,
1486233912Sdavidxu			    owner|UMUTEX_CONTESTED);
1487233912Sdavidxu			if (old == owner)
1488233912Sdavidxu				break;
1489233912Sdavidxu			owner = old;
1490233912Sdavidxu		}
1491233912Sdavidxu	} else if (count == 1) {
1492233912Sdavidxu		owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1493233912Sdavidxu		while ((owner & ~UMUTEX_CONTESTED) != 0 &&
1494233912Sdavidxu		       (owner & UMUTEX_CONTESTED) == 0) {
1495233912Sdavidxu			old = casuword32(&m->m_owner, owner,
1496233912Sdavidxu			    owner|UMUTEX_CONTESTED);
1497233912Sdavidxu			if (old == owner)
1498233912Sdavidxu				break;
1499233912Sdavidxu			owner = old;
1500233912Sdavidxu		}
1501233912Sdavidxu	}
1502233912Sdavidxu	umtxq_lock(&key);
1503233912Sdavidxu	if (owner == -1) {
1504233912Sdavidxu		error = EFAULT;
1505233912Sdavidxu		umtxq_signal(&key, INT_MAX);
1506233912Sdavidxu	}
1507233912Sdavidxu	else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1508233912Sdavidxu		umtxq_signal(&key, 1);
1509233912Sdavidxu	umtxq_unbusy(&key);
1510233912Sdavidxu	umtxq_unlock(&key);
1511233912Sdavidxu	umtx_key_release(&key);
1512233912Sdavidxu	return (error);
1513233912Sdavidxu}
1514233912Sdavidxu
1515161678Sdavidxustatic inline struct umtx_pi *
1516163697Sdavidxuumtx_pi_alloc(int flags)
1517161678Sdavidxu{
1518161678Sdavidxu	struct umtx_pi *pi;
1519161678Sdavidxu
1520163697Sdavidxu	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1521161678Sdavidxu	TAILQ_INIT(&pi->pi_blocked);
1522161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, 1);
1523161678Sdavidxu	return (pi);
1524161678Sdavidxu}
1525161678Sdavidxu
1526161678Sdavidxustatic inline void
1527161678Sdavidxuumtx_pi_free(struct umtx_pi *pi)
1528161678Sdavidxu{
1529161678Sdavidxu	uma_zfree(umtx_pi_zone, pi);
1530161678Sdavidxu	atomic_add_int(&umtx_pi_allocated, -1);
1531161678Sdavidxu}
1532161678Sdavidxu
1533161678Sdavidxu/*
1534161678Sdavidxu * Adjust the thread's position on a pi_state after its priority has been
1535161678Sdavidxu * changed.
1536161678Sdavidxu */
1537161678Sdavidxustatic int
1538161678Sdavidxuumtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1539161678Sdavidxu{
1540161678Sdavidxu	struct umtx_q *uq, *uq1, *uq2;
1541161678Sdavidxu	struct thread *td1;
1542161678Sdavidxu
1543170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1544161678Sdavidxu	if (pi == NULL)
1545161678Sdavidxu		return (0);
1546161678Sdavidxu
1547161678Sdavidxu	uq = td->td_umtxq;
1548161678Sdavidxu
1549161678Sdavidxu	/*
1550161678Sdavidxu	 * Check if the thread needs to be moved on the blocked chain.
1551161678Sdavidxu	 * It needs to be moved if either its priority is lower than
1552161678Sdavidxu	 * the previous thread or higher than the next thread.
1553161678Sdavidxu	 */
1554161678Sdavidxu	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1555161678Sdavidxu	uq2 = TAILQ_NEXT(uq, uq_lockq);
1556161678Sdavidxu	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1557161678Sdavidxu	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1558161678Sdavidxu		/*
1559161678Sdavidxu		 * Remove thread from blocked chain and determine where
1560161678Sdavidxu		 * it should be moved to.
1561161678Sdavidxu		 */
1562161678Sdavidxu		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1563161678Sdavidxu		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1564161678Sdavidxu			td1 = uq1->uq_thread;
1565161678Sdavidxu			MPASS(td1->td_proc->p_magic == P_MAGIC);
1566161678Sdavidxu			if (UPRI(td1) > UPRI(td))
1567161678Sdavidxu				break;
1568161678Sdavidxu		}
1569161678Sdavidxu
1570161678Sdavidxu		if (uq1 == NULL)
1571161678Sdavidxu			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1572161678Sdavidxu		else
1573161678Sdavidxu			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1574161678Sdavidxu	}
1575161678Sdavidxu	return (1);
1576161678Sdavidxu}
1577161678Sdavidxu
1578161678Sdavidxu/*
1579161678Sdavidxu * Propagate priority when a thread is blocked on POSIX
1580161678Sdavidxu * PI mutex.
1581161678Sdavidxu */
1582161678Sdavidxustatic void
1583161678Sdavidxuumtx_propagate_priority(struct thread *td)
1584161678Sdavidxu{
1585161678Sdavidxu	struct umtx_q *uq;
1586161678Sdavidxu	struct umtx_pi *pi;
1587161678Sdavidxu	int pri;
1588161678Sdavidxu
1589170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1590161678Sdavidxu	pri = UPRI(td);
1591161678Sdavidxu	uq = td->td_umtxq;
1592161678Sdavidxu	pi = uq->uq_pi_blocked;
1593161678Sdavidxu	if (pi == NULL)
1594161678Sdavidxu		return;
1595161678Sdavidxu
1596161678Sdavidxu	for (;;) {
1597161678Sdavidxu		td = pi->pi_owner;
1598216313Sdavidxu		if (td == NULL || td == curthread)
1599161678Sdavidxu			return;
1600161678Sdavidxu
1601161678Sdavidxu		MPASS(td->td_proc != NULL);
1602161678Sdavidxu		MPASS(td->td_proc->p_magic == P_MAGIC);
1603161678Sdavidxu
1604170300Sjeff		thread_lock(td);
1605216313Sdavidxu		if (td->td_lend_user_pri > pri)
1606216313Sdavidxu			sched_lend_user_prio(td, pri);
1607216313Sdavidxu		else {
1608216313Sdavidxu			thread_unlock(td);
1609216313Sdavidxu			break;
1610216313Sdavidxu		}
1611170300Sjeff		thread_unlock(td);
1612161678Sdavidxu
1613161678Sdavidxu		/*
1614161678Sdavidxu		 * Pick up the lock that td is blocked on.
1615161678Sdavidxu		 */
1616161678Sdavidxu		uq = td->td_umtxq;
1617161678Sdavidxu		pi = uq->uq_pi_blocked;
1618216791Sdavidxu		if (pi == NULL)
1619216791Sdavidxu			break;
1620161678Sdavidxu		/* Resort td on the list if needed. */
1621216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1622161678Sdavidxu	}
1623161678Sdavidxu}
1624161678Sdavidxu
1625161678Sdavidxu/*
1626161678Sdavidxu * Unpropagate priority for a PI mutex when a thread blocked on
1627161678Sdavidxu * it is interrupted by signal or resumed by others.
1628161678Sdavidxu */
1629161678Sdavidxustatic void
1630216791Sdavidxuumtx_repropagate_priority(struct umtx_pi *pi)
1631161678Sdavidxu{
1632161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1633161678Sdavidxu	struct umtx_pi *pi2;
1634216791Sdavidxu	int pri;
1635161678Sdavidxu
1636170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1637161678Sdavidxu
1638161678Sdavidxu	while (pi != NULL && pi->pi_owner != NULL) {
1639161678Sdavidxu		pri = PRI_MAX;
1640161678Sdavidxu		uq_owner = pi->pi_owner->td_umtxq;
1641161678Sdavidxu
1642161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1643161678Sdavidxu			uq = TAILQ_FIRST(&pi2->pi_blocked);
1644161678Sdavidxu			if (uq != NULL) {
1645161678Sdavidxu				if (pri > UPRI(uq->uq_thread))
1646161678Sdavidxu					pri = UPRI(uq->uq_thread);
1647161678Sdavidxu			}
1648161678Sdavidxu		}
1649161678Sdavidxu
1650161678Sdavidxu		if (pri > uq_owner->uq_inherited_pri)
1651161678Sdavidxu			pri = uq_owner->uq_inherited_pri;
1652170300Sjeff		thread_lock(pi->pi_owner);
1653216791Sdavidxu		sched_lend_user_prio(pi->pi_owner, pri);
1654170300Sjeff		thread_unlock(pi->pi_owner);
1655216791Sdavidxu		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1656216791Sdavidxu			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1657161678Sdavidxu	}
1658161678Sdavidxu}
1659161678Sdavidxu
1660161678Sdavidxu/*
1661161678Sdavidxu * Insert a PI mutex into owned list.
1662161678Sdavidxu */
1663161678Sdavidxustatic void
1664161678Sdavidxuumtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1665161678Sdavidxu{
1666161678Sdavidxu	struct umtx_q *uq_owner;
1667161678Sdavidxu
1668161678Sdavidxu	uq_owner = owner->td_umtxq;
1669170300Sjeff	mtx_assert(&umtx_lock, MA_OWNED);
1670161678Sdavidxu	if (pi->pi_owner != NULL)
1671161678Sdavidxu		panic("pi_ower != NULL");
1672161678Sdavidxu	pi->pi_owner = owner;
1673161678Sdavidxu	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1674161678Sdavidxu}
1675161678Sdavidxu
1676161678Sdavidxu/*
1677161678Sdavidxu * Claim ownership of a PI mutex.
1678161678Sdavidxu */
1679161678Sdavidxustatic int
1680161678Sdavidxuumtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1681161678Sdavidxu{
1682161678Sdavidxu	struct umtx_q *uq, *uq_owner;
1683161678Sdavidxu
1684161678Sdavidxu	uq_owner = owner->td_umtxq;
1685170300Sjeff	mtx_lock_spin(&umtx_lock);
1686161678Sdavidxu	if (pi->pi_owner == owner) {
1687170300Sjeff		mtx_unlock_spin(&umtx_lock);
1688161678Sdavidxu		return (0);
1689161678Sdavidxu	}
1690161678Sdavidxu
1691161678Sdavidxu	if (pi->pi_owner != NULL) {
1692161678Sdavidxu		/*
1693161678Sdavidxu		 * userland may have already messed the mutex, sigh.
1694161678Sdavidxu		 */
1695170300Sjeff		mtx_unlock_spin(&umtx_lock);
1696161678Sdavidxu		return (EPERM);
1697161678Sdavidxu	}
1698161678Sdavidxu	umtx_pi_setowner(pi, owner);
1699161678Sdavidxu	uq = TAILQ_FIRST(&pi->pi_blocked);
1700161678Sdavidxu	if (uq != NULL) {
1701161678Sdavidxu		int pri;
1702161678Sdavidxu
1703161678Sdavidxu		pri = UPRI(uq->uq_thread);
1704170300Sjeff		thread_lock(owner);
1705161678Sdavidxu		if (pri < UPRI(owner))
1706161678Sdavidxu			sched_lend_user_prio(owner, pri);
1707170300Sjeff		thread_unlock(owner);
1708161678Sdavidxu	}
1709170300Sjeff	mtx_unlock_spin(&umtx_lock);
1710161678Sdavidxu	return (0);
1711161678Sdavidxu}
1712161678Sdavidxu
1713161678Sdavidxu/*
1714174701Sdavidxu * Adjust a thread's order position in its blocked PI mutex,
1715174701Sdavidxu * this may result new priority propagating process.
1716174701Sdavidxu */
1717174701Sdavidxuvoid
1718174701Sdavidxuumtx_pi_adjust(struct thread *td, u_char oldpri)
1719174701Sdavidxu{
1720174707Sdavidxu	struct umtx_q *uq;
1721174707Sdavidxu	struct umtx_pi *pi;
1722174707Sdavidxu
1723174707Sdavidxu	uq = td->td_umtxq;
1724174701Sdavidxu	mtx_lock_spin(&umtx_lock);
1725174707Sdavidxu	/*
1726174707Sdavidxu	 * Pick up the lock that td is blocked on.
1727174707Sdavidxu	 */
1728174707Sdavidxu	pi = uq->uq_pi_blocked;
1729216791Sdavidxu	if (pi != NULL) {
1730216791Sdavidxu		umtx_pi_adjust_thread(pi, td);
1731216791Sdavidxu		umtx_repropagate_priority(pi);
1732216791Sdavidxu	}
1733174701Sdavidxu	mtx_unlock_spin(&umtx_lock);
1734174701Sdavidxu}
1735174701Sdavidxu
1736174701Sdavidxu/*
1737161678Sdavidxu * Sleep on a PI mutex.
1738161678Sdavidxu */
1739161678Sdavidxustatic int
1740161678Sdavidxuumtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1741233690Sdavidxu	uint32_t owner, const char *wmesg, struct abs_timeout *timo)
1742161678Sdavidxu{
1743161678Sdavidxu	struct umtxq_chain *uc;
1744161678Sdavidxu	struct thread *td, *td1;
1745161678Sdavidxu	struct umtx_q *uq1;
1746161678Sdavidxu	int pri;
1747161678Sdavidxu	int error = 0;
1748161678Sdavidxu
1749161678Sdavidxu	td = uq->uq_thread;
1750161678Sdavidxu	KASSERT(td == curthread, ("inconsistent uq_thread"));
1751161678Sdavidxu	uc = umtxq_getchain(&uq->uq_key);
1752161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1753189756Sdavidxu	UMTXQ_BUSY_ASSERT(uc);
1754161678Sdavidxu	umtxq_insert(uq);
1755189756Sdavidxu	mtx_lock_spin(&umtx_lock);
1756161678Sdavidxu	if (pi->pi_owner == NULL) {
1757189756Sdavidxu		mtx_unlock_spin(&umtx_lock);
1758213642Sdavidxu		/* XXX Only look up thread in current process. */
1759213642Sdavidxu		td1 = tdfind(owner, curproc->p_pid);
1760170300Sjeff		mtx_lock_spin(&umtx_lock);
1761215336Sdavidxu		if (td1 != NULL) {
1762215336Sdavidxu			if (pi->pi_owner == NULL)
1763215336Sdavidxu				umtx_pi_setowner(pi, td1);
1764215336Sdavidxu			PROC_UNLOCK(td1->td_proc);
1765161678Sdavidxu		}
1766161678Sdavidxu	}
1767161678Sdavidxu
1768161678Sdavidxu	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1769161678Sdavidxu		pri = UPRI(uq1->uq_thread);
1770161678Sdavidxu		if (pri > UPRI(td))
1771161678Sdavidxu			break;
1772161678Sdavidxu	}
1773161678Sdavidxu
1774161678Sdavidxu	if (uq1 != NULL)
1775161678Sdavidxu		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1776161678Sdavidxu	else
1777161678Sdavidxu		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1778161678Sdavidxu
1779161678Sdavidxu	uq->uq_pi_blocked = pi;
1780174701Sdavidxu	thread_lock(td);
1781161678Sdavidxu	td->td_flags |= TDF_UPIBLOCKED;
1782174701Sdavidxu	thread_unlock(td);
1783161678Sdavidxu	umtx_propagate_priority(td);
1784170300Sjeff	mtx_unlock_spin(&umtx_lock);
1785189756Sdavidxu	umtxq_unbusy(&uq->uq_key);
1786161678Sdavidxu
1787233690Sdavidxu	error = umtxq_sleep(uq, wmesg, timo);
1788233690Sdavidxu	umtxq_remove(uq);
1789233690Sdavidxu
1790170300Sjeff	mtx_lock_spin(&umtx_lock);
1791161678Sdavidxu	uq->uq_pi_blocked = NULL;
1792174701Sdavidxu	thread_lock(td);
1793161678Sdavidxu	td->td_flags &= ~TDF_UPIBLOCKED;
1794174701Sdavidxu	thread_unlock(td);
1795161678Sdavidxu	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1796216791Sdavidxu	umtx_repropagate_priority(pi);
1797170300Sjeff	mtx_unlock_spin(&umtx_lock);
1798189756Sdavidxu	umtxq_unlock(&uq->uq_key);
1799161678Sdavidxu
1800161678Sdavidxu	return (error);
1801161678Sdavidxu}
1802161678Sdavidxu
1803161678Sdavidxu/*
1804161678Sdavidxu * Add reference count for a PI mutex.
1805161678Sdavidxu */
1806161678Sdavidxustatic void
1807161678Sdavidxuumtx_pi_ref(struct umtx_pi *pi)
1808161678Sdavidxu{
1809161678Sdavidxu	struct umtxq_chain *uc;
1810161678Sdavidxu
1811161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1812161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1813161678Sdavidxu	pi->pi_refcount++;
1814161678Sdavidxu}
1815161678Sdavidxu
1816161678Sdavidxu/*
1817161678Sdavidxu * Decrease reference count for a PI mutex, if the counter
1818161678Sdavidxu * is decreased to zero, its memory space is freed.
1819161678Sdavidxu */
1820161678Sdavidxustatic void
1821161678Sdavidxuumtx_pi_unref(struct umtx_pi *pi)
1822161678Sdavidxu{
1823161678Sdavidxu	struct umtxq_chain *uc;
1824161678Sdavidxu
1825161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1826161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1827161678Sdavidxu	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1828161678Sdavidxu	if (--pi->pi_refcount == 0) {
1829170300Sjeff		mtx_lock_spin(&umtx_lock);
1830161678Sdavidxu		if (pi->pi_owner != NULL) {
1831161678Sdavidxu			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1832161678Sdavidxu				pi, pi_link);
1833161678Sdavidxu			pi->pi_owner = NULL;
1834161678Sdavidxu		}
1835161678Sdavidxu		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1836161678Sdavidxu			("blocked queue not empty"));
1837170300Sjeff		mtx_unlock_spin(&umtx_lock);
1838161678Sdavidxu		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1839189756Sdavidxu		umtx_pi_free(pi);
1840161678Sdavidxu	}
1841161678Sdavidxu}
1842161678Sdavidxu
1843161678Sdavidxu/*
1844161678Sdavidxu * Find a PI mutex in hash table.
1845161678Sdavidxu */
1846161678Sdavidxustatic struct umtx_pi *
1847161678Sdavidxuumtx_pi_lookup(struct umtx_key *key)
1848161678Sdavidxu{
1849161678Sdavidxu	struct umtxq_chain *uc;
1850161678Sdavidxu	struct umtx_pi *pi;
1851161678Sdavidxu
1852161678Sdavidxu	uc = umtxq_getchain(key);
1853161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1854161678Sdavidxu
1855161678Sdavidxu	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1856161678Sdavidxu		if (umtx_key_match(&pi->pi_key, key)) {
1857161678Sdavidxu			return (pi);
1858161678Sdavidxu		}
1859161678Sdavidxu	}
1860161678Sdavidxu	return (NULL);
1861161678Sdavidxu}
1862161678Sdavidxu
1863161678Sdavidxu/*
1864161678Sdavidxu * Insert a PI mutex into hash table.
1865161678Sdavidxu */
1866161678Sdavidxustatic inline void
1867161678Sdavidxuumtx_pi_insert(struct umtx_pi *pi)
1868161678Sdavidxu{
1869161678Sdavidxu	struct umtxq_chain *uc;
1870161678Sdavidxu
1871161678Sdavidxu	uc = umtxq_getchain(&pi->pi_key);
1872161678Sdavidxu	UMTXQ_LOCKED_ASSERT(uc);
1873161678Sdavidxu	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1874161678Sdavidxu}
1875161678Sdavidxu
1876161678Sdavidxu/*
1877161678Sdavidxu * Lock a PI mutex.
1878161678Sdavidxu */
1879161678Sdavidxustatic int
1880233690Sdavidxudo_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1881233690Sdavidxu    struct _umtx_time *timeout, int try)
1882161678Sdavidxu{
1883233690Sdavidxu	struct abs_timeout timo;
1884161678Sdavidxu	struct umtx_q *uq;
1885161678Sdavidxu	struct umtx_pi *pi, *new_pi;
1886161678Sdavidxu	uint32_t id, owner, old;
1887161678Sdavidxu	int error;
1888161678Sdavidxu
1889161678Sdavidxu	id = td->td_tid;
1890161678Sdavidxu	uq = td->td_umtxq;
1891161678Sdavidxu
1892161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1893161678Sdavidxu	    &uq->uq_key)) != 0)
1894161678Sdavidxu		return (error);
1895233690Sdavidxu
1896233690Sdavidxu	if (timeout != NULL)
1897233690Sdavidxu		abs_timeout_init2(&timo, timeout);
1898233690Sdavidxu
1899163697Sdavidxu	umtxq_lock(&uq->uq_key);
1900163697Sdavidxu	pi = umtx_pi_lookup(&uq->uq_key);
1901163697Sdavidxu	if (pi == NULL) {
1902163697Sdavidxu		new_pi = umtx_pi_alloc(M_NOWAIT);
1903163697Sdavidxu		if (new_pi == NULL) {
1904161678Sdavidxu			umtxq_unlock(&uq->uq_key);
1905163697Sdavidxu			new_pi = umtx_pi_alloc(M_WAITOK);
1906161678Sdavidxu			umtxq_lock(&uq->uq_key);
1907161678Sdavidxu			pi = umtx_pi_lookup(&uq->uq_key);
1908163697Sdavidxu			if (pi != NULL) {
1909161678Sdavidxu				umtx_pi_free(new_pi);
1910163697Sdavidxu				new_pi = NULL;
1911161678Sdavidxu			}
1912161678Sdavidxu		}
1913163697Sdavidxu		if (new_pi != NULL) {
1914163697Sdavidxu			new_pi->pi_key = uq->uq_key;
1915163697Sdavidxu			umtx_pi_insert(new_pi);
1916163697Sdavidxu			pi = new_pi;
1917163697Sdavidxu		}
1918163697Sdavidxu	}
1919163697Sdavidxu	umtx_pi_ref(pi);
1920163697Sdavidxu	umtxq_unlock(&uq->uq_key);
1921161678Sdavidxu
1922163697Sdavidxu	/*
1923163697Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
1924163697Sdavidxu	 * can fault on any access.
1925163697Sdavidxu	 */
1926163697Sdavidxu	for (;;) {
1927161678Sdavidxu		/*
1928161678Sdavidxu		 * Try the uncontested case.  This should be done in userland.
1929161678Sdavidxu		 */
1930161678Sdavidxu		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1931161678Sdavidxu
1932161678Sdavidxu		/* The acquire succeeded. */
1933161678Sdavidxu		if (owner == UMUTEX_UNOWNED) {
1934161678Sdavidxu			error = 0;
1935161678Sdavidxu			break;
1936161678Sdavidxu		}
1937161678Sdavidxu
1938161678Sdavidxu		/* The address was invalid. */
1939161678Sdavidxu		if (owner == -1) {
1940161678Sdavidxu			error = EFAULT;
1941161678Sdavidxu			break;
1942161678Sdavidxu		}
1943161678Sdavidxu
1944161678Sdavidxu		/* If no one owns it but it is contested try to acquire it. */
1945161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
1946161678Sdavidxu			owner = casuword32(&m->m_owner,
1947161678Sdavidxu			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1948161678Sdavidxu
1949161678Sdavidxu			if (owner == UMUTEX_CONTESTED) {
1950161678Sdavidxu				umtxq_lock(&uq->uq_key);
1951189756Sdavidxu				umtxq_busy(&uq->uq_key);
1952161678Sdavidxu				error = umtx_pi_claim(pi, td);
1953189756Sdavidxu				umtxq_unbusy(&uq->uq_key);
1954161678Sdavidxu				umtxq_unlock(&uq->uq_key);
1955161678Sdavidxu				break;
1956161678Sdavidxu			}
1957161678Sdavidxu
1958161678Sdavidxu			/* The address was invalid. */
1959161678Sdavidxu			if (owner == -1) {
1960161678Sdavidxu				error = EFAULT;
1961161678Sdavidxu				break;
1962161678Sdavidxu			}
1963161678Sdavidxu
1964161678Sdavidxu			/* If this failed the lock has changed, restart. */
1965161678Sdavidxu			continue;
1966161678Sdavidxu		}
1967161678Sdavidxu
1968161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1969161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
1970161678Sdavidxu			error = EDEADLK;
1971161678Sdavidxu			break;
1972161678Sdavidxu		}
1973161678Sdavidxu
1974161678Sdavidxu		if (try != 0) {
1975161678Sdavidxu			error = EBUSY;
1976161678Sdavidxu			break;
1977161678Sdavidxu		}
1978161678Sdavidxu
1979161678Sdavidxu		/*
1980161678Sdavidxu		 * If we caught a signal, we have retried and now
1981161678Sdavidxu		 * exit immediately.
1982161678Sdavidxu		 */
1983161678Sdavidxu		if (error != 0)
1984161678Sdavidxu			break;
1985161678Sdavidxu
1986161678Sdavidxu		umtxq_lock(&uq->uq_key);
1987161678Sdavidxu		umtxq_busy(&uq->uq_key);
1988161678Sdavidxu		umtxq_unlock(&uq->uq_key);
1989161678Sdavidxu
1990161678Sdavidxu		/*
1991161678Sdavidxu		 * Set the contested bit so that a release in user space
1992161678Sdavidxu		 * knows to use the system call for unlock.  If this fails
1993161678Sdavidxu		 * either some one else has acquired the lock or it has been
1994161678Sdavidxu		 * released.
1995161678Sdavidxu		 */
1996161678Sdavidxu		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1997161678Sdavidxu
1998161678Sdavidxu		/* The address was invalid. */
1999161678Sdavidxu		if (old == -1) {
2000161678Sdavidxu			umtxq_lock(&uq->uq_key);
2001161678Sdavidxu			umtxq_unbusy(&uq->uq_key);
2002161678Sdavidxu			umtxq_unlock(&uq->uq_key);
2003161678Sdavidxu			error = EFAULT;
2004161678Sdavidxu			break;
2005161678Sdavidxu		}
2006161678Sdavidxu
2007161678Sdavidxu		umtxq_lock(&uq->uq_key);
2008161678Sdavidxu		/*
2009161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2010161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2011161678Sdavidxu		 * unlocking the umtx.
2012161678Sdavidxu		 */
2013161678Sdavidxu		if (old == owner)
2014161678Sdavidxu			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
2015233690Sdavidxu			    "umtxpi", timeout == NULL ? NULL : &timo);
2016189756Sdavidxu		else {
2017189756Sdavidxu			umtxq_unbusy(&uq->uq_key);
2018189756Sdavidxu			umtxq_unlock(&uq->uq_key);
2019189756Sdavidxu		}
2020161678Sdavidxu	}
2021161678Sdavidxu
2022163697Sdavidxu	umtxq_lock(&uq->uq_key);
2023163697Sdavidxu	umtx_pi_unref(pi);
2024163697Sdavidxu	umtxq_unlock(&uq->uq_key);
2025161678Sdavidxu
2026161678Sdavidxu	umtx_key_release(&uq->uq_key);
2027161678Sdavidxu	return (error);
2028161678Sdavidxu}
2029161678Sdavidxu
2030161678Sdavidxu/*
2031161678Sdavidxu * Unlock a PI mutex.
2032161678Sdavidxu */
2033161678Sdavidxustatic int
2034161678Sdavidxudo_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
2035161678Sdavidxu{
2036161678Sdavidxu	struct umtx_key key;
2037161678Sdavidxu	struct umtx_q *uq_first, *uq_first2, *uq_me;
2038161678Sdavidxu	struct umtx_pi *pi, *pi2;
2039161678Sdavidxu	uint32_t owner, old, id;
2040161678Sdavidxu	int error;
2041161678Sdavidxu	int count;
2042161678Sdavidxu	int pri;
2043161678Sdavidxu
2044161678Sdavidxu	id = td->td_tid;
2045161678Sdavidxu	/*
2046161678Sdavidxu	 * Make sure we own this mtx.
2047161678Sdavidxu	 */
2048163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2049161678Sdavidxu	if (owner == -1)
2050161678Sdavidxu		return (EFAULT);
2051161678Sdavidxu
2052161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2053161678Sdavidxu		return (EPERM);
2054161678Sdavidxu
2055161678Sdavidxu	/* This should be done in userland */
2056161678Sdavidxu	if ((owner & UMUTEX_CONTESTED) == 0) {
2057161678Sdavidxu		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
2058161678Sdavidxu		if (old == -1)
2059161678Sdavidxu			return (EFAULT);
2060161678Sdavidxu		if (old == owner)
2061161678Sdavidxu			return (0);
2062161855Sdavidxu		owner = old;
2063161678Sdavidxu	}
2064161678Sdavidxu
2065161678Sdavidxu	/* We should only ever be in here for contested locks */
2066161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
2067161678Sdavidxu	    &key)) != 0)
2068161678Sdavidxu		return (error);
2069161678Sdavidxu
2070161678Sdavidxu	umtxq_lock(&key);
2071161678Sdavidxu	umtxq_busy(&key);
2072161678Sdavidxu	count = umtxq_count_pi(&key, &uq_first);
2073161678Sdavidxu	if (uq_first != NULL) {
2074189756Sdavidxu		mtx_lock_spin(&umtx_lock);
2075161678Sdavidxu		pi = uq_first->uq_pi_blocked;
2076189756Sdavidxu		KASSERT(pi != NULL, ("pi == NULL?"));
2077161678Sdavidxu		if (pi->pi_owner != curthread) {
2078189756Sdavidxu			mtx_unlock_spin(&umtx_lock);
2079161678Sdavidxu			umtxq_unbusy(&key);
2080161678Sdavidxu			umtxq_unlock(&key);
2081189756Sdavidxu			umtx_key_release(&key);
2082161678Sdavidxu			/* userland messed the mutex */
2083161678Sdavidxu			return (EPERM);
2084161678Sdavidxu		}
2085161678Sdavidxu		uq_me = curthread->td_umtxq;
2086161678Sdavidxu		pi->pi_owner = NULL;
2087161678Sdavidxu		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
2088189756Sdavidxu		/* get highest priority thread which is still sleeping. */
2089161678Sdavidxu		uq_first = TAILQ_FIRST(&pi->pi_blocked);
2090189756Sdavidxu		while (uq_first != NULL &&
2091189756Sdavidxu		       (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2092189756Sdavidxu			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2093189756Sdavidxu		}
2094161678Sdavidxu		pri = PRI_MAX;
2095161678Sdavidxu		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2096161678Sdavidxu			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2097161678Sdavidxu			if (uq_first2 != NULL) {
2098161678Sdavidxu				if (pri > UPRI(uq_first2->uq_thread))
2099161678Sdavidxu					pri = UPRI(uq_first2->uq_thread);
2100161678Sdavidxu			}
2101161678Sdavidxu		}
2102170300Sjeff		thread_lock(curthread);
2103216791Sdavidxu		sched_lend_user_prio(curthread, pri);
2104170300Sjeff		thread_unlock(curthread);
2105170300Sjeff		mtx_unlock_spin(&umtx_lock);
2106189756Sdavidxu		if (uq_first)
2107189756Sdavidxu			umtxq_signal_thread(uq_first);
2108161678Sdavidxu	}
2109161678Sdavidxu	umtxq_unlock(&key);
2110161678Sdavidxu
2111161678Sdavidxu	/*
2112161678Sdavidxu	 * When unlocking the umtx, it must be marked as unowned if
2113161678Sdavidxu	 * there is zero or one thread only waiting for it.
2114161678Sdavidxu	 * Otherwise, it must be marked as contested.
2115161678Sdavidxu	 */
2116161678Sdavidxu	old = casuword32(&m->m_owner, owner,
2117161678Sdavidxu		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
2118161678Sdavidxu
2119161678Sdavidxu	umtxq_lock(&key);
2120161678Sdavidxu	umtxq_unbusy(&key);
2121161678Sdavidxu	umtxq_unlock(&key);
2122161678Sdavidxu	umtx_key_release(&key);
2123161678Sdavidxu	if (old == -1)
2124161678Sdavidxu		return (EFAULT);
2125161678Sdavidxu	if (old != owner)
2126161678Sdavidxu		return (EINVAL);
2127161678Sdavidxu	return (0);
2128161678Sdavidxu}
2129161678Sdavidxu
2130161678Sdavidxu/*
2131161678Sdavidxu * Lock a PP mutex.
2132161678Sdavidxu */
2133161678Sdavidxustatic int
2134233690Sdavidxudo_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2135233690Sdavidxu    struct _umtx_time *timeout, int try)
2136161678Sdavidxu{
2137233690Sdavidxu	struct abs_timeout timo;
2138161678Sdavidxu	struct umtx_q *uq, *uq2;
2139161678Sdavidxu	struct umtx_pi *pi;
2140161678Sdavidxu	uint32_t ceiling;
2141161678Sdavidxu	uint32_t owner, id;
2142161678Sdavidxu	int error, pri, old_inherited_pri, su;
2143161678Sdavidxu
2144161678Sdavidxu	id = td->td_tid;
2145161678Sdavidxu	uq = td->td_umtxq;
2146161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2147161678Sdavidxu	    &uq->uq_key)) != 0)
2148161678Sdavidxu		return (error);
2149233690Sdavidxu
2150233690Sdavidxu	if (timeout != NULL)
2151233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2152233690Sdavidxu
2153164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2154161678Sdavidxu	for (;;) {
2155161678Sdavidxu		old_inherited_pri = uq->uq_inherited_pri;
2156161678Sdavidxu		umtxq_lock(&uq->uq_key);
2157161678Sdavidxu		umtxq_busy(&uq->uq_key);
2158161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2159161678Sdavidxu
2160161678Sdavidxu		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
2161161678Sdavidxu		if (ceiling > RTP_PRIO_MAX) {
2162161678Sdavidxu			error = EINVAL;
2163161678Sdavidxu			goto out;
2164161678Sdavidxu		}
2165161678Sdavidxu
2166170300Sjeff		mtx_lock_spin(&umtx_lock);
2167161678Sdavidxu		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2168170300Sjeff			mtx_unlock_spin(&umtx_lock);
2169161678Sdavidxu			error = EINVAL;
2170161678Sdavidxu			goto out;
2171161678Sdavidxu		}
2172161678Sdavidxu		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2173161678Sdavidxu			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2174170300Sjeff			thread_lock(td);
2175161678Sdavidxu			if (uq->uq_inherited_pri < UPRI(td))
2176161678Sdavidxu				sched_lend_user_prio(td, uq->uq_inherited_pri);
2177170300Sjeff			thread_unlock(td);
2178161678Sdavidxu		}
2179170300Sjeff		mtx_unlock_spin(&umtx_lock);
2180161678Sdavidxu
2181161678Sdavidxu		owner = casuword32(&m->m_owner,
2182161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2183161678Sdavidxu
2184161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2185161678Sdavidxu			error = 0;
2186161678Sdavidxu			break;
2187161678Sdavidxu		}
2188161678Sdavidxu
2189161678Sdavidxu		/* The address was invalid. */
2190161678Sdavidxu		if (owner == -1) {
2191161678Sdavidxu			error = EFAULT;
2192161678Sdavidxu			break;
2193161678Sdavidxu		}
2194161678Sdavidxu
2195161678Sdavidxu		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
2196161678Sdavidxu		    (owner & ~UMUTEX_CONTESTED) == id) {
2197161678Sdavidxu			error = EDEADLK;
2198161678Sdavidxu			break;
2199161678Sdavidxu		}
2200161678Sdavidxu
2201161678Sdavidxu		if (try != 0) {
2202161678Sdavidxu			error = EBUSY;
2203161678Sdavidxu			break;
2204161678Sdavidxu		}
2205161678Sdavidxu
2206161678Sdavidxu		/*
2207161678Sdavidxu		 * If we caught a signal, we have retried and now
2208161678Sdavidxu		 * exit immediately.
2209161678Sdavidxu		 */
2210161678Sdavidxu		if (error != 0)
2211161678Sdavidxu			break;
2212161678Sdavidxu
2213161678Sdavidxu		umtxq_lock(&uq->uq_key);
2214161678Sdavidxu		umtxq_insert(uq);
2215161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2216233690Sdavidxu		error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2217233690Sdavidxu		    NULL : &timo);
2218161678Sdavidxu		umtxq_remove(uq);
2219161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2220161678Sdavidxu
2221170300Sjeff		mtx_lock_spin(&umtx_lock);
2222161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2223161678Sdavidxu		pri = PRI_MAX;
2224161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2225161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2226161678Sdavidxu			if (uq2 != NULL) {
2227161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2228161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2229161678Sdavidxu			}
2230161678Sdavidxu		}
2231161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2232161678Sdavidxu			pri = uq->uq_inherited_pri;
2233170300Sjeff		thread_lock(td);
2234216791Sdavidxu		sched_lend_user_prio(td, pri);
2235170300Sjeff		thread_unlock(td);
2236170300Sjeff		mtx_unlock_spin(&umtx_lock);
2237161678Sdavidxu	}
2238161678Sdavidxu
2239161678Sdavidxu	if (error != 0) {
2240170300Sjeff		mtx_lock_spin(&umtx_lock);
2241161678Sdavidxu		uq->uq_inherited_pri = old_inherited_pri;
2242161678Sdavidxu		pri = PRI_MAX;
2243161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2244161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2245161678Sdavidxu			if (uq2 != NULL) {
2246161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2247161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2248161678Sdavidxu			}
2249161678Sdavidxu		}
2250161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2251161678Sdavidxu			pri = uq->uq_inherited_pri;
2252170300Sjeff		thread_lock(td);
2253216791Sdavidxu		sched_lend_user_prio(td, pri);
2254170300Sjeff		thread_unlock(td);
2255170300Sjeff		mtx_unlock_spin(&umtx_lock);
2256161678Sdavidxu	}
2257161678Sdavidxu
2258161678Sdavidxuout:
2259161678Sdavidxu	umtxq_lock(&uq->uq_key);
2260161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2261161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2262161678Sdavidxu	umtx_key_release(&uq->uq_key);
2263161678Sdavidxu	return (error);
2264161678Sdavidxu}
2265161678Sdavidxu
2266161678Sdavidxu/*
2267161678Sdavidxu * Unlock a PP mutex.
2268161678Sdavidxu */
2269161678Sdavidxustatic int
2270161678Sdavidxudo_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
2271161678Sdavidxu{
2272161678Sdavidxu	struct umtx_key key;
2273161678Sdavidxu	struct umtx_q *uq, *uq2;
2274161678Sdavidxu	struct umtx_pi *pi;
2275161678Sdavidxu	uint32_t owner, id;
2276161678Sdavidxu	uint32_t rceiling;
2277161926Sdavidxu	int error, pri, new_inherited_pri, su;
2278161678Sdavidxu
2279161678Sdavidxu	id = td->td_tid;
2280161678Sdavidxu	uq = td->td_umtxq;
2281164033Srwatson	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2282161678Sdavidxu
2283161678Sdavidxu	/*
2284161678Sdavidxu	 * Make sure we own this mtx.
2285161678Sdavidxu	 */
2286163449Sdavidxu	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2287161678Sdavidxu	if (owner == -1)
2288161678Sdavidxu		return (EFAULT);
2289161678Sdavidxu
2290161678Sdavidxu	if ((owner & ~UMUTEX_CONTESTED) != id)
2291161678Sdavidxu		return (EPERM);
2292161678Sdavidxu
2293161678Sdavidxu	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2294161678Sdavidxu	if (error != 0)
2295161678Sdavidxu		return (error);
2296161678Sdavidxu
2297161678Sdavidxu	if (rceiling == -1)
2298161678Sdavidxu		new_inherited_pri = PRI_MAX;
2299161678Sdavidxu	else {
2300161678Sdavidxu		rceiling = RTP_PRIO_MAX - rceiling;
2301161678Sdavidxu		if (rceiling > RTP_PRIO_MAX)
2302161678Sdavidxu			return (EINVAL);
2303161678Sdavidxu		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2304161678Sdavidxu	}
2305161678Sdavidxu
2306161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2307161678Sdavidxu	    &key)) != 0)
2308161678Sdavidxu		return (error);
2309161678Sdavidxu	umtxq_lock(&key);
2310161678Sdavidxu	umtxq_busy(&key);
2311161678Sdavidxu	umtxq_unlock(&key);
2312161678Sdavidxu	/*
2313161678Sdavidxu	 * For priority protected mutex, always set unlocked state
2314161678Sdavidxu	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2315161678Sdavidxu	 * to lock the mutex, it is necessary because thread priority
2316161678Sdavidxu	 * has to be adjusted for such mutex.
2317161678Sdavidxu	 */
2318163449Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2319163449Sdavidxu		UMUTEX_CONTESTED);
2320161678Sdavidxu
2321161678Sdavidxu	umtxq_lock(&key);
2322161678Sdavidxu	if (error == 0)
2323161678Sdavidxu		umtxq_signal(&key, 1);
2324161678Sdavidxu	umtxq_unbusy(&key);
2325161678Sdavidxu	umtxq_unlock(&key);
2326161678Sdavidxu
2327161678Sdavidxu	if (error == -1)
2328161678Sdavidxu		error = EFAULT;
2329161678Sdavidxu	else {
2330170300Sjeff		mtx_lock_spin(&umtx_lock);
2331161926Sdavidxu		if (su != 0)
2332161926Sdavidxu			uq->uq_inherited_pri = new_inherited_pri;
2333161678Sdavidxu		pri = PRI_MAX;
2334161678Sdavidxu		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2335161678Sdavidxu			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2336161678Sdavidxu			if (uq2 != NULL) {
2337161678Sdavidxu				if (pri > UPRI(uq2->uq_thread))
2338161678Sdavidxu					pri = UPRI(uq2->uq_thread);
2339161678Sdavidxu			}
2340161678Sdavidxu		}
2341161678Sdavidxu		if (pri > uq->uq_inherited_pri)
2342161678Sdavidxu			pri = uq->uq_inherited_pri;
2343170300Sjeff		thread_lock(td);
2344216791Sdavidxu		sched_lend_user_prio(td, pri);
2345170300Sjeff		thread_unlock(td);
2346170300Sjeff		mtx_unlock_spin(&umtx_lock);
2347161678Sdavidxu	}
2348161678Sdavidxu	umtx_key_release(&key);
2349161678Sdavidxu	return (error);
2350161678Sdavidxu}
2351161678Sdavidxu
2352161678Sdavidxustatic int
2353161678Sdavidxudo_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2354161678Sdavidxu	uint32_t *old_ceiling)
2355161678Sdavidxu{
2356161678Sdavidxu	struct umtx_q *uq;
2357161678Sdavidxu	uint32_t save_ceiling;
2358161678Sdavidxu	uint32_t owner, id;
2359161678Sdavidxu	uint32_t flags;
2360161678Sdavidxu	int error;
2361161678Sdavidxu
2362161678Sdavidxu	flags = fuword32(&m->m_flags);
2363161678Sdavidxu	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2364161678Sdavidxu		return (EINVAL);
2365161678Sdavidxu	if (ceiling > RTP_PRIO_MAX)
2366161678Sdavidxu		return (EINVAL);
2367161678Sdavidxu	id = td->td_tid;
2368161678Sdavidxu	uq = td->td_umtxq;
2369161678Sdavidxu	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2370161678Sdavidxu	   &uq->uq_key)) != 0)
2371161678Sdavidxu		return (error);
2372161678Sdavidxu	for (;;) {
2373161678Sdavidxu		umtxq_lock(&uq->uq_key);
2374161678Sdavidxu		umtxq_busy(&uq->uq_key);
2375161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2376161678Sdavidxu
2377161678Sdavidxu		save_ceiling = fuword32(&m->m_ceilings[0]);
2378161678Sdavidxu
2379161678Sdavidxu		owner = casuword32(&m->m_owner,
2380161678Sdavidxu		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2381161678Sdavidxu
2382161678Sdavidxu		if (owner == UMUTEX_CONTESTED) {
2383161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2384163449Sdavidxu			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2385163449Sdavidxu				UMUTEX_CONTESTED);
2386161678Sdavidxu			error = 0;
2387161678Sdavidxu			break;
2388161678Sdavidxu		}
2389161678Sdavidxu
2390161678Sdavidxu		/* The address was invalid. */
2391161678Sdavidxu		if (owner == -1) {
2392161678Sdavidxu			error = EFAULT;
2393161678Sdavidxu			break;
2394161678Sdavidxu		}
2395161678Sdavidxu
2396161678Sdavidxu		if ((owner & ~UMUTEX_CONTESTED) == id) {
2397161678Sdavidxu			suword32(&m->m_ceilings[0], ceiling);
2398161678Sdavidxu			error = 0;
2399161678Sdavidxu			break;
2400161678Sdavidxu		}
2401161678Sdavidxu
2402161678Sdavidxu		/*
2403161678Sdavidxu		 * If we caught a signal, we have retried and now
2404161678Sdavidxu		 * exit immediately.
2405161678Sdavidxu		 */
2406161678Sdavidxu		if (error != 0)
2407161678Sdavidxu			break;
2408161678Sdavidxu
2409161678Sdavidxu		/*
2410161678Sdavidxu		 * We set the contested bit, sleep. Otherwise the lock changed
2411161678Sdavidxu		 * and we need to retry or we lost a race to the thread
2412161678Sdavidxu		 * unlocking the umtx.
2413161678Sdavidxu		 */
2414161678Sdavidxu		umtxq_lock(&uq->uq_key);
2415161678Sdavidxu		umtxq_insert(uq);
2416161678Sdavidxu		umtxq_unbusy(&uq->uq_key);
2417233690Sdavidxu		error = umtxq_sleep(uq, "umtxpp", NULL);
2418161678Sdavidxu		umtxq_remove(uq);
2419161678Sdavidxu		umtxq_unlock(&uq->uq_key);
2420161678Sdavidxu	}
2421161678Sdavidxu	umtxq_lock(&uq->uq_key);
2422161678Sdavidxu	if (error == 0)
2423161678Sdavidxu		umtxq_signal(&uq->uq_key, INT_MAX);
2424161678Sdavidxu	umtxq_unbusy(&uq->uq_key);
2425161678Sdavidxu	umtxq_unlock(&uq->uq_key);
2426161678Sdavidxu	umtx_key_release(&uq->uq_key);
2427161678Sdavidxu	if (error == 0 && old_ceiling != NULL)
2428161678Sdavidxu		suword32(old_ceiling, save_ceiling);
2429161678Sdavidxu	return (error);
2430161678Sdavidxu}
2431161678Sdavidxu
2432161678Sdavidxu/*
2433161678Sdavidxu * Lock a userland POSIX mutex.
2434161678Sdavidxu */
2435161678Sdavidxustatic int
2436162030Sdavidxudo_lock_umutex(struct thread *td, struct umutex *m,
2437233690Sdavidxu    struct _umtx_time *timeout, int mode)
2438161678Sdavidxu{
2439161678Sdavidxu	uint32_t flags;
2440162030Sdavidxu	int error;
2441161678Sdavidxu
2442161678Sdavidxu	flags = fuword32(&m->m_flags);
2443161678Sdavidxu	if (flags == -1)
2444161678Sdavidxu		return (EFAULT);
2445161678Sdavidxu
2446233690Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2447233690Sdavidxu	case 0:
2448233690Sdavidxu		error = do_lock_normal(td, m, flags, timeout, mode);
2449233690Sdavidxu		break;
2450233690Sdavidxu	case UMUTEX_PRIO_INHERIT:
2451233690Sdavidxu		error = do_lock_pi(td, m, flags, timeout, mode);
2452233690Sdavidxu		break;
2453233690Sdavidxu	case UMUTEX_PRIO_PROTECT:
2454233690Sdavidxu		error = do_lock_pp(td, m, flags, timeout, mode);
2455233690Sdavidxu		break;
2456233690Sdavidxu	default:
2457233690Sdavidxu		return (EINVAL);
2458233690Sdavidxu	}
2459162030Sdavidxu	if (timeout == NULL) {
2460179970Sdavidxu		if (error == EINTR && mode != _UMUTEX_WAIT)
2461162030Sdavidxu			error = ERESTART;
2462162030Sdavidxu	} else {
2463162030Sdavidxu		/* Timed-locking is not restarted. */
2464162030Sdavidxu		if (error == ERESTART)
2465162030Sdavidxu			error = EINTR;
2466161742Sdavidxu	}
2467162030Sdavidxu	return (error);
2468161678Sdavidxu}
2469161678Sdavidxu
2470161678Sdavidxu/*
2471161678Sdavidxu * Unlock a userland POSIX mutex.
2472161678Sdavidxu */
2473161678Sdavidxustatic int
2474161678Sdavidxudo_unlock_umutex(struct thread *td, struct umutex *m)
2475161678Sdavidxu{
2476161678Sdavidxu	uint32_t flags;
2477161678Sdavidxu
2478161678Sdavidxu	flags = fuword32(&m->m_flags);
2479161678Sdavidxu	if (flags == -1)
2480161678Sdavidxu		return (EFAULT);
2481161678Sdavidxu
2482161855Sdavidxu	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2483161855Sdavidxu	case 0:
2484161855Sdavidxu		return (do_unlock_normal(td, m, flags));
2485161855Sdavidxu	case UMUTEX_PRIO_INHERIT:
2486161855Sdavidxu		return (do_unlock_pi(td, m, flags));
2487161855Sdavidxu	case UMUTEX_PRIO_PROTECT:
2488161855Sdavidxu		return (do_unlock_pp(td, m, flags));
2489161855Sdavidxu	}
2490161678Sdavidxu
2491161855Sdavidxu	return (EINVAL);
2492161678Sdavidxu}
2493161678Sdavidxu
2494164839Sdavidxustatic int
2495164839Sdavidxudo_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2496164876Sdavidxu	struct timespec *timeout, u_long wflags)
2497164839Sdavidxu{
2498233690Sdavidxu	struct abs_timeout timo;
2499164839Sdavidxu	struct umtx_q *uq;
2500164839Sdavidxu	uint32_t flags;
2501216641Sdavidxu	uint32_t clockid;
2502164839Sdavidxu	int error;
2503164839Sdavidxu
2504164839Sdavidxu	uq = td->td_umtxq;
2505164839Sdavidxu	flags = fuword32(&cv->c_flags);
2506164839Sdavidxu	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2507164839Sdavidxu	if (error != 0)
2508164839Sdavidxu		return (error);
2509216641Sdavidxu
2510216641Sdavidxu	if ((wflags & CVWAIT_CLOCKID) != 0) {
2511216641Sdavidxu		clockid = fuword32(&cv->c_clockid);
2512216641Sdavidxu		if (clockid < CLOCK_REALTIME ||
2513216641Sdavidxu		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2514216641Sdavidxu			/* hmm, only HW clock id will work. */
2515216641Sdavidxu			return (EINVAL);
2516216641Sdavidxu		}
2517216641Sdavidxu	} else {
2518216641Sdavidxu		clockid = CLOCK_REALTIME;
2519216641Sdavidxu	}
2520216641Sdavidxu
2521164839Sdavidxu	umtxq_lock(&uq->uq_key);
2522164839Sdavidxu	umtxq_busy(&uq->uq_key);
2523164839Sdavidxu	umtxq_insert(uq);
2524164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2525164839Sdavidxu
2526164839Sdavidxu	/*
2527216641Sdavidxu	 * Set c_has_waiters to 1 before releasing user mutex, also
2528216641Sdavidxu	 * don't modify cache line when unnecessary.
2529164839Sdavidxu	 */
2530216641Sdavidxu	if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
2531216641Sdavidxu		suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2532164839Sdavidxu
2533164839Sdavidxu	umtxq_lock(&uq->uq_key);
2534164839Sdavidxu	umtxq_unbusy(&uq->uq_key);
2535164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2536164839Sdavidxu
2537164839Sdavidxu	error = do_unlock_umutex(td, m);
2538233690Sdavidxu
2539233700Sdavidxu	if (timeout != NULL)
2540233690Sdavidxu		abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
2541233690Sdavidxu			timeout);
2542164839Sdavidxu
2543164839Sdavidxu	umtxq_lock(&uq->uq_key);
2544164839Sdavidxu	if (error == 0) {
2545233690Sdavidxu		error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2546233690Sdavidxu		    NULL : &timo);
2547164839Sdavidxu	}
2548164839Sdavidxu
2549211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2550211794Sdavidxu		error = 0;
2551211794Sdavidxu	else {
2552216641Sdavidxu		/*
2553216641Sdavidxu		 * This must be timeout,interrupted by signal or
2554216641Sdavidxu		 * surprious wakeup, clear c_has_waiter flag when
2555216641Sdavidxu		 * necessary.
2556216641Sdavidxu		 */
2557216641Sdavidxu		umtxq_busy(&uq->uq_key);
2558216641Sdavidxu		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2559216641Sdavidxu			int oldlen = uq->uq_cur_queue->length;
2560216641Sdavidxu			umtxq_remove(uq);
2561216641Sdavidxu			if (oldlen == 1) {
2562216641Sdavidxu				umtxq_unlock(&uq->uq_key);
2563216641Sdavidxu				suword32(
2564216641Sdavidxu				    __DEVOLATILE(uint32_t *,
2565216641Sdavidxu					 &cv->c_has_waiters), 0);
2566216641Sdavidxu				umtxq_lock(&uq->uq_key);
2567216641Sdavidxu			}
2568216641Sdavidxu		}
2569216641Sdavidxu		umtxq_unbusy(&uq->uq_key);
2570164839Sdavidxu		if (error == ERESTART)
2571164839Sdavidxu			error = EINTR;
2572164839Sdavidxu	}
2573211794Sdavidxu
2574164839Sdavidxu	umtxq_unlock(&uq->uq_key);
2575164839Sdavidxu	umtx_key_release(&uq->uq_key);
2576164839Sdavidxu	return (error);
2577164839Sdavidxu}
2578164839Sdavidxu
2579164839Sdavidxu/*
2580164839Sdavidxu * Signal a userland condition variable.
2581164839Sdavidxu */
2582164839Sdavidxustatic int
2583164839Sdavidxudo_cv_signal(struct thread *td, struct ucond *cv)
2584164839Sdavidxu{
2585164839Sdavidxu	struct umtx_key key;
2586164839Sdavidxu	int error, cnt, nwake;
2587164839Sdavidxu	uint32_t flags;
2588164839Sdavidxu
2589164839Sdavidxu	flags = fuword32(&cv->c_flags);
2590164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2591164839Sdavidxu		return (error);
2592164839Sdavidxu	umtxq_lock(&key);
2593164839Sdavidxu	umtxq_busy(&key);
2594164839Sdavidxu	cnt = umtxq_count(&key);
2595164839Sdavidxu	nwake = umtxq_signal(&key, 1);
2596164839Sdavidxu	if (cnt <= nwake) {
2597164839Sdavidxu		umtxq_unlock(&key);
2598164839Sdavidxu		error = suword32(
2599164839Sdavidxu		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2600164839Sdavidxu		umtxq_lock(&key);
2601164839Sdavidxu	}
2602164839Sdavidxu	umtxq_unbusy(&key);
2603164839Sdavidxu	umtxq_unlock(&key);
2604164839Sdavidxu	umtx_key_release(&key);
2605164839Sdavidxu	return (error);
2606164839Sdavidxu}
2607164839Sdavidxu
2608164839Sdavidxustatic int
2609164839Sdavidxudo_cv_broadcast(struct thread *td, struct ucond *cv)
2610164839Sdavidxu{
2611164839Sdavidxu	struct umtx_key key;
2612164839Sdavidxu	int error;
2613164839Sdavidxu	uint32_t flags;
2614164839Sdavidxu
2615164839Sdavidxu	flags = fuword32(&cv->c_flags);
2616164839Sdavidxu	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2617164839Sdavidxu		return (error);
2618164839Sdavidxu
2619164839Sdavidxu	umtxq_lock(&key);
2620164839Sdavidxu	umtxq_busy(&key);
2621164839Sdavidxu	umtxq_signal(&key, INT_MAX);
2622164839Sdavidxu	umtxq_unlock(&key);
2623164839Sdavidxu
2624164839Sdavidxu	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2625164839Sdavidxu
2626164839Sdavidxu	umtxq_lock(&key);
2627164839Sdavidxu	umtxq_unbusy(&key);
2628164839Sdavidxu	umtxq_unlock(&key);
2629164839Sdavidxu
2630164839Sdavidxu	umtx_key_release(&key);
2631164839Sdavidxu	return (error);
2632164839Sdavidxu}
2633164839Sdavidxu
2634177848Sdavidxustatic int
2635233690Sdavidxudo_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
2636177848Sdavidxu{
2637233690Sdavidxu	struct abs_timeout timo;
2638177848Sdavidxu	struct umtx_q *uq;
2639177848Sdavidxu	uint32_t flags, wrflags;
2640177848Sdavidxu	int32_t state, oldstate;
2641177848Sdavidxu	int32_t blocked_readers;
2642177848Sdavidxu	int error;
2643177848Sdavidxu
2644177848Sdavidxu	uq = td->td_umtxq;
2645177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2646177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2647177848Sdavidxu	if (error != 0)
2648177848Sdavidxu		return (error);
2649177848Sdavidxu
2650233690Sdavidxu	if (timeout != NULL)
2651233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2652233690Sdavidxu
2653177848Sdavidxu	wrflags = URWLOCK_WRITE_OWNER;
2654177848Sdavidxu	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2655177848Sdavidxu		wrflags |= URWLOCK_WRITE_WAITERS;
2656177848Sdavidxu
2657177848Sdavidxu	for (;;) {
2658177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2659177848Sdavidxu		/* try to lock it */
2660177848Sdavidxu		while (!(state & wrflags)) {
2661177848Sdavidxu			if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2662177848Sdavidxu				umtx_key_release(&uq->uq_key);
2663177848Sdavidxu				return (EAGAIN);
2664177848Sdavidxu			}
2665177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2666177848Sdavidxu			if (oldstate == state) {
2667177848Sdavidxu				umtx_key_release(&uq->uq_key);
2668177848Sdavidxu				return (0);
2669177848Sdavidxu			}
2670177848Sdavidxu			state = oldstate;
2671177848Sdavidxu		}
2672177848Sdavidxu
2673177848Sdavidxu		if (error)
2674177848Sdavidxu			break;
2675177848Sdavidxu
2676177848Sdavidxu		/* grab monitor lock */
2677177848Sdavidxu		umtxq_lock(&uq->uq_key);
2678177848Sdavidxu		umtxq_busy(&uq->uq_key);
2679177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2680177848Sdavidxu
2681203414Sdavidxu		/*
2682203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2683203414Sdavidxu		 * and the check below
2684203414Sdavidxu		 */
2685203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2686203414Sdavidxu
2687177848Sdavidxu		/* set read contention bit */
2688177848Sdavidxu		while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2689177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2690177848Sdavidxu			if (oldstate == state)
2691177848Sdavidxu				goto sleep;
2692177848Sdavidxu			state = oldstate;
2693177848Sdavidxu		}
2694177848Sdavidxu
2695177848Sdavidxu		/* state is changed while setting flags, restart */
2696177848Sdavidxu		if (!(state & wrflags)) {
2697177848Sdavidxu			umtxq_lock(&uq->uq_key);
2698177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2699177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2700177848Sdavidxu			continue;
2701177848Sdavidxu		}
2702177848Sdavidxu
2703177848Sdavidxusleep:
2704177848Sdavidxu		/* contention bit is set, before sleeping, increase read waiter count */
2705177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2706177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2707177848Sdavidxu
2708177848Sdavidxu		while (state & wrflags) {
2709177848Sdavidxu			umtxq_lock(&uq->uq_key);
2710177848Sdavidxu			umtxq_insert(uq);
2711177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2712177848Sdavidxu
2713233690Sdavidxu			error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2714233690Sdavidxu			    NULL : &timo);
2715177848Sdavidxu
2716177848Sdavidxu			umtxq_busy(&uq->uq_key);
2717177848Sdavidxu			umtxq_remove(uq);
2718177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2719177848Sdavidxu			if (error)
2720177848Sdavidxu				break;
2721177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2722177848Sdavidxu		}
2723177848Sdavidxu
2724177848Sdavidxu		/* decrease read waiter count, and may clear read contention bit */
2725177848Sdavidxu		blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2726177848Sdavidxu		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2727177848Sdavidxu		if (blocked_readers == 1) {
2728177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2729177848Sdavidxu			for (;;) {
2730177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2731177848Sdavidxu					 state & ~URWLOCK_READ_WAITERS);
2732177848Sdavidxu				if (oldstate == state)
2733177848Sdavidxu					break;
2734177848Sdavidxu				state = oldstate;
2735177848Sdavidxu			}
2736177848Sdavidxu		}
2737177848Sdavidxu
2738177848Sdavidxu		umtxq_lock(&uq->uq_key);
2739177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2740177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2741177848Sdavidxu	}
2742177848Sdavidxu	umtx_key_release(&uq->uq_key);
2743177849Sdavidxu	if (error == ERESTART)
2744177849Sdavidxu		error = EINTR;
2745177848Sdavidxu	return (error);
2746177848Sdavidxu}
2747177848Sdavidxu
2748177848Sdavidxustatic int
2749233690Sdavidxudo_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
2750177848Sdavidxu{
2751233690Sdavidxu	struct abs_timeout timo;
2752177848Sdavidxu	struct umtx_q *uq;
2753177848Sdavidxu	uint32_t flags;
2754177848Sdavidxu	int32_t state, oldstate;
2755177848Sdavidxu	int32_t blocked_writers;
2756197476Sdavidxu	int32_t blocked_readers;
2757177848Sdavidxu	int error;
2758177848Sdavidxu
2759177848Sdavidxu	uq = td->td_umtxq;
2760177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2761177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2762177848Sdavidxu	if (error != 0)
2763177848Sdavidxu		return (error);
2764177848Sdavidxu
2765233690Sdavidxu	if (timeout != NULL)
2766233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2767233690Sdavidxu
2768197476Sdavidxu	blocked_readers = 0;
2769177848Sdavidxu	for (;;) {
2770177848Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2771177848Sdavidxu		while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2772177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2773177848Sdavidxu			if (oldstate == state) {
2774177848Sdavidxu				umtx_key_release(&uq->uq_key);
2775177848Sdavidxu				return (0);
2776177848Sdavidxu			}
2777177848Sdavidxu			state = oldstate;
2778177848Sdavidxu		}
2779177848Sdavidxu
2780197476Sdavidxu		if (error) {
2781197476Sdavidxu			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2782197476Sdavidxu			    blocked_readers != 0) {
2783197476Sdavidxu				umtxq_lock(&uq->uq_key);
2784197476Sdavidxu				umtxq_busy(&uq->uq_key);
2785197476Sdavidxu				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2786197476Sdavidxu				umtxq_unbusy(&uq->uq_key);
2787197476Sdavidxu				umtxq_unlock(&uq->uq_key);
2788197476Sdavidxu			}
2789197476Sdavidxu
2790177848Sdavidxu			break;
2791197476Sdavidxu		}
2792177848Sdavidxu
2793177848Sdavidxu		/* grab monitor lock */
2794177848Sdavidxu		umtxq_lock(&uq->uq_key);
2795177848Sdavidxu		umtxq_busy(&uq->uq_key);
2796177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2797177848Sdavidxu
2798203414Sdavidxu		/*
2799203414Sdavidxu		 * re-read the state, in case it changed between the try-lock above
2800203414Sdavidxu		 * and the check below
2801203414Sdavidxu		 */
2802203414Sdavidxu		state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2803203414Sdavidxu
2804177848Sdavidxu		while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2805177848Sdavidxu		       (state & URWLOCK_WRITE_WAITERS) == 0) {
2806177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2807177848Sdavidxu			if (oldstate == state)
2808177848Sdavidxu				goto sleep;
2809177848Sdavidxu			state = oldstate;
2810177848Sdavidxu		}
2811177848Sdavidxu
2812177848Sdavidxu		if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2813177848Sdavidxu			umtxq_lock(&uq->uq_key);
2814177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2815177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2816177848Sdavidxu			continue;
2817177848Sdavidxu		}
2818177848Sdavidxusleep:
2819177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2820177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2821177848Sdavidxu
2822177848Sdavidxu		while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2823177848Sdavidxu			umtxq_lock(&uq->uq_key);
2824177848Sdavidxu			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2825177848Sdavidxu			umtxq_unbusy(&uq->uq_key);
2826177848Sdavidxu
2827233690Sdavidxu			error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
2828233690Sdavidxu			    NULL : &timo);
2829177848Sdavidxu
2830177848Sdavidxu			umtxq_busy(&uq->uq_key);
2831177848Sdavidxu			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2832177848Sdavidxu			umtxq_unlock(&uq->uq_key);
2833177848Sdavidxu			if (error)
2834177848Sdavidxu				break;
2835177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2836177848Sdavidxu		}
2837177848Sdavidxu
2838177848Sdavidxu		blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2839177848Sdavidxu		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2840177848Sdavidxu		if (blocked_writers == 1) {
2841177848Sdavidxu			state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2842177848Sdavidxu			for (;;) {
2843177848Sdavidxu				oldstate = casuword32(&rwlock->rw_state, state,
2844177848Sdavidxu					 state & ~URWLOCK_WRITE_WAITERS);
2845177848Sdavidxu				if (oldstate == state)
2846177848Sdavidxu					break;
2847177848Sdavidxu				state = oldstate;
2848177848Sdavidxu			}
2849197476Sdavidxu			blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2850197476Sdavidxu		} else
2851197476Sdavidxu			blocked_readers = 0;
2852177848Sdavidxu
2853177848Sdavidxu		umtxq_lock(&uq->uq_key);
2854177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2855177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2856177848Sdavidxu	}
2857177848Sdavidxu
2858177848Sdavidxu	umtx_key_release(&uq->uq_key);
2859177849Sdavidxu	if (error == ERESTART)
2860177849Sdavidxu		error = EINTR;
2861177848Sdavidxu	return (error);
2862177848Sdavidxu}
2863177848Sdavidxu
2864177848Sdavidxustatic int
2865177880Sdavidxudo_rw_unlock(struct thread *td, struct urwlock *rwlock)
2866177848Sdavidxu{
2867177848Sdavidxu	struct umtx_q *uq;
2868177848Sdavidxu	uint32_t flags;
2869177848Sdavidxu	int32_t state, oldstate;
2870177848Sdavidxu	int error, q, count;
2871177848Sdavidxu
2872177848Sdavidxu	uq = td->td_umtxq;
2873177848Sdavidxu	flags = fuword32(&rwlock->rw_flags);
2874177848Sdavidxu	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2875177848Sdavidxu	if (error != 0)
2876177848Sdavidxu		return (error);
2877177848Sdavidxu
2878177848Sdavidxu	state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2879177848Sdavidxu	if (state & URWLOCK_WRITE_OWNER) {
2880177848Sdavidxu		for (;;) {
2881177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2882177848Sdavidxu				state & ~URWLOCK_WRITE_OWNER);
2883177848Sdavidxu			if (oldstate != state) {
2884177848Sdavidxu				state = oldstate;
2885177848Sdavidxu				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2886177848Sdavidxu					error = EPERM;
2887177848Sdavidxu					goto out;
2888177848Sdavidxu				}
2889177848Sdavidxu			} else
2890177848Sdavidxu				break;
2891177848Sdavidxu		}
2892177848Sdavidxu	} else if (URWLOCK_READER_COUNT(state) != 0) {
2893177848Sdavidxu		for (;;) {
2894177848Sdavidxu			oldstate = casuword32(&rwlock->rw_state, state,
2895177848Sdavidxu				state - 1);
2896177848Sdavidxu			if (oldstate != state) {
2897177848Sdavidxu				state = oldstate;
2898177848Sdavidxu				if (URWLOCK_READER_COUNT(oldstate) == 0) {
2899177848Sdavidxu					error = EPERM;
2900177848Sdavidxu					goto out;
2901177848Sdavidxu				}
2902177848Sdavidxu			}
2903177848Sdavidxu			else
2904177848Sdavidxu				break;
2905177848Sdavidxu		}
2906177848Sdavidxu	} else {
2907177848Sdavidxu		error = EPERM;
2908177848Sdavidxu		goto out;
2909177848Sdavidxu	}
2910177848Sdavidxu
2911177848Sdavidxu	count = 0;
2912177848Sdavidxu
2913177848Sdavidxu	if (!(flags & URWLOCK_PREFER_READER)) {
2914177848Sdavidxu		if (state & URWLOCK_WRITE_WAITERS) {
2915177848Sdavidxu			count = 1;
2916177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2917177848Sdavidxu		} else if (state & URWLOCK_READ_WAITERS) {
2918177848Sdavidxu			count = INT_MAX;
2919177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2920177848Sdavidxu		}
2921177848Sdavidxu	} else {
2922177848Sdavidxu		if (state & URWLOCK_READ_WAITERS) {
2923177848Sdavidxu			count = INT_MAX;
2924177848Sdavidxu			q = UMTX_SHARED_QUEUE;
2925177848Sdavidxu		} else if (state & URWLOCK_WRITE_WAITERS) {
2926177848Sdavidxu			count = 1;
2927177848Sdavidxu			q = UMTX_EXCLUSIVE_QUEUE;
2928177848Sdavidxu		}
2929177848Sdavidxu	}
2930177848Sdavidxu
2931177848Sdavidxu	if (count) {
2932177848Sdavidxu		umtxq_lock(&uq->uq_key);
2933177848Sdavidxu		umtxq_busy(&uq->uq_key);
2934177848Sdavidxu		umtxq_signal_queue(&uq->uq_key, count, q);
2935177848Sdavidxu		umtxq_unbusy(&uq->uq_key);
2936177848Sdavidxu		umtxq_unlock(&uq->uq_key);
2937177848Sdavidxu	}
2938177848Sdavidxuout:
2939177848Sdavidxu	umtx_key_release(&uq->uq_key);
2940177848Sdavidxu	return (error);
2941177848Sdavidxu}
2942177848Sdavidxu
2943201472Sdavidxustatic int
2944232144Sdavidxudo_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
2945201472Sdavidxu{
2946233690Sdavidxu	struct abs_timeout timo;
2947201472Sdavidxu	struct umtx_q *uq;
2948201472Sdavidxu	uint32_t flags, count;
2949201472Sdavidxu	int error;
2950201472Sdavidxu
2951201472Sdavidxu	uq = td->td_umtxq;
2952201472Sdavidxu	flags = fuword32(&sem->_flags);
2953201885Sdavidxu	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
2954201472Sdavidxu	if (error != 0)
2955201472Sdavidxu		return (error);
2956233690Sdavidxu
2957233690Sdavidxu	if (timeout != NULL)
2958233690Sdavidxu		abs_timeout_init2(&timo, timeout);
2959233690Sdavidxu
2960201472Sdavidxu	umtxq_lock(&uq->uq_key);
2961201472Sdavidxu	umtxq_busy(&uq->uq_key);
2962201472Sdavidxu	umtxq_insert(uq);
2963201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2964230194Sdavidxu	casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
2965201472Sdavidxu	count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
2966201472Sdavidxu	if (count != 0) {
2967201472Sdavidxu		umtxq_lock(&uq->uq_key);
2968201472Sdavidxu		umtxq_unbusy(&uq->uq_key);
2969201472Sdavidxu		umtxq_remove(uq);
2970201472Sdavidxu		umtxq_unlock(&uq->uq_key);
2971201472Sdavidxu		umtx_key_release(&uq->uq_key);
2972201472Sdavidxu		return (0);
2973201472Sdavidxu	}
2974201472Sdavidxu	umtxq_lock(&uq->uq_key);
2975201472Sdavidxu	umtxq_unbusy(&uq->uq_key);
2976201472Sdavidxu
2977233690Sdavidxu	error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
2978201472Sdavidxu
2979211794Sdavidxu	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2980211794Sdavidxu		error = 0;
2981211794Sdavidxu	else {
2982211794Sdavidxu		umtxq_remove(uq);
2983201472Sdavidxu		if (error == ERESTART)
2984201472Sdavidxu			error = EINTR;
2985201472Sdavidxu	}
2986201472Sdavidxu	umtxq_unlock(&uq->uq_key);
2987201472Sdavidxu	umtx_key_release(&uq->uq_key);
2988201472Sdavidxu	return (error);
2989201472Sdavidxu}
2990201472Sdavidxu
2991201472Sdavidxu/*
2992201472Sdavidxu * Signal a userland condition variable.
2993201472Sdavidxu */
2994201472Sdavidxustatic int
2995201472Sdavidxudo_sem_wake(struct thread *td, struct _usem *sem)
2996201472Sdavidxu{
2997201472Sdavidxu	struct umtx_key key;
2998233913Sdavidxu	int error, cnt;
2999201472Sdavidxu	uint32_t flags;
3000201472Sdavidxu
3001201472Sdavidxu	flags = fuword32(&sem->_flags);
3002201885Sdavidxu	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3003201472Sdavidxu		return (error);
3004201472Sdavidxu	umtxq_lock(&key);
3005201472Sdavidxu	umtxq_busy(&key);
3006201472Sdavidxu	cnt = umtxq_count(&key);
3007233913Sdavidxu	if (cnt > 0) {
3008233913Sdavidxu		umtxq_signal(&key, 1);
3009233913Sdavidxu		/*
3010233913Sdavidxu		 * Check if count is greater than 0, this means the memory is
3011233913Sdavidxu		 * still being referenced by user code, so we can safely
3012233913Sdavidxu		 * update _has_waiters flag.
3013233913Sdavidxu		 */
3014233913Sdavidxu		if (cnt == 1) {
3015233913Sdavidxu			umtxq_unlock(&key);
3016233913Sdavidxu			error = suword32(
3017233913Sdavidxu			    __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
3018233913Sdavidxu			umtxq_lock(&key);
3019233913Sdavidxu		}
3020201472Sdavidxu	}
3021201472Sdavidxu	umtxq_unbusy(&key);
3022201472Sdavidxu	umtxq_unlock(&key);
3023201472Sdavidxu	umtx_key_release(&key);
3024201472Sdavidxu	return (error);
3025201472Sdavidxu}
3026201472Sdavidxu
3027139013Sdavidxuint
3028225617Skmacysys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
3029139013Sdavidxu    /* struct umtx *umtx */
3030139013Sdavidxu{
3031233690Sdavidxu	return do_lock_umtx(td, uap->umtx, td->td_tid, 0);
3032139013Sdavidxu}
3033139013Sdavidxu
3034139013Sdavidxuint
3035225617Skmacysys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
3036139013Sdavidxu    /* struct umtx *umtx */
3037139013Sdavidxu{
3038162536Sdavidxu	return do_unlock_umtx(td, uap->umtx, td->td_tid);
3039139013Sdavidxu}
3040139013Sdavidxu
3041228219Sphoinline int
3042228219Sphoumtx_copyin_timeout(const void *addr, struct timespec *tsp)
3043228219Spho{
3044228219Spho	int error;
3045228219Spho
3046228219Spho	error = copyin(addr, tsp, sizeof(struct timespec));
3047228219Spho	if (error == 0) {
3048228219Spho		if (tsp->tv_sec < 0 ||
3049228219Spho		    tsp->tv_nsec >= 1000000000 ||
3050228219Spho		    tsp->tv_nsec < 0)
3051228219Spho			error = EINVAL;
3052228219Spho	}
3053228219Spho	return (error);
3054228219Spho}
3055228219Spho
3056232144Sdavidxustatic inline int
3057232144Sdavidxuumtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
3058232144Sdavidxu{
3059232144Sdavidxu	int error;
3060232144Sdavidxu
3061232286Sdavidxu	if (size <= sizeof(struct timespec)) {
3062232286Sdavidxu		tp->_clockid = CLOCK_REALTIME;
3063232286Sdavidxu		tp->_flags = 0;
3064232144Sdavidxu		error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
3065232286Sdavidxu	} else
3066232144Sdavidxu		error = copyin(addr, tp, sizeof(struct _umtx_time));
3067232144Sdavidxu	if (error != 0)
3068232144Sdavidxu		return (error);
3069232144Sdavidxu	if (tp->_timeout.tv_sec < 0 ||
3070232144Sdavidxu	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
3071232144Sdavidxu		return (EINVAL);
3072232144Sdavidxu	return (0);
3073232144Sdavidxu}
3074232144Sdavidxu
3075162536Sdavidxustatic int
3076162536Sdavidxu__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
3077139013Sdavidxu{
3078162536Sdavidxu	struct timespec *ts, timeout;
3079139013Sdavidxu	int error;
3080139013Sdavidxu
3081162536Sdavidxu	/* Allow a null timespec (wait forever). */
3082162536Sdavidxu	if (uap->uaddr2 == NULL)
3083162536Sdavidxu		ts = NULL;
3084162536Sdavidxu	else {
3085228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3086162536Sdavidxu		if (error != 0)
3087162536Sdavidxu			return (error);
3088162536Sdavidxu		ts = &timeout;
3089162536Sdavidxu	}
3090162536Sdavidxu	return (do_lock_umtx(td, uap->obj, uap->val, ts));
3091162536Sdavidxu}
3092162536Sdavidxu
3093162536Sdavidxustatic int
3094162536Sdavidxu__umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
3095162536Sdavidxu{
3096162536Sdavidxu	return (do_unlock_umtx(td, uap->obj, uap->val));
3097162536Sdavidxu}
3098162536Sdavidxu
3099162536Sdavidxustatic int
3100162536Sdavidxu__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
3101162536Sdavidxu{
3102232144Sdavidxu	struct _umtx_time timeout, *tm_p;
3103162536Sdavidxu	int error;
3104162536Sdavidxu
3105162536Sdavidxu	if (uap->uaddr2 == NULL)
3106232144Sdavidxu		tm_p = NULL;
3107162536Sdavidxu	else {
3108232144Sdavidxu		error = umtx_copyin_umtx_time(
3109232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3110162536Sdavidxu		if (error != 0)
3111162536Sdavidxu			return (error);
3112232144Sdavidxu		tm_p = &timeout;
3113162536Sdavidxu	}
3114232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
3115162536Sdavidxu}
3116162536Sdavidxu
3117162536Sdavidxustatic int
3118173800Sdavidxu__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
3119173800Sdavidxu{
3120232144Sdavidxu	struct _umtx_time timeout, *tm_p;
3121173800Sdavidxu	int error;
3122173800Sdavidxu
3123173800Sdavidxu	if (uap->uaddr2 == NULL)
3124232144Sdavidxu		tm_p = NULL;
3125173800Sdavidxu	else {
3126232144Sdavidxu		error = umtx_copyin_umtx_time(
3127232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3128173800Sdavidxu		if (error != 0)
3129173800Sdavidxu			return (error);
3130232144Sdavidxu		tm_p = &timeout;
3131173800Sdavidxu	}
3132232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3133173800Sdavidxu}
3134173800Sdavidxu
3135173800Sdavidxustatic int
3136178646Sdavidxu__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
3137178646Sdavidxu{
3138232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3139178646Sdavidxu	int error;
3140178646Sdavidxu
3141178646Sdavidxu	if (uap->uaddr2 == NULL)
3142232144Sdavidxu		tm_p = NULL;
3143178646Sdavidxu	else {
3144232144Sdavidxu		error = umtx_copyin_umtx_time(
3145232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3146178646Sdavidxu		if (error != 0)
3147178646Sdavidxu			return (error);
3148232144Sdavidxu		tm_p = &timeout;
3149178646Sdavidxu	}
3150232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3151178646Sdavidxu}
3152178646Sdavidxu
3153178646Sdavidxustatic int
3154162536Sdavidxu__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
3155162536Sdavidxu{
3156178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3157162536Sdavidxu}
3158162536Sdavidxu
3159216641Sdavidxu#define BATCH_SIZE	128
3160162536Sdavidxustatic int
3161216641Sdavidxu__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
3162216641Sdavidxu{
3163216641Sdavidxu	int count = uap->val;
3164216641Sdavidxu	void *uaddrs[BATCH_SIZE];
3165216641Sdavidxu	char **upp = (char **)uap->obj;
3166216641Sdavidxu	int tocopy;
3167216641Sdavidxu	int error = 0;
3168216641Sdavidxu	int i, pos = 0;
3169216641Sdavidxu
3170216641Sdavidxu	while (count > 0) {
3171216641Sdavidxu		tocopy = count;
3172216641Sdavidxu		if (tocopy > BATCH_SIZE)
3173216641Sdavidxu			tocopy = BATCH_SIZE;
3174216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
3175216641Sdavidxu		if (error != 0)
3176216641Sdavidxu			break;
3177216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3178216641Sdavidxu			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3179216641Sdavidxu		count -= tocopy;
3180216641Sdavidxu		pos += tocopy;
3181216641Sdavidxu	}
3182216641Sdavidxu	return (error);
3183216641Sdavidxu}
3184216641Sdavidxu
3185216641Sdavidxustatic int
3186178646Sdavidxu__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
3187178646Sdavidxu{
3188178646Sdavidxu	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3189178646Sdavidxu}
3190178646Sdavidxu
3191178646Sdavidxustatic int
3192162536Sdavidxu__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
3193162536Sdavidxu{
3194232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3195162536Sdavidxu	int error;
3196162536Sdavidxu
3197162536Sdavidxu	/* Allow a null timespec (wait forever). */
3198162536Sdavidxu	if (uap->uaddr2 == NULL)
3199232144Sdavidxu		tm_p = NULL;
3200162536Sdavidxu	else {
3201232144Sdavidxu		error = umtx_copyin_umtx_time(
3202232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3203162536Sdavidxu		if (error != 0)
3204162536Sdavidxu			return (error);
3205232144Sdavidxu		tm_p = &timeout;
3206139013Sdavidxu	}
3207232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3208162536Sdavidxu}
3209162536Sdavidxu
3210162536Sdavidxustatic int
3211162536Sdavidxu__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
3212162536Sdavidxu{
3213179970Sdavidxu	return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
3214162536Sdavidxu}
3215162536Sdavidxu
3216162536Sdavidxustatic int
3217179970Sdavidxu__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
3218179970Sdavidxu{
3219232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3220179970Sdavidxu	int error;
3221179970Sdavidxu
3222179970Sdavidxu	/* Allow a null timespec (wait forever). */
3223179970Sdavidxu	if (uap->uaddr2 == NULL)
3224232144Sdavidxu		tm_p = NULL;
3225179970Sdavidxu	else {
3226232144Sdavidxu		error = umtx_copyin_umtx_time(
3227232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3228179970Sdavidxu		if (error != 0)
3229179970Sdavidxu			return (error);
3230232144Sdavidxu		tm_p = &timeout;
3231179970Sdavidxu	}
3232232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3233179970Sdavidxu}
3234179970Sdavidxu
3235179970Sdavidxustatic int
3236179970Sdavidxu__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
3237179970Sdavidxu{
3238179970Sdavidxu	return do_wake_umutex(td, uap->obj);
3239179970Sdavidxu}
3240179970Sdavidxu
3241179970Sdavidxustatic int
3242162536Sdavidxu__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
3243162536Sdavidxu{
3244162536Sdavidxu	return do_unlock_umutex(td, uap->obj);
3245162536Sdavidxu}
3246162536Sdavidxu
3247162536Sdavidxustatic int
3248162536Sdavidxu__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
3249162536Sdavidxu{
3250162536Sdavidxu	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
3251162536Sdavidxu}
3252162536Sdavidxu
3253164839Sdavidxustatic int
3254164839Sdavidxu__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
3255164839Sdavidxu{
3256164839Sdavidxu	struct timespec *ts, timeout;
3257164839Sdavidxu	int error;
3258164839Sdavidxu
3259164839Sdavidxu	/* Allow a null timespec (wait forever). */
3260164839Sdavidxu	if (uap->uaddr2 == NULL)
3261164839Sdavidxu		ts = NULL;
3262164839Sdavidxu	else {
3263228219Spho		error = umtx_copyin_timeout(uap->uaddr2, &timeout);
3264164839Sdavidxu		if (error != 0)
3265164839Sdavidxu			return (error);
3266164839Sdavidxu		ts = &timeout;
3267164839Sdavidxu	}
3268164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3269164839Sdavidxu}
3270164839Sdavidxu
3271164839Sdavidxustatic int
3272164839Sdavidxu__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
3273164839Sdavidxu{
3274164839Sdavidxu	return do_cv_signal(td, uap->obj);
3275164839Sdavidxu}
3276164839Sdavidxu
3277164839Sdavidxustatic int
3278164839Sdavidxu__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
3279164839Sdavidxu{
3280164839Sdavidxu	return do_cv_broadcast(td, uap->obj);
3281164839Sdavidxu}
3282164839Sdavidxu
3283177848Sdavidxustatic int
3284177848Sdavidxu__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
3285177848Sdavidxu{
3286232209Sdavidxu	struct _umtx_time timeout;
3287177848Sdavidxu	int error;
3288177848Sdavidxu
3289177848Sdavidxu	/* Allow a null timespec (wait forever). */
3290177848Sdavidxu	if (uap->uaddr2 == NULL) {
3291177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3292177848Sdavidxu	} else {
3293232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3294232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3295177848Sdavidxu		if (error != 0)
3296177848Sdavidxu			return (error);
3297233690Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3298177848Sdavidxu	}
3299177848Sdavidxu	return (error);
3300177848Sdavidxu}
3301177848Sdavidxu
3302177848Sdavidxustatic int
3303177848Sdavidxu__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
3304177848Sdavidxu{
3305232209Sdavidxu	struct _umtx_time timeout;
3306177848Sdavidxu	int error;
3307177848Sdavidxu
3308177848Sdavidxu	/* Allow a null timespec (wait forever). */
3309177848Sdavidxu	if (uap->uaddr2 == NULL) {
3310177848Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3311177848Sdavidxu	} else {
3312232209Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3313232209Sdavidxu		   (size_t)uap->uaddr1, &timeout);
3314177848Sdavidxu		if (error != 0)
3315177848Sdavidxu			return (error);
3316177848Sdavidxu
3317233690Sdavidxu		error = do_rw_wrlock(td, uap->obj, &timeout);
3318177848Sdavidxu	}
3319177848Sdavidxu	return (error);
3320177848Sdavidxu}
3321177848Sdavidxu
3322177848Sdavidxustatic int
3323177848Sdavidxu__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
3324177848Sdavidxu{
3325177880Sdavidxu	return do_rw_unlock(td, uap->obj);
3326177848Sdavidxu}
3327177848Sdavidxu
3328201472Sdavidxustatic int
3329201472Sdavidxu__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
3330201472Sdavidxu{
3331232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3332201472Sdavidxu	int error;
3333201472Sdavidxu
3334201472Sdavidxu	/* Allow a null timespec (wait forever). */
3335201472Sdavidxu	if (uap->uaddr2 == NULL)
3336232144Sdavidxu		tm_p = NULL;
3337201472Sdavidxu	else {
3338232144Sdavidxu		error = umtx_copyin_umtx_time(
3339232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3340201472Sdavidxu		if (error != 0)
3341201472Sdavidxu			return (error);
3342232144Sdavidxu		tm_p = &timeout;
3343201472Sdavidxu	}
3344232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3345201472Sdavidxu}
3346201472Sdavidxu
3347201472Sdavidxustatic int
3348201472Sdavidxu__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
3349201472Sdavidxu{
3350201472Sdavidxu	return do_sem_wake(td, uap->obj);
3351201472Sdavidxu}
3352201472Sdavidxu
3353233912Sdavidxustatic int
3354233912Sdavidxu__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
3355233912Sdavidxu{
3356233912Sdavidxu	return do_wake2_umutex(td, uap->obj, uap->val);
3357233912Sdavidxu}
3358233912Sdavidxu
3359162536Sdavidxutypedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3360162536Sdavidxu
3361162536Sdavidxustatic _umtx_op_func op_table[] = {
3362162536Sdavidxu	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
3363162536Sdavidxu	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
3364162536Sdavidxu	__umtx_op_wait,			/* UMTX_OP_WAIT */
3365162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3366162536Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
3367162536Sdavidxu	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
3368162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
3369164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3370164839Sdavidxu	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
3371164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3372173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3373177848Sdavidxu	__umtx_op_wait_uint,		/* UMTX_OP_WAIT_UINT */
3374177848Sdavidxu	__umtx_op_rw_rdlock,		/* UMTX_OP_RW_RDLOCK */
3375177848Sdavidxu	__umtx_op_rw_wrlock,		/* UMTX_OP_RW_WRLOCK */
3376178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3377178646Sdavidxu	__umtx_op_wait_uint_private,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3378179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3379179970Sdavidxu	__umtx_op_wait_umutex,		/* UMTX_OP_UMUTEX_WAIT */
3380201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3381201472Sdavidxu	__umtx_op_sem_wait,		/* UMTX_OP_SEM_WAIT */
3382216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3383233912Sdavidxu	__umtx_op_nwake_private,	/* UMTX_OP_NWAKE_PRIVATE */
3384233912Sdavidxu	__umtx_op_wake2_umutex		/* UMTX_OP_UMUTEX_WAKE2 */
3385162536Sdavidxu};
3386162536Sdavidxu
3387162536Sdavidxuint
3388225617Skmacysys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
3389162536Sdavidxu{
3390163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3391162536Sdavidxu		return (*op_table[uap->op])(td, uap);
3392162536Sdavidxu	return (EINVAL);
3393162536Sdavidxu}
3394162536Sdavidxu
3395205014Snwhitehorn#ifdef COMPAT_FREEBSD32
3396163046Sdavidxuint
3397163046Sdavidxufreebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3398163046Sdavidxu    /* struct umtx *umtx */
3399163046Sdavidxu{
3400163046Sdavidxu	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3401163046Sdavidxu}
3402163046Sdavidxu
3403163046Sdavidxuint
3404163046Sdavidxufreebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3405163046Sdavidxu    /* struct umtx *umtx */
3406163046Sdavidxu{
3407163046Sdavidxu	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3408163046Sdavidxu}
3409163046Sdavidxu
3410162536Sdavidxustruct timespec32 {
3411242202Sdavide	int32_t tv_sec;
3412242202Sdavide	int32_t tv_nsec;
3413162536Sdavidxu};
3414162536Sdavidxu
3415232144Sdavidxustruct umtx_time32 {
3416232144Sdavidxu	struct	timespec32	timeout;
3417232144Sdavidxu	uint32_t		flags;
3418232144Sdavidxu	uint32_t		clockid;
3419232144Sdavidxu};
3420232144Sdavidxu
3421162536Sdavidxustatic inline int
3422228218Sphoumtx_copyin_timeout32(void *addr, struct timespec *tsp)
3423162536Sdavidxu{
3424162536Sdavidxu	struct timespec32 ts32;
3425162536Sdavidxu	int error;
3426162536Sdavidxu
3427162536Sdavidxu	error = copyin(addr, &ts32, sizeof(struct timespec32));
3428162536Sdavidxu	if (error == 0) {
3429228218Spho		if (ts32.tv_sec < 0 ||
3430228218Spho		    ts32.tv_nsec >= 1000000000 ||
3431228218Spho		    ts32.tv_nsec < 0)
3432228218Spho			error = EINVAL;
3433228218Spho		else {
3434228218Spho			tsp->tv_sec = ts32.tv_sec;
3435228218Spho			tsp->tv_nsec = ts32.tv_nsec;
3436228218Spho		}
3437162536Sdavidxu	}
3438140421Sdavidxu	return (error);
3439139013Sdavidxu}
3440161678Sdavidxu
3441232144Sdavidxustatic inline int
3442232144Sdavidxuumtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
3443232144Sdavidxu{
3444232144Sdavidxu	struct umtx_time32 t32;
3445232144Sdavidxu	int error;
3446232144Sdavidxu
3447232144Sdavidxu	t32.clockid = CLOCK_REALTIME;
3448232144Sdavidxu	t32.flags   = 0;
3449232144Sdavidxu	if (size <= sizeof(struct timespec32))
3450232144Sdavidxu		error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
3451232144Sdavidxu	else
3452232144Sdavidxu		error = copyin(addr, &t32, sizeof(struct umtx_time32));
3453232144Sdavidxu	if (error != 0)
3454232144Sdavidxu		return (error);
3455232144Sdavidxu	if (t32.timeout.tv_sec < 0 ||
3456232144Sdavidxu	    t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
3457232144Sdavidxu		return (EINVAL);
3458232144Sdavidxu	tp->_timeout.tv_sec = t32.timeout.tv_sec;
3459232144Sdavidxu	tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
3460232144Sdavidxu	tp->_flags = t32.flags;
3461232144Sdavidxu	tp->_clockid = t32.clockid;
3462232144Sdavidxu	return (0);
3463232144Sdavidxu}
3464232144Sdavidxu
3465162536Sdavidxustatic int
3466162536Sdavidxu__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3467162536Sdavidxu{
3468162536Sdavidxu	struct timespec *ts, timeout;
3469162536Sdavidxu	int error;
3470162536Sdavidxu
3471162536Sdavidxu	/* Allow a null timespec (wait forever). */
3472162536Sdavidxu	if (uap->uaddr2 == NULL)
3473162536Sdavidxu		ts = NULL;
3474162536Sdavidxu	else {
3475228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3476162536Sdavidxu		if (error != 0)
3477162536Sdavidxu			return (error);
3478162536Sdavidxu		ts = &timeout;
3479162536Sdavidxu	}
3480162536Sdavidxu	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3481162536Sdavidxu}
3482162536Sdavidxu
3483162536Sdavidxustatic int
3484162536Sdavidxu__umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3485162536Sdavidxu{
3486162536Sdavidxu	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3487162536Sdavidxu}
3488162536Sdavidxu
3489162536Sdavidxustatic int
3490162536Sdavidxu__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3491162536Sdavidxu{
3492232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3493162536Sdavidxu	int error;
3494162536Sdavidxu
3495162536Sdavidxu	if (uap->uaddr2 == NULL)
3496232144Sdavidxu		tm_p = NULL;
3497162536Sdavidxu	else {
3498232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3499232144Sdavidxu			(size_t)uap->uaddr1, &timeout);
3500162536Sdavidxu		if (error != 0)
3501162536Sdavidxu			return (error);
3502232144Sdavidxu		tm_p = &timeout;
3503162536Sdavidxu	}
3504232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
3505162536Sdavidxu}
3506162536Sdavidxu
3507162536Sdavidxustatic int
3508162536Sdavidxu__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3509162536Sdavidxu{
3510232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3511162536Sdavidxu	int error;
3512162536Sdavidxu
3513162536Sdavidxu	/* Allow a null timespec (wait forever). */
3514162536Sdavidxu	if (uap->uaddr2 == NULL)
3515232144Sdavidxu		tm_p = NULL;
3516162536Sdavidxu	else {
3517232144Sdavidxu		error = umtx_copyin_umtx_time(uap->uaddr2,
3518232144Sdavidxu			    (size_t)uap->uaddr1, &timeout);
3519162536Sdavidxu		if (error != 0)
3520162536Sdavidxu			return (error);
3521232144Sdavidxu		tm_p = &timeout;
3522162536Sdavidxu	}
3523232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, 0);
3524162536Sdavidxu}
3525162536Sdavidxu
3526164839Sdavidxustatic int
3527179970Sdavidxu__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3528179970Sdavidxu{
3529232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3530179970Sdavidxu	int error;
3531179970Sdavidxu
3532179970Sdavidxu	/* Allow a null timespec (wait forever). */
3533179970Sdavidxu	if (uap->uaddr2 == NULL)
3534232144Sdavidxu		tm_p = NULL;
3535179970Sdavidxu	else {
3536232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3537232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3538179970Sdavidxu		if (error != 0)
3539179970Sdavidxu			return (error);
3540232144Sdavidxu		tm_p = &timeout;
3541179970Sdavidxu	}
3542232144Sdavidxu	return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
3543179970Sdavidxu}
3544179970Sdavidxu
3545179970Sdavidxustatic int
3546164839Sdavidxu__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3547164839Sdavidxu{
3548164839Sdavidxu	struct timespec *ts, timeout;
3549164839Sdavidxu	int error;
3550164839Sdavidxu
3551164839Sdavidxu	/* Allow a null timespec (wait forever). */
3552164839Sdavidxu	if (uap->uaddr2 == NULL)
3553164839Sdavidxu		ts = NULL;
3554164839Sdavidxu	else {
3555228218Spho		error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
3556164839Sdavidxu		if (error != 0)
3557164839Sdavidxu			return (error);
3558164839Sdavidxu		ts = &timeout;
3559164839Sdavidxu	}
3560164876Sdavidxu	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3561164839Sdavidxu}
3562164839Sdavidxu
3563177848Sdavidxustatic int
3564177848Sdavidxu__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3565177848Sdavidxu{
3566232209Sdavidxu	struct _umtx_time timeout;
3567177848Sdavidxu	int error;
3568177848Sdavidxu
3569177848Sdavidxu	/* Allow a null timespec (wait forever). */
3570177848Sdavidxu	if (uap->uaddr2 == NULL) {
3571177848Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3572177848Sdavidxu	} else {
3573232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3574232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3575177848Sdavidxu		if (error != 0)
3576177848Sdavidxu			return (error);
3577233693Sdavidxu		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3578177848Sdavidxu	}
3579177848Sdavidxu	return (error);
3580177848Sdavidxu}
3581177848Sdavidxu
3582177848Sdavidxustatic int
3583177848Sdavidxu__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3584177848Sdavidxu{
3585232209Sdavidxu	struct _umtx_time timeout;
3586177848Sdavidxu	int error;
3587177848Sdavidxu
3588177848Sdavidxu	/* Allow a null timespec (wait forever). */
3589177848Sdavidxu	if (uap->uaddr2 == NULL) {
3590177852Sdavidxu		error = do_rw_wrlock(td, uap->obj, 0);
3591177848Sdavidxu	} else {
3592232209Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3593232209Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3594177848Sdavidxu		if (error != 0)
3595177848Sdavidxu			return (error);
3596233693Sdavidxu		error = do_rw_wrlock(td, uap->obj, &timeout);
3597177848Sdavidxu	}
3598177848Sdavidxu	return (error);
3599177848Sdavidxu}
3600177848Sdavidxu
3601178646Sdavidxustatic int
3602178646Sdavidxu__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3603178646Sdavidxu{
3604232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3605178646Sdavidxu	int error;
3606178646Sdavidxu
3607178646Sdavidxu	if (uap->uaddr2 == NULL)
3608232144Sdavidxu		tm_p = NULL;
3609178646Sdavidxu	else {
3610232144Sdavidxu		error = umtx_copyin_umtx_time32(
3611232144Sdavidxu		    uap->uaddr2, (size_t)uap->uaddr1,&timeout);
3612178646Sdavidxu		if (error != 0)
3613178646Sdavidxu			return (error);
3614232144Sdavidxu		tm_p = &timeout;
3615178646Sdavidxu	}
3616232144Sdavidxu	return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
3617178646Sdavidxu}
3618178646Sdavidxu
3619201472Sdavidxustatic int
3620201472Sdavidxu__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3621201472Sdavidxu{
3622232144Sdavidxu	struct _umtx_time *tm_p, timeout;
3623201472Sdavidxu	int error;
3624201472Sdavidxu
3625201472Sdavidxu	/* Allow a null timespec (wait forever). */
3626201472Sdavidxu	if (uap->uaddr2 == NULL)
3627232144Sdavidxu		tm_p = NULL;
3628201472Sdavidxu	else {
3629232144Sdavidxu		error = umtx_copyin_umtx_time32(uap->uaddr2,
3630232144Sdavidxu		    (size_t)uap->uaddr1, &timeout);
3631201472Sdavidxu		if (error != 0)
3632201472Sdavidxu			return (error);
3633232144Sdavidxu		tm_p = &timeout;
3634201472Sdavidxu	}
3635232144Sdavidxu	return (do_sem_wait(td, uap->obj, tm_p));
3636201472Sdavidxu}
3637201472Sdavidxu
3638216641Sdavidxustatic int
3639216641Sdavidxu__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
3640216641Sdavidxu{
3641216641Sdavidxu	int count = uap->val;
3642216641Sdavidxu	uint32_t uaddrs[BATCH_SIZE];
3643216641Sdavidxu	uint32_t **upp = (uint32_t **)uap->obj;
3644216641Sdavidxu	int tocopy;
3645216641Sdavidxu	int error = 0;
3646216641Sdavidxu	int i, pos = 0;
3647216641Sdavidxu
3648216641Sdavidxu	while (count > 0) {
3649216641Sdavidxu		tocopy = count;
3650216641Sdavidxu		if (tocopy > BATCH_SIZE)
3651216641Sdavidxu			tocopy = BATCH_SIZE;
3652216641Sdavidxu		error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
3653216641Sdavidxu		if (error != 0)
3654216641Sdavidxu			break;
3655216641Sdavidxu		for (i = 0; i < tocopy; ++i)
3656216641Sdavidxu			kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
3657216641Sdavidxu				INT_MAX, 1);
3658216641Sdavidxu		count -= tocopy;
3659216641Sdavidxu		pos += tocopy;
3660216641Sdavidxu	}
3661216641Sdavidxu	return (error);
3662216641Sdavidxu}
3663216641Sdavidxu
3664162536Sdavidxustatic _umtx_op_func op_table_compat32[] = {
3665162536Sdavidxu	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
3666162536Sdavidxu	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
3667162536Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
3668162536Sdavidxu	__umtx_op_wake,			/* UMTX_OP_WAKE */
3669162550Sdavidxu	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
3670162536Sdavidxu	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
3671162536Sdavidxu	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
3672164839Sdavidxu	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
3673164839Sdavidxu	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
3674164839Sdavidxu	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
3675173800Sdavidxu	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
3676177848Sdavidxu	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT_UINT */
3677177848Sdavidxu	__umtx_op_rw_rdlock_compat32,	/* UMTX_OP_RW_RDLOCK */
3678177848Sdavidxu	__umtx_op_rw_wrlock_compat32,	/* UMTX_OP_RW_WRLOCK */
3679178646Sdavidxu	__umtx_op_rw_unlock,		/* UMTX_OP_RW_UNLOCK */
3680178646Sdavidxu	__umtx_op_wait_uint_private_compat32,	/* UMTX_OP_WAIT_UINT_PRIVATE */
3681179970Sdavidxu	__umtx_op_wake_private,		/* UMTX_OP_WAKE_PRIVATE */
3682179970Sdavidxu	__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3683201472Sdavidxu	__umtx_op_wake_umutex,		/* UMTX_OP_UMUTEX_WAKE */
3684201472Sdavidxu	__umtx_op_sem_wait_compat32,	/* UMTX_OP_SEM_WAIT */
3685216641Sdavidxu	__umtx_op_sem_wake,		/* UMTX_OP_SEM_WAKE */
3686233912Sdavidxu	__umtx_op_nwake_private32,	/* UMTX_OP_NWAKE_PRIVATE */
3687233912Sdavidxu	__umtx_op_wake2_umutex		/* UMTX_OP_UMUTEX_WAKE2 */
3688162536Sdavidxu};
3689162536Sdavidxu
3690162536Sdavidxuint
3691162536Sdavidxufreebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3692162536Sdavidxu{
3693163678Sdavidxu	if ((unsigned)uap->op < UMTX_OP_MAX)
3694162536Sdavidxu		return (*op_table_compat32[uap->op])(td,
3695162536Sdavidxu			(struct _umtx_op_args *)uap);
3696162536Sdavidxu	return (EINVAL);
3697162536Sdavidxu}
3698162536Sdavidxu#endif
3699162536Sdavidxu
3700161678Sdavidxuvoid
3701161678Sdavidxuumtx_thread_init(struct thread *td)
3702161678Sdavidxu{
3703161678Sdavidxu	td->td_umtxq = umtxq_alloc();
3704161678Sdavidxu	td->td_umtxq->uq_thread = td;
3705161678Sdavidxu}
3706161678Sdavidxu
3707161678Sdavidxuvoid
3708161678Sdavidxuumtx_thread_fini(struct thread *td)
3709161678Sdavidxu{
3710161678Sdavidxu	umtxq_free(td->td_umtxq);
3711161678Sdavidxu}
3712161678Sdavidxu
3713161678Sdavidxu/*
3714161678Sdavidxu * It will be called when new thread is created, e.g fork().
3715161678Sdavidxu */
3716161678Sdavidxuvoid
3717161678Sdavidxuumtx_thread_alloc(struct thread *td)
3718161678Sdavidxu{
3719161678Sdavidxu	struct umtx_q *uq;
3720161678Sdavidxu
3721161678Sdavidxu	uq = td->td_umtxq;
3722161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3723161678Sdavidxu
3724161678Sdavidxu	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3725161678Sdavidxu	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3726161678Sdavidxu	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3727161678Sdavidxu	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3728161678Sdavidxu}
3729161678Sdavidxu
3730161678Sdavidxu/*
3731161678Sdavidxu * exec() hook.
3732161678Sdavidxu */
3733161678Sdavidxustatic void
3734161678Sdavidxuumtx_exec_hook(void *arg __unused, struct proc *p __unused,
3735161678Sdavidxu	struct image_params *imgp __unused)
3736161678Sdavidxu{
3737161678Sdavidxu	umtx_thread_cleanup(curthread);
3738161678Sdavidxu}
3739161678Sdavidxu
3740161678Sdavidxu/*
3741161678Sdavidxu * thread_exit() hook.
3742161678Sdavidxu */
3743161678Sdavidxuvoid
3744161678Sdavidxuumtx_thread_exit(struct thread *td)
3745161678Sdavidxu{
3746161678Sdavidxu	umtx_thread_cleanup(td);
3747161678Sdavidxu}
3748161678Sdavidxu
3749161678Sdavidxu/*
3750161678Sdavidxu * clean up umtx data.
3751161678Sdavidxu */
3752161678Sdavidxustatic void
3753161678Sdavidxuumtx_thread_cleanup(struct thread *td)
3754161678Sdavidxu{
3755161678Sdavidxu	struct umtx_q *uq;
3756161678Sdavidxu	struct umtx_pi *pi;
3757161678Sdavidxu
3758161678Sdavidxu	if ((uq = td->td_umtxq) == NULL)
3759161678Sdavidxu		return;
3760161678Sdavidxu
3761170300Sjeff	mtx_lock_spin(&umtx_lock);
3762161678Sdavidxu	uq->uq_inherited_pri = PRI_MAX;
3763161678Sdavidxu	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3764161678Sdavidxu		pi->pi_owner = NULL;
3765161678Sdavidxu		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3766161678Sdavidxu	}
3767216313Sdavidxu	mtx_unlock_spin(&umtx_lock);
3768174701Sdavidxu	thread_lock(td);
3769216791Sdavidxu	sched_lend_user_prio(td, PRI_MAX);
3770174701Sdavidxu	thread_unlock(td);
3771161678Sdavidxu}
3772