kern_umtx.c revision 139257
1112904Sjeff/*
2139013Sdavidxu * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4112904Sjeff * All rights reserved.
5112904Sjeff *
6112904Sjeff * Redistribution and use in source and binary forms, with or without
7112904Sjeff * modification, are permitted provided that the following conditions
8112904Sjeff * are met:
9112904Sjeff * 1. Redistributions of source code must retain the above copyright
10112904Sjeff *    notice unmodified, this list of conditions, and the following
11112904Sjeff *    disclaimer.
12112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
13112904Sjeff *    notice, this list of conditions and the following disclaimer in the
14112904Sjeff *    documentation and/or other materials provided with the distribution.
15112904Sjeff *
16112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26112904Sjeff */
27112904Sjeff
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 139257 2004-12-24 11:30:55Z davidxu $");
30116182Sobrien
31112904Sjeff#include <sys/param.h>
32112904Sjeff#include <sys/kernel.h>
33131431Smarcel#include <sys/limits.h>
34112904Sjeff#include <sys/lock.h>
35115765Sjeff#include <sys/malloc.h>
36112904Sjeff#include <sys/mutex.h>
37112904Sjeff#include <sys/proc.h>
38112904Sjeff#include <sys/sysent.h>
39112904Sjeff#include <sys/systm.h>
40112904Sjeff#include <sys/sysproto.h>
41139013Sdavidxu#include <sys/eventhandler.h>
42112904Sjeff#include <sys/thr.h>
43112904Sjeff#include <sys/umtx.h>
44112904Sjeff
45139013Sdavidxu#include <vm/vm.h>
46139013Sdavidxu#include <vm/vm_param.h>
47139013Sdavidxu#include <vm/pmap.h>
48139013Sdavidxu#include <vm/vm_map.h>
49139013Sdavidxu#include <vm/vm_object.h>
50139013Sdavidxu
51139013Sdavidxu#define UMTX_PRIVATE	0
52139013Sdavidxu#define UMTX_SHARED	1
53139013Sdavidxu
54139013Sdavidxu#define UMTX_STATIC_SHARED
55139013Sdavidxu
56139013Sdavidxustruct umtx_key {
57139013Sdavidxu	int	type;
58139013Sdavidxu	union {
59139013Sdavidxu		struct {
60139013Sdavidxu			vm_object_t	object;
61139013Sdavidxu			long		offset;
62139013Sdavidxu		} shared;
63139013Sdavidxu		struct {
64139013Sdavidxu			struct umtx	*umtx;
65139013Sdavidxu			long		pid;
66139013Sdavidxu		} private;
67139013Sdavidxu		struct {
68139013Sdavidxu			void		*ptr;
69139013Sdavidxu			long		word;
70139013Sdavidxu		} both;
71139013Sdavidxu	} info;
72139013Sdavidxu};
73139013Sdavidxu
74115765Sjeffstruct umtx_q {
75115765Sjeff	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
76139013Sdavidxu	struct umtx_key		uq_key;		/* Umtx key. */
77139257Sdavidxu	struct thread		*uq_thread;	/* The thread waits on. */
78139013Sdavidxu	LIST_ENTRY(umtx_q)	uq_rqnext;	/* Linked list for requeuing. */
79139013Sdavidxu	vm_offset_t		uq_addr;	/* Umtx's virtual address. */
80115765Sjeff};
81115765Sjeff
82115765SjeffLIST_HEAD(umtx_head, umtx_q);
83138224Sdavidxustruct umtxq_chain {
84139013Sdavidxu	struct mtx		uc_lock;	/* Lock for this chain. */
85139013Sdavidxu	struct umtx_head	uc_queue;	/* List of sleep queues. */
86139257Sdavidxu#define	UCF_BUSY		0x01
87139257Sdavidxu#define	UCF_WANT		0x02
88139257Sdavidxu	int			uc_flags;
89138224Sdavidxu};
90115765Sjeff
91138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
92138224Sdavidxu#define	UMTX_CHAINS		128
93138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
94115765Sjeff
95138224Sdavidxustatic struct umtxq_chain umtxq_chains[UMTX_CHAINS];
96138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
97115310Sjeff
98131431Smarcel#define	UMTX_CONTESTED	LONG_MIN
99115310Sjeff
100139013Sdavidxustatic void umtxq_init_chains(void *);
101139013Sdavidxustatic int umtxq_hash(struct umtx_key *key);
102139013Sdavidxustatic struct mtx *umtxq_mtx(int chain);
103139013Sdavidxustatic void umtxq_lock(struct umtx_key *key);
104139013Sdavidxustatic void umtxq_unlock(struct umtx_key *key);
105139257Sdavidxustatic void umtxq_busy(struct umtx_key *key);
106139257Sdavidxustatic void umtxq_unbusy(struct umtx_key *key);
107139013Sdavidxustatic void umtxq_insert(struct umtx_q *uq);
108139013Sdavidxustatic void umtxq_remove(struct umtx_q *uq);
109139013Sdavidxustatic int umtxq_sleep(struct thread *td, struct umtx_key *key,
110139013Sdavidxu	int prio, const char *wmesg, int timo);
111139257Sdavidxustatic int umtxq_count(struct umtx_key *key);
112139257Sdavidxustatic int umtxq_signal(struct umtx_key *key, int nr_wakeup);
113139013Sdavidxu#ifdef UMTX_DYNAMIC_SHARED
114139013Sdavidxustatic void fork_handler(void *arg, struct proc *p1, struct proc *p2,
115139013Sdavidxu	int flags);
116139013Sdavidxu#endif
117139013Sdavidxustatic int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
118139013Sdavidxustatic int umtx_key_get(struct thread *td, struct umtx *umtx,
119139013Sdavidxu	struct umtx_key *key);
120139013Sdavidxustatic void umtx_key_release(struct umtx_key *key);
121115310Sjeff
122139013SdavidxuSYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
123138224Sdavidxu
124138224Sdavidxustatic void
125139013Sdavidxuumtxq_init_chains(void *arg __unused)
126138224Sdavidxu{
127138224Sdavidxu	int i;
128138224Sdavidxu
129138224Sdavidxu	for (i = 0; i < UMTX_CHAINS; ++i) {
130138224Sdavidxu		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
131138224Sdavidxu			 MTX_DEF | MTX_DUPOK);
132139013Sdavidxu		LIST_INIT(&umtxq_chains[i].uc_queue);
133139257Sdavidxu		umtxq_chains[i].uc_flags = 0;
134138224Sdavidxu	}
135139013Sdavidxu#ifdef UMTX_DYNAMIC_SHARED
136139013Sdavidxu	EVENTHANDLER_REGISTER(process_fork, fork_handler, 0, 10000);
137139013Sdavidxu#endif
138138224Sdavidxu}
139138224Sdavidxu
140138224Sdavidxustatic inline int
141139013Sdavidxuumtxq_hash(struct umtx_key *key)
142138224Sdavidxu{
143139013Sdavidxu	unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
144138224Sdavidxu	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
145138224Sdavidxu}
146138224Sdavidxu
147139013Sdavidxustatic inline int
148139013Sdavidxuumtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
149139013Sdavidxu{
150139013Sdavidxu	return (k1->type == k2->type &&
151139013Sdavidxu		k1->info.both.ptr == k2->info.both.ptr &&
152139013Sdavidxu	        k1->info.both.word == k2->info.both.word);
153139013Sdavidxu}
154139013Sdavidxu
155139013Sdavidxustatic inline struct mtx *
156139013Sdavidxuumtxq_mtx(int chain)
157139013Sdavidxu{
158139013Sdavidxu	return (&umtxq_chains[chain].uc_lock);
159139013Sdavidxu}
160139013Sdavidxu
161138224Sdavidxustatic inline void
162139257Sdavidxuumtxq_busy(struct umtx_key *key)
163139257Sdavidxu{
164139257Sdavidxu	int chain = umtxq_hash(key);
165139257Sdavidxu
166139257Sdavidxu	mtx_assert(umtxq_mtx(chain), MA_OWNED);
167139257Sdavidxu	while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
168139257Sdavidxu		umtxq_chains[chain].uc_flags |= UCF_WANT;
169139257Sdavidxu		msleep(&umtxq_chains[chain], umtxq_mtx(chain),
170139257Sdavidxu		       curthread->td_priority, "umtxq_busy", 0);
171139257Sdavidxu	}
172139257Sdavidxu	umtxq_chains[chain].uc_flags |= UCF_BUSY;
173139257Sdavidxu}
174139257Sdavidxu
175139257Sdavidxustatic inline void
176139257Sdavidxuumtxq_unbusy(struct umtx_key *key)
177139257Sdavidxu{
178139257Sdavidxu	int chain = umtxq_hash(key);
179139257Sdavidxu
180139257Sdavidxu	mtx_assert(umtxq_mtx(chain), MA_OWNED);
181139257Sdavidxu	KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, "not busy");
182139257Sdavidxu	umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
183139257Sdavidxu	if (umtxq_chains[chain].uc_flags & UCF_WANT) {
184139257Sdavidxu		umtxq_chains[chain].uc_flags &= ~UCF_WANT;
185139257Sdavidxu		wakeup(&umtxq_chains[chain]);
186139257Sdavidxu	}
187139257Sdavidxu}
188139257Sdavidxu
189139257Sdavidxustatic inline void
190139013Sdavidxuumtxq_lock(struct umtx_key *key)
191138224Sdavidxu{
192139013Sdavidxu	int chain = umtxq_hash(key);
193139013Sdavidxu	mtx_lock(umtxq_mtx(chain));
194138224Sdavidxu}
195138224Sdavidxu
196138225Sdavidxustatic inline void
197139013Sdavidxuumtxq_unlock(struct umtx_key *key)
198138224Sdavidxu{
199139013Sdavidxu	int chain = umtxq_hash(key);
200139013Sdavidxu	mtx_unlock(umtxq_mtx(chain));
201138224Sdavidxu}
202138224Sdavidxu
203139013Sdavidxu/*
204139013Sdavidxu * Insert a thread onto the umtx queue.
205139013Sdavidxu */
206139013Sdavidxustatic inline void
207139013Sdavidxuumtxq_insert(struct umtx_q *uq)
208115765Sjeff{
209115765Sjeff	struct umtx_head *head;
210139013Sdavidxu	int chain = umtxq_hash(&uq->uq_key);
211139013Sdavidxu
212139257Sdavidxu	mtx_assert(umtxq_mtx(chain), MA_OWNED);
213139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
214139013Sdavidxu	LIST_INSERT_HEAD(head, uq, uq_next);
215139013Sdavidxu	uq->uq_thread->td_umtxq = uq;
216139013Sdavidxu	mtx_lock_spin(&sched_lock);
217139013Sdavidxu	uq->uq_thread->td_flags |= TDF_UMTXQ;
218139013Sdavidxu	mtx_unlock_spin(&sched_lock);
219139013Sdavidxu}
220139013Sdavidxu
221139013Sdavidxu/*
222139013Sdavidxu * Remove thread from the umtx queue.
223139013Sdavidxu */
224139013Sdavidxustatic inline void
225139013Sdavidxuumtxq_remove(struct umtx_q *uq)
226139013Sdavidxu{
227139257Sdavidxu	mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
228139013Sdavidxu	if (uq->uq_thread->td_flags & TDF_UMTXQ) {
229139013Sdavidxu		LIST_REMOVE(uq, uq_next);
230139013Sdavidxu		uq->uq_thread->td_umtxq = NULL;
231139013Sdavidxu		/* turning off TDF_UMTXQ should be the last thing. */
232139013Sdavidxu		mtx_lock_spin(&sched_lock);
233139013Sdavidxu		uq->uq_thread->td_flags &= ~TDF_UMTXQ;
234139013Sdavidxu		mtx_unlock_spin(&sched_lock);
235139013Sdavidxu	}
236139013Sdavidxu}
237139013Sdavidxu
238139013Sdavidxustatic int
239139013Sdavidxuumtxq_count(struct umtx_key *key)
240139013Sdavidxu{
241115765Sjeff	struct umtx_q *uq;
242139013Sdavidxu	struct umtx_head *head;
243139013Sdavidxu	int chain, count = 0;
244115765Sjeff
245139013Sdavidxu	chain = umtxq_hash(key);
246139257Sdavidxu	mtx_assert(umtxq_mtx(chain), MA_OWNED);
247139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
248115765Sjeff	LIST_FOREACH(uq, head, uq_next) {
249139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
250139013Sdavidxu			if (++count > 1)
251139013Sdavidxu				break;
252139013Sdavidxu		}
253115765Sjeff	}
254139013Sdavidxu	return (count);
255115765Sjeff}
256115765Sjeff
257139257Sdavidxustatic int
258139257Sdavidxuumtxq_signal(struct umtx_key *key, int n_wake)
259115765Sjeff{
260139257Sdavidxu	struct umtx_q *uq, *next;
261115765Sjeff	struct umtx_head *head;
262139013Sdavidxu	struct thread *blocked = NULL;
263139257Sdavidxu	int chain, ret;
264115765Sjeff
265139257Sdavidxu	ret = 0;
266139013Sdavidxu	chain = umtxq_hash(key);
267139257Sdavidxu	mtx_assert(umtxq_mtx(chain), MA_OWNED);
268139013Sdavidxu	head = &umtxq_chains[chain].uc_queue;
269139257Sdavidxu	for (uq = LIST_FIRST(head); uq; uq = next) {
270139013Sdavidxu		next = LIST_NEXT(uq, uq_next);
271139013Sdavidxu		if (umtx_key_match(&uq->uq_key, key)) {
272139013Sdavidxu			blocked = uq->uq_thread;
273139013Sdavidxu			umtxq_remove(uq);
274139013Sdavidxu			wakeup(blocked);
275139257Sdavidxu			if (++ret >= n_wake)
276139257Sdavidxu				break;
277139013Sdavidxu		}
278139013Sdavidxu	}
279139257Sdavidxu	return (ret);
280138224Sdavidxu}
281138224Sdavidxu
282138224Sdavidxustatic inline int
283139013Sdavidxuumtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
284139013Sdavidxu	    const char *wmesg, int timo)
285138224Sdavidxu{
286139013Sdavidxu	int chain = umtxq_hash(key);
287138224Sdavidxu
288139257Sdavidxu	return (msleep(td, umtxq_mtx(chain), priority, wmesg, timo));
289138224Sdavidxu}
290138224Sdavidxu
291139013Sdavidxustatic int
292139013Sdavidxuumtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
293139013Sdavidxu{
294139013Sdavidxu#if defined(UMTX_DYNAMIC_SHARED) || defined(UMTX_STATIC_SHARED)
295139013Sdavidxu	vm_map_t map;
296139013Sdavidxu	vm_map_entry_t entry;
297139013Sdavidxu	vm_pindex_t pindex;
298139013Sdavidxu	vm_prot_t prot;
299139013Sdavidxu	boolean_t wired;
300139013Sdavidxu
301139013Sdavidxu	map = &td->td_proc->p_vmspace->vm_map;
302139013Sdavidxu	if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
303139013Sdavidxu	    &entry, &key->info.shared.object, &pindex, &prot,
304139013Sdavidxu	    &wired) != KERN_SUCCESS) {
305139013Sdavidxu		return EFAULT;
306139013Sdavidxu	}
307139013Sdavidxu#endif
308139013Sdavidxu
309139013Sdavidxu#if defined(UMTX_DYNAMIC_SHARED)
310139013Sdavidxu	key->type = UMTX_SHARED;
311139013Sdavidxu	key->info.shared.offset = entry->offset + entry->start -
312139013Sdavidxu		(vm_offset_t)umtx;
313139013Sdavidxu	/*
314139013Sdavidxu	 * Add object reference, if we don't do this, a buggy application
315139013Sdavidxu	 * deallocates the object, the object will be reused by other
316139013Sdavidxu	 * applications, then unlock will wake wrong thread.
317139013Sdavidxu	 */
318139013Sdavidxu	vm_object_reference(key->info.shared.object);
319139013Sdavidxu	vm_map_lookup_done(map, entry);
320139013Sdavidxu#elif defined(UMTX_STATIC_SHARED)
321139013Sdavidxu	if (VM_INHERIT_SHARE == entry->inheritance) {
322139013Sdavidxu		key->type = UMTX_SHARED;
323139013Sdavidxu		key->info.shared.offset = entry->offset + entry->start -
324139013Sdavidxu			(vm_offset_t)umtx;
325139013Sdavidxu		vm_object_reference(key->info.shared.object);
326139013Sdavidxu	} else {
327139013Sdavidxu		key->type = UMTX_PRIVATE;
328139013Sdavidxu		key->info.private.umtx = umtx;
329139013Sdavidxu		key->info.private.pid  = td->td_proc->p_pid;
330139013Sdavidxu	}
331139013Sdavidxu	vm_map_lookup_done(map, entry);
332139013Sdavidxu#else
333139013Sdavidxu	key->type = UMTX_PRIVATE;
334139013Sdavidxu	key->info.private.umtx = umtx;
335139013Sdavidxu	key->info.private.pid  = td->td_proc->p_pid;
336139013Sdavidxu#endif
337139013Sdavidxu	return (0);
338139013Sdavidxu}
339139013Sdavidxu
340139013Sdavidxustatic inline void
341139013Sdavidxuumtx_key_release(struct umtx_key *key)
342139013Sdavidxu{
343139013Sdavidxu	if (key->type == UMTX_SHARED)
344139013Sdavidxu		vm_object_deallocate(key->info.shared.object);
345139013Sdavidxu}
346139013Sdavidxu
347138224Sdavidxustatic inline int
348139013Sdavidxuumtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
349138224Sdavidxu{
350139013Sdavidxu	int error;
351138224Sdavidxu
352139013Sdavidxu	if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
353139013Sdavidxu		return (error);
354139013Sdavidxu
355139013Sdavidxu	uq->uq_addr = (vm_offset_t)umtx;
356139013Sdavidxu	uq->uq_thread = td;
357139013Sdavidxu	umtxq_lock(&uq->uq_key);
358139257Sdavidxu	/* hmm, for condition variable, we don't need busy flag. */
359139257Sdavidxu	umtxq_busy(&uq->uq_key);
360139013Sdavidxu	umtxq_insert(uq);
361139257Sdavidxu	umtxq_unbusy(&uq->uq_key);
362139013Sdavidxu	umtxq_unlock(&uq->uq_key);
363139013Sdavidxu	return (0);
364138224Sdavidxu}
365138224Sdavidxu
366139013Sdavidxu#if defined(UMTX_DYNAMIC_SHARED)
367138224Sdavidxustatic void
368139013Sdavidxufork_handler(void *arg, struct proc *p1, struct proc *p2, int flags)
369138224Sdavidxu{
370139013Sdavidxu	vm_map_t map;
371139013Sdavidxu	vm_map_entry_t entry;
372139013Sdavidxu	vm_object_t object;
373139013Sdavidxu	vm_pindex_t pindex;
374139013Sdavidxu	vm_prot_t prot;
375139013Sdavidxu	boolean_t wired;
376139013Sdavidxu	struct umtx_key key;
377139013Sdavidxu	LIST_HEAD(, umtx_q) workq;
378138224Sdavidxu	struct umtx_q *uq;
379139013Sdavidxu	struct thread *td;
380139013Sdavidxu	int onq;
381138224Sdavidxu
382139013Sdavidxu	LIST_INIT(&workq);
383139013Sdavidxu
384139013Sdavidxu	/* Collect threads waiting on umtxq */
385139013Sdavidxu	PROC_LOCK(p1);
386139013Sdavidxu	FOREACH_THREAD_IN_PROC(p1, td) {
387139013Sdavidxu		if (td->td_flags & TDF_UMTXQ) {
388139013Sdavidxu			uq = td->td_umtxq;
389139013Sdavidxu			if (uq)
390139013Sdavidxu				LIST_INSERT_HEAD(&workq, uq, uq_rqnext);
391138224Sdavidxu		}
392115765Sjeff	}
393139013Sdavidxu	PROC_UNLOCK(p1);
394139013Sdavidxu
395139013Sdavidxu	LIST_FOREACH(uq, &workq, uq_rqnext) {
396139013Sdavidxu		map = &p1->p_vmspace->vm_map;
397139013Sdavidxu		if (vm_map_lookup(&map, uq->uq_addr, VM_PROT_WRITE,
398139013Sdavidxu		    &entry, &object, &pindex, &prot, &wired) != KERN_SUCCESS) {
399139013Sdavidxu			continue;
400139013Sdavidxu		}
401139013Sdavidxu		key.type = UMTX_SHARED;
402139013Sdavidxu		key.info.shared.object = object;
403139013Sdavidxu		key.info.shared.offset = entry->offset + entry->start -
404139013Sdavidxu			uq->uq_addr;
405139013Sdavidxu		if (umtx_key_match(&key, &uq->uq_key)) {
406139013Sdavidxu			vm_map_lookup_done(map, entry);
407139013Sdavidxu			continue;
408139013Sdavidxu		}
409139013Sdavidxu
410139013Sdavidxu		umtxq_lock(&uq->uq_key);
411139257Sdavidxu		umtxq_busy(&uq->uq_key);
412139013Sdavidxu		if (uq->uq_thread->td_flags & TDF_UMTXQ) {
413139013Sdavidxu			umtxq_remove(uq);
414139013Sdavidxu			onq = 1;
415139013Sdavidxu		} else
416139013Sdavidxu			onq = 0;
417139257Sdavidxu		umtxq_unbusy(&uq->uq_key);
418139013Sdavidxu		umtxq_unlock(&uq->uq_key);
419139013Sdavidxu		if (onq) {
420139013Sdavidxu			vm_object_deallocate(uq->uq_key.info.shared.object);
421139013Sdavidxu			uq->uq_key = key;
422139013Sdavidxu			umtxq_lock(&uq->uq_key);
423139257Sdavidxu			umtxq_busy(&uq->uq_key);
424139013Sdavidxu			umtxq_insert(uq);
425139257Sdavidxu			umtxq_unbusy(&uq->uq_key);
426139013Sdavidxu			umtxq_unlock(&uq->uq_key);
427139013Sdavidxu			vm_object_reference(uq->uq_key.info.shared.object);
428139013Sdavidxu		}
429139013Sdavidxu		vm_map_lookup_done(map, entry);
430139013Sdavidxu	}
431115765Sjeff}
432139013Sdavidxu#endif
433115765Sjeff
434139013Sdavidxustatic int
435139013Sdavidxu_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
436112904Sjeff{
437139013Sdavidxu	struct umtx_q uq;
438112904Sjeff	intptr_t owner;
439112967Sjake	intptr_t old;
440138224Sdavidxu	int error = 0;
441112904Sjeff
442112904Sjeff	/*
443139013Sdavidxu	 * Care must be exercised when dealing with umtx structure.  It
444112904Sjeff	 * can fault on any access.
445112904Sjeff	 */
446112904Sjeff
447112904Sjeff	for (;;) {
448112904Sjeff		/*
449112904Sjeff		 * Try the uncontested case.  This should be done in userland.
450112904Sjeff		 */
451112904Sjeff		owner = casuptr((intptr_t *)&umtx->u_owner,
452139013Sdavidxu		    UMTX_UNOWNED, id);
453112904Sjeff
454138224Sdavidxu		/* The acquire succeeded. */
455138224Sdavidxu		if (owner == UMTX_UNOWNED)
456138224Sdavidxu			return (0);
457138224Sdavidxu
458115765Sjeff		/* The address was invalid. */
459115765Sjeff		if (owner == -1)
460115765Sjeff			return (EFAULT);
461115765Sjeff
462115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
463115765Sjeff		if (owner == UMTX_CONTESTED) {
464115765Sjeff			owner = casuptr((intptr_t *)&umtx->u_owner,
465139013Sdavidxu			    UMTX_CONTESTED, id | UMTX_CONTESTED);
466115765Sjeff
467138224Sdavidxu			if (owner == UMTX_CONTESTED)
468138224Sdavidxu				return (0);
469138224Sdavidxu
470115765Sjeff			/* The address was invalid. */
471115765Sjeff			if (owner == -1)
472115765Sjeff				return (EFAULT);
473115765Sjeff
474115765Sjeff			/* If this failed the lock has changed, restart. */
475115765Sjeff			continue;
476112904Sjeff		}
477112904Sjeff
478138224Sdavidxu		/*
479138224Sdavidxu		 * If we caught a signal, we have retried and now
480138224Sdavidxu		 * exit immediately.
481138224Sdavidxu		 */
482139013Sdavidxu		if (error || (error = umtxq_queue_me(td, umtx, &uq)) != 0)
483138224Sdavidxu			return (error);
484112904Sjeff
485112904Sjeff		/*
486112904Sjeff		 * Set the contested bit so that a release in user space
487112904Sjeff		 * knows to use the system call for unlock.  If this fails
488112904Sjeff		 * either some one else has acquired the lock or it has been
489112904Sjeff		 * released.
490112904Sjeff		 */
491112967Sjake		old = casuptr((intptr_t *)&umtx->u_owner, owner,
492112967Sjake		    owner | UMTX_CONTESTED);
493112904Sjeff
494112904Sjeff		/* The address was invalid. */
495112967Sjake		if (old == -1) {
496139013Sdavidxu			umtxq_lock(&uq.uq_key);
497139257Sdavidxu			umtxq_busy(&uq.uq_key);
498139013Sdavidxu			umtxq_remove(&uq);
499139257Sdavidxu			umtxq_unbusy(&uq.uq_key);
500139013Sdavidxu			umtxq_unlock(&uq.uq_key);
501139013Sdavidxu			umtx_key_release(&uq.uq_key);
502115765Sjeff			return (EFAULT);
503112904Sjeff		}
504112904Sjeff
505112904Sjeff		/*
506115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
507117685Smtm		 * and we need to retry or we lost a race to the thread
508117685Smtm		 * unlocking the umtx.
509112904Sjeff		 */
510139013Sdavidxu		umtxq_lock(&uq.uq_key);
511139013Sdavidxu		if (old == owner && (td->td_flags & TDF_UMTXQ)) {
512139013Sdavidxu			error = umtxq_sleep(td, &uq.uq_key,
513139257Sdavidxu				       td->td_priority | PCATCH,
514139013Sdavidxu				       "umtx", timo);
515138224Sdavidxu		}
516139257Sdavidxu		umtxq_busy(&uq.uq_key);
517139257Sdavidxu		umtxq_remove(&uq);
518139257Sdavidxu		umtxq_unbusy(&uq.uq_key);
519139257Sdavidxu		umtxq_unlock(&uq.uq_key);
520139013Sdavidxu		umtx_key_release(&uq.uq_key);
521112904Sjeff	}
522117743Smtm
523117743Smtm	return (0);
524112904Sjeff}
525112904Sjeff
526139013Sdavidxustatic int
527139013Sdavidxudo_lock(struct thread *td, struct umtx *umtx, long id,
528139013Sdavidxu	struct timespec *abstime)
529112904Sjeff{
530139013Sdavidxu	struct timespec ts1, ts2;
531139013Sdavidxu	struct timeval tv;
532139013Sdavidxu	int timo, error;
533139013Sdavidxu
534139013Sdavidxu	if (abstime == NULL) {
535139013Sdavidxu		error = _do_lock(td, umtx, id, 0);
536139013Sdavidxu	} else {
537139013Sdavidxu		for (;;) {
538139013Sdavidxu			ts1 = *abstime;
539139013Sdavidxu			getnanotime(&ts2);
540139013Sdavidxu			timespecsub(&ts1, &ts2);
541139013Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
542139013Sdavidxu			if (tv.tv_sec < 0) {
543139014Sdavidxu				error = EWOULDBLOCK;
544139013Sdavidxu				break;
545139013Sdavidxu			}
546139013Sdavidxu			timo = tvtohz(&tv);
547139013Sdavidxu			error = _do_lock(td, umtx, id, timo);
548139014Sdavidxu			if (error != EWOULDBLOCK) {
549139013Sdavidxu				if (error == ERESTART)
550139013Sdavidxu					error = EINTR;
551139013Sdavidxu				break;
552139013Sdavidxu			}
553139013Sdavidxu		}
554139013Sdavidxu	}
555139013Sdavidxu	return (error);
556139013Sdavidxu}
557139013Sdavidxu
558139013Sdavidxustatic int
559139013Sdavidxudo_unlock(struct thread *td, struct umtx *umtx, long id)
560139013Sdavidxu{
561139013Sdavidxu	struct umtx_key key;
562112904Sjeff	intptr_t owner;
563112967Sjake	intptr_t old;
564139257Sdavidxu	int error;
565139257Sdavidxu	int count;
566112904Sjeff
567112904Sjeff	/*
568112904Sjeff	 * Make sure we own this mtx.
569112904Sjeff	 *
570112904Sjeff	 * XXX Need a {fu,su}ptr this is not correct on arch where
571112904Sjeff	 * sizeof(intptr_t) != sizeof(long).
572112904Sjeff	 */
573115765Sjeff	if ((owner = fuword(&umtx->u_owner)) == -1)
574115765Sjeff		return (EFAULT);
575115765Sjeff
576139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
577115765Sjeff		return (EPERM);
578112904Sjeff
579117685Smtm	/* We should only ever be in here for contested locks */
580119836Stjr	if ((owner & UMTX_CONTESTED) == 0)
581119836Stjr		return (EINVAL);
582112904Sjeff
583139257Sdavidxu	if ((error = umtx_key_get(td, umtx, &key)) != 0)
584139257Sdavidxu		return (error);
585139257Sdavidxu
586139257Sdavidxu	umtxq_lock(&key);
587139257Sdavidxu	umtxq_busy(&key);
588139257Sdavidxu	count = umtxq_count(&key);
589139257Sdavidxu	umtxq_unlock(&key);
590139257Sdavidxu
591117743Smtm	/*
592117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
593117743Smtm	 * there is zero or one thread only waiting for it.
594117743Smtm	 * Otherwise, it must be marked as contested.
595117743Smtm	 */
596139257Sdavidxu	old = casuptr((intptr_t *)&umtx->u_owner, owner,
597139257Sdavidxu			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
598139257Sdavidxu	umtxq_lock(&key);
599139257Sdavidxu	umtxq_signal(&key, 0);
600139257Sdavidxu	umtxq_unbusy(&key);
601139257Sdavidxu	umtxq_unlock(&key);
602139257Sdavidxu	umtx_key_release(&key);
603115765Sjeff	if (old == -1)
604115765Sjeff		return (EFAULT);
605138224Sdavidxu	if (old != owner)
606138224Sdavidxu		return (EINVAL);
607115765Sjeff	return (0);
608112904Sjeff}
609139013Sdavidxu
610139013Sdavidxustatic int
611139013Sdavidxudo_unlock_and_wait(struct thread *td, struct umtx *umtx, long id, void *uaddr,
612139013Sdavidxu	struct timespec *abstime)
613139013Sdavidxu{
614139013Sdavidxu	struct umtx_q uq;
615139013Sdavidxu	intptr_t owner;
616139013Sdavidxu	intptr_t old;
617139013Sdavidxu	struct timespec ts1, ts2;
618139013Sdavidxu	struct timeval tv;
619139013Sdavidxu	int timo, error = 0;
620139013Sdavidxu
621139013Sdavidxu	if (umtx == uaddr)
622139013Sdavidxu		return (EINVAL);
623139013Sdavidxu
624139013Sdavidxu	/*
625139013Sdavidxu	 * Make sure we own this mtx.
626139013Sdavidxu	 *
627139013Sdavidxu	 * XXX Need a {fu,su}ptr this is not correct on arch where
628139013Sdavidxu	 * sizeof(intptr_t) != sizeof(long).
629139013Sdavidxu	 */
630139013Sdavidxu	if ((owner = fuword(&umtx->u_owner)) == -1)
631139013Sdavidxu		return (EFAULT);
632139013Sdavidxu
633139013Sdavidxu	if ((owner & ~UMTX_CONTESTED) != id)
634139013Sdavidxu		return (EPERM);
635139013Sdavidxu
636139013Sdavidxu	if ((error = umtxq_queue_me(td, uaddr, &uq)) != 0)
637139013Sdavidxu		return (error);
638139013Sdavidxu
639139013Sdavidxu	old = casuptr((intptr_t *)&umtx->u_owner, id, UMTX_UNOWNED);
640139013Sdavidxu	if (old == -1) {
641139013Sdavidxu		umtxq_lock(&uq.uq_key);
642139013Sdavidxu		umtxq_remove(&uq);
643139013Sdavidxu		umtxq_unlock(&uq.uq_key);
644139013Sdavidxu		umtx_key_release(&uq.uq_key);
645139013Sdavidxu		return (EFAULT);
646139013Sdavidxu	}
647139013Sdavidxu	if (old != id) {
648139013Sdavidxu		error = do_unlock(td, umtx, id);
649139013Sdavidxu		if (error) {
650139013Sdavidxu			umtxq_lock(&uq.uq_key);
651139013Sdavidxu			umtxq_remove(&uq);
652139013Sdavidxu			umtxq_unlock(&uq.uq_key);
653139013Sdavidxu			umtx_key_release(&uq.uq_key);
654139013Sdavidxu			return (error);
655139013Sdavidxu		}
656139013Sdavidxu	}
657139013Sdavidxu	if (abstime == NULL) {
658139013Sdavidxu		umtxq_lock(&uq.uq_key);
659139013Sdavidxu		if (td->td_flags & TDF_UMTXQ)
660139013Sdavidxu			error = umtxq_sleep(td, &uq.uq_key,
661139013Sdavidxu			       td->td_priority | PCATCH, "ucond", 0);
662139257Sdavidxu		if (!(td->td_flags & TDF_UMTXQ))
663139257Sdavidxu			error = 0;
664139257Sdavidxu		else
665139257Sdavidxu			umtxq_remove(&uq);
666139013Sdavidxu		umtxq_unlock(&uq.uq_key);
667139013Sdavidxu	} else {
668139013Sdavidxu		for (;;) {
669139013Sdavidxu			ts1 = *abstime;
670139013Sdavidxu			getnanotime(&ts2);
671139013Sdavidxu			timespecsub(&ts1, &ts2);
672139013Sdavidxu			TIMESPEC_TO_TIMEVAL(&tv, &ts1);
673139257Sdavidxu			umtxq_lock(&uq.uq_key);
674139013Sdavidxu			if (tv.tv_sec < 0) {
675139014Sdavidxu				error = EWOULDBLOCK;
676139013Sdavidxu				break;
677139013Sdavidxu			}
678139013Sdavidxu			timo = tvtohz(&tv);
679139257Sdavidxu			if (td->td_flags & TDF_UMTXQ)
680139013Sdavidxu				error = umtxq_sleep(td, &uq.uq_key,
681139257Sdavidxu					    td->td_priority | PCATCH,
682139014Sdavidxu					    "ucond", timo);
683139257Sdavidxu			if (!td->td_flags & TDF_UMTXQ)
684139013Sdavidxu				break;
685139257Sdavidxu			umtxq_unlock(&uq.uq_key);
686139013Sdavidxu		}
687139257Sdavidxu		if (!(td->td_flags & TDF_UMTXQ))
688139257Sdavidxu			error = 0;
689139257Sdavidxu		else
690139013Sdavidxu			umtxq_remove(&uq);
691139257Sdavidxu		umtxq_unlock(&uq.uq_key);
692139013Sdavidxu	}
693139013Sdavidxu	umtx_key_release(&uq.uq_key);
694139257Sdavidxu	if (error == ERESTART)
695139257Sdavidxu		error = EINTR;
696139013Sdavidxu	return (error);
697139013Sdavidxu}
698139013Sdavidxu
699139013Sdavidxustatic int
700139257Sdavidxudo_wake(struct thread *td, void *uaddr, int n_wake)
701139013Sdavidxu{
702139013Sdavidxu	struct umtx_key key;
703139257Sdavidxu	int ret;
704139013Sdavidxu
705139257Sdavidxu	if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
706139257Sdavidxu		return (ret);
707139257Sdavidxu	ret = umtxq_signal(&key, n_wake);
708139257Sdavidxu	umtx_key_release(&key);
709139257Sdavidxu	td->td_retval[0] = ret;
710139013Sdavidxu	return (0);
711139013Sdavidxu}
712139013Sdavidxu
713139013Sdavidxuint
714139013Sdavidxu_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
715139013Sdavidxu    /* struct umtx *umtx */
716139013Sdavidxu{
717139013Sdavidxu	return _do_lock(td, uap->umtx, td->td_tid, 0);
718139013Sdavidxu}
719139013Sdavidxu
720139013Sdavidxuint
721139013Sdavidxu_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
722139013Sdavidxu    /* struct umtx *umtx */
723139013Sdavidxu{
724139013Sdavidxu	return do_unlock(td, uap->umtx, td->td_tid);
725139013Sdavidxu}
726139013Sdavidxu
727139013Sdavidxuint
728139013Sdavidxu_umtx_op(struct thread *td, struct _umtx_op_args *uap)
729139013Sdavidxu{
730139013Sdavidxu	struct timespec abstime;
731139013Sdavidxu	struct timespec *ts;
732139013Sdavidxu	int error;
733139013Sdavidxu
734139013Sdavidxu	switch(uap->op) {
735139013Sdavidxu	case UMTX_OP_LOCK:
736139013Sdavidxu		/* Allow a null timespec (wait forever). */
737139013Sdavidxu		if (uap->abstime == NULL)
738139013Sdavidxu			ts = NULL;
739139013Sdavidxu		else {
740139013Sdavidxu			error = copyin(uap->abstime, &abstime, sizeof(abstime));
741139013Sdavidxu			if (error != 0)
742139013Sdavidxu				return (error);
743139013Sdavidxu			if (abstime.tv_nsec >= 1000000000 ||
744139013Sdavidxu			    abstime.tv_nsec < 0)
745139013Sdavidxu				return (EINVAL);
746139013Sdavidxu			ts = &abstime;
747139013Sdavidxu		}
748139013Sdavidxu		return do_lock(td, uap->umtx, uap->id, ts);
749139013Sdavidxu	case UMTX_OP_UNLOCK:
750139013Sdavidxu		return do_unlock(td, uap->umtx, uap->id);
751139013Sdavidxu	case UMTX_OP_UNLOCK_AND_WAIT:
752139013Sdavidxu		/* Allow a null timespec (wait forever). */
753139013Sdavidxu		if (uap->abstime == NULL)
754139013Sdavidxu			ts = NULL;
755139013Sdavidxu		else {
756139013Sdavidxu			error = copyin(uap->abstime, &abstime, sizeof(abstime));
757139013Sdavidxu			if (error != 0)
758139013Sdavidxu				return (error);
759139013Sdavidxu			if (abstime.tv_nsec >= 1000000000 ||
760139013Sdavidxu			    abstime.tv_nsec < 0)
761139013Sdavidxu				return (EINVAL);
762139013Sdavidxu			ts = &abstime;
763139013Sdavidxu		}
764139013Sdavidxu		return do_unlock_and_wait(td, uap->umtx, uap->id,
765139013Sdavidxu					  uap->uaddr, ts);
766139013Sdavidxu	case UMTX_OP_WAKE:
767139013Sdavidxu		return do_wake(td, uap->uaddr, uap->id);
768139013Sdavidxu	default:
769139013Sdavidxu		return (EINVAL);
770139013Sdavidxu	}
771139013Sdavidxu}
772