kern_umtx.c revision 158718
1/*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice unmodified, this list of conditions, and the following
11 *    disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 158718 2006-05-18 08:43:46Z davidxu $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/limits.h>
34#include <sys/lock.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/sysent.h>
39#include <sys/systm.h>
40#include <sys/sysproto.h>
41#include <sys/eventhandler.h>
42#include <sys/thr.h>
43#include <sys/umtx.h>
44
45#include <vm/vm.h>
46#include <vm/vm_param.h>
47#include <vm/pmap.h>
48#include <vm/vm_map.h>
49#include <vm/vm_object.h>
50
51#define UMTX_PRIVATE	0
52#define UMTX_SHARED	1
53
54struct umtx_key {
55	int	type;
56	union {
57		struct {
58			vm_object_t	object;
59			long		offset;
60		} shared;
61		struct {
62			struct umtx	*umtx;
63			long		pid;
64		} private;
65		struct {
66			void		*ptr;
67			long		word;
68		} both;
69	} info;
70};
71
72struct umtx_q {
73	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
74	struct umtx_key		uq_key;		/* Umtx key. */
75	int			uq_flags;	/* Umtx flags. */
76#define UQF_UMTXQ		0x0001
77	struct thread		*uq_thread;	/* The thread waits on. */
78	vm_offset_t		uq_addr;	/* Umtx's virtual address. */
79};
80
81LIST_HEAD(umtx_head, umtx_q);
82struct umtxq_chain {
83	struct mtx		uc_lock;	/* Lock for this chain. */
84	struct umtx_head	uc_queue;	/* List of sleep queues. */
85#define	UCF_BUSY		0x01
86	int			uc_flags;
87	int			uc_waiters;
88};
89
90#define	GOLDEN_RATIO_PRIME	2654404609U
91#define	UMTX_CHAINS		128
92#define	UMTX_SHIFTS		(__WORD_BIT - 7)
93
94static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
95static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
96
97static void umtxq_init_chains(void *);
98static int umtxq_hash(struct umtx_key *key);
99static struct mtx *umtxq_mtx(int chain);
100static void umtxq_lock(struct umtx_key *key);
101static void umtxq_unlock(struct umtx_key *key);
102static void umtxq_busy(struct umtx_key *key);
103static void umtxq_unbusy(struct umtx_key *key);
104static void umtxq_insert(struct umtx_q *uq);
105static void umtxq_remove(struct umtx_q *uq);
106static int umtxq_sleep(struct thread *td, struct umtx_key *key,
107	int prio, const char *wmesg, int timo);
108static int umtxq_count(struct umtx_key *key);
109static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
110static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
111static int umtx_key_get(struct thread *td, struct umtx *umtx,
112	struct umtx_key *key);
113static void umtx_key_release(struct umtx_key *key);
114
115SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
116
117struct umtx_q *
118umtxq_alloc(void)
119{
120	return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK|M_ZERO));
121}
122
123void
124umtxq_free(struct umtx_q *uq)
125{
126	free(uq, M_UMTX);
127}
128
129static void
130umtxq_init_chains(void *arg __unused)
131{
132	int i;
133
134	for (i = 0; i < UMTX_CHAINS; ++i) {
135		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
136			 MTX_DEF | MTX_DUPOK);
137		LIST_INIT(&umtxq_chains[i].uc_queue);
138		umtxq_chains[i].uc_flags = 0;
139		umtxq_chains[i].uc_waiters = 0;
140	}
141}
142
143static inline int
144umtxq_hash(struct umtx_key *key)
145{
146	unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
147	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
148}
149
150static inline int
151umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
152{
153	return (k1->type == k2->type &&
154		k1->info.both.ptr == k2->info.both.ptr &&
155	        k1->info.both.word == k2->info.both.word);
156}
157
158static inline struct mtx *
159umtxq_mtx(int chain)
160{
161	return (&umtxq_chains[chain].uc_lock);
162}
163
164static inline void
165umtxq_busy(struct umtx_key *key)
166{
167	int chain = umtxq_hash(key);
168
169	mtx_assert(umtxq_mtx(chain), MA_OWNED);
170	while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
171		umtxq_chains[chain].uc_waiters++;
172		msleep(&umtxq_chains[chain], umtxq_mtx(chain),
173		    0, "umtxq_busy", 0);
174		umtxq_chains[chain].uc_waiters--;
175	}
176	umtxq_chains[chain].uc_flags |= UCF_BUSY;
177}
178
179static inline void
180umtxq_unbusy(struct umtx_key *key)
181{
182	int chain = umtxq_hash(key);
183
184	mtx_assert(umtxq_mtx(chain), MA_OWNED);
185	KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy"));
186	umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
187	if (umtxq_chains[chain].uc_waiters)
188		wakeup_one(&umtxq_chains[chain]);
189}
190
191static inline void
192umtxq_lock(struct umtx_key *key)
193{
194	int chain = umtxq_hash(key);
195	mtx_lock(umtxq_mtx(chain));
196}
197
198static inline void
199umtxq_unlock(struct umtx_key *key)
200{
201	int chain = umtxq_hash(key);
202	mtx_unlock(umtxq_mtx(chain));
203}
204
205/*
206 * Insert a thread onto the umtx queue.
207 */
208static inline void
209umtxq_insert(struct umtx_q *uq)
210{
211	struct umtx_head *head;
212	int chain = umtxq_hash(&uq->uq_key);
213
214	mtx_assert(umtxq_mtx(chain), MA_OWNED);
215	head = &umtxq_chains[chain].uc_queue;
216	LIST_INSERT_HEAD(head, uq, uq_next);
217	uq->uq_flags |= UQF_UMTXQ;
218}
219
220/*
221 * Remove thread from the umtx queue.
222 */
223static inline void
224umtxq_remove(struct umtx_q *uq)
225{
226	mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
227	if (uq->uq_flags & UQF_UMTXQ) {
228		LIST_REMOVE(uq, uq_next);
229		/* turning off UQF_UMTXQ should be the last thing. */
230		uq->uq_flags &= ~UQF_UMTXQ;
231	}
232}
233
234static int
235umtxq_count(struct umtx_key *key)
236{
237	struct umtx_q *uq;
238	struct umtx_head *head;
239	int chain, count = 0;
240
241	chain = umtxq_hash(key);
242	mtx_assert(umtxq_mtx(chain), MA_OWNED);
243	head = &umtxq_chains[chain].uc_queue;
244	LIST_FOREACH(uq, head, uq_next) {
245		if (umtx_key_match(&uq->uq_key, key)) {
246			if (++count > 1)
247				break;
248		}
249	}
250	return (count);
251}
252
253static int
254umtxq_signal(struct umtx_key *key, int n_wake)
255{
256	struct umtx_q *uq, *next;
257	struct umtx_head *head;
258	struct thread *blocked = NULL;
259	int chain, ret;
260
261	ret = 0;
262	chain = umtxq_hash(key);
263	mtx_assert(umtxq_mtx(chain), MA_OWNED);
264	head = &umtxq_chains[chain].uc_queue;
265	for (uq = LIST_FIRST(head); uq; uq = next) {
266		next = LIST_NEXT(uq, uq_next);
267		if (umtx_key_match(&uq->uq_key, key)) {
268			blocked = uq->uq_thread;
269			umtxq_remove(uq);
270			wakeup(blocked);
271			if (++ret >= n_wake)
272				break;
273		}
274	}
275	return (ret);
276}
277
278static inline int
279umtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
280	    const char *wmesg, int timo)
281{
282	int chain = umtxq_hash(key);
283	int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
284	if (error == EWOULDBLOCK)
285		error = ETIMEDOUT;
286	return (error);
287}
288
289static int
290umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
291{
292	vm_map_t map;
293	vm_map_entry_t entry;
294	vm_pindex_t pindex;
295	vm_prot_t prot;
296	boolean_t wired;
297
298	map = &td->td_proc->p_vmspace->vm_map;
299	if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
300	    &entry, &key->info.shared.object, &pindex, &prot,
301	    &wired) != KERN_SUCCESS) {
302		return EFAULT;
303	}
304
305	if (VM_INHERIT_SHARE == entry->inheritance) {
306		key->type = UMTX_SHARED;
307		key->info.shared.offset = entry->offset + entry->start -
308			(vm_offset_t)umtx;
309		vm_object_reference(key->info.shared.object);
310	} else {
311		key->type = UMTX_PRIVATE;
312		key->info.private.umtx = umtx;
313		key->info.private.pid  = td->td_proc->p_pid;
314	}
315	vm_map_lookup_done(map, entry);
316	return (0);
317}
318
319static inline void
320umtx_key_release(struct umtx_key *key)
321{
322	if (key->type == UMTX_SHARED)
323		vm_object_deallocate(key->info.shared.object);
324}
325
326static inline int
327umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
328{
329	int error;
330
331	if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
332		return (error);
333
334	uq->uq_addr = (vm_offset_t)umtx;
335	uq->uq_thread = td;
336	umtxq_lock(&uq->uq_key);
337	/* hmm, for condition variable, we don't need busy flag. */
338	umtxq_busy(&uq->uq_key);
339	umtxq_insert(uq);
340	umtxq_unbusy(&uq->uq_key);
341	umtxq_unlock(&uq->uq_key);
342	return (0);
343}
344
345static int
346_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
347{
348	struct umtx_q *uq;
349	intptr_t owner;
350	intptr_t old;
351	int error = 0;
352
353	uq = td->td_umtxq;
354	/*
355	 * Care must be exercised when dealing with umtx structure.  It
356	 * can fault on any access.
357	 */
358
359	for (;;) {
360		/*
361		 * Try the uncontested case.  This should be done in userland.
362		 */
363		owner = casuptr((intptr_t *)&umtx->u_owner,
364		    UMTX_UNOWNED, id);
365
366		/* The acquire succeeded. */
367		if (owner == UMTX_UNOWNED)
368			return (0);
369
370		/* The address was invalid. */
371		if (owner == -1)
372			return (EFAULT);
373
374		/* If no one owns it but it is contested try to acquire it. */
375		if (owner == UMTX_CONTESTED) {
376			owner = casuptr((intptr_t *)&umtx->u_owner,
377			    UMTX_CONTESTED, id | UMTX_CONTESTED);
378
379			if (owner == UMTX_CONTESTED)
380				return (0);
381
382			/* The address was invalid. */
383			if (owner == -1)
384				return (EFAULT);
385
386			/* If this failed the lock has changed, restart. */
387			continue;
388		}
389
390		/*
391		 * If we caught a signal, we have retried and now
392		 * exit immediately.
393		 */
394		if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0)
395			return (error);
396
397		/*
398		 * Set the contested bit so that a release in user space
399		 * knows to use the system call for unlock.  If this fails
400		 * either some one else has acquired the lock or it has been
401		 * released.
402		 */
403		old = casuptr((intptr_t *)&umtx->u_owner, owner,
404		    owner | UMTX_CONTESTED);
405
406		/* The address was invalid. */
407		if (old == -1) {
408			umtxq_lock(&uq->uq_key);
409			umtxq_busy(&uq->uq_key);
410			umtxq_remove(uq);
411			umtxq_unbusy(&uq->uq_key);
412			umtxq_unlock(&uq->uq_key);
413			umtx_key_release(&uq->uq_key);
414			return (EFAULT);
415		}
416
417		/*
418		 * We set the contested bit, sleep. Otherwise the lock changed
419		 * and we need to retry or we lost a race to the thread
420		 * unlocking the umtx.
421		 */
422		umtxq_lock(&uq->uq_key);
423		if (old == owner && (uq->uq_flags & UQF_UMTXQ)) {
424			error = umtxq_sleep(td, &uq->uq_key, PCATCH,
425				       "umtx", timo);
426		}
427		umtxq_busy(&uq->uq_key);
428		umtxq_remove(uq);
429		umtxq_unbusy(&uq->uq_key);
430		umtxq_unlock(&uq->uq_key);
431		umtx_key_release(&uq->uq_key);
432	}
433
434	return (0);
435}
436
437static int
438do_lock(struct thread *td, struct umtx *umtx, long id,
439	struct timespec *timeout)
440{
441	struct timespec ts, ts2, ts3;
442	struct timeval tv;
443	int error;
444
445	if (timeout == NULL) {
446		error = _do_lock(td, umtx, id, 0);
447	} else {
448		getnanouptime(&ts);
449		timespecadd(&ts, timeout);
450		TIMESPEC_TO_TIMEVAL(&tv, timeout);
451		for (;;) {
452			error = _do_lock(td, umtx, id, tvtohz(&tv));
453			if (error != ETIMEDOUT)
454				break;
455			getnanouptime(&ts2);
456			if (timespeccmp(&ts2, &ts, >=)) {
457				error = ETIMEDOUT;
458				break;
459			}
460			ts3 = ts;
461			timespecsub(&ts3, &ts2);
462			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
463		}
464	}
465	/*
466	 * This lets userland back off critical region if needed.
467	 */
468	if (error == ERESTART)
469		error = EINTR;
470	return (error);
471}
472
473static int
474do_unlock(struct thread *td, struct umtx *umtx, long id)
475{
476	struct umtx_key key;
477	intptr_t owner;
478	intptr_t old;
479	int error;
480	int count;
481
482	/*
483	 * Make sure we own this mtx.
484	 *
485	 * XXX Need a {fu,su}ptr this is not correct on arch where
486	 * sizeof(intptr_t) != sizeof(long).
487	 */
488	if ((owner = fuword(&umtx->u_owner)) == -1)
489		return (EFAULT);
490
491	if ((owner & ~UMTX_CONTESTED) != id)
492		return (EPERM);
493
494	/* We should only ever be in here for contested locks */
495	if ((owner & UMTX_CONTESTED) == 0)
496		return (EINVAL);
497
498	if ((error = umtx_key_get(td, umtx, &key)) != 0)
499		return (error);
500
501	umtxq_lock(&key);
502	umtxq_busy(&key);
503	count = umtxq_count(&key);
504	umtxq_unlock(&key);
505
506	/*
507	 * When unlocking the umtx, it must be marked as unowned if
508	 * there is zero or one thread only waiting for it.
509	 * Otherwise, it must be marked as contested.
510	 */
511	old = casuptr((intptr_t *)&umtx->u_owner, owner,
512			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
513	umtxq_lock(&key);
514	umtxq_signal(&key, 0);
515	umtxq_unbusy(&key);
516	umtxq_unlock(&key);
517	umtx_key_release(&key);
518	if (old == -1)
519		return (EFAULT);
520	if (old != owner)
521		return (EINVAL);
522	return (0);
523}
524
525static int
526do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout)
527{
528	struct umtx_q *uq;
529	struct timespec ts, ts2, ts3;
530	struct timeval tv;
531	long tmp;
532	int error = 0;
533
534	uq = td->td_umtxq;
535	if ((error = umtxq_queue_me(td, umtx, uq)) != 0)
536		return (error);
537	tmp = fuword(&umtx->u_owner);
538	if (tmp != id) {
539		umtxq_lock(&uq->uq_key);
540		umtxq_remove(uq);
541		umtxq_unlock(&uq->uq_key);
542	} else if (timeout == NULL) {
543		umtxq_lock(&uq->uq_key);
544		if (uq->uq_flags & UQF_UMTXQ)
545			error = umtxq_sleep(td, &uq->uq_key,
546			    PCATCH, "ucond", 0);
547		if (!(uq->uq_flags & UQF_UMTXQ))
548			error = 0;
549		else
550			umtxq_remove(uq);
551		umtxq_unlock(&uq->uq_key);
552	} else {
553		getnanouptime(&ts);
554		timespecadd(&ts, timeout);
555		TIMESPEC_TO_TIMEVAL(&tv, timeout);
556		for (;;) {
557			umtxq_lock(&uq->uq_key);
558			if (uq->uq_flags & UQF_UMTXQ) {
559				error = umtxq_sleep(td, &uq->uq_key, PCATCH,
560					    "ucond", tvtohz(&tv));
561			}
562			if (!(uq->uq_flags & UQF_UMTXQ)) {
563				umtxq_unlock(&uq->uq_key);
564				goto out;
565			}
566			umtxq_unlock(&uq->uq_key);
567			if (error != ETIMEDOUT)
568				break;
569			getnanouptime(&ts2);
570			if (timespeccmp(&ts2, &ts, >=)) {
571				error = ETIMEDOUT;
572				break;
573			}
574			ts3 = ts;
575			timespecsub(&ts3, &ts2);
576			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
577		}
578		umtxq_lock(&uq->uq_key);
579		umtxq_remove(uq);
580		umtxq_unlock(&uq->uq_key);
581	}
582out:
583	umtx_key_release(&uq->uq_key);
584	if (error == ERESTART)
585		error = EINTR;
586	return (error);
587}
588
589int
590kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
591{
592	struct umtx_key key;
593	int ret;
594
595	if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
596		return (ret);
597	umtxq_lock(&key);
598	ret = umtxq_signal(&key, n_wake);
599	umtxq_unlock(&key);
600	umtx_key_release(&key);
601	return (0);
602}
603
604int
605_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
606    /* struct umtx *umtx */
607{
608	return _do_lock(td, uap->umtx, td->td_tid, 0);
609}
610
611int
612_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
613    /* struct umtx *umtx */
614{
615	return do_unlock(td, uap->umtx, td->td_tid);
616}
617
618int
619_umtx_op(struct thread *td, struct _umtx_op_args *uap)
620{
621	struct timespec timeout;
622	struct timespec *ts;
623	int error;
624
625	switch(uap->op) {
626	case UMTX_OP_LOCK:
627		/* Allow a null timespec (wait forever). */
628		if (uap->uaddr2 == NULL)
629			ts = NULL;
630		else {
631			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
632			if (error != 0)
633				break;
634			if (timeout.tv_nsec >= 1000000000 ||
635			    timeout.tv_nsec < 0) {
636				error = EINVAL;
637				break;
638			}
639			ts = &timeout;
640		}
641		error = do_lock(td, uap->umtx, uap->id, ts);
642		break;
643	case UMTX_OP_UNLOCK:
644		error = do_unlock(td, uap->umtx, uap->id);
645		break;
646	case UMTX_OP_WAIT:
647		/* Allow a null timespec (wait forever). */
648		if (uap->uaddr2 == NULL)
649			ts = NULL;
650		else {
651			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
652			if (error != 0)
653				break;
654			if (timeout.tv_nsec >= 1000000000 ||
655			    timeout.tv_nsec < 0) {
656				error = EINVAL;
657				break;
658			}
659			ts = &timeout;
660		}
661		error = do_wait(td, uap->umtx, uap->id, ts);
662		break;
663	case UMTX_OP_WAKE:
664		error = kern_umtx_wake(td, uap->umtx, uap->id);
665		break;
666	default:
667		error = EINVAL;
668		break;
669	}
670	return (error);
671}
672