kern_umtx.c revision 138225
1112904Sjeff/*
2112904Sjeff * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3112904Sjeff * All rights reserved.
4112904Sjeff *
5112904Sjeff * Redistribution and use in source and binary forms, with or without
6112904Sjeff * modification, are permitted provided that the following conditions
7112904Sjeff * are met:
8112904Sjeff * 1. Redistributions of source code must retain the above copyright
9112904Sjeff *    notice unmodified, this list of conditions, and the following
10112904Sjeff *    disclaimer.
11112904Sjeff * 2. Redistributions in binary form must reproduce the above copyright
12112904Sjeff *    notice, this list of conditions and the following disclaimer in the
13112904Sjeff *    documentation and/or other materials provided with the distribution.
14112904Sjeff *
15112904Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16112904Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17112904Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18112904Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19112904Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20112904Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21112904Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22112904Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23112904Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24112904Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25112904Sjeff */
26112904Sjeff
27116182Sobrien#include <sys/cdefs.h>
28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_umtx.c 138225 2004-11-30 12:18:53Z davidxu $");
29116182Sobrien
30112904Sjeff#include <sys/param.h>
31112904Sjeff#include <sys/kernel.h>
32131431Smarcel#include <sys/limits.h>
33112904Sjeff#include <sys/lock.h>
34115765Sjeff#include <sys/malloc.h>
35112904Sjeff#include <sys/mutex.h>
36112904Sjeff#include <sys/proc.h>
37112904Sjeff#include <sys/sysent.h>
38112904Sjeff#include <sys/systm.h>
39112904Sjeff#include <sys/sysproto.h>
40112904Sjeff#include <sys/thr.h>
41112904Sjeff#include <sys/umtx.h>
42112904Sjeff
43115765Sjeffstruct umtx_q {
44115765Sjeff	LIST_ENTRY(umtx_q)	uq_next;	/* Linked list for the hash. */
45138224Sdavidxu	TAILQ_HEAD(, thread)	uq_tdq;		/* List of threads blocked here. */
46138224Sdavidxu	struct umtx		*uq_umtx;	/* Pointer key component. */
47138224Sdavidxu	pid_t			uq_pid;		/* Pid key component. */
48138224Sdavidxu	int			uq_count;	/* How many threads blocked. */
49115765Sjeff};
50115765Sjeff
51115765SjeffLIST_HEAD(umtx_head, umtx_q);
52138224Sdavidxustruct umtxq_chain {
53138224Sdavidxu	struct mtx		uc_lock;	/* lock for this chain. */
54138224Sdavidxu	struct umtx_head	uc_queues;	/* List of sleep queues. */
55138224Sdavidxu};
56115765Sjeff
57138224Sdavidxu#define	GOLDEN_RATIO_PRIME	2654404609U
58138224Sdavidxu#define	UMTX_CHAINS		128
59138224Sdavidxu#define	UMTX_SHIFTS		(__WORD_BIT - 7)
60115765Sjeff
61138224Sdavidxustatic struct umtxq_chain umtxq_chains[UMTX_CHAINS];
62138224Sdavidxustatic MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
63115310Sjeff
64131431Smarcel#define	UMTX_CONTESTED	LONG_MIN
65115310Sjeff
66138224Sdavidxustatic void umtx_init_chains(void *);
67138224Sdavidxustatic int umtxq_hash(struct thread *, struct umtx *);
68138224Sdavidxustatic void umtxq_lock(struct thread *td, struct umtx *key);
69138224Sdavidxustatic void umtxq_unlock(struct thread *td, struct umtx *key);
70138224Sdavidxustatic struct umtx_q *umtxq_lookup(struct thread *, struct umtx *);
71138224Sdavidxustatic struct umtx_q *umtxq_insert(struct thread *, struct umtx *);
72138224Sdavidxustatic int umtxq_count(struct thread *td, struct umtx *umtx);
73138224Sdavidxustatic int umtx_sleep(struct thread *td, struct umtx *umtx, int priority,
74138224Sdavidxu	   const char *wmesg, int timo);
75138224Sdavidxustatic void umtx_signal(struct thread *td, struct umtx *umtx);
76115310Sjeff
77138224SdavidxuSYSINIT(umtx, SI_SUB_LOCK, SI_ORDER_MIDDLE, umtx_init_chains, NULL);
78138224Sdavidxu
79138224Sdavidxustatic void
80138224Sdavidxuumtx_init_chains(void *arg __unused)
81138224Sdavidxu{
82138224Sdavidxu	int i;
83138224Sdavidxu
84138224Sdavidxu	for (i = 0; i < UMTX_CHAINS; ++i) {
85138224Sdavidxu		mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
86138224Sdavidxu			 MTX_DEF | MTX_DUPOK);
87138224Sdavidxu		LIST_INIT(&umtxq_chains[i].uc_queues);
88138224Sdavidxu	}
89138224Sdavidxu}
90138224Sdavidxu
91138224Sdavidxustatic inline int
92138224Sdavidxuumtxq_hash(struct thread *td, struct umtx *umtx)
93138224Sdavidxu{
94138224Sdavidxu	unsigned n = (uintptr_t)umtx + td->td_proc->p_pid;
95138224Sdavidxu	return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
96138224Sdavidxu}
97138224Sdavidxu
98138224Sdavidxustatic inline void
99138224Sdavidxuumtxq_lock(struct thread *td, struct umtx *key)
100138224Sdavidxu{
101138224Sdavidxu	int chain = umtxq_hash(td, key);
102138224Sdavidxu	mtx_lock(&umtxq_chains[chain].uc_lock);
103138224Sdavidxu}
104138224Sdavidxu
105138225Sdavidxustatic inline void
106138224Sdavidxuumtxq_unlock(struct thread *td, struct umtx *key)
107138224Sdavidxu{
108138224Sdavidxu	int chain = umtxq_hash(td, key);
109138224Sdavidxu	mtx_unlock(&umtxq_chains[chain].uc_lock);
110138224Sdavidxu}
111138224Sdavidxu
112115765Sjeffstatic struct umtx_q *
113138224Sdavidxuumtxq_lookup(struct thread *td, struct umtx *umtx)
114115765Sjeff{
115115765Sjeff	struct umtx_head *head;
116115765Sjeff	struct umtx_q *uq;
117115765Sjeff	pid_t pid;
118138224Sdavidxu	int chain;
119115765Sjeff
120138224Sdavidxu	chain = umtxq_hash(td, umtx);
121138224Sdavidxu	mtx_assert(&umtxq_chains[chain].uc_lock, MA_OWNED);
122115765Sjeff	pid = td->td_proc->p_pid;
123138224Sdavidxu	head = &umtxq_chains[chain].uc_queues;
124115765Sjeff	LIST_FOREACH(uq, head, uq_next) {
125115765Sjeff		if (uq->uq_pid == pid && uq->uq_umtx == umtx)
126115765Sjeff			return (uq);
127115765Sjeff	}
128115765Sjeff	return (NULL);
129115765Sjeff}
130115765Sjeff
131115765Sjeff/*
132115765Sjeff * Insert a thread onto the umtx queue.
133115765Sjeff */
134115765Sjeffstatic struct umtx_q *
135138224Sdavidxuumtxq_insert(struct thread *td, struct umtx *umtx)
136115765Sjeff{
137115765Sjeff	struct umtx_head *head;
138138224Sdavidxu	struct umtx_q *uq, *ins = NULL;
139115765Sjeff	pid_t pid;
140138224Sdavidxu	int chain;
141115765Sjeff
142138224Sdavidxu	chain = umtxq_hash(td, umtx);
143115765Sjeff	pid = td->td_proc->p_pid;
144138224Sdavidxu	if ((uq = umtxq_lookup(td, umtx)) == NULL) {
145138224Sdavidxu		umtxq_unlock(td, umtx);
146115765Sjeff		ins = malloc(sizeof(*uq), M_UMTX, M_ZERO | M_WAITOK);
147138224Sdavidxu		umtxq_lock(td, umtx);
148115765Sjeff
149115765Sjeff		/*
150115765Sjeff		 * Some one else could have succeeded while we were blocked
151115765Sjeff		 * waiting on memory.
152115765Sjeff		 */
153138224Sdavidxu		if ((uq = umtxq_lookup(td, umtx)) == NULL) {
154138224Sdavidxu			head = &umtxq_chains[chain].uc_queues;
155115765Sjeff			uq = ins;
156115765Sjeff			uq->uq_pid = pid;
157115765Sjeff			uq->uq_umtx = umtx;
158138224Sdavidxu			uq->uq_count = 0;
159115765Sjeff			LIST_INSERT_HEAD(head, uq, uq_next);
160115765Sjeff			TAILQ_INIT(&uq->uq_tdq);
161138224Sdavidxu			ins = NULL;
162138224Sdavidxu		}
163115765Sjeff	}
164115765Sjeff	TAILQ_INSERT_TAIL(&uq->uq_tdq, td, td_umtx);
165138224Sdavidxu	uq->uq_count++;
166138224Sdavidxu	if (ins) {
167138224Sdavidxu		umtxq_unlock(td, umtx);
168138224Sdavidxu		free(ins, M_UMTX);
169138224Sdavidxu		umtxq_lock(td, umtx);
170138224Sdavidxu	}
171115765Sjeff	return (uq);
172115765Sjeff}
173115765Sjeff
174138224Sdavidxu/*
175138224Sdavidxu * Remove thread from umtx queue, umtx chain lock is also
176138224Sdavidxu * released.
177138224Sdavidxu */
178115765Sjeffstatic void
179138224Sdavidxuumtx_remove(struct umtx_q *uq, struct thread *td, struct umtx *umtx)
180115765Sjeff{
181138224Sdavidxu	int chain;
182138224Sdavidxu
183138224Sdavidxu	chain = umtxq_hash(td, umtx);
184138224Sdavidxu	mtx_assert(&umtxq_chains[chain].uc_lock, MA_OWNED);
185115765Sjeff	TAILQ_REMOVE(&uq->uq_tdq, td, td_umtx);
186138224Sdavidxu	uq->uq_count--;
187115765Sjeff	if (TAILQ_EMPTY(&uq->uq_tdq)) {
188115765Sjeff		LIST_REMOVE(uq, uq_next);
189138224Sdavidxu		umtxq_unlock(td, umtx);
190115765Sjeff		free(uq, M_UMTX);
191138224Sdavidxu	} else
192138224Sdavidxu		umtxq_unlock(td, umtx);
193138224Sdavidxu}
194138224Sdavidxu
195138224Sdavidxustatic inline int
196138224Sdavidxuumtxq_count(struct thread *td, struct umtx *umtx)
197138224Sdavidxu{
198138224Sdavidxu	struct umtx_q *uq;
199138224Sdavidxu	int count = 0;
200138224Sdavidxu
201138224Sdavidxu	umtxq_lock(td, umtx);
202138224Sdavidxu	if ((uq = umtxq_lookup(td, umtx)) != NULL)
203138224Sdavidxu		count = uq->uq_count;
204138224Sdavidxu	umtxq_unlock(td, umtx);
205138224Sdavidxu	return (count);
206138224Sdavidxu}
207138224Sdavidxu
208138224Sdavidxustatic inline int
209138224Sdavidxuumtx_sleep(struct thread *td, struct umtx *umtx, int priority,
210138224Sdavidxu	   const char *wmesg, int timo)
211138224Sdavidxu{
212138224Sdavidxu	int chain;
213138224Sdavidxu
214138224Sdavidxu	chain = umtxq_hash(td, umtx);
215138224Sdavidxu	mtx_assert(&umtxq_chains[chain].uc_lock, MA_OWNED);
216138224Sdavidxu	return (msleep(td, &umtxq_chains[chain].uc_lock, priority,
217138224Sdavidxu		       wmesg, timo));
218138224Sdavidxu}
219138224Sdavidxu
220138224Sdavidxustatic void
221138224Sdavidxuumtx_signal(struct thread *td, struct umtx *umtx)
222138224Sdavidxu{
223138224Sdavidxu	struct umtx_q *uq;
224138224Sdavidxu	struct thread *blocked = NULL;
225138224Sdavidxu
226138224Sdavidxu	umtxq_lock(td, umtx);
227138224Sdavidxu	if ((uq = umtxq_lookup(td, umtx)) != NULL) {
228138224Sdavidxu		if ((blocked = TAILQ_FIRST(&uq->uq_tdq)) != NULL) {
229138224Sdavidxu			mtx_lock_spin(&sched_lock);
230138224Sdavidxu			blocked->td_flags |= TDF_UMTXWAKEUP;
231138224Sdavidxu			mtx_unlock_spin(&sched_lock);
232138224Sdavidxu		}
233115765Sjeff	}
234138224Sdavidxu	umtxq_unlock(td, umtx);
235138224Sdavidxu	if (blocked != NULL)
236138224Sdavidxu		wakeup(blocked);
237115765Sjeff}
238115765Sjeff
239112904Sjeffint
240112904Sjeff_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
241112904Sjeff    /* struct umtx *umtx */
242112904Sjeff{
243115765Sjeff	struct umtx_q *uq;
244112904Sjeff	struct umtx *umtx;
245112904Sjeff	intptr_t owner;
246112967Sjake	intptr_t old;
247138224Sdavidxu	int error = 0;
248112904Sjeff
249115765Sjeff	uq = NULL;
250112904Sjeff
251112904Sjeff	/*
252112904Sjeff	 * Care must be exercised when dealing with this structure.  It
253112904Sjeff	 * can fault on any access.
254112904Sjeff	 */
255112904Sjeff	umtx = uap->umtx;
256112904Sjeff
257112904Sjeff	for (;;) {
258112904Sjeff		/*
259112904Sjeff		 * Try the uncontested case.  This should be done in userland.
260112904Sjeff		 */
261112904Sjeff		owner = casuptr((intptr_t *)&umtx->u_owner,
262131431Smarcel		    UMTX_UNOWNED, td->td_tid);
263112904Sjeff
264138224Sdavidxu		/* The acquire succeeded. */
265138224Sdavidxu		if (owner == UMTX_UNOWNED)
266138224Sdavidxu			return (0);
267138224Sdavidxu
268115765Sjeff		/* The address was invalid. */
269115765Sjeff		if (owner == -1)
270115765Sjeff			return (EFAULT);
271115765Sjeff
272115765Sjeff		/* If no one owns it but it is contested try to acquire it. */
273115765Sjeff		if (owner == UMTX_CONTESTED) {
274115765Sjeff			owner = casuptr((intptr_t *)&umtx->u_owner,
275131431Smarcel			    UMTX_CONTESTED, td->td_tid | UMTX_CONTESTED);
276115765Sjeff
277138224Sdavidxu			if (owner == UMTX_CONTESTED)
278138224Sdavidxu				return (0);
279138224Sdavidxu
280115765Sjeff			/* The address was invalid. */
281115765Sjeff			if (owner == -1)
282115765Sjeff				return (EFAULT);
283115765Sjeff
284115765Sjeff			/* If this failed the lock has changed, restart. */
285115765Sjeff			continue;
286112904Sjeff		}
287112904Sjeff
288138224Sdavidxu		/*
289138224Sdavidxu		 * If we caught a signal, we have retried and now
290138224Sdavidxu		 * exit immediately.
291138224Sdavidxu		 */
292138224Sdavidxu		if (error)
293138224Sdavidxu			return (error);
294112904Sjeff
295138224Sdavidxu		umtxq_lock(td, umtx);
296138224Sdavidxu		uq = umtxq_insert(td, umtx);
297138224Sdavidxu		umtxq_unlock(td, umtx);
298115765Sjeff
299112904Sjeff		/*
300112904Sjeff		 * Set the contested bit so that a release in user space
301112904Sjeff		 * knows to use the system call for unlock.  If this fails
302112904Sjeff		 * either some one else has acquired the lock or it has been
303112904Sjeff		 * released.
304112904Sjeff		 */
305112967Sjake		old = casuptr((intptr_t *)&umtx->u_owner, owner,
306112967Sjake		    owner | UMTX_CONTESTED);
307112904Sjeff
308112904Sjeff		/* The address was invalid. */
309112967Sjake		if (old == -1) {
310138224Sdavidxu			umtxq_lock(td, umtx);
311138224Sdavidxu			umtx_remove(uq, td, umtx);
312138224Sdavidxu			/* unlocked by umtx_remove */
313115765Sjeff			return (EFAULT);
314112904Sjeff		}
315112904Sjeff
316112904Sjeff		/*
317115765Sjeff		 * We set the contested bit, sleep. Otherwise the lock changed
318117685Smtm		 * and we need to retry or we lost a race to the thread
319117685Smtm		 * unlocking the umtx.
320112904Sjeff		 */
321138224Sdavidxu		umtxq_lock(td, umtx);
322132039Smtm		if (old == owner && (td->td_flags & TDF_UMTXWAKEUP) == 0)
323138224Sdavidxu			error = umtx_sleep(td, umtx, td->td_priority | PCATCH,
324138224Sdavidxu				    "umtx", 0);
325132039Smtm		else
326115765Sjeff			error = 0;
327138224Sdavidxu		umtx_remove(uq, td, umtx);
328138224Sdavidxu		/* unlocked by umtx_remove */
329112904Sjeff
330138224Sdavidxu		if (td->td_flags & TDF_UMTXWAKEUP) {
331138224Sdavidxu			/*
332138224Sdavidxu			 * If we were resumed by umtxq_unlock, we should retry
333138224Sdavidxu			 * to avoid a race.
334138224Sdavidxu			 */
335138224Sdavidxu			mtx_lock_spin(&sched_lock);
336138224Sdavidxu			td->td_flags &= ~TDF_UMTXWAKEUP;
337138224Sdavidxu			mtx_unlock_spin(&sched_lock);
338138224Sdavidxu			continue;
339138224Sdavidxu		}
340112904Sjeff
341112904Sjeff		/*
342138224Sdavidxu		 * If we caught a signal, exit immediately.
343112904Sjeff		 */
344112904Sjeff		if (error)
345115765Sjeff			return (error);
346112904Sjeff	}
347117743Smtm
348117743Smtm	return (0);
349112904Sjeff}
350112904Sjeff
351112904Sjeffint
352112904Sjeff_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
353112904Sjeff    /* struct umtx *umtx */
354112904Sjeff{
355112904Sjeff	struct umtx *umtx;
356112904Sjeff	intptr_t owner;
357112967Sjake	intptr_t old;
358138224Sdavidxu	int count;
359112904Sjeff
360112904Sjeff	umtx = uap->umtx;
361112904Sjeff
362112904Sjeff	/*
363112904Sjeff	 * Make sure we own this mtx.
364112904Sjeff	 *
365112904Sjeff	 * XXX Need a {fu,su}ptr this is not correct on arch where
366112904Sjeff	 * sizeof(intptr_t) != sizeof(long).
367112904Sjeff	 */
368115765Sjeff	if ((owner = fuword(&umtx->u_owner)) == -1)
369115765Sjeff		return (EFAULT);
370115765Sjeff
371131431Smarcel	if ((owner & ~UMTX_CONTESTED) != td->td_tid)
372115765Sjeff		return (EPERM);
373112904Sjeff
374117685Smtm	/* We should only ever be in here for contested locks */
375119836Stjr	if ((owner & UMTX_CONTESTED) == 0)
376119836Stjr		return (EINVAL);
377112904Sjeff
378117743Smtm	/*
379117743Smtm	 * When unlocking the umtx, it must be marked as unowned if
380117743Smtm	 * there is zero or one thread only waiting for it.
381117743Smtm	 * Otherwise, it must be marked as contested.
382117743Smtm	 */
383138224Sdavidxu	old = casuptr((intptr_t *)&umtx->u_owner, owner, UMTX_UNOWNED);
384115765Sjeff	if (old == -1)
385115765Sjeff		return (EFAULT);
386138224Sdavidxu	if (old != owner)
387138224Sdavidxu		return (EINVAL);
388112904Sjeff
389112904Sjeff	/*
390138224Sdavidxu	 * At the point, a new thread can lock the umtx before we
391138224Sdavidxu	 * reach here, so contested bit will not be set, if there
392138224Sdavidxu	 * are two or more threads on wait queue, we should set
393138224Sdavidxu	 * contensted bit for them.
394112904Sjeff	 */
395138224Sdavidxu	count = umtxq_count(td, umtx);
396138224Sdavidxu	if (count <= 0)
397138224Sdavidxu		return (0);
398138224Sdavidxu
399138224Sdavidxu	/*
400138224Sdavidxu	 * If there is second thread waiting on umtx, set contested bit,
401138224Sdavidxu	 * if they are resumed before we reach here, it is harmless,
402138224Sdavidxu	 * just a bit unefficient.
403138224Sdavidxu	 */
404138224Sdavidxu	if (count > 1) {
405138224Sdavidxu		owner = UMTX_UNOWNED;
406138224Sdavidxu		for (;;) {
407138224Sdavidxu			old = casuptr((intptr_t *)&umtx->u_owner, owner,
408138224Sdavidxu				    owner | UMTX_CONTESTED);
409138224Sdavidxu			if (old == owner)
410138224Sdavidxu				break;
411138224Sdavidxu			if (old == -1)
412138224Sdavidxu				return (EFAULT);
413138224Sdavidxu			owner = old;
414138224Sdavidxu		}
415138224Sdavidxu		/*
416138224Sdavidxu		 * Another thread locked the umtx before us, so don't bother
417138224Sdavidxu		 * to wake more threads, that thread will do it when it unlocks
418138224Sdavidxu		 * the umtx.
419138224Sdavidxu		 */
420138224Sdavidxu		if ((owner & ~UMTX_CONTESTED) != 0)
421138224Sdavidxu			return (0);
422112904Sjeff	}
423112904Sjeff
424138224Sdavidxu	/* Wake blocked thread. */
425138224Sdavidxu	umtx_signal(td, umtx);
426138224Sdavidxu
427115765Sjeff	return (0);
428112904Sjeff}
429