linux_futex.c revision 293532
1/*	$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
2
3/*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 *    must display the following acknowledgement:
16 *	This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 *    products derived from this software without specific prior written
19 *    permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_futex.c 293532 2016-01-09 16:20:29Z dchagin $");
36#if 0
37__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
38#endif
39
40#include "opt_compat.h"
41#include "opt_kdtrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/imgact.h>
46#include <sys/kernel.h>
47#include <sys/ktr.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/mutex.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/queue.h>
54#include <sys/sched.h>
55#include <sys/sdt.h>
56#include <sys/sx.h>
57#include <sys/umtx.h>
58
59#ifdef COMPAT_LINUX32
60#include <machine/../linux32/linux.h>
61#include <machine/../linux32/linux32_proto.h>
62#else
63#include <machine/../linux/linux.h>
64#include <machine/../linux/linux_proto.h>
65#endif
66#include <compat/linux/linux_dtrace.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_util.h>
70
71/* DTrace init */
72LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
73
74/**
75 * Futex part for the special DTrace module "locks".
76 */
77LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
78LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
79
80/**
81 * Per futex probes.
82 */
83LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
84LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
85
86/**
87 * DTrace probes in this module.
88 */
89LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
90    "struct waiting_proc *");
91LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
92    "int");
93LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
94    "int");
95LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
96LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
97    "uint32_t");
98LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
99LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
100    "int");
101LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
102LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
103LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
104LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
105    "struct waiting_proc **", "struct futex **");
106LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
107LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
108LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
109    "struct waiting_proc **", "int");
110LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
111    "struct waiting_proc *", "uint32_t *", "uint32_t");
112LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
113    "struct waiting_proc *");
114LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
115LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
116    "uint32_t");
117LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
118    "struct waiting_proc *", "uint32_t");
119LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
120LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
121LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
122    "struct futex *", "int");
123LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
124LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
125    "struct waiting_proc *", "uint32_t");
126LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
127LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
128    "struct waiting_proc **", "int", "uint32_t");
129LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
130LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
131LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
132    "int", "uint32_t");
133LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
134    "int");
135LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
136LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
137LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
138LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
139LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
140    "struct linux_sys_futex_args *");
141LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
142LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int");
143LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
144LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
145LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
146    "uint32_t", "uint32_t");
147LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
148    "uint32_t *", "uint32_t", "int", "uint32_t");
149LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
150    "uint32_t", "uint32_t");
151LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
152    "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
153LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
154    "uint32_t", "int");
155LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
156    "int", "uint32_t", "uint32_t *", "uint32_t");
157LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
158LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
159LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
160LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
161LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
162LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
163LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
164LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
165LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
166LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
167    "struct linux_set_robust_list_args *");
168LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
169LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
170LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
171    "struct linux_get_robust_list_args *");
172LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
173LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
174LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
175    "struct linux_emuldata *", "uint32_t *", "unsigned int");
176LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
177LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
178LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
179    "struct linux_robust_list **", "struct linux_robust_list **",
180    "unsigned int *");
181LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
182LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
183LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
184    "struct linux_emuldata *");
185LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
186LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
187
188struct futex;
189
190struct waiting_proc {
191	uint32_t	wp_flags;
192	struct futex	*wp_futex;
193	TAILQ_ENTRY(waiting_proc) wp_list;
194};
195
196struct futex {
197	struct sx	f_lck;
198	uint32_t	*f_uaddr;	/* user-supplied value, for debug */
199	struct umtx_key	f_key;
200	uint32_t	f_refcount;
201	uint32_t	f_bitset;
202	LIST_ENTRY(futex) f_list;
203	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
204};
205
206struct futex_list futex_list;
207
208#define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
209#define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
210#define FUTEX_INIT(f)		do { \
211				    sx_init_flags(&(f)->f_lck, "ftlk", \
212					SX_DUPOK); \
213				    LIN_SDT_PROBE1(futex, futex, create, \
214					&(f)->f_lck); \
215				} while (0)
216#define FUTEX_DESTROY(f)	do { \
217				    LIN_SDT_PROBE1(futex, futex, destroy, \
218					&(f)->f_lck); \
219				    sx_destroy(&(f)->f_lck); \
220				} while (0)
221#define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
222
223struct mtx futex_mtx;			/* protects the futex list */
224#define FUTEXES_LOCK		do { \
225				    mtx_lock(&futex_mtx); \
226				    LIN_SDT_PROBE1(locks, futex_mtx, \
227					locked, &futex_mtx); \
228				} while (0)
229#define FUTEXES_UNLOCK		do { \
230				    LIN_SDT_PROBE1(locks, futex_mtx, \
231					unlock, &futex_mtx); \
232				    mtx_unlock(&futex_mtx); \
233				} while (0)
234
235/* flags for futex_get() */
236#define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
237#define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
238#define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
239#define	FUTEX_SHARED		0x8	/* shared futex */
240
241/* wp_flags */
242#define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
243					 * of futex where thread sleep to wp_list
244					 * of another futex.
245					 */
246#define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
247					 * wp_list to prevent double wakeup.
248					 */
249
250/* support.s */
251int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
252int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
253int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
254int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
255int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
256
257static void
258futex_put(struct futex *f, struct waiting_proc *wp)
259{
260	LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
261
262	FUTEX_ASSERT_LOCKED(f);
263	if (wp != NULL) {
264		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
265			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
266		free(wp, M_FUTEX_WP);
267	}
268
269	FUTEXES_LOCK;
270	if (--f->f_refcount == 0) {
271		LIST_REMOVE(f, f_list);
272		FUTEXES_UNLOCK;
273		FUTEX_UNLOCK(f);
274
275		LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
276		    f->f_refcount, f->f_key.shared);
277		LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
278		    "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
279		umtx_key_release(&f->f_key);
280		FUTEX_DESTROY(f);
281		free(f, M_FUTEX);
282
283		LIN_SDT_PROBE0(futex, futex_put, return);
284		return;
285	}
286
287	LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
288	    f->f_key.shared);
289	LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
290	    f->f_uaddr, f->f_refcount, f->f_key.shared);
291	FUTEXES_UNLOCK;
292	FUTEX_UNLOCK(f);
293
294	LIN_SDT_PROBE0(futex, futex_put, return);
295}
296
297static int
298futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
299{
300	struct futex *f, *tmpf;
301	struct umtx_key key;
302	int error;
303
304	LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
305
306	*newf = tmpf = NULL;
307
308	error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
309	    AUTO_SHARE : THREAD_SHARE, &key);
310	if (error) {
311		LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
312		LIN_SDT_PROBE1(futex, futex_get0, return, error);
313		return (error);
314	}
315retry:
316	FUTEXES_LOCK;
317	LIST_FOREACH(f, &futex_list, f_list) {
318		if (umtx_key_match(&f->f_key, &key)) {
319			if (tmpf != NULL) {
320				FUTEX_UNLOCK(tmpf);
321				FUTEX_DESTROY(tmpf);
322				free(tmpf, M_FUTEX);
323			}
324			if (flags & FUTEX_DONTEXISTS) {
325				FUTEXES_UNLOCK;
326				umtx_key_release(&key);
327
328				LIN_SDT_PROBE1(futex, futex_get0, return,
329				    EINVAL);
330				return (EINVAL);
331			}
332
333			/*
334			 * Increment refcount of the found futex to
335			 * prevent it from deallocation before FUTEX_LOCK()
336			 */
337			++f->f_refcount;
338			FUTEXES_UNLOCK;
339			umtx_key_release(&key);
340
341			FUTEX_LOCK(f);
342			*newf = f;
343			LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
344			    f->f_refcount, f->f_key.shared);
345			LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
346			    uaddr, f->f_refcount, f->f_key.shared);
347
348			LIN_SDT_PROBE1(futex, futex_get0, return, 0);
349			return (0);
350		}
351	}
352
353	if (flags & FUTEX_DONTCREATE) {
354		FUTEXES_UNLOCK;
355		umtx_key_release(&key);
356		LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
357		LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
358
359		LIN_SDT_PROBE1(futex, futex_get0, return, 0);
360		return (0);
361	}
362
363	if (tmpf == NULL) {
364		FUTEXES_UNLOCK;
365		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
366		tmpf->f_uaddr = uaddr;
367		tmpf->f_key = key;
368		tmpf->f_refcount = 1;
369		tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
370		FUTEX_INIT(tmpf);
371		TAILQ_INIT(&tmpf->f_waiting_proc);
372
373		/*
374		 * Lock the new futex before an insert into the futex_list
375		 * to prevent futex usage by other.
376		 */
377		FUTEX_LOCK(tmpf);
378		goto retry;
379	}
380
381	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
382	FUTEXES_UNLOCK;
383
384	LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
385	    tmpf->f_key.shared);
386	LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
387	    uaddr, tmpf->f_refcount, tmpf->f_key.shared);
388	*newf = tmpf;
389
390	LIN_SDT_PROBE1(futex, futex_get0, return, 0);
391	return (0);
392}
393
394static int
395futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
396    uint32_t flags)
397{
398	int error;
399
400	LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
401
402	if (flags & FUTEX_CREATE_WP) {
403		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
404		(*wp)->wp_flags = 0;
405	}
406	error = futex_get0(uaddr, f, flags);
407	if (error) {
408		LIN_SDT_PROBE0(futex, futex_get, error);
409
410		if (flags & FUTEX_CREATE_WP)
411			free(*wp, M_FUTEX_WP);
412
413		LIN_SDT_PROBE1(futex, futex_get, return, error);
414		return (error);
415	}
416	if (flags & FUTEX_CREATE_WP) {
417		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
418		(*wp)->wp_futex = *f;
419	}
420
421	LIN_SDT_PROBE1(futex, futex_get, return, error);
422	return (error);
423}
424
425static int
426futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
427{
428	int error;
429
430	FUTEX_ASSERT_LOCKED(f);
431	LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
432	LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
433	    f->f_uaddr, wp, timeout, f->f_refcount);
434	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
435	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
436		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
437
438		if (error) {
439			LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
440			    f->f_uaddr, wp, wp->wp_futex->f_uaddr,
441			    wp->wp_futex->f_refcount);
442		}
443
444		LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
445		    " %p requeued uaddr %p ref %d",
446		    error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
447		    wp->wp_futex->f_refcount);
448		futex_put(f, NULL);
449		f = wp->wp_futex;
450		FUTEX_LOCK(f);
451	} else {
452		if (error) {
453			LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
454			    f->f_uaddr, wp);
455		}
456		LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
457		    error, f->f_uaddr, wp);
458	}
459
460	futex_put(f, wp);
461
462	LIN_SDT_PROBE1(futex, futex_sleep, return, error);
463	return (error);
464}
465
466static int
467futex_wake(struct futex *f, int n, uint32_t bitset)
468{
469	struct waiting_proc *wp, *wpt;
470	int count = 0;
471
472	LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
473
474	if (bitset == 0) {
475		LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
476		return (EINVAL);
477	}
478
479	FUTEX_ASSERT_LOCKED(f);
480	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
481		LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
482		    f->f_refcount);
483		LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
484		    f->f_uaddr, wp, f->f_refcount);
485		/*
486		 * Unless we find a matching bit in
487		 * the bitset, continue searching.
488		 */
489		if (!(wp->wp_futex->f_bitset & bitset))
490			continue;
491
492		wp->wp_flags |= FUTEX_WP_REMOVED;
493		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
494		LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
495		wakeup_one(wp);
496		if (++count == n)
497			break;
498	}
499
500	LIN_SDT_PROBE1(futex, futex_wake, return, count);
501	return (count);
502}
503
504static int
505futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
506{
507	struct waiting_proc *wp, *wpt;
508	int count = 0;
509
510	LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
511
512	FUTEX_ASSERT_LOCKED(f);
513	FUTEX_ASSERT_LOCKED(f2);
514
515	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
516		if (++count <= n) {
517			LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
518			    f->f_uaddr, wp);
519			wp->wp_flags |= FUTEX_WP_REMOVED;
520			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
521			LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
522			wakeup_one(wp);
523		} else {
524			LIN_SDT_PROBE3(futex, futex_requeue, requeue,
525			    f->f_uaddr, wp, f2->f_uaddr);
526			LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
527			    f->f_uaddr, wp, f2->f_uaddr);
528			wp->wp_flags |= FUTEX_WP_REQUEUED;
529			/* Move wp to wp_list of f2 futex */
530			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
531			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
532
533			/*
534			 * Thread which sleeps on wp after waking should
535			 * acquire f2 lock, so increment refcount of f2 to
536			 * prevent it from premature deallocation.
537			 */
538			wp->wp_futex = f2;
539			FUTEXES_LOCK;
540			++f2->f_refcount;
541			FUTEXES_UNLOCK;
542			if (count - n >= n2)
543				break;
544		}
545	}
546
547	LIN_SDT_PROBE1(futex, futex_requeue, return, count);
548	return (count);
549}
550
551static int
552futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz,
553    uint32_t bitset)
554{
555	int error;
556
557	LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset);
558
559	if (bitset == 0) {
560		LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
561		return (EINVAL);
562	}
563
564	f->f_bitset = bitset;
565	error = futex_sleep(f, wp, timeout_hz);
566	if (error)
567		LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
568	if (error == EWOULDBLOCK)
569		error = ETIMEDOUT;
570
571	LIN_SDT_PROBE1(futex, futex_wait, return, error);
572	return (error);
573}
574
575static int
576futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
577{
578	int op = (encoded_op >> 28) & 7;
579	int cmp = (encoded_op >> 24) & 15;
580	int oparg = (encoded_op << 8) >> 20;
581	int cmparg = (encoded_op << 20) >> 20;
582	int oldval = 0, ret;
583
584	LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
585
586	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
587		oparg = 1 << oparg;
588
589	LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
590	    cmparg);
591
592	/* XXX: Linux verifies access here and returns EFAULT */
593	LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
594
595	switch (op) {
596	case FUTEX_OP_SET:
597		ret = futex_xchgl(oparg, uaddr, &oldval);
598		break;
599	case FUTEX_OP_ADD:
600		ret = futex_addl(oparg, uaddr, &oldval);
601		break;
602	case FUTEX_OP_OR:
603		ret = futex_orl(oparg, uaddr, &oldval);
604		break;
605	case FUTEX_OP_ANDN:
606		ret = futex_andl(~oparg, uaddr, &oldval);
607		break;
608	case FUTEX_OP_XOR:
609		ret = futex_xorl(oparg, uaddr, &oldval);
610		break;
611	default:
612		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
613		ret = -ENOSYS;
614		break;
615	}
616
617	if (ret) {
618		LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
619		return (ret);
620	}
621
622	switch (cmp) {
623	case FUTEX_OP_CMP_EQ:
624		ret = (oldval == cmparg);
625		break;
626	case FUTEX_OP_CMP_NE:
627		ret = (oldval != cmparg);
628		break;
629	case FUTEX_OP_CMP_LT:
630		ret = (oldval < cmparg);
631		break;
632	case FUTEX_OP_CMP_GE:
633		ret = (oldval >= cmparg);
634		break;
635	case FUTEX_OP_CMP_LE:
636		ret = (oldval <= cmparg);
637		break;
638	case FUTEX_OP_CMP_GT:
639		ret = (oldval > cmparg);
640		break;
641	default:
642		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
643		ret = -ENOSYS;
644	}
645
646	LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
647	return (ret);
648}
649
650int
651linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
652{
653	int clockrt, nrwake, op_ret, ret;
654	struct linux_pemuldata *pem;
655	struct waiting_proc *wp;
656	struct futex *f, *f2;
657	struct l_timespec timeout;
658	struct timeval utv, ctv;
659	int timeout_hz;
660	int error;
661	uint32_t flags, val;
662
663	LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
664
665	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
666		flags = 0;
667		args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
668	} else
669		flags = FUTEX_SHARED;
670
671	/*
672	 * Currently support for switching between CLOCK_MONOTONIC and
673	 * CLOCK_REALTIME is not present. However Linux forbids the use of
674	 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
675	 * FUTEX_WAIT_REQUEUE_PI.
676	 */
677	clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
678	args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
679	if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
680		args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
681		LIN_SDT_PROBE0(futex, linux_sys_futex,
682		    unimplemented_clockswitch);
683		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
684		return (ENOSYS);
685	}
686
687	error = 0;
688	f = f2 = NULL;
689
690	switch (args->op) {
691	case LINUX_FUTEX_WAIT:
692		args->val3 = FUTEX_BITSET_MATCH_ANY;
693		/* FALLTHROUGH */
694
695	case LINUX_FUTEX_WAIT_BITSET:
696		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
697		    args->val, args->val3);
698		LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
699		    args->uaddr, args->val, args->val3);
700
701		error = futex_get(args->uaddr, &wp, &f,
702		    flags | FUTEX_CREATE_WP);
703		if (error) {
704			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
705			return (error);
706		}
707
708		error = copyin(args->uaddr, &val, sizeof(val));
709		if (error) {
710			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
711			    error);
712			LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
713			    error);
714			futex_put(f, wp);
715
716			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
717			return (error);
718		}
719		if (val != args->val) {
720			LIN_SDT_PROBE4(futex, linux_sys_futex,
721			    debug_wait_value_neq, args->uaddr, args->val, val,
722			    args->val3);
723			LINUX_CTR3(sys_futex,
724			    "WAIT uaddr %p val 0x%x != uval 0x%x",
725			    args->uaddr, args->val, val);
726			futex_put(f, wp);
727
728			LIN_SDT_PROBE1(futex, linux_sys_futex, return,
729			    EWOULDBLOCK);
730			return (EWOULDBLOCK);
731		}
732
733		if (args->timeout != NULL) {
734			error = copyin(args->timeout, &timeout, sizeof(timeout));
735			if (error) {
736				LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
737				    error);
738				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
739				futex_put(f, wp);
740				return (error);
741			}
742			TIMESPEC_TO_TIMEVAL(&utv, &timeout);
743			error = itimerfix(&utv);
744			if (error) {
745				LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
746				    error);
747				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
748				futex_put(f, wp);
749				return (error);
750			}
751			if (clockrt) {
752				microtime(&ctv);
753				timevalsub(&utv, &ctv);
754			} else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
755				microuptime(&ctv);
756				timevalsub(&utv, &ctv);
757			}
758			if (utv.tv_sec < 0)
759				timevalclear(&utv);
760			timeout_hz = tvtohz(&utv);
761		} else
762			timeout_hz = 0;
763
764		error = futex_wait(f, wp, timeout_hz, args->val3);
765		break;
766
767	case LINUX_FUTEX_WAKE:
768		args->val3 = FUTEX_BITSET_MATCH_ANY;
769		/* FALLTHROUGH */
770
771	case LINUX_FUTEX_WAKE_BITSET:
772		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
773		    args->val, args->val3);
774		LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
775		    args->uaddr, args->val, args->val3);
776
777		error = futex_get(args->uaddr, NULL, &f,
778		    flags | FUTEX_DONTCREATE);
779		if (error) {
780			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
781			return (error);
782		}
783
784		if (f == NULL) {
785			td->td_retval[0] = 0;
786
787			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
788			return (error);
789		}
790		td->td_retval[0] = futex_wake(f, args->val, args->val3);
791		futex_put(f, NULL);
792		break;
793
794	case LINUX_FUTEX_CMP_REQUEUE:
795		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
796		    args->uaddr, args->val, args->val3, args->uaddr2,
797		    args->timeout);
798		LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
799		    "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
800		    args->uaddr, args->val, args->val3, args->uaddr2,
801		    args->timeout);
802
803		/*
804		 * Linux allows this, we would not, it is an incorrect
805		 * usage of declared ABI, so return EINVAL.
806		 */
807		if (args->uaddr == args->uaddr2) {
808			LIN_SDT_PROBE0(futex, linux_sys_futex,
809			    invalid_cmp_requeue_use);
810			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
811			return (EINVAL);
812		}
813
814		error = futex_get(args->uaddr, NULL, &f, flags);
815		if (error) {
816			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
817			return (error);
818		}
819
820		/*
821		 * To avoid deadlocks return EINVAL if second futex
822		 * exists at this time.
823		 *
824		 * Glibc fall back to FUTEX_WAKE in case of any error
825		 * returned by FUTEX_CMP_REQUEUE.
826		 */
827		error = futex_get(args->uaddr2, NULL, &f2,
828		    flags | FUTEX_DONTEXISTS);
829		if (error) {
830			futex_put(f, NULL);
831
832			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
833			return (error);
834		}
835		error = copyin(args->uaddr, &val, sizeof(val));
836		if (error) {
837			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
838			    error);
839			LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
840			    error);
841			futex_put(f2, NULL);
842			futex_put(f, NULL);
843
844			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
845			return (error);
846		}
847		if (val != args->val3) {
848			LIN_SDT_PROBE2(futex, linux_sys_futex,
849			    debug_cmp_requeue_value_neq, args->val, val);
850			LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
851			    args->val, val);
852			futex_put(f2, NULL);
853			futex_put(f, NULL);
854
855			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
856			return (EAGAIN);
857		}
858
859		nrwake = (int)(unsigned long)args->timeout;
860		td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
861		futex_put(f2, NULL);
862		futex_put(f, NULL);
863		break;
864
865	case LINUX_FUTEX_WAKE_OP:
866		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
867		    args->uaddr, args->op, args->val, args->uaddr2, args->val3);
868		LINUX_CTR5(sys_futex, "WAKE_OP "
869		    "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
870		    args->uaddr, args->val, args->uaddr2, args->val3,
871		    args->timeout);
872
873		error = futex_get(args->uaddr, NULL, &f, flags);
874		if (error) {
875			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
876			return (error);
877		}
878
879		if (args->uaddr != args->uaddr2)
880			error = futex_get(args->uaddr2, NULL, &f2, flags);
881		if (error) {
882			futex_put(f, NULL);
883
884			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
885			return (error);
886		}
887
888		/*
889		 * This function returns positive number as results and
890		 * negative as errors
891		 */
892		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
893
894		LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x",
895		    args->uaddr, op_ret);
896
897		if (op_ret < 0) {
898			/* XXX: We don't handle the EFAULT yet. */
899			if (op_ret != -EFAULT) {
900				if (f2 != NULL)
901					futex_put(f2, NULL);
902				futex_put(f, NULL);
903
904				LIN_SDT_PROBE1(futex, linux_sys_futex, return,
905				    -op_ret);
906				return (-op_ret);
907			} else {
908				LIN_SDT_PROBE0(futex, linux_sys_futex,
909				    unhandled_efault);
910			}
911			if (f2 != NULL)
912				futex_put(f2, NULL);
913			futex_put(f, NULL);
914
915			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
916			return (EFAULT);
917		}
918
919		ret = futex_wake(f, args->val, args->val3);
920
921		if (op_ret > 0) {
922			op_ret = 0;
923			nrwake = (int)(unsigned long)args->timeout;
924
925			if (f2 != NULL)
926				op_ret += futex_wake(f2, nrwake, args->val3);
927			else
928				op_ret += futex_wake(f, nrwake, args->val3);
929			ret += op_ret;
930
931		}
932		if (f2 != NULL)
933			futex_put(f2, NULL);
934		futex_put(f, NULL);
935		td->td_retval[0] = ret;
936		break;
937
938	case LINUX_FUTEX_LOCK_PI:
939		/* not yet implemented */
940		linux_msg(td,
941			  "linux_sys_futex: "
942			  "op LINUX_FUTEX_LOCK_PI not implemented\n");
943		LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi);
944		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
945		return (ENOSYS);
946
947	case LINUX_FUTEX_UNLOCK_PI:
948		/* not yet implemented */
949		linux_msg(td,
950			  "linux_sys_futex: "
951			  "op LINUX_FUTEX_UNLOCK_PI not implemented\n");
952		LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi);
953		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
954		return (ENOSYS);
955
956	case LINUX_FUTEX_TRYLOCK_PI:
957		/* not yet implemented */
958		linux_msg(td,
959			  "linux_sys_futex: "
960			  "op LINUX_FUTEX_TRYLOCK_PI not implemented\n");
961		LIN_SDT_PROBE0(futex, linux_sys_futex,
962		    unimplemented_trylock_pi);
963		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
964		return (ENOSYS);
965
966	case LINUX_FUTEX_REQUEUE:
967
968		/*
969		 * Glibc does not use this operation since version 2.3.3,
970		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
971		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
972		 * FUTEX_REQUEUE returned EINVAL.
973		 */
974		pem = pem_find(td->td_proc);
975		if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
976			linux_msg(td,
977				  "linux_sys_futex: "
978				  "unsupported futex_requeue op\n");
979			pem->flags |= LINUX_XDEPR_REQUEUEOP;
980			LIN_SDT_PROBE0(futex, linux_sys_futex,
981			    deprecated_requeue);
982		}
983
984		LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
985		return (EINVAL);
986
987	case LINUX_FUTEX_WAIT_REQUEUE_PI:
988		/* not yet implemented */
989		linux_msg(td,
990			  "linux_sys_futex: "
991			  "op FUTEX_WAIT_REQUEUE_PI not implemented\n");
992		LIN_SDT_PROBE0(futex, linux_sys_futex,
993		    unimplemented_wait_requeue_pi);
994		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
995		return (ENOSYS);
996
997	case LINUX_FUTEX_CMP_REQUEUE_PI:
998		/* not yet implemented */
999		linux_msg(td,
1000			    "linux_sys_futex: "
1001			    "op LINUX_FUTEX_CMP_REQUEUE_PI not implemented\n");
1002		LIN_SDT_PROBE0(futex, linux_sys_futex,
1003		    unimplemented_cmp_requeue_pi);
1004		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1005		return (ENOSYS);
1006
1007	default:
1008		linux_msg(td,
1009			  "linux_sys_futex: unknown op %d\n", args->op);
1010		LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
1011		    args->op);
1012		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1013		return (ENOSYS);
1014	}
1015
1016	LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
1017	return (error);
1018}
1019
1020int
1021linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
1022{
1023	struct linux_emuldata *em;
1024
1025	LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
1026
1027	if (args->len != sizeof(struct linux_robust_list_head)) {
1028		LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
1029		LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
1030		return (EINVAL);
1031	}
1032
1033	em = em_find(td);
1034	em->robust_futexes = args->head;
1035
1036	LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
1037	return (0);
1038}
1039
1040int
1041linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
1042{
1043	struct linux_emuldata *em;
1044	struct linux_robust_list_head *head;
1045	l_size_t len = sizeof(struct linux_robust_list_head);
1046	struct thread *td2;
1047	int error = 0;
1048
1049	LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
1050
1051	if (!args->pid) {
1052		em = em_find(td);
1053		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1054		head = em->robust_futexes;
1055	} else {
1056		td2 = tdfind(args->pid, -1);
1057		if (td2 == NULL) {
1058			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1059			    ESRCH);
1060			return (ESRCH);
1061		}
1062
1063		em = em_find(td2);
1064		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1065		/* XXX: ptrace? */
1066		if (priv_check(td, PRIV_CRED_SETUID) ||
1067		    priv_check(td, PRIV_CRED_SETEUID) ||
1068		    p_candebug(td, td2->td_proc)) {
1069			PROC_UNLOCK(td2->td_proc);
1070
1071			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1072			    EPERM);
1073			return (EPERM);
1074		}
1075		head = em->robust_futexes;
1076
1077		PROC_UNLOCK(td2->td_proc);
1078	}
1079
1080	error = copyout(&len, args->len, sizeof(l_size_t));
1081	if (error) {
1082		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1083		    error);
1084		LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
1085		return (EFAULT);
1086	}
1087
1088	error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
1089	if (error) {
1090		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1091		    error);
1092	}
1093
1094	LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
1095	return (error);
1096}
1097
1098static int
1099handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
1100    unsigned int pi)
1101{
1102	uint32_t uval, nval, mval;
1103	struct futex *f;
1104	int error;
1105
1106	LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
1107
1108retry:
1109	error = copyin(uaddr, &uval, 4);
1110	if (error) {
1111		LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
1112		LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
1113		return (EFAULT);
1114	}
1115	if ((uval & FUTEX_TID_MASK) == em->em_tid) {
1116		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1117		nval = casuword32(uaddr, uval, mval);
1118
1119		if (nval == -1) {
1120			LIN_SDT_PROBE1(futex, handle_futex_death, return,
1121			    EFAULT);
1122			return (EFAULT);
1123		}
1124
1125		if (nval != uval)
1126			goto retry;
1127
1128		if (!pi && (uval & FUTEX_WAITERS)) {
1129			error = futex_get(uaddr, NULL, &f,
1130			    FUTEX_DONTCREATE | FUTEX_SHARED);
1131			if (error) {
1132				LIN_SDT_PROBE1(futex, handle_futex_death,
1133				    return, error);
1134				return (error);
1135			}
1136			if (f != NULL) {
1137				futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
1138				futex_put(f, NULL);
1139			}
1140		}
1141	}
1142
1143	LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
1144	return (0);
1145}
1146
1147static int
1148fetch_robust_entry(struct linux_robust_list **entry,
1149    struct linux_robust_list **head, unsigned int *pi)
1150{
1151	l_ulong uentry;
1152	int error;
1153
1154	LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
1155
1156	error = copyin((const void *)head, &uentry, sizeof(l_ulong));
1157	if (error) {
1158		LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
1159		LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
1160		return (EFAULT);
1161	}
1162
1163	*entry = (void *)(uentry & ~1UL);
1164	*pi = uentry & 1;
1165
1166	LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
1167	return (0);
1168}
1169
1170/* This walks the list of robust futexes releasing them. */
1171void
1172release_futexes(struct thread *td, struct linux_emuldata *em)
1173{
1174	struct linux_robust_list_head *head = NULL;
1175	struct linux_robust_list *entry, *next_entry, *pending;
1176	unsigned int limit = 2048, pi, next_pi, pip;
1177	l_long futex_offset;
1178	int rc, error;
1179
1180	LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
1181
1182	head = em->robust_futexes;
1183
1184	if (head == NULL) {
1185		LIN_SDT_PROBE0(futex, release_futexes, return);
1186		return;
1187	}
1188
1189	if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
1190		LIN_SDT_PROBE0(futex, release_futexes, return);
1191		return;
1192	}
1193
1194	error = copyin(&head->futex_offset, &futex_offset,
1195	    sizeof(futex_offset));
1196	if (error) {
1197		LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
1198		LIN_SDT_PROBE0(futex, release_futexes, return);
1199		return;
1200	}
1201
1202	if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
1203		LIN_SDT_PROBE0(futex, release_futexes, return);
1204		return;
1205	}
1206
1207	while (entry != &head->list) {
1208		rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
1209
1210		if (entry != pending)
1211			if (handle_futex_death(em,
1212			    (uint32_t *)((caddr_t)entry + futex_offset), pi)) {
1213				LIN_SDT_PROBE0(futex, release_futexes, return);
1214				return;
1215			}
1216		if (rc) {
1217			LIN_SDT_PROBE0(futex, release_futexes, return);
1218			return;
1219		}
1220
1221		entry = next_entry;
1222		pi = next_pi;
1223
1224		if (!--limit)
1225			break;
1226
1227		sched_relinquish(curthread);
1228	}
1229
1230	if (pending)
1231		handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
1232
1233	LIN_SDT_PROBE0(futex, release_futexes, return);
1234}
1235