linux_futex.c revision 293493
1/*	$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
2
3/*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 *    must display the following acknowledgement:
16 *	This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 *    products derived from this software without specific prior written
19 *    permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_futex.c 293493 2016-01-09 15:16:13Z dchagin $");
36#if 0
37__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
38#endif
39
40#include "opt_compat.h"
41#include "opt_kdtrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/imgact.h>
46#include <sys/kernel.h>
47#include <sys/ktr.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/mutex.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/queue.h>
54#include <sys/sched.h>
55#include <sys/sdt.h>
56#include <sys/sx.h>
57#include <sys/umtx.h>
58
59#ifdef COMPAT_LINUX32
60#include <machine/../linux32/linux.h>
61#include <machine/../linux32/linux32_proto.h>
62#else
63#include <machine/../linux/linux.h>
64#include <machine/../linux/linux_proto.h>
65#endif
66#include <compat/linux/linux_dtrace.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_util.h>
70
71/* DTrace init */
72LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
73
74/**
75 * Futex part for the special DTrace module "locks".
76 */
77LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
78LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
79
80/**
81 * Per futex probes.
82 */
83LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
84LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
85
86/**
87 * DTrace probes in this module.
88 */
89LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
90    "struct waiting_proc *");
91LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
92    "int");
93LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
94    "int");
95LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
96LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
97    "uint32_t");
98LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
99LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
100    "int");
101LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
102LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
103LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
104LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
105    "struct waiting_proc **", "struct futex **");
106LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
107LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
108LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
109    "struct waiting_proc **", "int");
110LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
111    "struct waiting_proc *", "uint32_t *", "uint32_t");
112LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
113    "struct waiting_proc *");
114LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
115LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
116    "uint32_t");
117LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
118    "struct waiting_proc *", "uint32_t");
119LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
120LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
121LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
122    "struct futex *", "int");
123LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
124LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
125    "struct waiting_proc *", "uint32_t");
126LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
127LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
128    "struct waiting_proc **", "int", "uint32_t");
129LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
130LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
131LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
132    "int", "uint32_t");
133LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
134    "int");
135LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
136LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
137LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
138LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
139LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
140    "struct linux_sys_futex_args *");
141LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
142LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int");
143LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
144LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
145LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
146    "uint32_t", "uint32_t");
147LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
148    "uint32_t *", "uint32_t", "int", "uint32_t");
149LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
150    "uint32_t", "uint32_t");
151LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
152    "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
153LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
154    "uint32_t", "int");
155LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
156    "int", "uint32_t", "uint32_t *", "uint32_t");
157LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
158LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
159LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
160LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
161LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
162LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
163LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
164LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
165LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
166LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
167    "struct linux_set_robust_list_args *");
168LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
169LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
170LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
171    "struct linux_get_robust_list_args *");
172LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
173LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
174LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
175    "struct linux_emuldata *", "uint32_t *", "unsigned int");
176LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
177LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
178LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
179    "struct linux_robust_list **", "struct linux_robust_list **",
180    "unsigned int *");
181LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
182LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
183LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
184    "struct linux_emuldata *");
185LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
186LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
187
188static MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
189static MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp");
190
191struct futex;
192
193struct waiting_proc {
194	uint32_t	wp_flags;
195	struct futex	*wp_futex;
196	TAILQ_ENTRY(waiting_proc) wp_list;
197};
198
199struct futex {
200	struct sx	f_lck;
201	uint32_t	*f_uaddr;	/* user-supplied value, for debug */
202	struct umtx_key	f_key;
203	uint32_t	f_refcount;
204	uint32_t	f_bitset;
205	LIST_ENTRY(futex) f_list;
206	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
207};
208
209struct futex_list futex_list;
210
211#define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
212#define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
213#define FUTEX_INIT(f)		do { \
214				    sx_init_flags(&(f)->f_lck, "ftlk", \
215					SX_DUPOK); \
216				    LIN_SDT_PROBE1(futex, futex, create, \
217					&(f)->f_lck); \
218				} while (0)
219#define FUTEX_DESTROY(f)	do { \
220				    LIN_SDT_PROBE1(futex, futex, destroy, \
221					&(f)->f_lck); \
222				    sx_destroy(&(f)->f_lck); \
223				} while (0)
224#define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
225
226struct mtx futex_mtx;			/* protects the futex list */
227#define FUTEXES_LOCK		do { \
228				    mtx_lock(&futex_mtx); \
229				    LIN_SDT_PROBE1(locks, futex_mtx, \
230					locked, &futex_mtx); \
231				} while (0)
232#define FUTEXES_UNLOCK		do { \
233				    LIN_SDT_PROBE1(locks, futex_mtx, \
234					unlock, &futex_mtx); \
235				    mtx_unlock(&futex_mtx); \
236				} while (0)
237
238/* flags for futex_get() */
239#define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
240#define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
241#define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
242#define	FUTEX_SHARED		0x8	/* shared futex */
243
244/* wp_flags */
245#define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
246					 * of futex where thread sleep to wp_list
247					 * of another futex.
248					 */
249#define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
250					 * wp_list to prevent double wakeup.
251					 */
252
253/* support.s */
254int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
255int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
256int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
257int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
258int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
259
260static void
261futex_put(struct futex *f, struct waiting_proc *wp)
262{
263	LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
264
265	FUTEX_ASSERT_LOCKED(f);
266	if (wp != NULL) {
267		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
268			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
269		free(wp, M_FUTEX_WP);
270	}
271
272	FUTEXES_LOCK;
273	if (--f->f_refcount == 0) {
274		LIST_REMOVE(f, f_list);
275		FUTEXES_UNLOCK;
276		FUTEX_UNLOCK(f);
277
278		LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
279		    f->f_refcount, f->f_key.shared);
280		LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
281		    "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
282		umtx_key_release(&f->f_key);
283		FUTEX_DESTROY(f);
284		free(f, M_FUTEX);
285
286		LIN_SDT_PROBE0(futex, futex_put, return);
287		return;
288	}
289
290	LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
291	    f->f_key.shared);
292	LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
293	    f->f_uaddr, f->f_refcount, f->f_key.shared);
294	FUTEXES_UNLOCK;
295	FUTEX_UNLOCK(f);
296
297	LIN_SDT_PROBE0(futex, futex_put, return);
298}
299
300static int
301futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
302{
303	struct futex *f, *tmpf;
304	struct umtx_key key;
305	int error;
306
307	LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
308
309	*newf = tmpf = NULL;
310
311	error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
312	    AUTO_SHARE : THREAD_SHARE, &key);
313	if (error) {
314		LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
315		LIN_SDT_PROBE1(futex, futex_get0, return, error);
316		return (error);
317	}
318retry:
319	FUTEXES_LOCK;
320	LIST_FOREACH(f, &futex_list, f_list) {
321		if (umtx_key_match(&f->f_key, &key)) {
322			if (tmpf != NULL) {
323				FUTEX_UNLOCK(tmpf);
324				FUTEX_DESTROY(tmpf);
325				free(tmpf, M_FUTEX);
326			}
327			if (flags & FUTEX_DONTEXISTS) {
328				FUTEXES_UNLOCK;
329				umtx_key_release(&key);
330
331				LIN_SDT_PROBE1(futex, futex_get0, return,
332				    EINVAL);
333				return (EINVAL);
334			}
335
336			/*
337			 * Increment refcount of the found futex to
338			 * prevent it from deallocation before FUTEX_LOCK()
339			 */
340			++f->f_refcount;
341			FUTEXES_UNLOCK;
342			umtx_key_release(&key);
343
344			FUTEX_LOCK(f);
345			*newf = f;
346			LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
347			    f->f_refcount, f->f_key.shared);
348			LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
349			    uaddr, f->f_refcount, f->f_key.shared);
350
351			LIN_SDT_PROBE1(futex, futex_get0, return, 0);
352			return (0);
353		}
354	}
355
356	if (flags & FUTEX_DONTCREATE) {
357		FUTEXES_UNLOCK;
358		umtx_key_release(&key);
359		LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
360		LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
361
362		LIN_SDT_PROBE1(futex, futex_get0, return, 0);
363		return (0);
364	}
365
366	if (tmpf == NULL) {
367		FUTEXES_UNLOCK;
368		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
369		tmpf->f_uaddr = uaddr;
370		tmpf->f_key = key;
371		tmpf->f_refcount = 1;
372		tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
373		FUTEX_INIT(tmpf);
374		TAILQ_INIT(&tmpf->f_waiting_proc);
375
376		/*
377		 * Lock the new futex before an insert into the futex_list
378		 * to prevent futex usage by other.
379		 */
380		FUTEX_LOCK(tmpf);
381		goto retry;
382	}
383
384	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
385	FUTEXES_UNLOCK;
386
387	LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
388	    tmpf->f_key.shared);
389	LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
390	    uaddr, tmpf->f_refcount, tmpf->f_key.shared);
391	*newf = tmpf;
392
393	LIN_SDT_PROBE1(futex, futex_get0, return, 0);
394	return (0);
395}
396
397static int
398futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
399    uint32_t flags)
400{
401	int error;
402
403	LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
404
405	if (flags & FUTEX_CREATE_WP) {
406		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
407		(*wp)->wp_flags = 0;
408	}
409	error = futex_get0(uaddr, f, flags);
410	if (error) {
411		LIN_SDT_PROBE0(futex, futex_get, error);
412
413		if (flags & FUTEX_CREATE_WP)
414			free(*wp, M_FUTEX_WP);
415
416		LIN_SDT_PROBE1(futex, futex_get, return, error);
417		return (error);
418	}
419	if (flags & FUTEX_CREATE_WP) {
420		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
421		(*wp)->wp_futex = *f;
422	}
423
424	LIN_SDT_PROBE1(futex, futex_get, return, error);
425	return (error);
426}
427
428static int
429futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
430{
431	int error;
432
433	FUTEX_ASSERT_LOCKED(f);
434	LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
435	LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
436	    f->f_uaddr, wp, timeout, f->f_refcount);
437	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
438	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
439		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
440
441		if (error) {
442			LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
443			    f->f_uaddr, wp, wp->wp_futex->f_uaddr,
444			    wp->wp_futex->f_refcount);
445		}
446
447		LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
448		    " %p requeued uaddr %p ref %d",
449		    error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
450		    wp->wp_futex->f_refcount);
451		futex_put(f, NULL);
452		f = wp->wp_futex;
453		FUTEX_LOCK(f);
454	} else {
455		if (error) {
456			LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
457			    f->f_uaddr, wp);
458		}
459		LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
460		    error, f->f_uaddr, wp);
461	}
462
463	futex_put(f, wp);
464
465	LIN_SDT_PROBE1(futex, futex_sleep, return, error);
466	return (error);
467}
468
469static int
470futex_wake(struct futex *f, int n, uint32_t bitset)
471{
472	struct waiting_proc *wp, *wpt;
473	int count = 0;
474
475	LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
476
477	if (bitset == 0) {
478		LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
479		return (EINVAL);
480	}
481
482	FUTEX_ASSERT_LOCKED(f);
483	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
484		LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
485		    f->f_refcount);
486		LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
487		    f->f_uaddr, wp, f->f_refcount);
488		/*
489		 * Unless we find a matching bit in
490		 * the bitset, continue searching.
491		 */
492		if (!(wp->wp_futex->f_bitset & bitset))
493			continue;
494
495		wp->wp_flags |= FUTEX_WP_REMOVED;
496		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
497		LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
498		wakeup_one(wp);
499		if (++count == n)
500			break;
501	}
502
503	LIN_SDT_PROBE1(futex, futex_wake, return, count);
504	return (count);
505}
506
507static int
508futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
509{
510	struct waiting_proc *wp, *wpt;
511	int count = 0;
512
513	LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
514
515	FUTEX_ASSERT_LOCKED(f);
516	FUTEX_ASSERT_LOCKED(f2);
517
518	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
519		if (++count <= n) {
520			LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
521			    f->f_uaddr, wp);
522			wp->wp_flags |= FUTEX_WP_REMOVED;
523			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
524			LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
525			wakeup_one(wp);
526		} else {
527			LIN_SDT_PROBE3(futex, futex_requeue, requeue,
528			    f->f_uaddr, wp, f2->f_uaddr);
529			LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
530			    f->f_uaddr, wp, f2->f_uaddr);
531			wp->wp_flags |= FUTEX_WP_REQUEUED;
532			/* Move wp to wp_list of f2 futex */
533			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
534			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
535
536			/*
537			 * Thread which sleeps on wp after waking should
538			 * acquire f2 lock, so increment refcount of f2 to
539			 * prevent it from premature deallocation.
540			 */
541			wp->wp_futex = f2;
542			FUTEXES_LOCK;
543			++f2->f_refcount;
544			FUTEXES_UNLOCK;
545			if (count - n >= n2)
546				break;
547		}
548	}
549
550	LIN_SDT_PROBE1(futex, futex_requeue, return, count);
551	return (count);
552}
553
554static int
555futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz,
556    uint32_t bitset)
557{
558	int error;
559
560	LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset);
561
562	if (bitset == 0) {
563		LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
564		return (EINVAL);
565	}
566
567	f->f_bitset = bitset;
568	error = futex_sleep(f, wp, timeout_hz);
569	if (error)
570		LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
571	if (error == EWOULDBLOCK)
572		error = ETIMEDOUT;
573
574	LIN_SDT_PROBE1(futex, futex_wait, return, error);
575	return (error);
576}
577
578static int
579futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
580{
581	int op = (encoded_op >> 28) & 7;
582	int cmp = (encoded_op >> 24) & 15;
583	int oparg = (encoded_op << 8) >> 20;
584	int cmparg = (encoded_op << 20) >> 20;
585	int oldval = 0, ret;
586
587	LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
588
589	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
590		oparg = 1 << oparg;
591
592	LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
593	    cmparg);
594
595	/* XXX: Linux verifies access here and returns EFAULT */
596	LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
597
598	switch (op) {
599	case FUTEX_OP_SET:
600		ret = futex_xchgl(oparg, uaddr, &oldval);
601		break;
602	case FUTEX_OP_ADD:
603		ret = futex_addl(oparg, uaddr, &oldval);
604		break;
605	case FUTEX_OP_OR:
606		ret = futex_orl(oparg, uaddr, &oldval);
607		break;
608	case FUTEX_OP_ANDN:
609		ret = futex_andl(~oparg, uaddr, &oldval);
610		break;
611	case FUTEX_OP_XOR:
612		ret = futex_xorl(oparg, uaddr, &oldval);
613		break;
614	default:
615		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
616		ret = -ENOSYS;
617		break;
618	}
619
620	if (ret) {
621		LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
622		return (ret);
623	}
624
625	switch (cmp) {
626	case FUTEX_OP_CMP_EQ:
627		ret = (oldval == cmparg);
628		break;
629	case FUTEX_OP_CMP_NE:
630		ret = (oldval != cmparg);
631		break;
632	case FUTEX_OP_CMP_LT:
633		ret = (oldval < cmparg);
634		break;
635	case FUTEX_OP_CMP_GE:
636		ret = (oldval >= cmparg);
637		break;
638	case FUTEX_OP_CMP_LE:
639		ret = (oldval <= cmparg);
640		break;
641	case FUTEX_OP_CMP_GT:
642		ret = (oldval > cmparg);
643		break;
644	default:
645		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
646		ret = -ENOSYS;
647	}
648
649	LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
650	return (ret);
651}
652
653int
654linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
655{
656	int clockrt, nrwake, op_ret, ret;
657	struct linux_emuldata *em;
658	struct waiting_proc *wp;
659	struct futex *f, *f2;
660	struct l_timespec timeout;
661	struct timeval utv, ctv;
662	int timeout_hz;
663	int error;
664	uint32_t flags, val;
665
666	LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
667
668	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
669		flags = 0;
670		args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
671	} else
672		flags = FUTEX_SHARED;
673
674	/*
675	 * Currently support for switching between CLOCK_MONOTONIC and
676	 * CLOCK_REALTIME is not present. However Linux forbids the use of
677	 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
678	 * FUTEX_WAIT_REQUEUE_PI.
679	 */
680	clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
681	args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
682	if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
683		args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
684		LIN_SDT_PROBE0(futex, linux_sys_futex,
685		    unimplemented_clockswitch);
686		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
687		return (ENOSYS);
688	}
689
690	error = 0;
691	f = f2 = NULL;
692
693	switch (args->op) {
694	case LINUX_FUTEX_WAIT:
695		args->val3 = FUTEX_BITSET_MATCH_ANY;
696		/* FALLTHROUGH */
697
698	case LINUX_FUTEX_WAIT_BITSET:
699		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
700		    args->val, args->val3);
701		LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
702		    args->uaddr, args->val, args->val3);
703
704		error = futex_get(args->uaddr, &wp, &f,
705		    flags | FUTEX_CREATE_WP);
706		if (error) {
707			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
708			return (error);
709		}
710
711		error = copyin(args->uaddr, &val, sizeof(val));
712		if (error) {
713			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
714			    error);
715			LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
716			    error);
717			futex_put(f, wp);
718
719			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
720			return (error);
721		}
722		if (val != args->val) {
723			LIN_SDT_PROBE4(futex, linux_sys_futex,
724			    debug_wait_value_neq, args->uaddr, args->val, val,
725			    args->val3);
726			LINUX_CTR3(sys_futex,
727			    "WAIT uaddr %p val 0x%x != uval 0x%x",
728			    args->uaddr, args->val, val);
729			futex_put(f, wp);
730
731			LIN_SDT_PROBE1(futex, linux_sys_futex, return,
732			    EWOULDBLOCK);
733			return (EWOULDBLOCK);
734		}
735
736		if (args->timeout != NULL) {
737			error = copyin(args->timeout, &timeout, sizeof(timeout));
738			if (error) {
739				LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
740				    error);
741				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
742				futex_put(f, wp);
743				return (error);
744			}
745			TIMESPEC_TO_TIMEVAL(&utv, &timeout);
746			error = itimerfix(&utv);
747			if (error) {
748				LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
749				    error);
750				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
751				futex_put(f, wp);
752				return (error);
753			}
754			if (clockrt) {
755				microtime(&ctv);
756				timevalsub(&utv, &ctv);
757			} else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
758				microuptime(&ctv);
759				timevalsub(&utv, &ctv);
760			}
761			if (utv.tv_sec < 0)
762				timevalclear(&utv);
763			timeout_hz = tvtohz(&utv);
764		} else
765			timeout_hz = 0;
766
767		error = futex_wait(f, wp, timeout_hz, args->val3);
768		break;
769
770	case LINUX_FUTEX_WAKE:
771		args->val3 = FUTEX_BITSET_MATCH_ANY;
772		/* FALLTHROUGH */
773
774	case LINUX_FUTEX_WAKE_BITSET:
775		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
776		    args->val, args->val3);
777		LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
778		    args->uaddr, args->val, args->val3);
779
780		error = futex_get(args->uaddr, NULL, &f,
781		    flags | FUTEX_DONTCREATE);
782		if (error) {
783			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
784			return (error);
785		}
786
787		if (f == NULL) {
788			td->td_retval[0] = 0;
789
790			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
791			return (error);
792		}
793		td->td_retval[0] = futex_wake(f, args->val, args->val3);
794		futex_put(f, NULL);
795		break;
796
797	case LINUX_FUTEX_CMP_REQUEUE:
798		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
799		    args->uaddr, args->val, args->val3, args->uaddr2,
800		    args->timeout);
801		LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
802		    "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
803		    args->uaddr, args->val, args->val3, args->uaddr2,
804		    args->timeout);
805
806		/*
807		 * Linux allows this, we would not, it is an incorrect
808		 * usage of declared ABI, so return EINVAL.
809		 */
810		if (args->uaddr == args->uaddr2) {
811			LIN_SDT_PROBE0(futex, linux_sys_futex,
812			    invalid_cmp_requeue_use);
813			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
814			return (EINVAL);
815		}
816
817		error = futex_get(args->uaddr, NULL, &f, flags);
818		if (error) {
819			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
820			return (error);
821		}
822
823		/*
824		 * To avoid deadlocks return EINVAL if second futex
825		 * exists at this time.
826		 *
827		 * Glibc fall back to FUTEX_WAKE in case of any error
828		 * returned by FUTEX_CMP_REQUEUE.
829		 */
830		error = futex_get(args->uaddr2, NULL, &f2,
831		    flags | FUTEX_DONTEXISTS);
832		if (error) {
833			futex_put(f, NULL);
834
835			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
836			return (error);
837		}
838		error = copyin(args->uaddr, &val, sizeof(val));
839		if (error) {
840			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
841			    error);
842			LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
843			    error);
844			futex_put(f2, NULL);
845			futex_put(f, NULL);
846
847			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
848			return (error);
849		}
850		if (val != args->val3) {
851			LIN_SDT_PROBE2(futex, linux_sys_futex,
852			    debug_cmp_requeue_value_neq, args->val, val);
853			LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
854			    args->val, val);
855			futex_put(f2, NULL);
856			futex_put(f, NULL);
857
858			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
859			return (EAGAIN);
860		}
861
862		nrwake = (int)(unsigned long)args->timeout;
863		td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
864		futex_put(f2, NULL);
865		futex_put(f, NULL);
866		break;
867
868	case LINUX_FUTEX_WAKE_OP:
869		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
870		    args->uaddr, args->op, args->val, args->uaddr2, args->val3);
871		LINUX_CTR5(sys_futex, "WAKE_OP "
872		    "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
873		    args->uaddr, args->val, args->uaddr2, args->val3,
874		    args->timeout);
875
876		error = futex_get(args->uaddr, NULL, &f, flags);
877		if (error) {
878			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
879			return (error);
880		}
881
882		if (args->uaddr != args->uaddr2)
883			error = futex_get(args->uaddr2, NULL, &f2, flags);
884		if (error) {
885			futex_put(f, NULL);
886
887			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
888			return (error);
889		}
890
891		/*
892		 * This function returns positive number as results and
893		 * negative as errors
894		 */
895		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
896
897		LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x",
898		    args->uaddr, op_ret);
899
900		if (op_ret < 0) {
901			/* XXX: We don't handle the EFAULT yet. */
902			if (op_ret != -EFAULT) {
903				if (f2 != NULL)
904					futex_put(f2, NULL);
905				futex_put(f, NULL);
906
907				LIN_SDT_PROBE1(futex, linux_sys_futex, return,
908				    -op_ret);
909				return (-op_ret);
910			} else {
911				LIN_SDT_PROBE0(futex, linux_sys_futex,
912				    unhandled_efault);
913			}
914			if (f2 != NULL)
915				futex_put(f2, NULL);
916			futex_put(f, NULL);
917
918			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
919			return (EFAULT);
920		}
921
922		ret = futex_wake(f, args->val, args->val3);
923
924		if (op_ret > 0) {
925			op_ret = 0;
926			nrwake = (int)(unsigned long)args->timeout;
927
928			if (f2 != NULL)
929				op_ret += futex_wake(f2, nrwake, args->val3);
930			else
931				op_ret += futex_wake(f, nrwake, args->val3);
932			ret += op_ret;
933
934		}
935		if (f2 != NULL)
936			futex_put(f2, NULL);
937		futex_put(f, NULL);
938		td->td_retval[0] = ret;
939		break;
940
941	case LINUX_FUTEX_LOCK_PI:
942		/* not yet implemented */
943		linux_msg(td,
944			  "linux_sys_futex: "
945			  "op LINUX_FUTEX_LOCK_PI not implemented\n");
946		LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi);
947		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
948		return (ENOSYS);
949
950	case LINUX_FUTEX_UNLOCK_PI:
951		/* not yet implemented */
952		linux_msg(td,
953			  "linux_sys_futex: "
954			  "op LINUX_FUTEX_UNLOCK_PI not implemented\n");
955		LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi);
956		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
957		return (ENOSYS);
958
959	case LINUX_FUTEX_TRYLOCK_PI:
960		/* not yet implemented */
961		linux_msg(td,
962			  "linux_sys_futex: "
963			  "op LINUX_FUTEX_TRYLOCK_PI not implemented\n");
964		LIN_SDT_PROBE0(futex, linux_sys_futex,
965		    unimplemented_trylock_pi);
966		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
967		return (ENOSYS);
968
969	case LINUX_FUTEX_REQUEUE:
970
971		/*
972		 * Glibc does not use this operation since version 2.3.3,
973		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
974		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
975		 * FUTEX_REQUEUE returned EINVAL.
976		 */
977		em = em_find(td);
978		if ((em->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
979			linux_msg(td,
980				  "linux_sys_futex: "
981				  "unsupported futex_requeue op\n");
982			em->flags |= LINUX_XDEPR_REQUEUEOP;
983			LIN_SDT_PROBE0(futex, linux_sys_futex,
984			    deprecated_requeue);
985		}
986
987		LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
988		return (EINVAL);
989
990	case LINUX_FUTEX_WAIT_REQUEUE_PI:
991		/* not yet implemented */
992		linux_msg(td,
993			  "linux_sys_futex: "
994			  "op FUTEX_WAIT_REQUEUE_PI not implemented\n");
995		LIN_SDT_PROBE0(futex, linux_sys_futex,
996		    unimplemented_wait_requeue_pi);
997		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
998		return (ENOSYS);
999
1000	case LINUX_FUTEX_CMP_REQUEUE_PI:
1001		/* not yet implemented */
1002		linux_msg(td,
1003			    "linux_sys_futex: "
1004			    "op LINUX_FUTEX_CMP_REQUEUE_PI not implemented\n");
1005		LIN_SDT_PROBE0(futex, linux_sys_futex,
1006		    unimplemented_cmp_requeue_pi);
1007		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1008		return (ENOSYS);
1009
1010	default:
1011		linux_msg(td,
1012			  "linux_sys_futex: unknown op %d\n", args->op);
1013		LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
1014		    args->op);
1015		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1016		return (ENOSYS);
1017	}
1018
1019	LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
1020	return (error);
1021}
1022
1023int
1024linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
1025{
1026	struct linux_emuldata *em;
1027
1028	LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
1029
1030	if (args->len != sizeof(struct linux_robust_list_head)) {
1031		LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
1032		LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
1033		return (EINVAL);
1034	}
1035
1036	em = em_find(td);
1037	em->robust_futexes = args->head;
1038
1039	LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
1040	return (0);
1041}
1042
1043int
1044linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
1045{
1046	struct linux_emuldata *em;
1047	struct linux_robust_list_head *head;
1048	l_size_t len = sizeof(struct linux_robust_list_head);
1049	struct thread *td2;
1050	int error = 0;
1051
1052	LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
1053
1054	if (!args->pid) {
1055		em = em_find(td);
1056		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1057		head = em->robust_futexes;
1058	} else {
1059		td2 = tdfind(args->pid, -1);
1060		if (td2 == NULL) {
1061			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1062			    ESRCH);
1063			return (ESRCH);
1064		}
1065
1066		em = em_find(td2);
1067		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1068		/* XXX: ptrace? */
1069		if (priv_check(td, PRIV_CRED_SETUID) ||
1070		    priv_check(td, PRIV_CRED_SETEUID) ||
1071		    p_candebug(td, td2->td_proc)) {
1072			PROC_UNLOCK(td2->td_proc);
1073
1074			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1075			    EPERM);
1076			return (EPERM);
1077		}
1078		head = em->robust_futexes;
1079
1080		PROC_UNLOCK(td2->td_proc);
1081	}
1082
1083	error = copyout(&len, args->len, sizeof(l_size_t));
1084	if (error) {
1085		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1086		    error);
1087		LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
1088		return (EFAULT);
1089	}
1090
1091	error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
1092	if (error) {
1093		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1094		    error);
1095	}
1096
1097	LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
1098	return (error);
1099}
1100
1101static int
1102handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
1103    unsigned int pi)
1104{
1105	uint32_t uval, nval, mval;
1106	struct futex *f;
1107	int error;
1108
1109	LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
1110
1111retry:
1112	error = copyin(uaddr, &uval, 4);
1113	if (error) {
1114		LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
1115		LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
1116		return (EFAULT);
1117	}
1118	if ((uval & FUTEX_TID_MASK) == em->em_tid) {
1119		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1120		nval = casuword32(uaddr, uval, mval);
1121
1122		if (nval == -1) {
1123			LIN_SDT_PROBE1(futex, handle_futex_death, return,
1124			    EFAULT);
1125			return (EFAULT);
1126		}
1127
1128		if (nval != uval)
1129			goto retry;
1130
1131		if (!pi && (uval & FUTEX_WAITERS)) {
1132			error = futex_get(uaddr, NULL, &f,
1133			    FUTEX_DONTCREATE | FUTEX_SHARED);
1134			if (error) {
1135				LIN_SDT_PROBE1(futex, handle_futex_death,
1136				    return, error);
1137				return (error);
1138			}
1139			if (f != NULL) {
1140				futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
1141				futex_put(f, NULL);
1142			}
1143		}
1144	}
1145
1146	LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
1147	return (0);
1148}
1149
1150static int
1151fetch_robust_entry(struct linux_robust_list **entry,
1152    struct linux_robust_list **head, unsigned int *pi)
1153{
1154	l_ulong uentry;
1155	int error;
1156
1157	LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
1158
1159	error = copyin((const void *)head, &uentry, sizeof(l_ulong));
1160	if (error) {
1161		LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
1162		LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
1163		return (EFAULT);
1164	}
1165
1166	*entry = (void *)(uentry & ~1UL);
1167	*pi = uentry & 1;
1168
1169	LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
1170	return (0);
1171}
1172
1173/* This walks the list of robust futexes releasing them. */
1174void
1175release_futexes(struct thread *td, struct linux_emuldata *em)
1176{
1177	struct linux_robust_list_head *head = NULL;
1178	struct linux_robust_list *entry, *next_entry, *pending;
1179	unsigned int limit = 2048, pi, next_pi, pip;
1180	l_long futex_offset;
1181	int rc, error;
1182
1183	LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
1184
1185	head = em->robust_futexes;
1186
1187	if (head == NULL) {
1188		LIN_SDT_PROBE0(futex, release_futexes, return);
1189		return;
1190	}
1191
1192	if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
1193		LIN_SDT_PROBE0(futex, release_futexes, return);
1194		return;
1195	}
1196
1197	error = copyin(&head->futex_offset, &futex_offset,
1198	    sizeof(futex_offset));
1199	if (error) {
1200		LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
1201		LIN_SDT_PROBE0(futex, release_futexes, return);
1202		return;
1203	}
1204
1205	if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
1206		LIN_SDT_PROBE0(futex, release_futexes, return);
1207		return;
1208	}
1209
1210	while (entry != &head->list) {
1211		rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
1212
1213		if (entry != pending)
1214			if (handle_futex_death(em,
1215			    (uint32_t *)((caddr_t)entry + futex_offset), pi)) {
1216				LIN_SDT_PROBE0(futex, release_futexes, return);
1217				return;
1218			}
1219		if (rc) {
1220			LIN_SDT_PROBE0(futex, release_futexes, return);
1221			return;
1222		}
1223
1224		entry = next_entry;
1225		pi = next_pi;
1226
1227		if (!--limit)
1228			break;
1229
1230		sched_relinquish(curthread);
1231	}
1232
1233	if (pending)
1234		handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
1235
1236	LIN_SDT_PROBE0(futex, release_futexes, return);
1237}
1238