linux_futex.c revision 293562
1/*	$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
2
3/*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 *    must display the following acknowledgement:
16 *	This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 *    products derived from this software without specific prior written
19 *    permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_futex.c 293562 2016-01-09 17:07:17Z dchagin $");
36#if 0
37__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
38#endif
39
40#include "opt_compat.h"
41#include "opt_kdtrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/imgact.h>
46#include <sys/kernel.h>
47#include <sys/ktr.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/mutex.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/queue.h>
54#include <sys/sched.h>
55#include <sys/sdt.h>
56#include <sys/sx.h>
57#include <sys/umtx.h>
58
59#ifdef COMPAT_LINUX32
60#include <machine/../linux32/linux.h>
61#include <machine/../linux32/linux32_proto.h>
62#else
63#include <machine/../linux/linux.h>
64#include <machine/../linux/linux_proto.h>
65#endif
66#include <compat/linux/linux_dtrace.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_util.h>
70
71/* DTrace init */
72LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
73
74/**
75 * Futex part for the special DTrace module "locks".
76 */
77LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
78LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
79
80/**
81 * Per futex probes.
82 */
83LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
84LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
85
86/**
87 * DTrace probes in this module.
88 */
89LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
90    "struct waiting_proc *");
91LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
92    "int");
93LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
94    "int");
95LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
96LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
97    "uint32_t");
98LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
99LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
100    "int");
101LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
102LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
103LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
104LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
105    "struct waiting_proc **", "struct futex **");
106LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
107LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
108LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
109    "struct waiting_proc **", "int");
110LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
111    "struct waiting_proc *", "uint32_t *", "uint32_t");
112LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
113    "struct waiting_proc *");
114LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
115LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
116    "uint32_t");
117LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
118    "struct waiting_proc *", "uint32_t");
119LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
120LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
121LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
122    "struct futex *", "int");
123LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
124LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
125    "struct waiting_proc *", "uint32_t");
126LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
127LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
128    "struct waiting_proc **", "int", "uint32_t");
129LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
130LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
131LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
132    "int", "uint32_t");
133LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
134    "int");
135LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
136LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
137LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
138LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
139LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
140    "struct linux_sys_futex_args *");
141LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
142LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int");
143LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
144LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
145LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
146    "uint32_t", "uint32_t");
147LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
148    "uint32_t *", "uint32_t", "int", "uint32_t");
149LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
150    "uint32_t", "uint32_t");
151LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
152    "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
153LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
154    "uint32_t", "int");
155LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
156    "int", "uint32_t", "uint32_t *", "uint32_t");
157LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
158LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
159LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
160LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
161LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
162LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
163LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
164LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
165LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
166LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
167    "struct linux_set_robust_list_args *");
168LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
169LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
170LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
171    "struct linux_get_robust_list_args *");
172LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
173LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
174LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
175    "struct linux_emuldata *", "uint32_t *", "unsigned int");
176LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
177LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
178LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
179    "struct linux_robust_list **", "struct linux_robust_list **",
180    "unsigned int *");
181LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
182LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
183LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
184    "struct linux_emuldata *");
185LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
186LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
187
188struct futex;
189
190struct waiting_proc {
191	uint32_t	wp_flags;
192	struct futex	*wp_futex;
193	TAILQ_ENTRY(waiting_proc) wp_list;
194};
195
196struct futex {
197	struct sx	f_lck;
198	uint32_t	*f_uaddr;	/* user-supplied value, for debug */
199	struct umtx_key	f_key;
200	uint32_t	f_refcount;
201	uint32_t	f_bitset;
202	LIST_ENTRY(futex) f_list;
203	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
204};
205
206struct futex_list futex_list;
207
208#define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
209#define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
210#define FUTEX_INIT(f)		do { \
211				    sx_init_flags(&(f)->f_lck, "ftlk", \
212					SX_DUPOK); \
213				    LIN_SDT_PROBE1(futex, futex, create, \
214					&(f)->f_lck); \
215				} while (0)
216#define FUTEX_DESTROY(f)	do { \
217				    LIN_SDT_PROBE1(futex, futex, destroy, \
218					&(f)->f_lck); \
219				    sx_destroy(&(f)->f_lck); \
220				} while (0)
221#define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
222
223struct mtx futex_mtx;			/* protects the futex list */
224#define FUTEXES_LOCK		do { \
225				    mtx_lock(&futex_mtx); \
226				    LIN_SDT_PROBE1(locks, futex_mtx, \
227					locked, &futex_mtx); \
228				} while (0)
229#define FUTEXES_UNLOCK		do { \
230				    LIN_SDT_PROBE1(locks, futex_mtx, \
231					unlock, &futex_mtx); \
232				    mtx_unlock(&futex_mtx); \
233				} while (0)
234
235/* flags for futex_get() */
236#define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
237#define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
238#define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
239#define	FUTEX_SHARED		0x8	/* shared futex */
240
241/* wp_flags */
242#define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
243					 * of futex where thread sleep to wp_list
244					 * of another futex.
245					 */
246#define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
247					 * wp_list to prevent double wakeup.
248					 */
249
250/* support.s */
251int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
252int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
253int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
254int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
255int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
256
257static void
258futex_put(struct futex *f, struct waiting_proc *wp)
259{
260	LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
261
262	FUTEX_ASSERT_LOCKED(f);
263	if (wp != NULL) {
264		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
265			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
266		free(wp, M_FUTEX_WP);
267	}
268
269	FUTEXES_LOCK;
270	if (--f->f_refcount == 0) {
271		LIST_REMOVE(f, f_list);
272		FUTEXES_UNLOCK;
273		FUTEX_UNLOCK(f);
274
275		LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
276		    f->f_refcount, f->f_key.shared);
277		LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
278		    "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
279		umtx_key_release(&f->f_key);
280		FUTEX_DESTROY(f);
281		free(f, M_FUTEX);
282
283		LIN_SDT_PROBE0(futex, futex_put, return);
284		return;
285	}
286
287	LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
288	    f->f_key.shared);
289	LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
290	    f->f_uaddr, f->f_refcount, f->f_key.shared);
291	FUTEXES_UNLOCK;
292	FUTEX_UNLOCK(f);
293
294	LIN_SDT_PROBE0(futex, futex_put, return);
295}
296
297static int
298futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
299{
300	struct futex *f, *tmpf;
301	struct umtx_key key;
302	int error;
303
304	LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
305
306	*newf = tmpf = NULL;
307
308	error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
309	    AUTO_SHARE : THREAD_SHARE, &key);
310	if (error) {
311		LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
312		LIN_SDT_PROBE1(futex, futex_get0, return, error);
313		return (error);
314	}
315retry:
316	FUTEXES_LOCK;
317	LIST_FOREACH(f, &futex_list, f_list) {
318		if (umtx_key_match(&f->f_key, &key)) {
319			if (tmpf != NULL) {
320				FUTEX_UNLOCK(tmpf);
321				FUTEX_DESTROY(tmpf);
322				free(tmpf, M_FUTEX);
323			}
324			if (flags & FUTEX_DONTEXISTS) {
325				FUTEXES_UNLOCK;
326				umtx_key_release(&key);
327
328				LIN_SDT_PROBE1(futex, futex_get0, return,
329				    EINVAL);
330				return (EINVAL);
331			}
332
333			/*
334			 * Increment refcount of the found futex to
335			 * prevent it from deallocation before FUTEX_LOCK()
336			 */
337			++f->f_refcount;
338			FUTEXES_UNLOCK;
339			umtx_key_release(&key);
340
341			FUTEX_LOCK(f);
342			*newf = f;
343			LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
344			    f->f_refcount, f->f_key.shared);
345			LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
346			    uaddr, f->f_refcount, f->f_key.shared);
347
348			LIN_SDT_PROBE1(futex, futex_get0, return, 0);
349			return (0);
350		}
351	}
352
353	if (flags & FUTEX_DONTCREATE) {
354		FUTEXES_UNLOCK;
355		umtx_key_release(&key);
356		LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
357		LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
358
359		LIN_SDT_PROBE1(futex, futex_get0, return, 0);
360		return (0);
361	}
362
363	if (tmpf == NULL) {
364		FUTEXES_UNLOCK;
365		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
366		tmpf->f_uaddr = uaddr;
367		tmpf->f_key = key;
368		tmpf->f_refcount = 1;
369		tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
370		FUTEX_INIT(tmpf);
371		TAILQ_INIT(&tmpf->f_waiting_proc);
372
373		/*
374		 * Lock the new futex before an insert into the futex_list
375		 * to prevent futex usage by other.
376		 */
377		FUTEX_LOCK(tmpf);
378		goto retry;
379	}
380
381	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
382	FUTEXES_UNLOCK;
383
384	LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
385	    tmpf->f_key.shared);
386	LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
387	    uaddr, tmpf->f_refcount, tmpf->f_key.shared);
388	*newf = tmpf;
389
390	LIN_SDT_PROBE1(futex, futex_get0, return, 0);
391	return (0);
392}
393
394static int
395futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
396    uint32_t flags)
397{
398	int error;
399
400	LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
401
402	if (flags & FUTEX_CREATE_WP) {
403		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
404		(*wp)->wp_flags = 0;
405	}
406	error = futex_get0(uaddr, f, flags);
407	if (error) {
408		LIN_SDT_PROBE0(futex, futex_get, error);
409
410		if (flags & FUTEX_CREATE_WP)
411			free(*wp, M_FUTEX_WP);
412
413		LIN_SDT_PROBE1(futex, futex_get, return, error);
414		return (error);
415	}
416	if (flags & FUTEX_CREATE_WP) {
417		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
418		(*wp)->wp_futex = *f;
419	}
420
421	LIN_SDT_PROBE1(futex, futex_get, return, error);
422	return (error);
423}
424
425static int
426futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
427{
428	int error;
429
430	FUTEX_ASSERT_LOCKED(f);
431	LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
432	LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
433	    f->f_uaddr, wp, timeout, f->f_refcount);
434	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
435	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
436		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
437
438		if (error) {
439			LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
440			    f->f_uaddr, wp, wp->wp_futex->f_uaddr,
441			    wp->wp_futex->f_refcount);
442		}
443
444		LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
445		    " %p requeued uaddr %p ref %d",
446		    error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
447		    wp->wp_futex->f_refcount);
448		futex_put(f, NULL);
449		f = wp->wp_futex;
450		FUTEX_LOCK(f);
451	} else {
452		if (error) {
453			LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
454			    f->f_uaddr, wp);
455		}
456		LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
457		    error, f->f_uaddr, wp);
458	}
459
460	futex_put(f, wp);
461
462	LIN_SDT_PROBE1(futex, futex_sleep, return, error);
463	return (error);
464}
465
466static int
467futex_wake(struct futex *f, int n, uint32_t bitset)
468{
469	struct waiting_proc *wp, *wpt;
470	int count = 0;
471
472	LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
473
474	if (bitset == 0) {
475		LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
476		return (EINVAL);
477	}
478
479	FUTEX_ASSERT_LOCKED(f);
480	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
481		LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
482		    f->f_refcount);
483		LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
484		    f->f_uaddr, wp, f->f_refcount);
485		/*
486		 * Unless we find a matching bit in
487		 * the bitset, continue searching.
488		 */
489		if (!(wp->wp_futex->f_bitset & bitset))
490			continue;
491
492		wp->wp_flags |= FUTEX_WP_REMOVED;
493		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
494		LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
495		wakeup_one(wp);
496		if (++count == n)
497			break;
498	}
499
500	LIN_SDT_PROBE1(futex, futex_wake, return, count);
501	return (count);
502}
503
504static int
505futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
506{
507	struct waiting_proc *wp, *wpt;
508	int count = 0;
509
510	LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
511
512	FUTEX_ASSERT_LOCKED(f);
513	FUTEX_ASSERT_LOCKED(f2);
514
515	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
516		if (++count <= n) {
517			LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
518			    f->f_uaddr, wp);
519			wp->wp_flags |= FUTEX_WP_REMOVED;
520			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
521			LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
522			wakeup_one(wp);
523		} else {
524			LIN_SDT_PROBE3(futex, futex_requeue, requeue,
525			    f->f_uaddr, wp, f2->f_uaddr);
526			LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
527			    f->f_uaddr, wp, f2->f_uaddr);
528			wp->wp_flags |= FUTEX_WP_REQUEUED;
529			/* Move wp to wp_list of f2 futex */
530			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
531			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
532
533			/*
534			 * Thread which sleeps on wp after waking should
535			 * acquire f2 lock, so increment refcount of f2 to
536			 * prevent it from premature deallocation.
537			 */
538			wp->wp_futex = f2;
539			FUTEXES_LOCK;
540			++f2->f_refcount;
541			FUTEXES_UNLOCK;
542			if (count - n >= n2)
543				break;
544		}
545	}
546
547	LIN_SDT_PROBE1(futex, futex_requeue, return, count);
548	return (count);
549}
550
551static int
552futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz,
553    uint32_t bitset)
554{
555	int error;
556
557	LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset);
558
559	if (bitset == 0) {
560		LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
561		return (EINVAL);
562	}
563
564	f->f_bitset = bitset;
565	error = futex_sleep(f, wp, timeout_hz);
566	if (error)
567		LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
568	if (error == EWOULDBLOCK)
569		error = ETIMEDOUT;
570
571	LIN_SDT_PROBE1(futex, futex_wait, return, error);
572	return (error);
573}
574
575static int
576futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
577{
578	int op = (encoded_op >> 28) & 7;
579	int cmp = (encoded_op >> 24) & 15;
580	int oparg = (encoded_op << 8) >> 20;
581	int cmparg = (encoded_op << 20) >> 20;
582	int oldval = 0, ret;
583
584	LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
585
586	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
587		oparg = 1 << oparg;
588
589	LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
590	    cmparg);
591
592	/* XXX: Linux verifies access here and returns EFAULT */
593	LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
594
595	switch (op) {
596	case FUTEX_OP_SET:
597		ret = futex_xchgl(oparg, uaddr, &oldval);
598		break;
599	case FUTEX_OP_ADD:
600		ret = futex_addl(oparg, uaddr, &oldval);
601		break;
602	case FUTEX_OP_OR:
603		ret = futex_orl(oparg, uaddr, &oldval);
604		break;
605	case FUTEX_OP_ANDN:
606		ret = futex_andl(~oparg, uaddr, &oldval);
607		break;
608	case FUTEX_OP_XOR:
609		ret = futex_xorl(oparg, uaddr, &oldval);
610		break;
611	default:
612		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
613		ret = -ENOSYS;
614		break;
615	}
616
617	if (ret) {
618		LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
619		return (ret);
620	}
621
622	switch (cmp) {
623	case FUTEX_OP_CMP_EQ:
624		ret = (oldval == cmparg);
625		break;
626	case FUTEX_OP_CMP_NE:
627		ret = (oldval != cmparg);
628		break;
629	case FUTEX_OP_CMP_LT:
630		ret = (oldval < cmparg);
631		break;
632	case FUTEX_OP_CMP_GE:
633		ret = (oldval >= cmparg);
634		break;
635	case FUTEX_OP_CMP_LE:
636		ret = (oldval <= cmparg);
637		break;
638	case FUTEX_OP_CMP_GT:
639		ret = (oldval > cmparg);
640		break;
641	default:
642		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
643		ret = -ENOSYS;
644	}
645
646	LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
647	return (ret);
648}
649
650int
651linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
652{
653	int clockrt, nrwake, op_ret, ret;
654	struct linux_pemuldata *pem;
655	struct waiting_proc *wp;
656	struct futex *f, *f2;
657	struct l_timespec timeout;
658	struct timeval utv, ctv;
659	int timeout_hz;
660	int error;
661	uint32_t flags, val;
662
663	LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
664
665	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
666		flags = 0;
667		args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
668	} else
669		flags = FUTEX_SHARED;
670
671	/*
672	 * Currently support for switching between CLOCK_MONOTONIC and
673	 * CLOCK_REALTIME is not present. However Linux forbids the use of
674	 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
675	 * FUTEX_WAIT_REQUEUE_PI.
676	 */
677	clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
678	args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
679	if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
680		args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
681		LIN_SDT_PROBE0(futex, linux_sys_futex,
682		    unimplemented_clockswitch);
683		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
684		return (ENOSYS);
685	}
686
687	error = 0;
688	f = f2 = NULL;
689
690	switch (args->op) {
691	case LINUX_FUTEX_WAIT:
692		args->val3 = FUTEX_BITSET_MATCH_ANY;
693		/* FALLTHROUGH */
694
695	case LINUX_FUTEX_WAIT_BITSET:
696		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
697		    args->val, args->val3);
698		LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
699		    args->uaddr, args->val, args->val3);
700
701		error = futex_get(args->uaddr, &wp, &f,
702		    flags | FUTEX_CREATE_WP);
703		if (error) {
704			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
705			return (error);
706		}
707
708		error = copyin(args->uaddr, &val, sizeof(val));
709		if (error) {
710			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
711			    error);
712			LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
713			    error);
714			futex_put(f, wp);
715
716			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
717			return (error);
718		}
719		if (val != args->val) {
720			LIN_SDT_PROBE4(futex, linux_sys_futex,
721			    debug_wait_value_neq, args->uaddr, args->val, val,
722			    args->val3);
723			LINUX_CTR3(sys_futex,
724			    "WAIT uaddr %p val 0x%x != uval 0x%x",
725			    args->uaddr, args->val, val);
726			futex_put(f, wp);
727
728			LIN_SDT_PROBE1(futex, linux_sys_futex, return,
729			    EWOULDBLOCK);
730			return (EWOULDBLOCK);
731		}
732
733		if (args->timeout != NULL) {
734			error = copyin(args->timeout, &timeout, sizeof(timeout));
735			if (error) {
736				LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
737				    error);
738				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
739				futex_put(f, wp);
740				return (error);
741			}
742			TIMESPEC_TO_TIMEVAL(&utv, &timeout);
743			error = itimerfix(&utv);
744			if (error) {
745				LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
746				    error);
747				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
748				futex_put(f, wp);
749				return (error);
750			}
751			if (clockrt) {
752				microtime(&ctv);
753				timevalsub(&utv, &ctv);
754			} else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
755				microuptime(&ctv);
756				timevalsub(&utv, &ctv);
757			}
758			if (utv.tv_sec < 0)
759				timevalclear(&utv);
760			timeout_hz = tvtohz(&utv);
761		} else
762			timeout_hz = 0;
763
764		error = futex_wait(f, wp, timeout_hz, args->val3);
765		break;
766
767	case LINUX_FUTEX_WAKE:
768		args->val3 = FUTEX_BITSET_MATCH_ANY;
769		/* FALLTHROUGH */
770
771	case LINUX_FUTEX_WAKE_BITSET:
772		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
773		    args->val, args->val3);
774		LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
775		    args->uaddr, args->val, args->val3);
776
777		error = futex_get(args->uaddr, NULL, &f,
778		    flags | FUTEX_DONTCREATE);
779		if (error) {
780			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
781			return (error);
782		}
783
784		if (f == NULL) {
785			td->td_retval[0] = 0;
786
787			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
788			return (error);
789		}
790		td->td_retval[0] = futex_wake(f, args->val, args->val3);
791		futex_put(f, NULL);
792		break;
793
794	case LINUX_FUTEX_CMP_REQUEUE:
795		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
796		    args->uaddr, args->val, args->val3, args->uaddr2,
797		    args->timeout);
798		LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
799		    "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
800		    args->uaddr, args->val, args->val3, args->uaddr2,
801		    args->timeout);
802
803		/*
804		 * Linux allows this, we would not, it is an incorrect
805		 * usage of declared ABI, so return EINVAL.
806		 */
807		if (args->uaddr == args->uaddr2) {
808			LIN_SDT_PROBE0(futex, linux_sys_futex,
809			    invalid_cmp_requeue_use);
810			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
811			return (EINVAL);
812		}
813
814		error = futex_get(args->uaddr, NULL, &f, flags);
815		if (error) {
816			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
817			return (error);
818		}
819
820		/*
821		 * To avoid deadlocks return EINVAL if second futex
822		 * exists at this time.
823		 *
824		 * Glibc fall back to FUTEX_WAKE in case of any error
825		 * returned by FUTEX_CMP_REQUEUE.
826		 */
827		error = futex_get(args->uaddr2, NULL, &f2,
828		    flags | FUTEX_DONTEXISTS);
829		if (error) {
830			futex_put(f, NULL);
831
832			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
833			return (error);
834		}
835		error = copyin(args->uaddr, &val, sizeof(val));
836		if (error) {
837			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
838			    error);
839			LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
840			    error);
841			futex_put(f2, NULL);
842			futex_put(f, NULL);
843
844			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
845			return (error);
846		}
847		if (val != args->val3) {
848			LIN_SDT_PROBE2(futex, linux_sys_futex,
849			    debug_cmp_requeue_value_neq, args->val, val);
850			LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
851			    args->val, val);
852			futex_put(f2, NULL);
853			futex_put(f, NULL);
854
855			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
856			return (EAGAIN);
857		}
858
859		nrwake = (int)(unsigned long)args->timeout;
860		td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
861		futex_put(f2, NULL);
862		futex_put(f, NULL);
863		break;
864
865	case LINUX_FUTEX_WAKE_OP:
866		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
867		    args->uaddr, args->op, args->val, args->uaddr2, args->val3);
868		LINUX_CTR5(sys_futex, "WAKE_OP "
869		    "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
870		    args->uaddr, args->val, args->uaddr2, args->val3,
871		    args->timeout);
872
873		error = futex_get(args->uaddr, NULL, &f, flags);
874		if (error) {
875			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
876			return (error);
877		}
878
879		if (args->uaddr != args->uaddr2)
880			error = futex_get(args->uaddr2, NULL, &f2, flags);
881		if (error) {
882			futex_put(f, NULL);
883
884			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
885			return (error);
886		}
887
888		/*
889		 * This function returns positive number as results and
890		 * negative as errors
891		 */
892		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
893
894		LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x",
895		    args->uaddr, op_ret);
896
897		if (op_ret < 0) {
898			/* XXX: We don't handle the EFAULT yet. */
899			if (op_ret != -EFAULT) {
900				if (f2 != NULL)
901					futex_put(f2, NULL);
902				futex_put(f, NULL);
903
904				LIN_SDT_PROBE1(futex, linux_sys_futex, return,
905				    -op_ret);
906				return (-op_ret);
907			} else {
908				LIN_SDT_PROBE0(futex, linux_sys_futex,
909				    unhandled_efault);
910			}
911			if (f2 != NULL)
912				futex_put(f2, NULL);
913			futex_put(f, NULL);
914
915			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
916			return (EFAULT);
917		}
918
919		ret = futex_wake(f, args->val, args->val3);
920
921		if (op_ret > 0) {
922			op_ret = 0;
923			nrwake = (int)(unsigned long)args->timeout;
924
925			if (f2 != NULL)
926				op_ret += futex_wake(f2, nrwake, args->val3);
927			else
928				op_ret += futex_wake(f, nrwake, args->val3);
929			ret += op_ret;
930
931		}
932		if (f2 != NULL)
933			futex_put(f2, NULL);
934		futex_put(f, NULL);
935		td->td_retval[0] = ret;
936		break;
937
938	case LINUX_FUTEX_LOCK_PI:
939		/* not yet implemented */
940		pem = pem_find(td->td_proc);
941		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
942			linux_msg(td,
943				  "linux_sys_futex: "
944				  "unsupported futex_pi op\n");
945			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
946			LIN_SDT_PROBE0(futex, linux_sys_futex,
947			    unimplemented_lock_pi);
948		}
949		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
950		return (ENOSYS);
951
952	case LINUX_FUTEX_UNLOCK_PI:
953		/* not yet implemented */
954		pem = pem_find(td->td_proc);
955		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
956			linux_msg(td,
957				  "linux_sys_futex: "
958				  "unsupported futex_pi op\n");
959			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
960			LIN_SDT_PROBE0(futex, linux_sys_futex,
961			    unimplemented_unlock_pi);
962		}
963		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
964		return (ENOSYS);
965
966	case LINUX_FUTEX_TRYLOCK_PI:
967		/* not yet implemented */
968		pem = pem_find(td->td_proc);
969		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
970			linux_msg(td,
971				  "linux_sys_futex: "
972				  "unsupported futex_pi op\n");
973			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
974			LIN_SDT_PROBE0(futex, linux_sys_futex,
975			    unimplemented_trylock_pi);
976		}
977		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
978		return (ENOSYS);
979
980	case LINUX_FUTEX_REQUEUE:
981
982		/*
983		 * Glibc does not use this operation since version 2.3.3,
984		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
985		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
986		 * FUTEX_REQUEUE returned EINVAL.
987		 */
988		pem = pem_find(td->td_proc);
989		if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
990			linux_msg(td,
991				  "linux_sys_futex: "
992				  "unsupported futex_requeue op\n");
993			pem->flags |= LINUX_XDEPR_REQUEUEOP;
994			LIN_SDT_PROBE0(futex, linux_sys_futex,
995			    deprecated_requeue);
996		}
997
998		LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
999		return (EINVAL);
1000
1001	case LINUX_FUTEX_WAIT_REQUEUE_PI:
1002		/* not yet implemented */
1003		pem = pem_find(td->td_proc);
1004		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
1005			linux_msg(td,
1006				  "linux_sys_futex: "
1007				  "unsupported futex_pi op\n");
1008			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
1009			LIN_SDT_PROBE0(futex, linux_sys_futex,
1010			    unimplemented_wait_requeue_pi);
1011		}
1012		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1013		return (ENOSYS);
1014
1015	case LINUX_FUTEX_CMP_REQUEUE_PI:
1016		/* not yet implemented */
1017		pem = pem_find(td->td_proc);
1018		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
1019			linux_msg(td,
1020				  "linux_sys_futex: "
1021				  "unsupported futex_pi op\n");
1022			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
1023			LIN_SDT_PROBE0(futex, linux_sys_futex,
1024			    unimplemented_cmp_requeue_pi);
1025		}
1026		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1027		return (ENOSYS);
1028
1029	default:
1030		linux_msg(td,
1031			  "linux_sys_futex: unknown op %d\n", args->op);
1032		LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
1033		    args->op);
1034		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1035		return (ENOSYS);
1036	}
1037
1038	LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
1039	return (error);
1040}
1041
1042int
1043linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
1044{
1045	struct linux_emuldata *em;
1046
1047	LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
1048
1049	if (args->len != sizeof(struct linux_robust_list_head)) {
1050		LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
1051		LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
1052		return (EINVAL);
1053	}
1054
1055	em = em_find(td);
1056	em->robust_futexes = args->head;
1057
1058	LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
1059	return (0);
1060}
1061
1062int
1063linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
1064{
1065	struct linux_emuldata *em;
1066	struct linux_robust_list_head *head;
1067	l_size_t len = sizeof(struct linux_robust_list_head);
1068	struct thread *td2;
1069	int error = 0;
1070
1071	LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
1072
1073	if (!args->pid) {
1074		em = em_find(td);
1075		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1076		head = em->robust_futexes;
1077	} else {
1078		td2 = tdfind(args->pid, -1);
1079		if (td2 == NULL) {
1080			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1081			    ESRCH);
1082			return (ESRCH);
1083		}
1084
1085		em = em_find(td2);
1086		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1087		/* XXX: ptrace? */
1088		if (priv_check(td, PRIV_CRED_SETUID) ||
1089		    priv_check(td, PRIV_CRED_SETEUID) ||
1090		    p_candebug(td, td2->td_proc)) {
1091			PROC_UNLOCK(td2->td_proc);
1092
1093			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1094			    EPERM);
1095			return (EPERM);
1096		}
1097		head = em->robust_futexes;
1098
1099		PROC_UNLOCK(td2->td_proc);
1100	}
1101
1102	error = copyout(&len, args->len, sizeof(l_size_t));
1103	if (error) {
1104		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1105		    error);
1106		LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
1107		return (EFAULT);
1108	}
1109
1110	error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
1111	if (error) {
1112		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1113		    error);
1114	}
1115
1116	LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
1117	return (error);
1118}
1119
1120static int
1121handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
1122    unsigned int pi)
1123{
1124	uint32_t uval, nval, mval;
1125	struct futex *f;
1126	int error;
1127
1128	LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
1129
1130retry:
1131	error = copyin(uaddr, &uval, 4);
1132	if (error) {
1133		LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
1134		LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
1135		return (EFAULT);
1136	}
1137	if ((uval & FUTEX_TID_MASK) == em->em_tid) {
1138		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1139		nval = casuword32(uaddr, uval, mval);
1140
1141		if (nval == -1) {
1142			LIN_SDT_PROBE1(futex, handle_futex_death, return,
1143			    EFAULT);
1144			return (EFAULT);
1145		}
1146
1147		if (nval != uval)
1148			goto retry;
1149
1150		if (!pi && (uval & FUTEX_WAITERS)) {
1151			error = futex_get(uaddr, NULL, &f,
1152			    FUTEX_DONTCREATE | FUTEX_SHARED);
1153			if (error) {
1154				LIN_SDT_PROBE1(futex, handle_futex_death,
1155				    return, error);
1156				return (error);
1157			}
1158			if (f != NULL) {
1159				futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
1160				futex_put(f, NULL);
1161			}
1162		}
1163	}
1164
1165	LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
1166	return (0);
1167}
1168
1169static int
1170fetch_robust_entry(struct linux_robust_list **entry,
1171    struct linux_robust_list **head, unsigned int *pi)
1172{
1173	l_ulong uentry;
1174	int error;
1175
1176	LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
1177
1178	error = copyin((const void *)head, &uentry, sizeof(l_ulong));
1179	if (error) {
1180		LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
1181		LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
1182		return (EFAULT);
1183	}
1184
1185	*entry = (void *)(uentry & ~1UL);
1186	*pi = uentry & 1;
1187
1188	LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
1189	return (0);
1190}
1191
1192/* This walks the list of robust futexes releasing them. */
1193void
1194release_futexes(struct thread *td, struct linux_emuldata *em)
1195{
1196	struct linux_robust_list_head *head = NULL;
1197	struct linux_robust_list *entry, *next_entry, *pending;
1198	unsigned int limit = 2048, pi, next_pi, pip;
1199	l_long futex_offset;
1200	int rc, error;
1201
1202	LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
1203
1204	head = em->robust_futexes;
1205
1206	if (head == NULL) {
1207		LIN_SDT_PROBE0(futex, release_futexes, return);
1208		return;
1209	}
1210
1211	if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
1212		LIN_SDT_PROBE0(futex, release_futexes, return);
1213		return;
1214	}
1215
1216	error = copyin(&head->futex_offset, &futex_offset,
1217	    sizeof(futex_offset));
1218	if (error) {
1219		LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
1220		LIN_SDT_PROBE0(futex, release_futexes, return);
1221		return;
1222	}
1223
1224	if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
1225		LIN_SDT_PROBE0(futex, release_futexes, return);
1226		return;
1227	}
1228
1229	while (entry != &head->list) {
1230		rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
1231
1232		if (entry != pending)
1233			if (handle_futex_death(em,
1234			    (uint32_t *)((caddr_t)entry + futex_offset), pi)) {
1235				LIN_SDT_PROBE0(futex, release_futexes, return);
1236				return;
1237			}
1238		if (rc) {
1239			LIN_SDT_PROBE0(futex, release_futexes, return);
1240			return;
1241		}
1242
1243		entry = next_entry;
1244		pi = next_pi;
1245
1246		if (!--limit)
1247			break;
1248
1249		sched_relinquish(curthread);
1250	}
1251
1252	if (pending)
1253		handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
1254
1255	LIN_SDT_PROBE0(futex, release_futexes, return);
1256}
1257