linux_futex.c revision 293564
1/*	$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
2
3/*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 *    must display the following acknowledgement:
16 *	This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 *    products derived from this software without specific prior written
19 *    permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_futex.c 293564 2016-01-09 17:10:22Z dchagin $");
36#if 0
37__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
38#endif
39
40#include "opt_compat.h"
41#include "opt_kdtrace.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/imgact.h>
46#include <sys/kernel.h>
47#include <sys/ktr.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/mutex.h>
51#include <sys/priv.h>
52#include <sys/proc.h>
53#include <sys/queue.h>
54#include <sys/sched.h>
55#include <sys/sdt.h>
56#include <sys/sx.h>
57#include <sys/umtx.h>
58
59#ifdef COMPAT_LINUX32
60#include <machine/../linux32/linux.h>
61#include <machine/../linux32/linux32_proto.h>
62#else
63#include <machine/../linux/linux.h>
64#include <machine/../linux/linux_proto.h>
65#endif
66#include <compat/linux/linux_dtrace.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_util.h>
70
71/* DTrace init */
72LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
73
74/**
75 * Futex part for the special DTrace module "locks".
76 */
77LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
78LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
79
80/**
81 * Per futex probes.
82 */
83LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
84LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
85
86/**
87 * DTrace probes in this module.
88 */
89LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
90    "struct waiting_proc *");
91LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
92    "int");
93LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
94    "int");
95LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
96LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
97    "uint32_t");
98LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
99LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
100    "int");
101LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
102LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
103LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
104LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
105    "struct waiting_proc **", "struct futex **");
106LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
107LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
108LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
109    "struct waiting_proc **", "int");
110LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
111    "struct waiting_proc *", "uint32_t *", "uint32_t");
112LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
113    "struct waiting_proc *");
114LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
115LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
116    "uint32_t");
117LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
118    "struct waiting_proc *", "uint32_t");
119LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
120LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
121LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
122    "struct futex *", "int");
123LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
124LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
125    "struct waiting_proc *", "uint32_t");
126LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
127LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
128    "struct waiting_proc **", "int", "uint32_t");
129LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
130LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
131LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
132    "int", "uint32_t");
133LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
134    "int");
135LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
136LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
137LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
138LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
139LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
140    "struct linux_sys_futex_args *");
141LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
142LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int");
143LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
144LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
145LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
146    "uint32_t", "uint32_t");
147LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
148    "uint32_t *", "uint32_t", "int", "uint32_t");
149LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
150    "uint32_t", "uint32_t");
151LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
152    "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
153LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
154    "uint32_t", "int");
155LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
156    "int", "uint32_t", "uint32_t *", "uint32_t");
157LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
158LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
159LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
160LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
161LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
162LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
163LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
164LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
165LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
166LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
167    "struct linux_set_robust_list_args *");
168LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
169LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
170LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
171    "struct linux_get_robust_list_args *");
172LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
173LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
174LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
175    "struct linux_emuldata *", "uint32_t *", "unsigned int");
176LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
177LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
178LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
179    "struct linux_robust_list **", "struct linux_robust_list **",
180    "unsigned int *");
181LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
182LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
183LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
184    "struct linux_emuldata *");
185LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
186LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
187
188struct futex;
189
190struct waiting_proc {
191	uint32_t	wp_flags;
192	struct futex	*wp_futex;
193	TAILQ_ENTRY(waiting_proc) wp_list;
194};
195
196struct futex {
197	struct sx	f_lck;
198	uint32_t	*f_uaddr;	/* user-supplied value, for debug */
199	struct umtx_key	f_key;
200	uint32_t	f_refcount;
201	uint32_t	f_bitset;
202	LIST_ENTRY(futex) f_list;
203	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
204};
205
206struct futex_list futex_list;
207
208#define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
209#define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
210#define FUTEX_INIT(f)		do { \
211				    sx_init_flags(&(f)->f_lck, "ftlk", \
212					SX_DUPOK); \
213				    LIN_SDT_PROBE1(futex, futex, create, \
214					&(f)->f_lck); \
215				} while (0)
216#define FUTEX_DESTROY(f)	do { \
217				    LIN_SDT_PROBE1(futex, futex, destroy, \
218					&(f)->f_lck); \
219				    sx_destroy(&(f)->f_lck); \
220				} while (0)
221#define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
222
223struct mtx futex_mtx;			/* protects the futex list */
224#define FUTEXES_LOCK		do { \
225				    mtx_lock(&futex_mtx); \
226				    LIN_SDT_PROBE1(locks, futex_mtx, \
227					locked, &futex_mtx); \
228				} while (0)
229#define FUTEXES_UNLOCK		do { \
230				    LIN_SDT_PROBE1(locks, futex_mtx, \
231					unlock, &futex_mtx); \
232				    mtx_unlock(&futex_mtx); \
233				} while (0)
234
235/* flags for futex_get() */
236#define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
237#define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
238#define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
239#define	FUTEX_SHARED		0x8	/* shared futex */
240
241/* wp_flags */
242#define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
243					 * of futex where thread sleep to wp_list
244					 * of another futex.
245					 */
246#define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
247					 * wp_list to prevent double wakeup.
248					 */
249
250static void futex_put(struct futex *, struct waiting_proc *);
251static int futex_get0(uint32_t *, struct futex **f, uint32_t);
252static int futex_get(uint32_t *, struct waiting_proc **, struct futex **,
253    uint32_t);
254static int futex_sleep(struct futex *, struct waiting_proc *, int);
255static int futex_wake(struct futex *, int, uint32_t);
256static int futex_requeue(struct futex *, int, struct futex *, int);
257static int futex_wait(struct futex *, struct waiting_proc *, int,
258    uint32_t);
259static int futex_atomic_op(struct thread *, int, uint32_t *);
260static int handle_futex_death(struct linux_emuldata *, uint32_t *,
261    unsigned int);
262static int fetch_robust_entry(struct linux_robust_list **,
263    struct linux_robust_list **, unsigned int *);
264
265/* support.s */
266int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
267int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
268int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
269int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
270int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
271
272
273static void
274futex_put(struct futex *f, struct waiting_proc *wp)
275{
276	LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
277
278	FUTEX_ASSERT_LOCKED(f);
279	if (wp != NULL) {
280		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
281			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
282		free(wp, M_FUTEX_WP);
283	}
284
285	FUTEXES_LOCK;
286	if (--f->f_refcount == 0) {
287		LIST_REMOVE(f, f_list);
288		FUTEXES_UNLOCK;
289		FUTEX_UNLOCK(f);
290
291		LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
292		    f->f_refcount, f->f_key.shared);
293		LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
294		    "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
295		umtx_key_release(&f->f_key);
296		FUTEX_DESTROY(f);
297		free(f, M_FUTEX);
298
299		LIN_SDT_PROBE0(futex, futex_put, return);
300		return;
301	}
302
303	LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
304	    f->f_key.shared);
305	LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
306	    f->f_uaddr, f->f_refcount, f->f_key.shared);
307	FUTEXES_UNLOCK;
308	FUTEX_UNLOCK(f);
309
310	LIN_SDT_PROBE0(futex, futex_put, return);
311}
312
313static int
314futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
315{
316	struct futex *f, *tmpf;
317	struct umtx_key key;
318	int error;
319
320	LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
321
322	*newf = tmpf = NULL;
323
324	error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
325	    AUTO_SHARE : THREAD_SHARE, &key);
326	if (error) {
327		LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
328		LIN_SDT_PROBE1(futex, futex_get0, return, error);
329		return (error);
330	}
331retry:
332	FUTEXES_LOCK;
333	LIST_FOREACH(f, &futex_list, f_list) {
334		if (umtx_key_match(&f->f_key, &key)) {
335			if (tmpf != NULL) {
336				FUTEX_UNLOCK(tmpf);
337				FUTEX_DESTROY(tmpf);
338				free(tmpf, M_FUTEX);
339			}
340			if (flags & FUTEX_DONTEXISTS) {
341				FUTEXES_UNLOCK;
342				umtx_key_release(&key);
343
344				LIN_SDT_PROBE1(futex, futex_get0, return,
345				    EINVAL);
346				return (EINVAL);
347			}
348
349			/*
350			 * Increment refcount of the found futex to
351			 * prevent it from deallocation before FUTEX_LOCK()
352			 */
353			++f->f_refcount;
354			FUTEXES_UNLOCK;
355			umtx_key_release(&key);
356
357			FUTEX_LOCK(f);
358			*newf = f;
359			LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
360			    f->f_refcount, f->f_key.shared);
361			LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
362			    uaddr, f->f_refcount, f->f_key.shared);
363
364			LIN_SDT_PROBE1(futex, futex_get0, return, 0);
365			return (0);
366		}
367	}
368
369	if (flags & FUTEX_DONTCREATE) {
370		FUTEXES_UNLOCK;
371		umtx_key_release(&key);
372		LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
373		LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
374
375		LIN_SDT_PROBE1(futex, futex_get0, return, 0);
376		return (0);
377	}
378
379	if (tmpf == NULL) {
380		FUTEXES_UNLOCK;
381		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
382		tmpf->f_uaddr = uaddr;
383		tmpf->f_key = key;
384		tmpf->f_refcount = 1;
385		tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
386		FUTEX_INIT(tmpf);
387		TAILQ_INIT(&tmpf->f_waiting_proc);
388
389		/*
390		 * Lock the new futex before an insert into the futex_list
391		 * to prevent futex usage by other.
392		 */
393		FUTEX_LOCK(tmpf);
394		goto retry;
395	}
396
397	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
398	FUTEXES_UNLOCK;
399
400	LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
401	    tmpf->f_key.shared);
402	LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
403	    uaddr, tmpf->f_refcount, tmpf->f_key.shared);
404	*newf = tmpf;
405
406	LIN_SDT_PROBE1(futex, futex_get0, return, 0);
407	return (0);
408}
409
410static int
411futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
412    uint32_t flags)
413{
414	int error;
415
416	LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
417
418	if (flags & FUTEX_CREATE_WP) {
419		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
420		(*wp)->wp_flags = 0;
421	}
422	error = futex_get0(uaddr, f, flags);
423	if (error) {
424		LIN_SDT_PROBE0(futex, futex_get, error);
425
426		if (flags & FUTEX_CREATE_WP)
427			free(*wp, M_FUTEX_WP);
428
429		LIN_SDT_PROBE1(futex, futex_get, return, error);
430		return (error);
431	}
432	if (flags & FUTEX_CREATE_WP) {
433		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
434		(*wp)->wp_futex = *f;
435	}
436
437	LIN_SDT_PROBE1(futex, futex_get, return, error);
438	return (error);
439}
440
441static int
442futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
443{
444	int error;
445
446	FUTEX_ASSERT_LOCKED(f);
447	LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
448	LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
449	    f->f_uaddr, wp, timeout, f->f_refcount);
450	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
451	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
452		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
453
454		if (error) {
455			LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
456			    f->f_uaddr, wp, wp->wp_futex->f_uaddr,
457			    wp->wp_futex->f_refcount);
458		}
459
460		LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
461		    " %p requeued uaddr %p ref %d",
462		    error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
463		    wp->wp_futex->f_refcount);
464		futex_put(f, NULL);
465		f = wp->wp_futex;
466		FUTEX_LOCK(f);
467	} else {
468		if (error) {
469			LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
470			    f->f_uaddr, wp);
471		}
472		LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
473		    error, f->f_uaddr, wp);
474	}
475
476	futex_put(f, wp);
477
478	LIN_SDT_PROBE1(futex, futex_sleep, return, error);
479	return (error);
480}
481
482static int
483futex_wake(struct futex *f, int n, uint32_t bitset)
484{
485	struct waiting_proc *wp, *wpt;
486	int count = 0;
487
488	LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
489
490	if (bitset == 0) {
491		LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
492		return (EINVAL);
493	}
494
495	FUTEX_ASSERT_LOCKED(f);
496	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
497		LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
498		    f->f_refcount);
499		LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
500		    f->f_uaddr, wp, f->f_refcount);
501		/*
502		 * Unless we find a matching bit in
503		 * the bitset, continue searching.
504		 */
505		if (!(wp->wp_futex->f_bitset & bitset))
506			continue;
507
508		wp->wp_flags |= FUTEX_WP_REMOVED;
509		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
510		LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
511		wakeup_one(wp);
512		if (++count == n)
513			break;
514	}
515
516	LIN_SDT_PROBE1(futex, futex_wake, return, count);
517	return (count);
518}
519
520static int
521futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
522{
523	struct waiting_proc *wp, *wpt;
524	int count = 0;
525
526	LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
527
528	FUTEX_ASSERT_LOCKED(f);
529	FUTEX_ASSERT_LOCKED(f2);
530
531	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
532		if (++count <= n) {
533			LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
534			    f->f_uaddr, wp);
535			wp->wp_flags |= FUTEX_WP_REMOVED;
536			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
537			LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
538			wakeup_one(wp);
539		} else {
540			LIN_SDT_PROBE3(futex, futex_requeue, requeue,
541			    f->f_uaddr, wp, f2->f_uaddr);
542			LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
543			    f->f_uaddr, wp, f2->f_uaddr);
544			wp->wp_flags |= FUTEX_WP_REQUEUED;
545			/* Move wp to wp_list of f2 futex */
546			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
547			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
548
549			/*
550			 * Thread which sleeps on wp after waking should
551			 * acquire f2 lock, so increment refcount of f2 to
552			 * prevent it from premature deallocation.
553			 */
554			wp->wp_futex = f2;
555			FUTEXES_LOCK;
556			++f2->f_refcount;
557			FUTEXES_UNLOCK;
558			if (count - n >= n2)
559				break;
560		}
561	}
562
563	LIN_SDT_PROBE1(futex, futex_requeue, return, count);
564	return (count);
565}
566
567static int
568futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz,
569    uint32_t bitset)
570{
571	int error;
572
573	LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset);
574
575	if (bitset == 0) {
576		LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
577		return (EINVAL);
578	}
579
580	f->f_bitset = bitset;
581	error = futex_sleep(f, wp, timeout_hz);
582	if (error)
583		LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
584	if (error == EWOULDBLOCK)
585		error = ETIMEDOUT;
586
587	LIN_SDT_PROBE1(futex, futex_wait, return, error);
588	return (error);
589}
590
591static int
592futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
593{
594	int op = (encoded_op >> 28) & 7;
595	int cmp = (encoded_op >> 24) & 15;
596	int oparg = (encoded_op << 8) >> 20;
597	int cmparg = (encoded_op << 20) >> 20;
598	int oldval = 0, ret;
599
600	LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
601
602	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
603		oparg = 1 << oparg;
604
605	LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
606	    cmparg);
607
608	/* XXX: Linux verifies access here and returns EFAULT */
609	LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
610
611	switch (op) {
612	case FUTEX_OP_SET:
613		ret = futex_xchgl(oparg, uaddr, &oldval);
614		break;
615	case FUTEX_OP_ADD:
616		ret = futex_addl(oparg, uaddr, &oldval);
617		break;
618	case FUTEX_OP_OR:
619		ret = futex_orl(oparg, uaddr, &oldval);
620		break;
621	case FUTEX_OP_ANDN:
622		ret = futex_andl(~oparg, uaddr, &oldval);
623		break;
624	case FUTEX_OP_XOR:
625		ret = futex_xorl(oparg, uaddr, &oldval);
626		break;
627	default:
628		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
629		ret = -ENOSYS;
630		break;
631	}
632
633	if (ret) {
634		LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
635		return (ret);
636	}
637
638	switch (cmp) {
639	case FUTEX_OP_CMP_EQ:
640		ret = (oldval == cmparg);
641		break;
642	case FUTEX_OP_CMP_NE:
643		ret = (oldval != cmparg);
644		break;
645	case FUTEX_OP_CMP_LT:
646		ret = (oldval < cmparg);
647		break;
648	case FUTEX_OP_CMP_GE:
649		ret = (oldval >= cmparg);
650		break;
651	case FUTEX_OP_CMP_LE:
652		ret = (oldval <= cmparg);
653		break;
654	case FUTEX_OP_CMP_GT:
655		ret = (oldval > cmparg);
656		break;
657	default:
658		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
659		ret = -ENOSYS;
660	}
661
662	LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
663	return (ret);
664}
665
666int
667linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
668{
669	int clockrt, nrwake, op_ret, ret;
670	struct linux_pemuldata *pem;
671	struct waiting_proc *wp;
672	struct futex *f, *f2;
673	struct l_timespec timeout;
674	struct timeval utv, ctv;
675	int timeout_hz;
676	int error;
677	uint32_t flags, val;
678
679	LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
680
681	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
682		flags = 0;
683		args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
684	} else
685		flags = FUTEX_SHARED;
686
687	/*
688	 * Currently support for switching between CLOCK_MONOTONIC and
689	 * CLOCK_REALTIME is not present. However Linux forbids the use of
690	 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
691	 * FUTEX_WAIT_REQUEUE_PI.
692	 */
693	clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
694	args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
695	if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
696		args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
697		LIN_SDT_PROBE0(futex, linux_sys_futex,
698		    unimplemented_clockswitch);
699		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
700		return (ENOSYS);
701	}
702
703	error = 0;
704	f = f2 = NULL;
705
706	switch (args->op) {
707	case LINUX_FUTEX_WAIT:
708		args->val3 = FUTEX_BITSET_MATCH_ANY;
709		/* FALLTHROUGH */
710
711	case LINUX_FUTEX_WAIT_BITSET:
712		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
713		    args->val, args->val3);
714		LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
715		    args->uaddr, args->val, args->val3);
716
717		error = futex_get(args->uaddr, &wp, &f,
718		    flags | FUTEX_CREATE_WP);
719		if (error) {
720			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
721			return (error);
722		}
723
724		error = copyin(args->uaddr, &val, sizeof(val));
725		if (error) {
726			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
727			    error);
728			LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
729			    error);
730			futex_put(f, wp);
731
732			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
733			return (error);
734		}
735		if (val != args->val) {
736			LIN_SDT_PROBE4(futex, linux_sys_futex,
737			    debug_wait_value_neq, args->uaddr, args->val, val,
738			    args->val3);
739			LINUX_CTR3(sys_futex,
740			    "WAIT uaddr %p val 0x%x != uval 0x%x",
741			    args->uaddr, args->val, val);
742			futex_put(f, wp);
743
744			LIN_SDT_PROBE1(futex, linux_sys_futex, return,
745			    EWOULDBLOCK);
746			return (EWOULDBLOCK);
747		}
748
749		if (args->timeout != NULL) {
750			error = copyin(args->timeout, &timeout, sizeof(timeout));
751			if (error) {
752				LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
753				    error);
754				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
755				futex_put(f, wp);
756				return (error);
757			}
758			TIMESPEC_TO_TIMEVAL(&utv, &timeout);
759			error = itimerfix(&utv);
760			if (error) {
761				LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
762				    error);
763				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
764				futex_put(f, wp);
765				return (error);
766			}
767			if (clockrt) {
768				microtime(&ctv);
769				timevalsub(&utv, &ctv);
770			} else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
771				microuptime(&ctv);
772				timevalsub(&utv, &ctv);
773			}
774			if (utv.tv_sec < 0)
775				timevalclear(&utv);
776			timeout_hz = tvtohz(&utv);
777		} else
778			timeout_hz = 0;
779
780		error = futex_wait(f, wp, timeout_hz, args->val3);
781		break;
782
783	case LINUX_FUTEX_WAKE:
784		args->val3 = FUTEX_BITSET_MATCH_ANY;
785		/* FALLTHROUGH */
786
787	case LINUX_FUTEX_WAKE_BITSET:
788		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
789		    args->val, args->val3);
790		LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
791		    args->uaddr, args->val, args->val3);
792
793		error = futex_get(args->uaddr, NULL, &f,
794		    flags | FUTEX_DONTCREATE);
795		if (error) {
796			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
797			return (error);
798		}
799
800		if (f == NULL) {
801			td->td_retval[0] = 0;
802
803			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
804			return (error);
805		}
806		td->td_retval[0] = futex_wake(f, args->val, args->val3);
807		futex_put(f, NULL);
808		break;
809
810	case LINUX_FUTEX_CMP_REQUEUE:
811		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
812		    args->uaddr, args->val, args->val3, args->uaddr2,
813		    args->timeout);
814		LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
815		    "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
816		    args->uaddr, args->val, args->val3, args->uaddr2,
817		    args->timeout);
818
819		/*
820		 * Linux allows this, we would not, it is an incorrect
821		 * usage of declared ABI, so return EINVAL.
822		 */
823		if (args->uaddr == args->uaddr2) {
824			LIN_SDT_PROBE0(futex, linux_sys_futex,
825			    invalid_cmp_requeue_use);
826			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
827			return (EINVAL);
828		}
829
830		error = futex_get(args->uaddr, NULL, &f, flags);
831		if (error) {
832			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
833			return (error);
834		}
835
836		/*
837		 * To avoid deadlocks return EINVAL if second futex
838		 * exists at this time.
839		 *
840		 * Glibc fall back to FUTEX_WAKE in case of any error
841		 * returned by FUTEX_CMP_REQUEUE.
842		 */
843		error = futex_get(args->uaddr2, NULL, &f2,
844		    flags | FUTEX_DONTEXISTS);
845		if (error) {
846			futex_put(f, NULL);
847
848			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
849			return (error);
850		}
851		error = copyin(args->uaddr, &val, sizeof(val));
852		if (error) {
853			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
854			    error);
855			LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
856			    error);
857			futex_put(f2, NULL);
858			futex_put(f, NULL);
859
860			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
861			return (error);
862		}
863		if (val != args->val3) {
864			LIN_SDT_PROBE2(futex, linux_sys_futex,
865			    debug_cmp_requeue_value_neq, args->val, val);
866			LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
867			    args->val, val);
868			futex_put(f2, NULL);
869			futex_put(f, NULL);
870
871			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
872			return (EAGAIN);
873		}
874
875		nrwake = (int)(unsigned long)args->timeout;
876		td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
877		futex_put(f2, NULL);
878		futex_put(f, NULL);
879		break;
880
881	case LINUX_FUTEX_WAKE_OP:
882		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
883		    args->uaddr, args->op, args->val, args->uaddr2, args->val3);
884		LINUX_CTR5(sys_futex, "WAKE_OP "
885		    "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
886		    args->uaddr, args->val, args->uaddr2, args->val3,
887		    args->timeout);
888
889		error = futex_get(args->uaddr, NULL, &f, flags);
890		if (error) {
891			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
892			return (error);
893		}
894
895		if (args->uaddr != args->uaddr2)
896			error = futex_get(args->uaddr2, NULL, &f2, flags);
897		if (error) {
898			futex_put(f, NULL);
899
900			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
901			return (error);
902		}
903
904		/*
905		 * This function returns positive number as results and
906		 * negative as errors
907		 */
908		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
909
910		LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x",
911		    args->uaddr, op_ret);
912
913		if (op_ret < 0) {
914			/* XXX: We don't handle the EFAULT yet. */
915			if (op_ret != -EFAULT) {
916				if (f2 != NULL)
917					futex_put(f2, NULL);
918				futex_put(f, NULL);
919
920				LIN_SDT_PROBE1(futex, linux_sys_futex, return,
921				    -op_ret);
922				return (-op_ret);
923			} else {
924				LIN_SDT_PROBE0(futex, linux_sys_futex,
925				    unhandled_efault);
926			}
927			if (f2 != NULL)
928				futex_put(f2, NULL);
929			futex_put(f, NULL);
930
931			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
932			return (EFAULT);
933		}
934
935		ret = futex_wake(f, args->val, args->val3);
936
937		if (op_ret > 0) {
938			op_ret = 0;
939			nrwake = (int)(unsigned long)args->timeout;
940
941			if (f2 != NULL)
942				op_ret += futex_wake(f2, nrwake, args->val3);
943			else
944				op_ret += futex_wake(f, nrwake, args->val3);
945			ret += op_ret;
946
947		}
948		if (f2 != NULL)
949			futex_put(f2, NULL);
950		futex_put(f, NULL);
951		td->td_retval[0] = ret;
952		break;
953
954	case LINUX_FUTEX_LOCK_PI:
955		/* not yet implemented */
956		pem = pem_find(td->td_proc);
957		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
958			linux_msg(td,
959				  "linux_sys_futex: "
960				  "unsupported futex_pi op\n");
961			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
962			LIN_SDT_PROBE0(futex, linux_sys_futex,
963			    unimplemented_lock_pi);
964		}
965		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
966		return (ENOSYS);
967
968	case LINUX_FUTEX_UNLOCK_PI:
969		/* not yet implemented */
970		pem = pem_find(td->td_proc);
971		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
972			linux_msg(td,
973				  "linux_sys_futex: "
974				  "unsupported futex_pi op\n");
975			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
976			LIN_SDT_PROBE0(futex, linux_sys_futex,
977			    unimplemented_unlock_pi);
978		}
979		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
980		return (ENOSYS);
981
982	case LINUX_FUTEX_TRYLOCK_PI:
983		/* not yet implemented */
984		pem = pem_find(td->td_proc);
985		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
986			linux_msg(td,
987				  "linux_sys_futex: "
988				  "unsupported futex_pi op\n");
989			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
990			LIN_SDT_PROBE0(futex, linux_sys_futex,
991			    unimplemented_trylock_pi);
992		}
993		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
994		return (ENOSYS);
995
996	case LINUX_FUTEX_REQUEUE:
997
998		/*
999		 * Glibc does not use this operation since version 2.3.3,
1000		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
1001		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
1002		 * FUTEX_REQUEUE returned EINVAL.
1003		 */
1004		pem = pem_find(td->td_proc);
1005		if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
1006			linux_msg(td,
1007				  "linux_sys_futex: "
1008				  "unsupported futex_requeue op\n");
1009			pem->flags |= LINUX_XDEPR_REQUEUEOP;
1010			LIN_SDT_PROBE0(futex, linux_sys_futex,
1011			    deprecated_requeue);
1012		}
1013
1014		LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
1015		return (EINVAL);
1016
1017	case LINUX_FUTEX_WAIT_REQUEUE_PI:
1018		/* not yet implemented */
1019		pem = pem_find(td->td_proc);
1020		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
1021			linux_msg(td,
1022				  "linux_sys_futex: "
1023				  "unsupported futex_pi op\n");
1024			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
1025			LIN_SDT_PROBE0(futex, linux_sys_futex,
1026			    unimplemented_wait_requeue_pi);
1027		}
1028		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1029		return (ENOSYS);
1030
1031	case LINUX_FUTEX_CMP_REQUEUE_PI:
1032		/* not yet implemented */
1033		pem = pem_find(td->td_proc);
1034		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
1035			linux_msg(td,
1036				  "linux_sys_futex: "
1037				  "unsupported futex_pi op\n");
1038			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
1039			LIN_SDT_PROBE0(futex, linux_sys_futex,
1040			    unimplemented_cmp_requeue_pi);
1041		}
1042		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1043		return (ENOSYS);
1044
1045	default:
1046		linux_msg(td,
1047			  "linux_sys_futex: unknown op %d\n", args->op);
1048		LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
1049		    args->op);
1050		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1051		return (ENOSYS);
1052	}
1053
1054	LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
1055	return (error);
1056}
1057
1058int
1059linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
1060{
1061	struct linux_emuldata *em;
1062
1063	LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
1064
1065	if (args->len != sizeof(struct linux_robust_list_head)) {
1066		LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
1067		LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
1068		return (EINVAL);
1069	}
1070
1071	em = em_find(td);
1072	em->robust_futexes = args->head;
1073
1074	LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
1075	return (0);
1076}
1077
1078int
1079linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
1080{
1081	struct linux_emuldata *em;
1082	struct linux_robust_list_head *head;
1083	l_size_t len = sizeof(struct linux_robust_list_head);
1084	struct thread *td2;
1085	int error = 0;
1086
1087	LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
1088
1089	if (!args->pid) {
1090		em = em_find(td);
1091		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1092		head = em->robust_futexes;
1093	} else {
1094		td2 = tdfind(args->pid, -1);
1095		if (td2 == NULL) {
1096			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1097			    ESRCH);
1098			return (ESRCH);
1099		}
1100
1101		em = em_find(td2);
1102		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
1103		/* XXX: ptrace? */
1104		if (priv_check(td, PRIV_CRED_SETUID) ||
1105		    priv_check(td, PRIV_CRED_SETEUID) ||
1106		    p_candebug(td, td2->td_proc)) {
1107			PROC_UNLOCK(td2->td_proc);
1108
1109			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1110			    EPERM);
1111			return (EPERM);
1112		}
1113		head = em->robust_futexes;
1114
1115		PROC_UNLOCK(td2->td_proc);
1116	}
1117
1118	error = copyout(&len, args->len, sizeof(l_size_t));
1119	if (error) {
1120		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1121		    error);
1122		LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
1123		return (EFAULT);
1124	}
1125
1126	error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
1127	if (error) {
1128		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1129		    error);
1130	}
1131
1132	LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
1133	return (error);
1134}
1135
1136static int
1137handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
1138    unsigned int pi)
1139{
1140	uint32_t uval, nval, mval;
1141	struct futex *f;
1142	int error;
1143
1144	LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
1145
1146retry:
1147	error = copyin(uaddr, &uval, 4);
1148	if (error) {
1149		LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
1150		LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
1151		return (EFAULT);
1152	}
1153	if ((uval & FUTEX_TID_MASK) == em->em_tid) {
1154		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1155		nval = casuword32(uaddr, uval, mval);
1156
1157		if (nval == -1) {
1158			LIN_SDT_PROBE1(futex, handle_futex_death, return,
1159			    EFAULT);
1160			return (EFAULT);
1161		}
1162
1163		if (nval != uval)
1164			goto retry;
1165
1166		if (!pi && (uval & FUTEX_WAITERS)) {
1167			error = futex_get(uaddr, NULL, &f,
1168			    FUTEX_DONTCREATE | FUTEX_SHARED);
1169			if (error) {
1170				LIN_SDT_PROBE1(futex, handle_futex_death,
1171				    return, error);
1172				return (error);
1173			}
1174			if (f != NULL) {
1175				futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
1176				futex_put(f, NULL);
1177			}
1178		}
1179	}
1180
1181	LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
1182	return (0);
1183}
1184
1185static int
1186fetch_robust_entry(struct linux_robust_list **entry,
1187    struct linux_robust_list **head, unsigned int *pi)
1188{
1189	l_ulong uentry;
1190	int error;
1191
1192	LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
1193
1194	error = copyin((const void *)head, &uentry, sizeof(l_ulong));
1195	if (error) {
1196		LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
1197		LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
1198		return (EFAULT);
1199	}
1200
1201	*entry = (void *)(uentry & ~1UL);
1202	*pi = uentry & 1;
1203
1204	LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
1205	return (0);
1206}
1207
1208/* This walks the list of robust futexes releasing them. */
1209void
1210release_futexes(struct thread *td, struct linux_emuldata *em)
1211{
1212	struct linux_robust_list_head *head = NULL;
1213	struct linux_robust_list *entry, *next_entry, *pending;
1214	unsigned int limit = 2048, pi, next_pi, pip;
1215	l_long futex_offset;
1216	int rc, error;
1217
1218	LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
1219
1220	head = em->robust_futexes;
1221
1222	if (head == NULL) {
1223		LIN_SDT_PROBE0(futex, release_futexes, return);
1224		return;
1225	}
1226
1227	if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
1228		LIN_SDT_PROBE0(futex, release_futexes, return);
1229		return;
1230	}
1231
1232	error = copyin(&head->futex_offset, &futex_offset,
1233	    sizeof(futex_offset));
1234	if (error) {
1235		LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
1236		LIN_SDT_PROBE0(futex, release_futexes, return);
1237		return;
1238	}
1239
1240	if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
1241		LIN_SDT_PROBE0(futex, release_futexes, return);
1242		return;
1243	}
1244
1245	while (entry != &head->list) {
1246		rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
1247
1248		if (entry != pending)
1249			if (handle_futex_death(em,
1250			    (uint32_t *)((caddr_t)entry + futex_offset), pi)) {
1251				LIN_SDT_PROBE0(futex, release_futexes, return);
1252				return;
1253			}
1254		if (rc) {
1255			LIN_SDT_PROBE0(futex, release_futexes, return);
1256			return;
1257		}
1258
1259		entry = next_entry;
1260		pi = next_pi;
1261
1262		if (!--limit)
1263			break;
1264
1265		sched_relinquish(curthread);
1266	}
1267
1268	if (pending)
1269		handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
1270
1271	LIN_SDT_PROBE0(futex, release_futexes, return);
1272}
1273