1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5 * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6 * Copyright (c) 2015, 2016 The FreeBSD Foundation
7 *
8 * All rights reserved.
9 *
10 * Portions of this software were developed by Konstantin Belousov
11 * under sponsorship from the FreeBSD Foundation.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by John Birrell.
24 * 4. Neither the name of the author nor the names of any co-contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41#include "namespace.h"
42#include <stdlib.h>
43#include <errno.h>
44#include <string.h>
45#include <sys/param.h>
46#include <sys/queue.h>
47#include <pthread.h>
48#include <pthread_np.h>
49#include "un-namespace.h"
50
51#include "thr_private.h"
52
53_Static_assert(sizeof(struct pthread_mutex) <= THR_PAGE_SIZE_MIN,
54    "pthread_mutex is too large for off-page");
55
56/*
57 * For adaptive mutexes, how many times to spin doing trylock2
58 * before entering the kernel to block
59 */
60#define MUTEX_ADAPTIVE_SPINS	2000
61
62/*
63 * Prototypes
64 */
65int	__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
66		const struct timespec * __restrict abstime);
67int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
68int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
69int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
70int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
71int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
72int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
73
74static int	mutex_self_trylock(pthread_mutex_t);
75static int	mutex_self_lock(pthread_mutex_t,
76				const struct timespec *abstime);
77static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
78static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
79				const struct timespec *);
80static void	mutex_init_robust(struct pthread *curthread);
81static int	mutex_qidx(struct pthread_mutex *m);
82static bool	is_robust_mutex(struct pthread_mutex *m);
83static bool	is_pshared_mutex(struct pthread_mutex *m);
84
85__weak_reference(__Tthr_mutex_init, pthread_mutex_init);
86__weak_reference(__Tthr_mutex_init, __pthread_mutex_init);
87__strong_reference(__Tthr_mutex_init, _pthread_mutex_init);
88__weak_reference(__Tthr_mutex_lock, pthread_mutex_lock);
89__weak_reference(__Tthr_mutex_lock, __pthread_mutex_lock);
90__strong_reference(__Tthr_mutex_lock, _pthread_mutex_lock);
91__weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
92__strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
93__weak_reference(__Tthr_mutex_trylock, pthread_mutex_trylock);
94__weak_reference(__Tthr_mutex_trylock, __pthread_mutex_trylock);
95__strong_reference(__Tthr_mutex_trylock, _pthread_mutex_trylock);
96__weak_reference(_Tthr_mutex_consistent, pthread_mutex_consistent);
97__weak_reference(_Tthr_mutex_consistent, _pthread_mutex_consistent);
98__strong_reference(_Tthr_mutex_consistent, __pthread_mutex_consistent);
99
100/* Single underscore versions provided for libc internal usage: */
101/* No difference between libc and application usage of these: */
102__weak_reference(_thr_mutex_destroy, pthread_mutex_destroy);
103__weak_reference(_thr_mutex_destroy, _pthread_mutex_destroy);
104__weak_reference(_thr_mutex_unlock, pthread_mutex_unlock);
105__weak_reference(_thr_mutex_unlock, _pthread_mutex_unlock);
106
107__weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
108__weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
109
110__weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
111__strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
112__weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
113
114__weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
115__strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
116__weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
117__weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
118
119static void
120mutex_init_link(struct pthread_mutex *m __unused)
121{
122
123#if defined(_PTHREADS_INVARIANTS)
124	m->m_qe.tqe_prev = NULL;
125	m->m_qe.tqe_next = NULL;
126	m->m_pqe.tqe_prev = NULL;
127	m->m_pqe.tqe_next = NULL;
128#endif
129}
130
131static void
132mutex_assert_is_owned(struct pthread_mutex *m __unused)
133{
134
135#if defined(_PTHREADS_INVARIANTS)
136	if (__predict_false(m->m_qe.tqe_prev == NULL))
137		PANIC("mutex %p own %#x is not on list %p %p",
138		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
139#endif
140}
141
142static void
143mutex_assert_not_owned(struct pthread *curthread __unused,
144    struct pthread_mutex *m __unused)
145{
146
147#if defined(_PTHREADS_INVARIANTS)
148	if (__predict_false(m->m_qe.tqe_prev != NULL ||
149	    m->m_qe.tqe_next != NULL))
150		PANIC("mutex %p own %#x is on list %p %p",
151		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
152	if (__predict_false(is_robust_mutex(m) &&
153	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
154	    (is_pshared_mutex(m) && curthread->robust_list ==
155	    (uintptr_t)&m->m_lock) ||
156	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
157	    (uintptr_t)&m->m_lock))))
158		PANIC(
159    "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
160		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
161		    m->m_rb_prev, (void *)curthread->robust_list,
162		    (void *)curthread->priv_robust_list);
163#endif
164}
165
166static bool
167is_pshared_mutex(struct pthread_mutex *m)
168{
169
170	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
171}
172
173static bool
174is_robust_mutex(struct pthread_mutex *m)
175{
176
177	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
178}
179
180int
181_mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
182{
183
184#if defined(_PTHREADS_INVARIANTS)
185	if (__predict_false(curthread->inact_mtx != 0))
186		PANIC("inact_mtx enter");
187#endif
188	if (!is_robust_mutex(m))
189		return (0);
190
191	mutex_init_robust(curthread);
192	curthread->inact_mtx = (uintptr_t)&m->m_lock;
193	return (1);
194}
195
196void
197_mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
198{
199
200#if defined(_PTHREADS_INVARIANTS)
201	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
202		PANIC("inact_mtx leave");
203#endif
204	curthread->inact_mtx = 0;
205}
206
207static int
208mutex_check_attr(const struct pthread_mutex_attr *attr)
209{
210
211	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
212	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
213		return (EINVAL);
214	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
215	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
216		return (EINVAL);
217	return (0);
218}
219
220static void
221mutex_init_robust(struct pthread *curthread)
222{
223	struct umtx_robust_lists_params rb;
224
225	if (curthread == NULL)
226		curthread = _get_curthread();
227	if (curthread->robust_inited)
228		return;
229	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
230	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
231	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
232	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
233	curthread->robust_inited = 1;
234}
235
236static void
237mutex_init_body(struct pthread_mutex *pmutex,
238    const struct pthread_mutex_attr *attr)
239{
240
241	pmutex->m_flags = attr->m_type;
242	pmutex->m_count = 0;
243	pmutex->m_spinloops = 0;
244	pmutex->m_yieldloops = 0;
245	mutex_init_link(pmutex);
246	switch (attr->m_protocol) {
247	case PTHREAD_PRIO_NONE:
248		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
249		pmutex->m_lock.m_flags = 0;
250		break;
251	case PTHREAD_PRIO_INHERIT:
252		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
253		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
254		break;
255	case PTHREAD_PRIO_PROTECT:
256		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
257		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
258		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
259		break;
260	}
261	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
262		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
263	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
264		mutex_init_robust(NULL);
265		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
266	}
267	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
268		pmutex->m_spinloops =
269		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
270		pmutex->m_yieldloops = _thr_yieldloops;
271	}
272}
273
274static int
275mutex_init(pthread_mutex_t *mutex,
276    const struct pthread_mutex_attr *mutex_attr,
277    void *(calloc_cb)(size_t, size_t))
278{
279	const struct pthread_mutex_attr *attr;
280	struct pthread_mutex *pmutex;
281	int error;
282
283	if (mutex_attr == NULL) {
284		attr = &_pthread_mutexattr_default;
285	} else {
286		attr = mutex_attr;
287		error = mutex_check_attr(attr);
288		if (error != 0)
289			return (error);
290	}
291	if ((pmutex = (pthread_mutex_t)calloc_cb(1,
292	    sizeof(struct pthread_mutex))) == NULL)
293		return (ENOMEM);
294	mutex_init_body(pmutex, attr);
295	*mutex = pmutex;
296	return (0);
297}
298
299static int
300init_static(struct pthread *thread, pthread_mutex_t *mutex)
301{
302	int ret;
303
304	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
305
306	if (*mutex == THR_MUTEX_INITIALIZER)
307		ret = mutex_init(mutex, &_pthread_mutexattr_default,
308		    __thr_calloc);
309	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
310		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
311		    __thr_calloc);
312	else
313		ret = 0;
314	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
315
316	return (ret);
317}
318
319static void
320set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
321{
322	struct pthread_mutex *m2;
323
324	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
325	if (m2 != NULL)
326		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
327	else
328		m->m_lock.m_ceilings[1] = -1;
329}
330
331static void
332shared_mutex_init(struct pthread_mutex *pmtx, const struct
333    pthread_mutex_attr *mutex_attr)
334{
335	static const struct pthread_mutex_attr foobar_mutex_attr = {
336		.m_type = PTHREAD_MUTEX_DEFAULT,
337		.m_protocol = PTHREAD_PRIO_NONE,
338		.m_ceiling = 0,
339		.m_pshared = PTHREAD_PROCESS_SHARED,
340		.m_robust = PTHREAD_MUTEX_STALLED,
341	};
342	bool done;
343
344	/*
345	 * Hack to allow multiple pthread_mutex_init() calls on the
346	 * same process-shared mutex.  We rely on kernel allocating
347	 * zeroed offpage for the mutex, i.e. the
348	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
349	 */
350	for (done = false; !done;) {
351		switch (pmtx->m_ps) {
352		case PMUTEX_INITSTAGE_DONE:
353			atomic_thread_fence_acq();
354			done = true;
355			break;
356		case PMUTEX_INITSTAGE_ALLOC:
357			if (atomic_cmpset_int(&pmtx->m_ps,
358			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
359				if (mutex_attr == NULL)
360					mutex_attr = &foobar_mutex_attr;
361				mutex_init_body(pmtx, mutex_attr);
362				atomic_store_rel_int(&pmtx->m_ps,
363				    PMUTEX_INITSTAGE_DONE);
364				done = true;
365			}
366			break;
367		case PMUTEX_INITSTAGE_BUSY:
368			_pthread_yield();
369			break;
370		default:
371			PANIC("corrupted offpage");
372			break;
373		}
374	}
375}
376
377int
378__Tthr_mutex_init(pthread_mutex_t * __restrict mutex,
379    const pthread_mutexattr_t * __restrict mutex_attr)
380{
381	struct pthread_mutex *pmtx;
382	int ret;
383
384	_thr_check_init();
385
386	if (mutex_attr != NULL) {
387		ret = mutex_check_attr(*mutex_attr);
388		if (ret != 0)
389			return (ret);
390	}
391	if (mutex_attr == NULL ||
392	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
393		__thr_malloc_init();
394		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
395		    __thr_calloc));
396	}
397	pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
398	if (pmtx == NULL)
399		return (EFAULT);
400	*mutex = THR_PSHARED_PTR;
401	shared_mutex_init(pmtx, *mutex_attr);
402	return (0);
403}
404
405/* This function is used internally by malloc. */
406int
407_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
408    void *(calloc_cb)(size_t, size_t))
409{
410	static const struct pthread_mutex_attr attr = {
411		.m_type = PTHREAD_MUTEX_NORMAL,
412		.m_protocol = PTHREAD_PRIO_NONE,
413		.m_ceiling = 0,
414		.m_pshared = PTHREAD_PROCESS_PRIVATE,
415		.m_robust = PTHREAD_MUTEX_STALLED,
416	};
417	int ret;
418
419	ret = mutex_init(mutex, &attr, calloc_cb);
420	if (ret == 0)
421		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
422	return (ret);
423}
424
425/*
426 * Fix mutex ownership for child process.
427 *
428 * Process private mutex ownership is transmitted from the forking
429 * thread to the child process.
430 *
431 * Process shared mutex should not be inherited because owner is
432 * forking thread which is in parent process, they are removed from
433 * the owned mutex list.
434 */
435static void
436queue_fork(struct pthread *curthread, struct mutex_queue *q,
437    struct mutex_queue *qp, uint bit)
438{
439	struct pthread_mutex *m;
440
441	TAILQ_INIT(q);
442	TAILQ_FOREACH(m, qp, m_pqe) {
443		TAILQ_INSERT_TAIL(q, m, m_qe);
444		m->m_lock.m_owner = TID(curthread) | bit;
445	}
446}
447
448void
449_mutex_fork(struct pthread *curthread)
450{
451
452	queue_fork(curthread, &curthread->mq[TMQ_NORM],
453	    &curthread->mq[TMQ_NORM_PRIV], 0);
454	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
455	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
456	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
457	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
458	curthread->robust_list = 0;
459}
460
461int
462_thr_mutex_destroy(pthread_mutex_t *mutex)
463{
464	pthread_mutex_t m, m1;
465	int ret;
466
467	m = *mutex;
468	if (m < THR_MUTEX_DESTROYED) {
469		ret = 0;
470	} else if (m == THR_MUTEX_DESTROYED) {
471		ret = EINVAL;
472	} else {
473		if (m == THR_PSHARED_PTR) {
474			m1 = __thr_pshared_offpage(mutex, 0);
475			if (m1 != NULL) {
476				if ((uint32_t)m1->m_lock.m_owner !=
477				    UMUTEX_RB_OWNERDEAD) {
478					mutex_assert_not_owned(
479					    _get_curthread(), m1);
480				}
481				__thr_pshared_destroy(mutex);
482			}
483			*mutex = THR_MUTEX_DESTROYED;
484			return (0);
485		}
486		if (PMUTEX_OWNER_ID(m) != 0 &&
487		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
488			ret = EBUSY;
489		} else {
490			*mutex = THR_MUTEX_DESTROYED;
491			mutex_assert_not_owned(_get_curthread(), m);
492			__thr_free(m);
493			ret = 0;
494		}
495	}
496
497	return (ret);
498}
499
500static int
501mutex_qidx(struct pthread_mutex *m)
502{
503
504	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
505		return (TMQ_NORM);
506	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
507}
508
509/*
510 * Both enqueue_mutex() and dequeue_mutex() operate on the
511 * thread-private linkage of the locked mutexes and on the robust
512 * linkage.
513 *
514 * Robust list, as seen by kernel, must be consistent even in the case
515 * of thread termination at arbitrary moment.  Since either enqueue or
516 * dequeue for list walked by kernel consists of rewriting a single
517 * forward pointer, it is safe.  On the other hand, rewrite of the
518 * back pointer is not atomic WRT the forward one, but kernel does not
519 * care.
520 */
521static void
522enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
523    int error)
524{
525	struct pthread_mutex *m1;
526	uintptr_t *rl;
527	int qidx;
528
529	/* Add to the list of owned mutexes: */
530	if (error != EOWNERDEAD)
531		mutex_assert_not_owned(curthread, m);
532	qidx = mutex_qidx(m);
533	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
534	if (!is_pshared_mutex(m))
535		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
536	if (is_robust_mutex(m)) {
537		rl = is_pshared_mutex(m) ? &curthread->robust_list :
538		    &curthread->priv_robust_list;
539		m->m_rb_prev = NULL;
540		if (*rl != 0) {
541			m1 = __containerof((void *)*rl,
542			    struct pthread_mutex, m_lock);
543			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
544			m1->m_rb_prev = m;
545		} else {
546			m1 = NULL;
547			m->m_lock.m_rb_lnk = 0;
548		}
549		*rl = (uintptr_t)&m->m_lock;
550	}
551}
552
553static void
554dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
555{
556	struct pthread_mutex *mp, *mn;
557	int qidx;
558
559	mutex_assert_is_owned(m);
560	qidx = mutex_qidx(m);
561	if (is_robust_mutex(m)) {
562		mp = m->m_rb_prev;
563		if (mp == NULL) {
564			if (is_pshared_mutex(m)) {
565				curthread->robust_list = m->m_lock.m_rb_lnk;
566			} else {
567				curthread->priv_robust_list =
568				    m->m_lock.m_rb_lnk;
569			}
570		} else {
571			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
572		}
573		if (m->m_lock.m_rb_lnk != 0) {
574			mn = __containerof((void *)m->m_lock.m_rb_lnk,
575			    struct pthread_mutex, m_lock);
576			mn->m_rb_prev = m->m_rb_prev;
577		}
578		m->m_lock.m_rb_lnk = 0;
579		m->m_rb_prev = NULL;
580	}
581	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
582	if (!is_pshared_mutex(m))
583		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
584	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
585		set_inherited_priority(curthread, m);
586	mutex_init_link(m);
587}
588
589static int
590check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
591{
592	int ret;
593
594	*m = *mutex;
595	ret = 0;
596	if (__predict_false(*m == THR_PSHARED_PTR)) {
597		*m = __thr_pshared_offpage(mutex, 0);
598		if (*m == NULL)
599			ret = EINVAL;
600		else
601			shared_mutex_init(*m, NULL);
602	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
603		if (*m == THR_MUTEX_DESTROYED) {
604			ret = EINVAL;
605		} else {
606			ret = init_static(_get_curthread(), mutex);
607			if (ret == 0)
608				*m = *mutex;
609		}
610	}
611	return (ret);
612}
613
614int
615__Tthr_mutex_trylock(pthread_mutex_t *mutex)
616{
617	struct pthread *curthread;
618	struct pthread_mutex *m;
619	uint32_t id;
620	int ret, robust;
621
622	ret = check_and_init_mutex(mutex, &m);
623	if (ret != 0)
624		return (ret);
625	curthread = _get_curthread();
626	id = TID(curthread);
627	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
628		THR_CRITICAL_ENTER(curthread);
629	robust = _mutex_enter_robust(curthread, m);
630	ret = _thr_umutex_trylock(&m->m_lock, id);
631	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
632		enqueue_mutex(curthread, m, ret);
633		if (ret == EOWNERDEAD)
634			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
635	} else if (PMUTEX_OWNER_ID(m) == id) {
636		ret = mutex_self_trylock(m);
637	} /* else {} */
638	if (robust)
639		_mutex_leave_robust(curthread, m);
640	if (ret != 0 && ret != EOWNERDEAD &&
641	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
642		THR_CRITICAL_LEAVE(curthread);
643	return (ret);
644}
645
646static int
647mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
648    const struct timespec *abstime)
649{
650	uint32_t id, owner;
651	int count, ret;
652
653	id = TID(curthread);
654	if (PMUTEX_OWNER_ID(m) == id)
655		return (mutex_self_lock(m, abstime));
656
657	/*
658	 * For adaptive mutexes, spin for a bit in the expectation
659	 * that if the application requests this mutex type then
660	 * the lock is likely to be released quickly and it is
661	 * faster than entering the kernel
662	 */
663	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
664	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
665		goto sleep_in_kernel;
666
667	if (!_thr_is_smp)
668		goto yield_loop;
669
670	count = m->m_spinloops;
671	while (count--) {
672		owner = m->m_lock.m_owner;
673		if ((owner & ~UMUTEX_CONTESTED) == 0) {
674			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
675			    id | owner)) {
676				ret = 0;
677				goto done;
678			}
679		}
680		CPU_SPINWAIT;
681	}
682
683yield_loop:
684	count = m->m_yieldloops;
685	while (count--) {
686		_sched_yield();
687		owner = m->m_lock.m_owner;
688		if ((owner & ~UMUTEX_CONTESTED) == 0) {
689			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
690			    id | owner)) {
691				ret = 0;
692				goto done;
693			}
694		}
695	}
696
697sleep_in_kernel:
698	if (abstime == NULL)
699		ret = __thr_umutex_lock(&m->m_lock, id);
700	else if (__predict_false(abstime->tv_nsec < 0 ||
701	    abstime->tv_nsec >= 1000000000))
702		ret = EINVAL;
703	else
704		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
705done:
706	if (ret == 0 || ret == EOWNERDEAD) {
707		enqueue_mutex(curthread, m, ret);
708		if (ret == EOWNERDEAD)
709			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
710	}
711	return (ret);
712}
713
714static __always_inline int
715mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
716    bool cvattach, bool rb_onlist)
717{
718	struct pthread *curthread;
719	int ret, robust;
720
721	robust = 0;  /* pacify gcc */
722	curthread  = _get_curthread();
723	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
724		THR_CRITICAL_ENTER(curthread);
725	if (!rb_onlist)
726		robust = _mutex_enter_robust(curthread, m);
727	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
728	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
729		enqueue_mutex(curthread, m, ret);
730		if (ret == EOWNERDEAD)
731			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
732	} else {
733		ret = mutex_lock_sleep(curthread, m, abstime);
734	}
735	if (!rb_onlist && robust)
736		_mutex_leave_robust(curthread, m);
737	if (ret != 0 && ret != EOWNERDEAD &&
738	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
739		THR_CRITICAL_LEAVE(curthread);
740	return (ret);
741}
742
743int
744__Tthr_mutex_lock(pthread_mutex_t *mutex)
745{
746	struct pthread_mutex *m;
747	int ret;
748
749	_thr_check_init();
750	ret = check_and_init_mutex(mutex, &m);
751	if (ret == 0)
752		ret = mutex_lock_common(m, NULL, false, false);
753	return (ret);
754}
755
756int
757__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
758    const struct timespec * __restrict abstime)
759{
760	struct pthread_mutex *m;
761	int ret;
762
763	_thr_check_init();
764	ret = check_and_init_mutex(mutex, &m);
765	if (ret == 0)
766		ret = mutex_lock_common(m, abstime, false, false);
767	return (ret);
768}
769
770int
771_thr_mutex_unlock(pthread_mutex_t *mutex)
772{
773	struct pthread_mutex *mp;
774
775	if (*mutex == THR_PSHARED_PTR) {
776		mp = __thr_pshared_offpage(mutex, 0);
777		if (mp == NULL)
778			return (EINVAL);
779		shared_mutex_init(mp, NULL);
780	} else {
781		mp = *mutex;
782	}
783	return (mutex_unlock_common(mp, false, NULL));
784}
785
786int
787_mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
788{
789	int error;
790
791	error = mutex_lock_common(m, NULL, true, rb_onlist);
792	if (error == 0 || error == EOWNERDEAD)
793		m->m_count = count;
794	return (error);
795}
796
797int
798_mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
799{
800
801	/*
802	 * Clear the count in case this is a recursive mutex.
803	 */
804	*count = m->m_count;
805	m->m_count = 0;
806	(void)mutex_unlock_common(m, true, defer);
807        return (0);
808}
809
810int
811_mutex_cv_attach(struct pthread_mutex *m, int count)
812{
813	struct pthread *curthread;
814
815	curthread = _get_curthread();
816	enqueue_mutex(curthread, m, 0);
817	m->m_count = count;
818	return (0);
819}
820
821int
822_mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
823{
824	struct pthread *curthread;
825	int deferred, error;
826
827	curthread = _get_curthread();
828	if ((error = _mutex_owned(curthread, mp)) != 0)
829		return (error);
830
831	/*
832	 * Clear the count in case this is a recursive mutex.
833	 */
834	*recurse = mp->m_count;
835	mp->m_count = 0;
836	dequeue_mutex(curthread, mp);
837
838	/* Will this happen in real-world ? */
839        if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
840		deferred = 1;
841		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
842	} else
843		deferred = 0;
844
845	if (deferred)  {
846		_thr_wake_all(curthread->defer_waiters,
847		    curthread->nwaiter_defer);
848		curthread->nwaiter_defer = 0;
849	}
850	return (0);
851}
852
853static int
854mutex_self_trylock(struct pthread_mutex *m)
855{
856	int ret;
857
858	switch (PMUTEX_TYPE(m->m_flags)) {
859	case PTHREAD_MUTEX_ERRORCHECK:
860	case PTHREAD_MUTEX_NORMAL:
861	case PTHREAD_MUTEX_ADAPTIVE_NP:
862		ret = EBUSY;
863		break;
864
865	case PTHREAD_MUTEX_RECURSIVE:
866		/* Increment the lock count: */
867		if (m->m_count + 1 > 0) {
868			m->m_count++;
869			ret = 0;
870		} else
871			ret = EAGAIN;
872		break;
873
874	default:
875		/* Trap invalid mutex types; */
876		ret = EINVAL;
877	}
878
879	return (ret);
880}
881
882static int
883mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
884{
885	struct timespec	ts1, ts2;
886	int ret;
887
888	switch (PMUTEX_TYPE(m->m_flags)) {
889	case PTHREAD_MUTEX_ERRORCHECK:
890	case PTHREAD_MUTEX_ADAPTIVE_NP:
891		if (abstime) {
892			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
893			    abstime->tv_nsec >= 1000000000) {
894				ret = EINVAL;
895			} else {
896				clock_gettime(CLOCK_REALTIME, &ts1);
897				TIMESPEC_SUB(&ts2, abstime, &ts1);
898				__sys_nanosleep(&ts2, NULL);
899				ret = ETIMEDOUT;
900			}
901		} else {
902			/*
903			 * POSIX specifies that mutexes should return
904			 * EDEADLK if a recursive lock is detected.
905			 */
906			ret = EDEADLK;
907		}
908		break;
909
910	case PTHREAD_MUTEX_NORMAL:
911		/*
912		 * What SS2 define as a 'normal' mutex.  Intentionally
913		 * deadlock on attempts to get a lock you already own.
914		 */
915		ret = 0;
916		if (abstime) {
917			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
918			    abstime->tv_nsec >= 1000000000) {
919				ret = EINVAL;
920			} else {
921				clock_gettime(CLOCK_REALTIME, &ts1);
922				TIMESPEC_SUB(&ts2, abstime, &ts1);
923				__sys_nanosleep(&ts2, NULL);
924				ret = ETIMEDOUT;
925			}
926		} else {
927			ts1.tv_sec = 30;
928			ts1.tv_nsec = 0;
929			for (;;)
930				__sys_nanosleep(&ts1, NULL);
931		}
932		break;
933
934	case PTHREAD_MUTEX_RECURSIVE:
935		/* Increment the lock count: */
936		if (m->m_count + 1 > 0) {
937			m->m_count++;
938			ret = 0;
939		} else
940			ret = EAGAIN;
941		break;
942
943	default:
944		/* Trap invalid mutex types; */
945		ret = EINVAL;
946	}
947
948	return (ret);
949}
950
951static __always_inline int
952mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
953{
954	struct pthread *curthread;
955	uint32_t id;
956	int deferred, error, private, robust;
957
958	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
959		if (m == THR_MUTEX_DESTROYED)
960			return (EINVAL);
961		return (EPERM);
962	}
963
964	curthread = _get_curthread();
965	id = TID(curthread);
966
967	/*
968	 * Check if the running thread is not the owner of the mutex.
969	 */
970	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
971		return (EPERM);
972
973	error = 0;
974	private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
975	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
976	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
977		m->m_count--;
978	} else {
979		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
980			deferred = 1;
981			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
982        	} else
983			deferred = 0;
984
985		robust = _mutex_enter_robust(curthread, m);
986		dequeue_mutex(curthread, m);
987		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
988		if (deferred)  {
989			if (mtx_defer == NULL) {
990				_thr_wake_all(curthread->defer_waiters,
991				    curthread->nwaiter_defer);
992				curthread->nwaiter_defer = 0;
993			} else
994				*mtx_defer = 1;
995		}
996		if (robust)
997			_mutex_leave_robust(curthread, m);
998	}
999	if (!cv && private)
1000		THR_CRITICAL_LEAVE(curthread);
1001	return (error);
1002}
1003
1004int
1005_pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1006    int * __restrict prioceiling)
1007{
1008	struct pthread_mutex *m;
1009
1010	if (*mutex == THR_PSHARED_PTR) {
1011		m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1012		if (m == NULL)
1013			return (EINVAL);
1014		shared_mutex_init(m, NULL);
1015	} else {
1016		m = *mutex;
1017		if (m <= THR_MUTEX_DESTROYED)
1018			return (EINVAL);
1019	}
1020	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1021		return (EINVAL);
1022	*prioceiling = m->m_lock.m_ceilings[0];
1023	return (0);
1024}
1025
1026int
1027_pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1028    int ceiling, int * __restrict old_ceiling)
1029{
1030	struct pthread *curthread;
1031	struct pthread_mutex *m, *m1, *m2;
1032	struct mutex_queue *q, *qp;
1033	int qidx, ret;
1034
1035	if (*mutex == THR_PSHARED_PTR) {
1036		m = __thr_pshared_offpage(mutex, 0);
1037		if (m == NULL)
1038			return (EINVAL);
1039		shared_mutex_init(m, NULL);
1040	} else {
1041		m = *mutex;
1042		if (m <= THR_MUTEX_DESTROYED)
1043			return (EINVAL);
1044	}
1045	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1046		return (EINVAL);
1047
1048	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1049	if (ret != 0)
1050		return (ret);
1051
1052	curthread = _get_curthread();
1053	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1054		mutex_assert_is_owned(m);
1055		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1056		m2 = TAILQ_NEXT(m, m_qe);
1057		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1058		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1059			qidx = mutex_qidx(m);
1060			q = &curthread->mq[qidx];
1061			qp = &curthread->mq[qidx + 1];
1062			TAILQ_REMOVE(q, m, m_qe);
1063			if (!is_pshared_mutex(m))
1064				TAILQ_REMOVE(qp, m, m_pqe);
1065			TAILQ_FOREACH(m2, q, m_qe) {
1066				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1067					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1068					if (!is_pshared_mutex(m)) {
1069						while (m2 != NULL &&
1070						    is_pshared_mutex(m2)) {
1071							m2 = TAILQ_PREV(m2,
1072							    mutex_queue, m_qe);
1073						}
1074						if (m2 == NULL) {
1075							TAILQ_INSERT_HEAD(qp,
1076							    m, m_pqe);
1077						} else {
1078							TAILQ_INSERT_BEFORE(m2,
1079							    m, m_pqe);
1080						}
1081					}
1082					return (0);
1083				}
1084			}
1085			TAILQ_INSERT_TAIL(q, m, m_qe);
1086			if (!is_pshared_mutex(m))
1087				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1088		}
1089	}
1090	return (0);
1091}
1092
1093int
1094_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1095{
1096	struct pthread_mutex *m;
1097	int ret;
1098
1099	ret = check_and_init_mutex(mutex, &m);
1100	if (ret == 0)
1101		*count = m->m_spinloops;
1102	return (ret);
1103}
1104
1105int
1106__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1107{
1108	struct pthread_mutex *m;
1109	int ret;
1110
1111	ret = check_and_init_mutex(mutex, &m);
1112	if (ret == 0)
1113		m->m_spinloops = count;
1114	return (ret);
1115}
1116
1117int
1118_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1119{
1120	struct pthread_mutex *m;
1121	int ret;
1122
1123	ret = check_and_init_mutex(mutex, &m);
1124	if (ret == 0)
1125		*count = m->m_yieldloops;
1126	return (ret);
1127}
1128
1129int
1130__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1131{
1132	struct pthread_mutex *m;
1133	int ret;
1134
1135	ret = check_and_init_mutex(mutex, &m);
1136	if (ret == 0)
1137		m->m_yieldloops = count;
1138	return (0);
1139}
1140
1141int
1142_pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1143{
1144	struct pthread_mutex *m;
1145
1146	if (*mutex == THR_PSHARED_PTR) {
1147		m = __thr_pshared_offpage(mutex, 0);
1148		if (m == NULL)
1149			return (0);
1150		shared_mutex_init(m, NULL);
1151	} else {
1152		m = *mutex;
1153		if (m <= THR_MUTEX_DESTROYED)
1154			return (0);
1155	}
1156	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1157}
1158
1159int
1160_mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1161{
1162
1163	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1164		if (mp == THR_MUTEX_DESTROYED)
1165			return (EINVAL);
1166		return (EPERM);
1167	}
1168	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1169		return (EPERM);
1170	return (0);
1171}
1172
1173int
1174_Tthr_mutex_consistent(pthread_mutex_t *mutex)
1175{
1176	struct pthread_mutex *m;
1177	struct pthread *curthread;
1178
1179	if (*mutex == THR_PSHARED_PTR) {
1180		m = __thr_pshared_offpage(mutex, 0);
1181		if (m == NULL)
1182			return (EINVAL);
1183		shared_mutex_init(m, NULL);
1184	} else {
1185		m = *mutex;
1186		if (m <= THR_MUTEX_DESTROYED)
1187			return (EINVAL);
1188	}
1189	curthread = _get_curthread();
1190	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1191	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1192		return (EINVAL);
1193	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1194		return (EPERM);
1195	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1196	return (0);
1197}
1198