1124208Sdes/*
2124208Sdes *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3124208Sdes *  Copyright (C) 2007 The Regents of the University of California.
4124208Sdes *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5124208Sdes *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6124208Sdes *  UCRL-CODE-235197
7124208Sdes *
8124208Sdes *  This file is part of the SPL, Solaris Porting Layer.
9124208Sdes *
10124208Sdes *  The SPL is free software; you can redistribute it and/or modify it
11124208Sdes *  under the terms of the GNU General Public License as published by the
12124208Sdes *  Free Software Foundation; either version 2 of the License, or (at your
13124208Sdes *  option) any later version.
14124208Sdes *
15124208Sdes *  The SPL is distributed in the hope that it will be useful, but WITHOUT
16124208Sdes *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17124208Sdes *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18124208Sdes *  for more details.
19124208Sdes *
20124208Sdes *  You should have received a copy of the GNU General Public License along
21124208Sdes *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
22124208Sdes *
23124208Sdes *  Solaris Porting Layer (SPL) Condition Variables Implementation.
24124208Sdes */
25124208Sdes
2698937Sdes#include <sys/condvar.h>
2798937Sdes#include <sys/time.h>
28124208Sdes#include <sys/sysmacros.h>
29124208Sdes#include <linux/hrtimer.h>
30124208Sdes#include <linux/compiler_compat.h>
3198937Sdes#include <linux/mod_compat.h>
32162852Sdes
33162852Sdes#include <linux/sched.h>
34162852Sdes
35162852Sdes#ifdef HAVE_SCHED_SIGNAL_HEADER
3698937Sdes#include <linux/sched/signal.h>
37124208Sdes#endif
3898937Sdes
3998937Sdes#define	MAX_HRTIMEOUT_SLACK_US	1000
40124208Sdesstatic unsigned int spl_schedule_hrtimeout_slack_us = 0;
4198937Sdes
4298937Sdesstatic int
43124208Sdesparam_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
4498937Sdes{
4598937Sdes	unsigned long val;
4698937Sdes	int error;
4798937Sdes
4898937Sdes	error = kstrtoul(buf, 0, &val);
4998937Sdes	if (error)
5098937Sdes		return (error);
51124208Sdes
5298937Sdes	if (val > MAX_HRTIMEOUT_SLACK_US)
5398937Sdes		return (-EINVAL);
54124208Sdes
5598937Sdes	error = param_set_uint(buf, kp);
56124208Sdes	if (error < 0)
5798937Sdes		return (error);
5898937Sdes
5998937Sdes	return (0);
6098937Sdes}
6198937Sdes
6298937Sdesmodule_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
6398937Sdes	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
6498937SdesMODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
6598937Sdes	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
6698937Sdes
6798937Sdesvoid
6898937Sdes__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
6998937Sdes{
7098937Sdes	ASSERT(cvp);
7198937Sdes	ASSERT(name == NULL);
7298937Sdes	ASSERT(type == CV_DEFAULT);
7398937Sdes	ASSERT(arg == NULL);
7498937Sdes
7598937Sdes	cvp->cv_magic = CV_MAGIC;
7698937Sdes	init_waitqueue_head(&cvp->cv_event);
7798937Sdes	init_waitqueue_head(&cvp->cv_destroy);
7898937Sdes	atomic_set(&cvp->cv_waiters, 0);
7998937Sdes	atomic_set(&cvp->cv_refs, 1);
8098937Sdes	cvp->cv_mutex = NULL;
8198937Sdes}
8298937SdesEXPORT_SYMBOL(__cv_init);
8398937Sdes
8498937Sdesstatic int
8598937Sdescv_destroy_wakeup(kcondvar_t *cvp)
8698937Sdes{
8798937Sdes	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
8898937Sdes		ASSERT(cvp->cv_mutex == NULL);
8998937Sdes		ASSERT(!waitqueue_active(&cvp->cv_event));
9098937Sdes		return (1);
91	}
92
93	return (0);
94}
95
96void
97__cv_destroy(kcondvar_t *cvp)
98{
99	ASSERT(cvp);
100	ASSERT(cvp->cv_magic == CV_MAGIC);
101
102	cvp->cv_magic = CV_DESTROY;
103	atomic_dec(&cvp->cv_refs);
104
105	/* Block until all waiters are woken and references dropped. */
106	while (cv_destroy_wakeup(cvp) == 0)
107		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
108
109	ASSERT3P(cvp->cv_mutex, ==, NULL);
110	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
111	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
112	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
113}
114EXPORT_SYMBOL(__cv_destroy);
115
116static void
117cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
118{
119	DEFINE_WAIT(wait);
120	kmutex_t *m;
121
122	ASSERT(cvp);
123	ASSERT(mp);
124	ASSERT(cvp->cv_magic == CV_MAGIC);
125	ASSERT(mutex_owned(mp));
126	atomic_inc(&cvp->cv_refs);
127
128	m = READ_ONCE(cvp->cv_mutex);
129	if (!m)
130		m = xchg(&cvp->cv_mutex, mp);
131	/* Ensure the same mutex is used by all callers */
132	ASSERT(m == NULL || m == mp);
133
134	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
135	atomic_inc(&cvp->cv_waiters);
136
137	/*
138	 * Mutex should be dropped after prepare_to_wait() this
139	 * ensures we're linked in to the waiters list and avoids the
140	 * race where 'cvp->cv_waiters > 0' but the list is empty.
141	 */
142	mutex_exit(mp);
143	if (io)
144		io_schedule();
145	else
146		schedule();
147
148	/* No more waiters a different mutex could be used */
149	if (atomic_dec_and_test(&cvp->cv_waiters)) {
150		/*
151		 * This is set without any lock, so it's racy. But this is
152		 * just for debug anyway, so make it best-effort
153		 */
154		cvp->cv_mutex = NULL;
155		wake_up(&cvp->cv_destroy);
156	}
157
158	finish_wait(&cvp->cv_event, &wait);
159	atomic_dec(&cvp->cv_refs);
160
161	/*
162	 * Hold mutex after we release the cvp, otherwise we could dead lock
163	 * with a thread holding the mutex and call cv_destroy.
164	 */
165	mutex_enter(mp);
166}
167
168void
169__cv_wait(kcondvar_t *cvp, kmutex_t *mp)
170{
171	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
172}
173EXPORT_SYMBOL(__cv_wait);
174
175void
176__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
177{
178	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
179}
180EXPORT_SYMBOL(__cv_wait_io);
181
182int
183__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
184{
185	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
186
187	return (signal_pending(current) ? 0 : 1);
188}
189EXPORT_SYMBOL(__cv_wait_io_sig);
190
191int
192__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
193{
194	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
195
196	return (signal_pending(current) ? 0 : 1);
197}
198EXPORT_SYMBOL(__cv_wait_sig);
199
200void
201__cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
202{
203	sigset_t blocked, saved;
204
205	sigfillset(&blocked);
206	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
207	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
208	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
209}
210EXPORT_SYMBOL(__cv_wait_idle);
211
212#if defined(HAVE_IO_SCHEDULE_TIMEOUT)
213#define	spl_io_schedule_timeout(t)	io_schedule_timeout(t)
214#else
215
216struct spl_task_timer {
217	struct timer_list timer;
218	struct task_struct *task;
219};
220
221static void
222__cv_wakeup(spl_timer_list_t t)
223{
224	struct timer_list *tmr = (struct timer_list *)t;
225	struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
226
227	wake_up_process(task_timer->task);
228}
229
230static long
231spl_io_schedule_timeout(long time_left)
232{
233	long expire_time = jiffies + time_left;
234	struct spl_task_timer task_timer;
235	struct timer_list *timer = &task_timer.timer;
236
237	task_timer.task = current;
238
239	timer_setup(timer, __cv_wakeup, 0);
240
241	timer->expires = expire_time;
242	add_timer(timer);
243
244	io_schedule();
245
246	del_timer_sync(timer);
247
248	time_left = expire_time - jiffies;
249
250	return (time_left < 0 ? 0 : time_left);
251}
252#endif
253
254/*
255 * 'expire_time' argument is an absolute wall clock time in jiffies.
256 * Return value is time left (expire_time - now) or -1 if timeout occurred.
257 */
258static clock_t
259__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
260    int state, int io)
261{
262	DEFINE_WAIT(wait);
263	kmutex_t *m;
264	clock_t time_left;
265
266	ASSERT(cvp);
267	ASSERT(mp);
268	ASSERT(cvp->cv_magic == CV_MAGIC);
269	ASSERT(mutex_owned(mp));
270
271	/* XXX - Does not handle jiffie wrap properly */
272	time_left = expire_time - jiffies;
273	if (time_left <= 0)
274		return (-1);
275
276	atomic_inc(&cvp->cv_refs);
277	m = READ_ONCE(cvp->cv_mutex);
278	if (!m)
279		m = xchg(&cvp->cv_mutex, mp);
280	/* Ensure the same mutex is used by all callers */
281	ASSERT(m == NULL || m == mp);
282
283	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
284	atomic_inc(&cvp->cv_waiters);
285
286	/*
287	 * Mutex should be dropped after prepare_to_wait() this
288	 * ensures we're linked in to the waiters list and avoids the
289	 * race where 'cvp->cv_waiters > 0' but the list is empty.
290	 */
291	mutex_exit(mp);
292	if (io)
293		time_left = spl_io_schedule_timeout(time_left);
294	else
295		time_left = schedule_timeout(time_left);
296
297	/* No more waiters a different mutex could be used */
298	if (atomic_dec_and_test(&cvp->cv_waiters)) {
299		/*
300		 * This is set without any lock, so it's racy. But this is
301		 * just for debug anyway, so make it best-effort
302		 */
303		cvp->cv_mutex = NULL;
304		wake_up(&cvp->cv_destroy);
305	}
306
307	finish_wait(&cvp->cv_event, &wait);
308	atomic_dec(&cvp->cv_refs);
309
310	/*
311	 * Hold mutex after we release the cvp, otherwise we could dead lock
312	 * with a thread holding the mutex and call cv_destroy.
313	 */
314	mutex_enter(mp);
315	return (time_left > 0 ? 1 : -1);
316}
317
318int
319__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
320{
321	return (__cv_timedwait_common(cvp, mp, exp_time,
322	    TASK_UNINTERRUPTIBLE, 0));
323}
324EXPORT_SYMBOL(__cv_timedwait);
325
326int
327__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
328{
329	return (__cv_timedwait_common(cvp, mp, exp_time,
330	    TASK_UNINTERRUPTIBLE, 1));
331}
332EXPORT_SYMBOL(__cv_timedwait_io);
333
334int
335__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
336{
337	int rc;
338
339	rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
340	return (signal_pending(current) ? 0 : rc);
341}
342EXPORT_SYMBOL(__cv_timedwait_sig);
343
344int
345__cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
346{
347	sigset_t blocked, saved;
348	int rc;
349
350	sigfillset(&blocked);
351	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
352	rc = __cv_timedwait_common(cvp, mp, exp_time,
353	    TASK_INTERRUPTIBLE, 0);
354	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
355
356	return (rc);
357}
358EXPORT_SYMBOL(__cv_timedwait_idle);
359/*
360 * 'expire_time' argument is an absolute clock time in nanoseconds.
361 * Return value is time left (expire_time - now) or -1 if timeout occurred.
362 */
363static clock_t
364__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
365    hrtime_t res, int state)
366{
367	DEFINE_WAIT(wait);
368	kmutex_t *m;
369	hrtime_t time_left;
370	ktime_t ktime_left;
371	u64 slack = 0;
372	int rc;
373
374	ASSERT(cvp);
375	ASSERT(mp);
376	ASSERT(cvp->cv_magic == CV_MAGIC);
377	ASSERT(mutex_owned(mp));
378
379	time_left = expire_time - gethrtime();
380	if (time_left <= 0)
381		return (-1);
382
383	atomic_inc(&cvp->cv_refs);
384	m = READ_ONCE(cvp->cv_mutex);
385	if (!m)
386		m = xchg(&cvp->cv_mutex, mp);
387	/* Ensure the same mutex is used by all callers */
388	ASSERT(m == NULL || m == mp);
389
390	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
391	atomic_inc(&cvp->cv_waiters);
392
393	/*
394	 * Mutex should be dropped after prepare_to_wait() this
395	 * ensures we're linked in to the waiters list and avoids the
396	 * race where 'cvp->cv_waiters > 0' but the list is empty.
397	 */
398	mutex_exit(mp);
399
400	ktime_left = ktime_set(0, time_left);
401	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
402	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
403	rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
404
405	/* No more waiters a different mutex could be used */
406	if (atomic_dec_and_test(&cvp->cv_waiters)) {
407		/*
408		 * This is set without any lock, so it's racy. But this is
409		 * just for debug anyway, so make it best-effort
410		 */
411		cvp->cv_mutex = NULL;
412		wake_up(&cvp->cv_destroy);
413	}
414
415	finish_wait(&cvp->cv_event, &wait);
416	atomic_dec(&cvp->cv_refs);
417
418	mutex_enter(mp);
419	return (rc == -EINTR ? 1 : -1);
420}
421
422/*
423 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
424 */
425static int
426cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
427    hrtime_t res, int flag, int state)
428{
429	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
430		tim += gethrtime();
431
432	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
433}
434
435int
436cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
437    int flag)
438{
439	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
440	    TASK_UNINTERRUPTIBLE));
441}
442EXPORT_SYMBOL(cv_timedwait_hires);
443
444int
445cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
446    hrtime_t res, int flag)
447{
448	int rc;
449
450	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
451	    TASK_INTERRUPTIBLE);
452	return (signal_pending(current) ? 0 : rc);
453}
454EXPORT_SYMBOL(cv_timedwait_sig_hires);
455
456int
457cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
458    hrtime_t res, int flag)
459{
460	sigset_t blocked, saved;
461	int rc;
462
463	sigfillset(&blocked);
464	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
465	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
466	    TASK_INTERRUPTIBLE);
467	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
468
469	return (rc);
470}
471EXPORT_SYMBOL(cv_timedwait_idle_hires);
472
473void
474__cv_signal(kcondvar_t *cvp)
475{
476	ASSERT(cvp);
477	ASSERT(cvp->cv_magic == CV_MAGIC);
478	atomic_inc(&cvp->cv_refs);
479
480	/*
481	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
482	 * waiter will be set runnable with each call to wake_up().
483	 * Additionally wake_up() holds a spin_lock associated with
484	 * the wait queue to ensure we don't race waking up processes.
485	 */
486	if (atomic_read(&cvp->cv_waiters) > 0)
487		wake_up(&cvp->cv_event);
488
489	atomic_dec(&cvp->cv_refs);
490}
491EXPORT_SYMBOL(__cv_signal);
492
493void
494__cv_broadcast(kcondvar_t *cvp)
495{
496	ASSERT(cvp);
497	ASSERT(cvp->cv_magic == CV_MAGIC);
498	atomic_inc(&cvp->cv_refs);
499
500	/*
501	 * Wake_up_all() will wake up all waiters even those which
502	 * have the WQ_FLAG_EXCLUSIVE flag set.
503	 */
504	if (atomic_read(&cvp->cv_waiters) > 0)
505		wake_up_all(&cvp->cv_event);
506
507	atomic_dec(&cvp->cv_refs);
508}
509EXPORT_SYMBOL(__cv_broadcast);
510