1124208Sdes/* 2124208Sdes * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 3124208Sdes * Copyright (C) 2007 The Regents of the University of California. 4124208Sdes * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 5124208Sdes * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 6124208Sdes * UCRL-CODE-235197 7124208Sdes * 8124208Sdes * This file is part of the SPL, Solaris Porting Layer. 9124208Sdes * 10124208Sdes * The SPL is free software; you can redistribute it and/or modify it 11124208Sdes * under the terms of the GNU General Public License as published by the 12124208Sdes * Free Software Foundation; either version 2 of the License, or (at your 13124208Sdes * option) any later version. 14124208Sdes * 15124208Sdes * The SPL is distributed in the hope that it will be useful, but WITHOUT 16124208Sdes * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17124208Sdes * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18124208Sdes * for more details. 19124208Sdes * 20124208Sdes * You should have received a copy of the GNU General Public License along 21124208Sdes * with the SPL. If not, see <http://www.gnu.org/licenses/>. 22124208Sdes * 23124208Sdes * Solaris Porting Layer (SPL) Condition Variables Implementation. 24124208Sdes */ 25124208Sdes 2698937Sdes#include <sys/condvar.h> 2798937Sdes#include <sys/time.h> 28124208Sdes#include <sys/sysmacros.h> 29124208Sdes#include <linux/hrtimer.h> 30124208Sdes#include <linux/compiler_compat.h> 3198937Sdes#include <linux/mod_compat.h> 32162852Sdes 33162852Sdes#include <linux/sched.h> 34162852Sdes 35162852Sdes#ifdef HAVE_SCHED_SIGNAL_HEADER 3698937Sdes#include <linux/sched/signal.h> 37124208Sdes#endif 3898937Sdes 3998937Sdes#define MAX_HRTIMEOUT_SLACK_US 1000 40124208Sdesstatic unsigned int spl_schedule_hrtimeout_slack_us = 0; 4198937Sdes 4298937Sdesstatic int 43124208Sdesparam_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) 4498937Sdes{ 4598937Sdes unsigned long val; 4698937Sdes int error; 4798937Sdes 4898937Sdes error = kstrtoul(buf, 0, &val); 4998937Sdes if (error) 5098937Sdes return (error); 51124208Sdes 5298937Sdes if (val > MAX_HRTIMEOUT_SLACK_US) 5398937Sdes return (-EINVAL); 54124208Sdes 5598937Sdes error = param_set_uint(buf, kp); 56124208Sdes if (error < 0) 5798937Sdes return (error); 5898937Sdes 5998937Sdes return (0); 6098937Sdes} 6198937Sdes 6298937Sdesmodule_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, 6398937Sdes param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); 6498937SdesMODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, 6598937Sdes "schedule_hrtimeout_range() delta/slack value in us, default(0)"); 6698937Sdes 6798937Sdesvoid 6898937Sdes__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) 6998937Sdes{ 7098937Sdes ASSERT(cvp); 7198937Sdes ASSERT(name == NULL); 7298937Sdes ASSERT(type == CV_DEFAULT); 7398937Sdes ASSERT(arg == NULL); 7498937Sdes 7598937Sdes cvp->cv_magic = CV_MAGIC; 7698937Sdes init_waitqueue_head(&cvp->cv_event); 7798937Sdes init_waitqueue_head(&cvp->cv_destroy); 7898937Sdes atomic_set(&cvp->cv_waiters, 0); 7998937Sdes atomic_set(&cvp->cv_refs, 1); 8098937Sdes cvp->cv_mutex = NULL; 8198937Sdes} 8298937SdesEXPORT_SYMBOL(__cv_init); 8398937Sdes 8498937Sdesstatic int 8598937Sdescv_destroy_wakeup(kcondvar_t *cvp) 8698937Sdes{ 8798937Sdes if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) { 8898937Sdes ASSERT(cvp->cv_mutex == NULL); 8998937Sdes ASSERT(!waitqueue_active(&cvp->cv_event)); 9098937Sdes return (1); 91 } 92 93 return (0); 94} 95 96void 97__cv_destroy(kcondvar_t *cvp) 98{ 99 ASSERT(cvp); 100 ASSERT(cvp->cv_magic == CV_MAGIC); 101 102 cvp->cv_magic = CV_DESTROY; 103 atomic_dec(&cvp->cv_refs); 104 105 /* Block until all waiters are woken and references dropped. */ 106 while (cv_destroy_wakeup(cvp) == 0) 107 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1); 108 109 ASSERT3P(cvp->cv_mutex, ==, NULL); 110 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0); 111 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0); 112 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0); 113} 114EXPORT_SYMBOL(__cv_destroy); 115 116static void 117cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) 118{ 119 DEFINE_WAIT(wait); 120 kmutex_t *m; 121 122 ASSERT(cvp); 123 ASSERT(mp); 124 ASSERT(cvp->cv_magic == CV_MAGIC); 125 ASSERT(mutex_owned(mp)); 126 atomic_inc(&cvp->cv_refs); 127 128 m = READ_ONCE(cvp->cv_mutex); 129 if (!m) 130 m = xchg(&cvp->cv_mutex, mp); 131 /* Ensure the same mutex is used by all callers */ 132 ASSERT(m == NULL || m == mp); 133 134 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 135 atomic_inc(&cvp->cv_waiters); 136 137 /* 138 * Mutex should be dropped after prepare_to_wait() this 139 * ensures we're linked in to the waiters list and avoids the 140 * race where 'cvp->cv_waiters > 0' but the list is empty. 141 */ 142 mutex_exit(mp); 143 if (io) 144 io_schedule(); 145 else 146 schedule(); 147 148 /* No more waiters a different mutex could be used */ 149 if (atomic_dec_and_test(&cvp->cv_waiters)) { 150 /* 151 * This is set without any lock, so it's racy. But this is 152 * just for debug anyway, so make it best-effort 153 */ 154 cvp->cv_mutex = NULL; 155 wake_up(&cvp->cv_destroy); 156 } 157 158 finish_wait(&cvp->cv_event, &wait); 159 atomic_dec(&cvp->cv_refs); 160 161 /* 162 * Hold mutex after we release the cvp, otherwise we could dead lock 163 * with a thread holding the mutex and call cv_destroy. 164 */ 165 mutex_enter(mp); 166} 167 168void 169__cv_wait(kcondvar_t *cvp, kmutex_t *mp) 170{ 171 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0); 172} 173EXPORT_SYMBOL(__cv_wait); 174 175void 176__cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) 177{ 178 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1); 179} 180EXPORT_SYMBOL(__cv_wait_io); 181 182int 183__cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) 184{ 185 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); 186 187 return (signal_pending(current) ? 0 : 1); 188} 189EXPORT_SYMBOL(__cv_wait_io_sig); 190 191int 192__cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) 193{ 194 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 195 196 return (signal_pending(current) ? 0 : 1); 197} 198EXPORT_SYMBOL(__cv_wait_sig); 199 200void 201__cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) 202{ 203 sigset_t blocked, saved; 204 205 sigfillset(&blocked); 206 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 207 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 208 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 209} 210EXPORT_SYMBOL(__cv_wait_idle); 211 212#if defined(HAVE_IO_SCHEDULE_TIMEOUT) 213#define spl_io_schedule_timeout(t) io_schedule_timeout(t) 214#else 215 216struct spl_task_timer { 217 struct timer_list timer; 218 struct task_struct *task; 219}; 220 221static void 222__cv_wakeup(spl_timer_list_t t) 223{ 224 struct timer_list *tmr = (struct timer_list *)t; 225 struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); 226 227 wake_up_process(task_timer->task); 228} 229 230static long 231spl_io_schedule_timeout(long time_left) 232{ 233 long expire_time = jiffies + time_left; 234 struct spl_task_timer task_timer; 235 struct timer_list *timer = &task_timer.timer; 236 237 task_timer.task = current; 238 239 timer_setup(timer, __cv_wakeup, 0); 240 241 timer->expires = expire_time; 242 add_timer(timer); 243 244 io_schedule(); 245 246 del_timer_sync(timer); 247 248 time_left = expire_time - jiffies; 249 250 return (time_left < 0 ? 0 : time_left); 251} 252#endif 253 254/* 255 * 'expire_time' argument is an absolute wall clock time in jiffies. 256 * Return value is time left (expire_time - now) or -1 if timeout occurred. 257 */ 258static clock_t 259__cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, 260 int state, int io) 261{ 262 DEFINE_WAIT(wait); 263 kmutex_t *m; 264 clock_t time_left; 265 266 ASSERT(cvp); 267 ASSERT(mp); 268 ASSERT(cvp->cv_magic == CV_MAGIC); 269 ASSERT(mutex_owned(mp)); 270 271 /* XXX - Does not handle jiffie wrap properly */ 272 time_left = expire_time - jiffies; 273 if (time_left <= 0) 274 return (-1); 275 276 atomic_inc(&cvp->cv_refs); 277 m = READ_ONCE(cvp->cv_mutex); 278 if (!m) 279 m = xchg(&cvp->cv_mutex, mp); 280 /* Ensure the same mutex is used by all callers */ 281 ASSERT(m == NULL || m == mp); 282 283 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 284 atomic_inc(&cvp->cv_waiters); 285 286 /* 287 * Mutex should be dropped after prepare_to_wait() this 288 * ensures we're linked in to the waiters list and avoids the 289 * race where 'cvp->cv_waiters > 0' but the list is empty. 290 */ 291 mutex_exit(mp); 292 if (io) 293 time_left = spl_io_schedule_timeout(time_left); 294 else 295 time_left = schedule_timeout(time_left); 296 297 /* No more waiters a different mutex could be used */ 298 if (atomic_dec_and_test(&cvp->cv_waiters)) { 299 /* 300 * This is set without any lock, so it's racy. But this is 301 * just for debug anyway, so make it best-effort 302 */ 303 cvp->cv_mutex = NULL; 304 wake_up(&cvp->cv_destroy); 305 } 306 307 finish_wait(&cvp->cv_event, &wait); 308 atomic_dec(&cvp->cv_refs); 309 310 /* 311 * Hold mutex after we release the cvp, otherwise we could dead lock 312 * with a thread holding the mutex and call cv_destroy. 313 */ 314 mutex_enter(mp); 315 return (time_left > 0 ? 1 : -1); 316} 317 318int 319__cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 320{ 321 return (__cv_timedwait_common(cvp, mp, exp_time, 322 TASK_UNINTERRUPTIBLE, 0)); 323} 324EXPORT_SYMBOL(__cv_timedwait); 325 326int 327__cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 328{ 329 return (__cv_timedwait_common(cvp, mp, exp_time, 330 TASK_UNINTERRUPTIBLE, 1)); 331} 332EXPORT_SYMBOL(__cv_timedwait_io); 333 334int 335__cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 336{ 337 int rc; 338 339 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0); 340 return (signal_pending(current) ? 0 : rc); 341} 342EXPORT_SYMBOL(__cv_timedwait_sig); 343 344int 345__cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 346{ 347 sigset_t blocked, saved; 348 int rc; 349 350 sigfillset(&blocked); 351 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 352 rc = __cv_timedwait_common(cvp, mp, exp_time, 353 TASK_INTERRUPTIBLE, 0); 354 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 355 356 return (rc); 357} 358EXPORT_SYMBOL(__cv_timedwait_idle); 359/* 360 * 'expire_time' argument is an absolute clock time in nanoseconds. 361 * Return value is time left (expire_time - now) or -1 if timeout occurred. 362 */ 363static clock_t 364__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, 365 hrtime_t res, int state) 366{ 367 DEFINE_WAIT(wait); 368 kmutex_t *m; 369 hrtime_t time_left; 370 ktime_t ktime_left; 371 u64 slack = 0; 372 int rc; 373 374 ASSERT(cvp); 375 ASSERT(mp); 376 ASSERT(cvp->cv_magic == CV_MAGIC); 377 ASSERT(mutex_owned(mp)); 378 379 time_left = expire_time - gethrtime(); 380 if (time_left <= 0) 381 return (-1); 382 383 atomic_inc(&cvp->cv_refs); 384 m = READ_ONCE(cvp->cv_mutex); 385 if (!m) 386 m = xchg(&cvp->cv_mutex, mp); 387 /* Ensure the same mutex is used by all callers */ 388 ASSERT(m == NULL || m == mp); 389 390 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 391 atomic_inc(&cvp->cv_waiters); 392 393 /* 394 * Mutex should be dropped after prepare_to_wait() this 395 * ensures we're linked in to the waiters list and avoids the 396 * race where 'cvp->cv_waiters > 0' but the list is empty. 397 */ 398 mutex_exit(mp); 399 400 ktime_left = ktime_set(0, time_left); 401 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), 402 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); 403 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); 404 405 /* No more waiters a different mutex could be used */ 406 if (atomic_dec_and_test(&cvp->cv_waiters)) { 407 /* 408 * This is set without any lock, so it's racy. But this is 409 * just for debug anyway, so make it best-effort 410 */ 411 cvp->cv_mutex = NULL; 412 wake_up(&cvp->cv_destroy); 413 } 414 415 finish_wait(&cvp->cv_event, &wait); 416 atomic_dec(&cvp->cv_refs); 417 418 mutex_enter(mp); 419 return (rc == -EINTR ? 1 : -1); 420} 421 422/* 423 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. 424 */ 425static int 426cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 427 hrtime_t res, int flag, int state) 428{ 429 if (!(flag & CALLOUT_FLAG_ABSOLUTE)) 430 tim += gethrtime(); 431 432 return (__cv_timedwait_hires(cvp, mp, tim, res, state)); 433} 434 435int 436cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, 437 int flag) 438{ 439 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag, 440 TASK_UNINTERRUPTIBLE)); 441} 442EXPORT_SYMBOL(cv_timedwait_hires); 443 444int 445cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 446 hrtime_t res, int flag) 447{ 448 int rc; 449 450 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 451 TASK_INTERRUPTIBLE); 452 return (signal_pending(current) ? 0 : rc); 453} 454EXPORT_SYMBOL(cv_timedwait_sig_hires); 455 456int 457cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 458 hrtime_t res, int flag) 459{ 460 sigset_t blocked, saved; 461 int rc; 462 463 sigfillset(&blocked); 464 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 465 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 466 TASK_INTERRUPTIBLE); 467 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 468 469 return (rc); 470} 471EXPORT_SYMBOL(cv_timedwait_idle_hires); 472 473void 474__cv_signal(kcondvar_t *cvp) 475{ 476 ASSERT(cvp); 477 ASSERT(cvp->cv_magic == CV_MAGIC); 478 atomic_inc(&cvp->cv_refs); 479 480 /* 481 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one 482 * waiter will be set runnable with each call to wake_up(). 483 * Additionally wake_up() holds a spin_lock associated with 484 * the wait queue to ensure we don't race waking up processes. 485 */ 486 if (atomic_read(&cvp->cv_waiters) > 0) 487 wake_up(&cvp->cv_event); 488 489 atomic_dec(&cvp->cv_refs); 490} 491EXPORT_SYMBOL(__cv_signal); 492 493void 494__cv_broadcast(kcondvar_t *cvp) 495{ 496 ASSERT(cvp); 497 ASSERT(cvp->cv_magic == CV_MAGIC); 498 atomic_inc(&cvp->cv_refs); 499 500 /* 501 * Wake_up_all() will wake up all waiters even those which 502 * have the WQ_FLAG_EXCLUSIVE flag set. 503 */ 504 if (atomic_read(&cvp->cv_waiters) > 0) 505 wake_up_all(&cvp->cv_event); 506 507 atomic_dec(&cvp->cv_refs); 508} 509EXPORT_SYMBOL(__cv_broadcast); 510