11541Srgrimes/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1990, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 4. Neither the name of the University nor the names of its contributors 191541Srgrimes * may be used to endorse or promote products derived from this software 201541Srgrimes * without specific prior written permission. 211541Srgrimes * 221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 251541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 321541Srgrimes * SUCH DAMAGE. 331541Srgrimes * 3414525Shsu * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 351541Srgrimes */ 361541Srgrimes 37116182Sobrien#include <sys/cdefs.h> 38116182Sobrien__FBSDID("$FreeBSD: stable/10/sys/kern/kern_synch.c 337035 2018-08-01 10:35:49Z hselasky $"); 39116182Sobrien 40235459Srstone#include "opt_kdtrace.h" 4113203Swollman#include "opt_ktrace.h" 42178272Sjeff#include "opt_sched.h" 4313203Swollman 441541Srgrimes#include <sys/param.h> 451541Srgrimes#include <sys/systm.h> 4674927Sjhb#include <sys/condvar.h> 47131927Smarcel#include <sys/kdb.h> 481541Srgrimes#include <sys/kernel.h> 4965557Sjasone#include <sys/ktr.h> 5069645Sjhb#include <sys/lock.h> 5167362Sjhb#include <sys/mutex.h> 5274927Sjhb#include <sys/proc.h> 5374927Sjhb#include <sys/resourcevar.h> 54104964Sjeff#include <sys/sched.h> 55235459Srstone#include <sys/sdt.h> 561541Srgrimes#include <sys/signalvar.h> 57126326Sjhb#include <sys/sleepqueue.h> 5876078Sjhb#include <sys/smp.h> 5974927Sjhb#include <sys/sx.h> 6027989Sjulian#include <sys/sysctl.h> 6169512Sjake#include <sys/sysproto.h> 6274927Sjhb#include <sys/vmmeter.h> 631541Srgrimes#ifdef KTRACE 6434924Sbde#include <sys/uio.h> 651541Srgrimes#include <sys/ktrace.h> 661541Srgrimes#endif 671541Srgrimes 681541Srgrimes#include <machine/cpu.h> 691541Srgrimes 70184042Skmacy#ifdef XEN 71184042Skmacy#include <vm/vm.h> 72184042Skmacy#include <vm/vm_param.h> 73184042Skmacy#include <vm/pmap.h> 74184042Skmacy#endif 75184042Skmacy 76124954Sjeffstatic void synch_setup(void *dummy); 77177253SrwatsonSYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, 78177253Srwatson NULL); 7910358Sjulian 8044218Sbdeint hogticks; 81248186Smavstatic uint8_t pause_wchan[MAXCPU]; 821541Srgrimes 83125292Sjeffstatic struct callout loadav_callout; 8469286Sjake 8585227Siedowsestruct loadavg averunnable = 8685227Siedowse { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ 8785227Siedowse/* 8885227Siedowse * Constants for averages over 1, 5, and 15 minutes 8985227Siedowse * when sampling at 5 second intervals. 9085227Siedowse */ 9185227Siedowsestatic fixpt_t cexp[3] = { 9285227Siedowse 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 9385227Siedowse 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 9485227Siedowse 0.9944598480048967 * FSCALE, /* exp(-1/180) */ 9585227Siedowse}; 9685227Siedowse 97107134Sjeff/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 98273736ShselaskySYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, ""); 99107134Sjeff 100125292Sjeffstatic void loadav(void *arg); 1017090Sbde 102235459SrstoneSDT_PROVIDER_DECLARE(sched); 103260817SavgSDT_PROBE_DEFINE(sched, , , preempt); 104235459Srstone 105235459Srstone/* 106235459Srstone * These probes reference Solaris features that are not implemented in FreeBSD. 107235459Srstone * Create the probes anyway for compatibility with existing D scripts; they'll 108235459Srstone * just never fire. 109235459Srstone */ 110260817SavgSDT_PROBE_DEFINE(sched, , , cpucaps__sleep); 111260817SavgSDT_PROBE_DEFINE(sched, , , cpucaps__wakeup); 112260817SavgSDT_PROBE_DEFINE(sched, , , schedctl__nopreempt); 113260817SavgSDT_PROBE_DEFINE(sched, , , schedctl__preempt); 114260817SavgSDT_PROBE_DEFINE(sched, , , schedctl__yield); 115235459Srstone 116254167Scognetstatic void 117254167Scognetsleepinit(void *unused) 11817366Sdg{ 11917366Sdg 120104964Sjeff hogticks = (hz / 10) * 2; /* Default only. */ 121126326Sjhb init_sleepqueues(); 12217366Sdg} 12317366Sdg 1241541Srgrimes/* 125254167Scognet * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure 126254167Scognet * it is available. 127254167Scognet */ 128254167ScognetSYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, 0); 129254167Scognet 130254167Scognet/* 131155924Sjhb * General sleep call. Suspends the current thread until a wakeup is 132155924Sjhb * performed on the specified identifier. The thread will then be made 133252357Sdavide * runnable with the specified priority. Sleeps at most sbt units of time 134245049Sbjk * (0 means no timeout). If pri includes the PCATCH flag, let signals 135245049Sbjk * interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if 1361541Srgrimes * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 137245049Sbjk * signal becomes pending, ERESTART is returned if the current system 1381541Srgrimes * call should be restarted if possible, and EINTR is returned if the system 1391541Srgrimes * call should be interrupted by the signal (return EINTR). 14065708Sjake * 141167387Sjhb * The lock argument is unlocked before the caller is suspended, and 142167387Sjhb * re-locked before _sleep() returns. If priority includes the PDROP 143167387Sjhb * flag the lock is not re-locked before returning. 1441541Srgrimes */ 1451541Srgrimesint 146177272Srwatson_sleep(void *ident, struct lock_object *lock, int priority, 147247787Sdavide const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) 1481541Srgrimes{ 149126326Sjhb struct thread *td; 150126326Sjhb struct proc *p; 151167387Sjhb struct lock_class *class; 152255745Sdavide uintptr_t lock_state; 153255745Sdavide int catch, pri, rval, sleepq_flags; 154167387Sjhb WITNESS_SAVE_DECL(lock_witness); 1551541Srgrimes 156126326Sjhb td = curthread; 157126326Sjhb p = td->td_proc; 1581541Srgrimes#ifdef KTRACE 15997995Sjhb if (KTRPOINT(td, KTR_CSW)) 160234494Sjhb ktrcsw(1, 0, wmesg); 1611541Srgrimes#endif 162167387Sjhb WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, 163167387Sjhb "Sleeping on \"%s\"", wmesg); 164247787Sdavide KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL, 165181921Sed ("sleeping without a lock")); 166126326Sjhb KASSERT(p != NULL, ("msleep1")); 167126326Sjhb KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); 168181394Sjhb if (priority & PDROP) 169181394Sjhb KASSERT(lock != NULL && lock != &Giant.lock_object, 170181394Sjhb ("PDROP requires a non-Giant lock")); 171167387Sjhb if (lock != NULL) 172167387Sjhb class = LOCK_CLASS(lock); 173167387Sjhb else 174167387Sjhb class = NULL; 175126326Sjhb 176228424Savg if (cold || SCHEDULER_STOPPED()) { 177126326Sjhb /* 178126326Sjhb * During autoconfiguration, just return; 179129241Sbde * don't run any other threads or panic below, 180129241Sbde * in case this is the idle thread and already asleep. 181126326Sjhb * XXX: this used to do "s = splhigh(); splx(safepri); 182126326Sjhb * splx(s);" to give interrupts a chance, but there is 183126326Sjhb * no way to give interrupts a chance now. 184126326Sjhb */ 185167387Sjhb if (lock != NULL && priority & PDROP) 186167387Sjhb class->lc_unlock(lock); 187126326Sjhb return (0); 188126326Sjhb } 189126326Sjhb catch = priority & PCATCH; 190177085Sjeff pri = priority & PRIMASK; 191126326Sjhb 192308470Skib KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep")); 193126326Sjhb 194248186Smav if ((uint8_t *)ident >= &pause_wchan[0] && 195248186Smav (uint8_t *)ident <= &pause_wchan[MAXCPU - 1]) 196247787Sdavide sleepq_flags = SLEEPQ_PAUSE; 197166908Sjhb else 198247787Sdavide sleepq_flags = SLEEPQ_SLEEP; 199155932Sdavidxu if (catch) 200247787Sdavide sleepq_flags |= SLEEPQ_INTERRUPTIBLE; 201155932Sdavidxu 202136445Sjhb sleepq_lock(ident); 203167387Sjhb CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", 204173601Sjulian td->td_tid, p->p_pid, td->td_name, wmesg, ident); 205126326Sjhb 206183352Sjhb if (lock == &Giant.lock_object) 207183352Sjhb mtx_assert(&Giant, MA_OWNED); 20888900Sjhb DROP_GIANT(); 209181394Sjhb if (lock != NULL && lock != &Giant.lock_object && 210181394Sjhb !(class->lc_flags & LC_SLEEPABLE)) { 211167387Sjhb WITNESS_SAVE(lock, lock_witness); 212167387Sjhb lock_state = class->lc_unlock(lock); 213167387Sjhb } else 214167387Sjhb /* GCC needs to follow the Yellow Brick Road */ 215167387Sjhb lock_state = -1; 21641971Sdillon 2171541Srgrimes /* 2181541Srgrimes * We put ourselves on the sleep queue and start our timeout 219126326Sjhb * before calling thread_suspend_check, as we could stop there, 220126326Sjhb * and a wakeup or a SIGCONT (or both) could occur while we were 221126326Sjhb * stopped without resuming us. Thus, we must be ready for sleep 222126326Sjhb * when cursig() is called. If the wakeup happens while we're 223126326Sjhb * stopped, then td will no longer be on a sleep queue upon 224126326Sjhb * return from cursig(). 2251541Srgrimes */ 226247787Sdavide sleepq_add(ident, lock, wmesg, sleepq_flags, 0); 227247787Sdavide if (sbt != 0) 228247787Sdavide sleepq_set_timeout_sbt(ident, sbt, pr, flags); 229169392Sjhb if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { 230169392Sjhb sleepq_release(ident); 231169392Sjhb WITNESS_SAVE(lock, lock_witness); 232169392Sjhb lock_state = class->lc_unlock(lock); 233169392Sjhb sleepq_lock(ident); 234169392Sjhb } 235247787Sdavide if (sbt != 0 && catch) 236177085Sjeff rval = sleepq_timedwait_sig(ident, pri); 237247787Sdavide else if (sbt != 0) 238177085Sjeff rval = sleepq_timedwait(ident, pri); 239126326Sjhb else if (catch) 240177085Sjeff rval = sleepq_wait_sig(ident, pri); 241126326Sjhb else { 242177085Sjeff sleepq_wait(ident, pri); 243126326Sjhb rval = 0; 24478638Sjhb } 2451541Srgrimes#ifdef KTRACE 24697995Sjhb if (KTRPOINT(td, KTR_CSW)) 247234494Sjhb ktrcsw(0, 0, wmesg); 2481541Srgrimes#endif 24997995Sjhb PICKUP_GIANT(); 250181394Sjhb if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { 251167387Sjhb class->lc_lock(lock, lock_state); 252167387Sjhb WITNESS_RESTORE(lock, lock_witness); 25365708Sjake } 25465557Sjasone return (rval); 2551541Srgrimes} 2561541Srgrimes 257153855Sjhbint 258247787Sdavidemsleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg, 259247787Sdavide sbintime_t sbt, sbintime_t pr, int flags) 260153855Sjhb{ 261153855Sjhb struct thread *td; 262153855Sjhb struct proc *p; 263153855Sjhb int rval; 264153855Sjhb WITNESS_SAVE_DECL(mtx); 265153855Sjhb 266153855Sjhb td = curthread; 267153855Sjhb p = td->td_proc; 268153855Sjhb KASSERT(mtx != NULL, ("sleeping without a mutex")); 269153855Sjhb KASSERT(p != NULL, ("msleep1")); 270153855Sjhb KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); 271153855Sjhb 272228424Savg if (cold || SCHEDULER_STOPPED()) { 273153855Sjhb /* 274153855Sjhb * During autoconfiguration, just return; 275153855Sjhb * don't run any other threads or panic below, 276153855Sjhb * in case this is the idle thread and already asleep. 277153855Sjhb * XXX: this used to do "s = splhigh(); splx(safepri); 278153855Sjhb * splx(s);" to give interrupts a chance, but there is 279153855Sjhb * no way to give interrupts a chance now. 280153855Sjhb */ 281153855Sjhb return (0); 282153855Sjhb } 283153855Sjhb 284153855Sjhb sleepq_lock(ident); 285167089Sjhb CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)", 286173601Sjulian td->td_tid, p->p_pid, td->td_name, wmesg, ident); 287153855Sjhb 288153855Sjhb DROP_GIANT(); 289153855Sjhb mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 290167787Sjhb WITNESS_SAVE(&mtx->lock_object, mtx); 291153855Sjhb mtx_unlock_spin(mtx); 292153855Sjhb 293153855Sjhb /* 294153855Sjhb * We put ourselves on the sleep queue and start our timeout. 295153855Sjhb */ 296167787Sjhb sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); 297247787Sdavide if (sbt != 0) 298247787Sdavide sleepq_set_timeout_sbt(ident, sbt, pr, flags); 299153855Sjhb 300153855Sjhb /* 301153855Sjhb * Can't call ktrace with any spin locks held so it can lock the 302153855Sjhb * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold 303153855Sjhb * any spin lock. Thus, we have to drop the sleepq spin lock while 304153855Sjhb * we handle those requests. This is safe since we have placed our 305153855Sjhb * thread on the sleep queue already. 306153855Sjhb */ 307153855Sjhb#ifdef KTRACE 308153855Sjhb if (KTRPOINT(td, KTR_CSW)) { 309153855Sjhb sleepq_release(ident); 310234494Sjhb ktrcsw(1, 0, wmesg); 311153855Sjhb sleepq_lock(ident); 312153855Sjhb } 313153855Sjhb#endif 314153855Sjhb#ifdef WITNESS 315153855Sjhb sleepq_release(ident); 316153855Sjhb WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"", 317153855Sjhb wmesg); 318153855Sjhb sleepq_lock(ident); 319153855Sjhb#endif 320247787Sdavide if (sbt != 0) 321177085Sjeff rval = sleepq_timedwait(ident, 0); 322153855Sjhb else { 323177085Sjeff sleepq_wait(ident, 0); 324153855Sjhb rval = 0; 325153855Sjhb } 326153855Sjhb#ifdef KTRACE 327153855Sjhb if (KTRPOINT(td, KTR_CSW)) 328234494Sjhb ktrcsw(0, 0, wmesg); 329153855Sjhb#endif 330153855Sjhb PICKUP_GIANT(); 331153855Sjhb mtx_lock_spin(mtx); 332167787Sjhb WITNESS_RESTORE(&mtx->lock_object, mtx); 333153855Sjhb return (rval); 334153855Sjhb} 335153855Sjhb 3361541Srgrimes/* 337337034Shselasky * pause_sbt() delays the calling thread by the given signed binary 338337034Shselasky * time. During cold bootup, pause_sbt() uses the DELAY() function 339337034Shselasky * instead of the _sleep() function to do the waiting. The "sbt" 340337034Shselasky * argument must be greater than or equal to zero. A "sbt" value of 341337034Shselasky * zero is equivalent to a "sbt" value of one tick. 342166908Sjhb */ 343166908Sjhbint 344247787Sdavidepause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) 345166908Sjhb{ 346337034Shselasky KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0")); 347166908Sjhb 348227748Shselasky /* silently convert invalid timeouts */ 349247787Sdavide if (sbt == 0) 350247787Sdavide sbt = tick_sbt; 351227706Shselasky 352301261Shselasky if (cold || kdb_active || SCHEDULER_STOPPED()) { 353227706Shselasky /* 354247787Sdavide * We delay one second at a time to avoid overflowing the 355227748Shselasky * system specific DELAY() function(s): 356227706Shselasky */ 357255067Shselasky while (sbt >= SBT_1S) { 358227706Shselasky DELAY(1000000); 359255067Shselasky sbt -= SBT_1S; 360227706Shselasky } 361255067Shselasky /* Do the delay remainder, if any */ 362255067Shselasky sbt = (sbt + SBT_1US - 1) / SBT_1US; 363255067Shselasky if (sbt > 0) 364255067Shselasky DELAY(sbt); 365337035Shselasky return (EWOULDBLOCK); 366227706Shselasky } 367337034Shselasky return (_sleep(&pause_wchan[curcpu], NULL, 368337034Shselasky (flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags)); 369166908Sjhb} 370166908Sjhb 371166908Sjhb/* 372129241Sbde * Make all threads sleeping on the specified identifier runnable. 3731541Srgrimes */ 3741541Srgrimesvoid 375177272Srwatsonwakeup(void *ident) 37617366Sdg{ 377181334Sjhb int wakeup_swapper; 3781541Srgrimes 379136445Sjhb sleepq_lock(ident); 380181334Sjhb wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0); 381177085Sjeff sleepq_release(ident); 382195700Skib if (wakeup_swapper) { 383195700Skib KASSERT(ident != &proc0, 384195700Skib ("wakeup and wakeup_swapper and proc0")); 385181334Sjhb kick_proc0(); 386195700Skib } 3871541Srgrimes} 3881541Srgrimes 3891541Srgrimes/* 390129241Sbde * Make a thread sleeping on the specified identifier runnable. 391129241Sbde * May wake more than one thread if a target thread is currently 39279343Sjake * swapped out. 3931541Srgrimes */ 3941541Srgrimesvoid 395177272Srwatsonwakeup_one(void *ident) 3961541Srgrimes{ 397181334Sjhb int wakeup_swapper; 3981541Srgrimes 399136445Sjhb sleepq_lock(ident); 400181334Sjhb wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0); 401170294Sjeff sleepq_release(ident); 402181334Sjhb if (wakeup_swapper) 403181334Sjhb kick_proc0(); 4041541Srgrimes} 4051541Srgrimes 406177010Sjeffstatic void 407177010Sjeffkdb_switch(void) 408177010Sjeff{ 409177010Sjeff thread_unlock(curthread); 410177010Sjeff kdb_backtrace(); 411177010Sjeff kdb_reenter(); 412177010Sjeff panic("%s: did not reenter debugger", __func__); 413177010Sjeff} 414177010Sjeff 4151541Srgrimes/* 416129241Sbde * The machine independent parts of context switching. 4171541Srgrimes */ 4181541Srgrimesvoid 419131473Sjhbmi_switch(int flags, struct thread *newtd) 4201541Srgrimes{ 421175219Srwatson uint64_t runtime, new_switchtime; 422112993Speter struct thread *td; 4231541Srgrimes 424112993Speter td = curthread; /* XXX */ 425170294Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); 426103216Sjulian KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); 42785368Sjhb#ifdef INVARIANTS 428114750Sjhb if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) 42985368Sjhb mtx_assert(&Giant, MA_NOTOWNED); 43085368Sjhb#endif 431222252Sjhb KASSERT(td->td_critnest == 1 || panicstr, 432100210Sjhb ("mi_switch: switch in a critical section")); 433124944Sjeff KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, 434124944Sjeff ("mi_switch: switch must be voluntary or involuntary")); 435132266Sjhb KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself")); 43665557Sjasone 437159205Sjhb /* 438159205Sjhb * Don't perform context switches from the debugger. 439159205Sjhb */ 440177010Sjeff if (kdb_active) 441177010Sjeff kdb_switch(); 442228424Savg if (SCHEDULER_STOPPED()) 443228424Savg return; 444218424Smdf if (flags & SW_VOL) { 445170174Sjeff td->td_ru.ru_nvcsw++; 446218424Smdf td->td_swvoltick = ticks; 447218424Smdf } else 448170174Sjeff td->td_ru.ru_nivcsw++; 449178272Sjeff#ifdef SCHED_STATS 450178272Sjeff SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); 451178272Sjeff#endif 4521541Srgrimes /* 4531541Srgrimes * Compute the amount of time during which the current 454170174Sjeff * thread was running, and add that to its total so far. 4551541Srgrimes */ 456155444Sphk new_switchtime = cpu_ticks(); 457175219Srwatson runtime = new_switchtime - PCPU_GET(switchtime); 458175219Srwatson td->td_runtime += runtime; 459175219Srwatson td->td_incruntime += runtime; 460170174Sjeff PCPU_SET(switchtime, new_switchtime); 461120802Sbms td->td_generation++; /* bump preempt-detect counter */ 462170292Sattilio PCPU_INC(cnt.v_swtch); 463121688Sbde PCPU_SET(switchticks, ticks); 464177091Sjeff CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", 465284021Skib td->td_tid, td->td_sched, td->td_proc->p_pid, td->td_name); 466316843Savg#ifdef KDTRACE_HOOKS 467316843Savg if ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 && 468316843Savg (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)) 469316843Savg SDT_PROBE0(sched, , , preempt); 470316843Savg#endif 471184042Skmacy#ifdef XEN 472184042Skmacy PT_UPDATES_FLUSH(); 473184042Skmacy#endif 474135051Sjulian sched_switch(td, newtd, flags); 475177091Sjeff CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", 476284021Skib td->td_tid, td->td_sched, td->td_proc->p_pid, td->td_name); 477121688Sbde 478107719Sjulian /* 479107719Sjulian * If the last thread was exiting, finish cleaning it up. 480107719Sjulian */ 481107719Sjulian if ((td = PCPU_GET(deadthread))) { 482107719Sjulian PCPU_SET(deadthread, NULL); 483107719Sjulian thread_stash(td); 484107719Sjulian } 4851541Srgrimes} 4861541Srgrimes 4871541Srgrimes/* 488181334Sjhb * Change thread state to be runnable, placing it on the run queue if 489181334Sjhb * it is in memory. If it is swapped out, return true so our caller 490181334Sjhb * will know to awaken the swapper. 4911541Srgrimes */ 492181334Sjhbint 49383366Sjuliansetrunnable(struct thread *td) 4941541Srgrimes{ 49583786Sjhb 496170294Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 497172207Sjeff KASSERT(td->td_proc->p_state != PRS_ZOMBIE, 498172207Sjeff ("setrunnable: pid %d is a zombie", td->td_proc->p_pid)); 49999072Sjulian switch (td->td_state) { 50099072Sjulian case TDS_RUNNING: 501103216Sjulian case TDS_RUNQ: 502181334Sjhb return (0); 503103216Sjulian case TDS_INHIBITED: 504103216Sjulian /* 505103216Sjulian * If we are only inhibited because we are swapped out 506103216Sjulian * then arange to swap in this process. Otherwise just return. 507103216Sjulian */ 508103216Sjulian if (td->td_inhibitors != TDI_SWAPPED) 509181334Sjhb return (0); 510181334Sjhb /* FALLTHROUGH */ 511103216Sjulian case TDS_CAN_RUN: 512103216Sjulian break; 5131541Srgrimes default: 514103216Sjulian printf("state is 0x%x", td->td_state); 51583366Sjulian panic("setrunnable(2)"); 5161541Srgrimes } 517172207Sjeff if ((td->td_flags & TDF_INMEM) == 0) { 518172207Sjeff if ((td->td_flags & TDF_SWAPINREQ) == 0) { 519172207Sjeff td->td_flags |= TDF_SWAPINREQ; 520181334Sjhb return (1); 521100913Stanimura } 522104964Sjeff } else 523104964Sjeff sched_wakeup(td); 524181334Sjhb return (0); 5251541Srgrimes} 5261541Srgrimes 5271541Srgrimes/* 52885227Siedowse * Compute a tenex style load average of a quantity on 52985227Siedowse * 1, 5 and 15 minute intervals. 53085227Siedowse */ 53185227Siedowsestatic void 532125292Sjeffloadav(void *arg) 53385227Siedowse{ 53485227Siedowse int i, nrun; 53585237Siedowse struct loadavg *avg; 53685227Siedowse 537125290Sjeff nrun = sched_load(); 53885237Siedowse avg = &averunnable; 539125290Sjeff 54085227Siedowse for (i = 0; i < 3; i++) 54185227Siedowse avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + 54285227Siedowse nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; 54385237Siedowse 544125292Sjeff /* 545125292Sjeff * Schedule the next update to occur after 5 seconds, but add a 546125292Sjeff * random variation to avoid synchronisation with processes that 547125292Sjeff * run at regular intervals. 548125292Sjeff */ 549247778Sdavide callout_reset_sbt(&loadav_callout, 550255835Smav SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US, 551255835Smav loadav, NULL, C_DIRECT_EXEC | C_PREL(32)); 55285227Siedowse} 55385227Siedowse 55431403Sjulian/* ARGSUSED */ 55531403Sjulianstatic void 556177272Srwatsonsynch_setup(void *dummy) 55731403Sjulian{ 558314667Savg callout_init(&loadav_callout, 1); 55969286Sjake 56031403Sjulian /* Kick off timeout driven events by calling first time. */ 561125292Sjeff loadav(NULL); 56231403Sjulian} 56331403Sjulian 564218424Smdfint 565218424Smdfshould_yield(void) 566218424Smdf{ 567218424Smdf 568260737Savg return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks); 569218424Smdf} 570218424Smdf 571218424Smdfvoid 572218424Smdfmaybe_yield(void) 573218424Smdf{ 574218424Smdf 575218424Smdf if (should_yield()) 576221829Smdf kern_yield(PRI_USER); 577218424Smdf} 578218424Smdf 579218424Smdfvoid 580218424Smdfkern_yield(int prio) 581218424Smdf{ 582218424Smdf struct thread *td; 583218424Smdf 584218424Smdf td = curthread; 585218424Smdf DROP_GIANT(); 586218424Smdf thread_lock(td); 587221829Smdf if (prio == PRI_USER) 588221829Smdf prio = td->td_user_pri; 589218424Smdf if (prio >= 0) 590218424Smdf sched_prio(td, prio); 591218424Smdf mi_switch(SW_VOL | SWT_RELINQUISH, NULL); 592218424Smdf thread_unlock(td); 593218424Smdf PICKUP_GIANT(); 594218424Smdf} 595218424Smdf 59653745Sbde/* 597167232Srwatson * General purpose yield system call. 59869512Sjake */ 59969512Sjakeint 600225617Skmacysys_yield(struct thread *td, struct yield_args *uap) 60169512Sjake{ 602172482Sjeff 603172482Sjeff thread_lock(td); 604217077Sjhb if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) 605217077Sjhb sched_prio(td, PRI_MAX_TIMESHARE); 606178272Sjeff mi_switch(SW_VOL | SWT_RELINQUISH, NULL); 607172482Sjeff thread_unlock(td); 608172482Sjeff td->td_retval[0] = 0; 60969512Sjake return (0); 61069512Sjake} 611