11541Srgrimes/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 4. Neither the name of the University nor the names of its contributors 191541Srgrimes * may be used to endorse or promote products derived from this software 201541Srgrimes * without specific prior written permission. 211541Srgrimes * 221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 251541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 321541Srgrimes * SUCH DAMAGE. 331541Srgrimes * 341541Srgrimes * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 351541Srgrimes */ 361541Srgrimes 37116182Sobrien#include <sys/cdefs.h> 38116182Sobrien__FBSDID("$FreeBSD$"); 39116182Sobrien 40170075Semaste#include "opt_kdb.h" 41150968Sglebius#include "opt_device_polling.h" 42147565Speter#include "opt_hwpmc_hooks.h" 43235459Srstone#include "opt_kdtrace.h" 4444666Sphk#include "opt_ntp.h" 45116874Ssmkelly#include "opt_watchdog.h" 4644666Sphk 471541Srgrimes#include <sys/param.h> 481541Srgrimes#include <sys/systm.h> 491541Srgrimes#include <sys/callout.h> 50131927Smarcel#include <sys/kdb.h> 511541Srgrimes#include <sys/kernel.h> 52201879Sattilio#include <sys/kthread.h> 53201879Sattilio#include <sys/ktr.h> 5474914Sjhb#include <sys/lock.h> 5567365Sjhb#include <sys/mutex.h> 561541Srgrimes#include <sys/proc.h> 57111024Sjeff#include <sys/resource.h> 581541Srgrimes#include <sys/resourcevar.h> 59104964Sjeff#include <sys/sched.h> 60235459Srstone#include <sys/sdt.h> 613308Sphk#include <sys/signalvar.h> 62201879Sattilio#include <sys/sleepqueue.h> 6376078Sjhb#include <sys/smp.h> 642320Sdg#include <vm/vm.h> 6512662Sdg#include <vm/pmap.h> 6612662Sdg#include <vm/vm_map.h> 673308Sphk#include <sys/sysctl.h> 6867551Sjhb#include <sys/bus.h> 6967551Sjhb#include <sys/interrupt.h> 70114216Skan#include <sys/limits.h> 71102926Sphk#include <sys/timetc.h> 721541Srgrimes 731541Srgrimes#ifdef GPROF 741541Srgrimes#include <sys/gmon.h> 751541Srgrimes#endif 761541Srgrimes 77146799Sjkoshy#ifdef HWPMC_HOOKS 78146799Sjkoshy#include <sys/pmckern.h> 79233628SfabientPMC_SOFT_DEFINE( , , clock, hard); 80233628SfabientPMC_SOFT_DEFINE( , , clock, stat); 81247836SfabientPMC_SOFT_DEFINE_EX( , , clock, prof, \ 82247836Sfabient cpu_startprofclock, cpu_stopprofclock); 83146799Sjkoshy#endif 84146799Sjkoshy 8587902Sluigi#ifdef DEVICE_POLLING 8687902Sluigiextern void hardclock_device_poll(void); 8787902Sluigi#endif /* DEVICE_POLLING */ 8831639Sfsmp 8992723Salfredstatic void initclocks(void *dummy); 90177253SrwatsonSYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 9110358Sjulian 92169803Sjeff/* Spin-lock protecting profiling statistics. */ 93170468Sattiliostatic struct mtx time_lock; 94169803Sjeff 95235459SrstoneSDT_PROVIDER_DECLARE(sched); 96260817SavgSDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); 97235459Srstone 98147692Speterstatic int 99147692Spetersysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 100147692Speter{ 101147692Speter int error; 102174070Speter long cp_time[CPUSTATES]; 103147703Sps#ifdef SCTL_MASK32 104147692Speter int i; 105147692Speter unsigned int cp_time32[CPUSTATES]; 106174070Speter#endif 107157822Sjhb 108174070Speter read_cpu_time(cp_time); 109174070Speter#ifdef SCTL_MASK32 110147703Sps if (req->flags & SCTL_MASK32) { 111147692Speter if (!req->oldptr) 112147692Speter return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 113147692Speter for (i = 0; i < CPUSTATES; i++) 114147692Speter cp_time32[i] = (unsigned int)cp_time[i]; 115147692Speter error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 116147692Speter } else 117147692Speter#endif 118147692Speter { 119147692Speter if (!req->oldptr) 120147692Speter return SYSCTL_OUT(req, 0, sizeof(cp_time)); 121147692Speter error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 122147692Speter } 123147692Speter return error; 124147692Speter} 125147692Speter 126192304SedSYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 127147692Speter 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 128147692Speter 129174070Speterstatic long empty[CPUSTATES]; 130174070Speter 131174070Speterstatic int 132174070Spetersysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 133174070Speter{ 134174070Speter struct pcpu *pcpu; 135174070Speter int error; 136174072Srwatson int c; 137174070Speter long *cp_time; 138174070Speter#ifdef SCTL_MASK32 139174070Speter unsigned int cp_time32[CPUSTATES]; 140174072Srwatson int i; 141174070Speter#endif 142174070Speter 143174070Speter if (!req->oldptr) { 144174070Speter#ifdef SCTL_MASK32 145174070Speter if (req->flags & SCTL_MASK32) 146174070Speter return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 147174070Speter else 148174070Speter#endif 149174070Speter return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 150174070Speter } 151174070Speter for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 152174070Speter if (!CPU_ABSENT(c)) { 153174070Speter pcpu = pcpu_find(c); 154174070Speter cp_time = pcpu->pc_cp_time; 155174070Speter } else { 156174070Speter cp_time = empty; 157174070Speter } 158174070Speter#ifdef SCTL_MASK32 159174070Speter if (req->flags & SCTL_MASK32) { 160174070Speter for (i = 0; i < CPUSTATES; i++) 161174070Speter cp_time32[i] = (unsigned int)cp_time[i]; 162174070Speter error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 163174070Speter } else 164174070Speter#endif 165174070Speter error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 166174070Speter } 167174070Speter return error; 168174070Speter} 169174070Speter 170192304SedSYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 171174070Speter 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 172174070Speter 173201879Sattilio#ifdef DEADLKRES 174206482Sattiliostatic const char *blessed[] = { 175206879Sattilio "getblk", 176206482Sattilio "so_snd_sx", 177206482Sattilio "so_rcv_sx", 178206482Sattilio NULL 179206482Sattilio}; 180201879Sattiliostatic int slptime_threshold = 1800; 181201879Sattiliostatic int blktime_threshold = 900; 182201879Sattiliostatic int sleepfreq = 3; 183201879Sattilio 184201879Sattiliostatic void 185201879Sattiliodeadlkres(void) 186201879Sattilio{ 187201879Sattilio struct proc *p; 188201879Sattilio struct thread *td; 189201879Sattilio void *wchan; 190206482Sattilio int blkticks, i, slpticks, slptype, tryl, tticks; 191201879Sattilio 192201879Sattilio tryl = 0; 193201879Sattilio for (;;) { 194201879Sattilio blkticks = blktime_threshold * hz; 195201879Sattilio slpticks = slptime_threshold * hz; 196201879Sattilio 197201879Sattilio /* 198201879Sattilio * Avoid to sleep on the sx_lock in order to avoid a possible 199201879Sattilio * priority inversion problem leading to starvation. 200201879Sattilio * If the lock can't be held after 100 tries, panic. 201201879Sattilio */ 202201879Sattilio if (!sx_try_slock(&allproc_lock)) { 203201879Sattilio if (tryl > 100) 204201879Sattilio panic("%s: possible deadlock detected on allproc_lock\n", 205201879Sattilio __func__); 206201879Sattilio tryl++; 207214682Sjhb pause("allproc", sleepfreq * hz); 208201879Sattilio continue; 209201879Sattilio } 210201879Sattilio tryl = 0; 211201879Sattilio FOREACH_PROC_IN_SYSTEM(p) { 212201879Sattilio PROC_LOCK(p); 213220390Sjhb if (p->p_state == PRS_NEW) { 214220390Sjhb PROC_UNLOCK(p); 215220390Sjhb continue; 216220390Sjhb } 217201879Sattilio FOREACH_THREAD_IN_PROC(p, td) { 218209761Sattilio 219201879Sattilio thread_lock(td); 220252342Srstone if (TD_ON_LOCK(td)) { 221201879Sattilio 222201879Sattilio /* 223201879Sattilio * The thread should be blocked on a 224201879Sattilio * turnstile, simply check if the 225201879Sattilio * turnstile channel is in good state. 226201879Sattilio */ 227201879Sattilio MPASS(td->td_blocked != NULL); 228206482Sattilio 229201879Sattilio tticks = ticks - td->td_blktick; 230201879Sattilio thread_unlock(td); 231201879Sattilio if (tticks > blkticks) { 232201879Sattilio 233201879Sattilio /* 234201879Sattilio * Accordingly with provided 235201879Sattilio * thresholds, this thread is 236201879Sattilio * stuck for too long on a 237201879Sattilio * turnstile. 238201879Sattilio */ 239201879Sattilio PROC_UNLOCK(p); 240201879Sattilio sx_sunlock(&allproc_lock); 241201879Sattilio panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", 242201879Sattilio __func__, td, tticks); 243201879Sattilio } 244209761Sattilio } else if (TD_IS_SLEEPING(td) && 245252342Srstone TD_ON_SLEEPQ(td)) { 246201879Sattilio 247201879Sattilio /* 248201879Sattilio * Check if the thread is sleeping on a 249201879Sattilio * lock, otherwise skip the check. 250201879Sattilio * Drop the thread lock in order to 251201879Sattilio * avoid a LOR with the sleepqueue 252201879Sattilio * spinlock. 253201879Sattilio */ 254201879Sattilio wchan = td->td_wchan; 255201879Sattilio tticks = ticks - td->td_slptick; 256201879Sattilio thread_unlock(td); 257201879Sattilio slptype = sleepq_type(wchan); 258201879Sattilio if ((slptype == SLEEPQ_SX || 259201879Sattilio slptype == SLEEPQ_LK) && 260201879Sattilio tticks > slpticks) { 261201879Sattilio 262201879Sattilio /* 263201879Sattilio * Accordingly with provided 264201879Sattilio * thresholds, this thread is 265201879Sattilio * stuck for too long on a 266201879Sattilio * sleepqueue. 267206482Sattilio * However, being on a 268206482Sattilio * sleepqueue, we might still 269206482Sattilio * check for the blessed 270206482Sattilio * list. 271201879Sattilio */ 272206482Sattilio tryl = 0; 273206482Sattilio for (i = 0; blessed[i] != NULL; 274206482Sattilio i++) { 275206482Sattilio if (!strcmp(blessed[i], 276206482Sattilio td->td_wmesg)) { 277206482Sattilio tryl = 1; 278206482Sattilio break; 279206482Sattilio } 280206482Sattilio } 281206482Sattilio if (tryl != 0) { 282206482Sattilio tryl = 0; 283206482Sattilio continue; 284206482Sattilio } 285201879Sattilio PROC_UNLOCK(p); 286201879Sattilio sx_sunlock(&allproc_lock); 287201879Sattilio panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", 288201879Sattilio __func__, td, tticks); 289201879Sattilio } 290201879Sattilio } else 291201879Sattilio thread_unlock(td); 292201879Sattilio } 293201879Sattilio PROC_UNLOCK(p); 294201879Sattilio } 295201879Sattilio sx_sunlock(&allproc_lock); 296201879Sattilio 297201879Sattilio /* Sleep for sleepfreq seconds. */ 298214682Sjhb pause("-", sleepfreq * hz); 299201879Sattilio } 300201879Sattilio} 301201879Sattilio 302201879Sattiliostatic struct kthread_desc deadlkres_kd = { 303201879Sattilio "deadlkres", 304201879Sattilio deadlkres, 305201879Sattilio (struct thread **)NULL 306201879Sattilio}; 307201879Sattilio 308201879SattilioSYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 309201879Sattilio 310227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, 311227309Sed "Deadlock resolver"); 312201879SattilioSYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, 313201879Sattilio &slptime_threshold, 0, 314201879Sattilio "Number of seconds within is valid to sleep on a sleepqueue"); 315201879SattilioSYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, 316201879Sattilio &blktime_threshold, 0, 317201879Sattilio "Number of seconds within is valid to block on a turnstile"); 318201879SattilioSYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, 319201879Sattilio "Number of seconds between any deadlock resolver thread run"); 320201879Sattilio#endif /* DEADLKRES */ 321201879Sattilio 322174070Spetervoid 323174070Speterread_cpu_time(long *cp_time) 324174070Speter{ 325174070Speter struct pcpu *pc; 326174070Speter int i, j; 327174070Speter 328174070Speter /* Sum up global cp_time[]. */ 329174070Speter bzero(cp_time, sizeof(long) * CPUSTATES); 330209059Sjhb CPU_FOREACH(i) { 331174070Speter pc = pcpu_find(i); 332174070Speter for (j = 0; j < CPUSTATES; j++) 333174070Speter cp_time[j] += pc->pc_cp_time[j]; 334174070Speter } 335174070Speter} 336174070Speter 337126383Sphk#ifdef SW_WATCHDOG 338126383Sphk#include <sys/watchdog.h> 339116874Ssmkelly 340126383Sphkstatic int watchdog_ticks; 341116874Ssmkellystatic int watchdog_enabled; 342126383Sphkstatic void watchdog_fire(void); 343126383Sphkstatic void watchdog_config(void *, u_int, int *); 344126383Sphk#endif /* SW_WATCHDOG */ 345116874Ssmkelly 3461541Srgrimes/* 3471541Srgrimes * Clock handling routines. 3481541Srgrimes * 34934618Sphk * This code is written to operate with two timers that run independently of 35034618Sphk * each other. 35133690Sphk * 35234618Sphk * The main timer, running hz times per second, is used to trigger interval 35334618Sphk * timers, timeouts and rescheduling as needed. 35433690Sphk * 35534618Sphk * The second timer handles kernel and user profiling, 35634618Sphk * and does resource use estimation. If the second timer is programmable, 35734618Sphk * it is randomized to avoid aliasing between the two clocks. For example, 35834618Sphk * the randomization prevents an adversary from always giving up the cpu 3591541Srgrimes * just before its quantum expires. Otherwise, it would never accumulate 3601541Srgrimes * cpu ticks. The mean frequency of the second timer is stathz. 36134618Sphk * 36234618Sphk * If no second timer exists, stathz will be zero; in this case we drive 36334618Sphk * profiling and statistics off the main clock. This WILL NOT be accurate; 36434618Sphk * do not do it unless absolutely necessary. 36534618Sphk * 3661541Srgrimes * The statistics clock may (or may not) be run at a higher rate while 36734618Sphk * profiling. This profile clock runs at profhz. We require that profhz 36834618Sphk * be an integral multiple of stathz. 3691541Srgrimes * 37034618Sphk * If the statistics clock is running fast, it must be divided by the ratio 37134618Sphk * profhz/stathz for statistics. (For profiling, every tick counts.) 37234618Sphk * 37333690Sphk * Time-of-day is maintained using a "timecounter", which may or may 37433690Sphk * not be related to the hardware generating the above mentioned 37533690Sphk * interrupts. 3761541Srgrimes */ 3771541Srgrimes 3781541Srgrimesint stathz; 3791541Srgrimesint profhz; 380110296Sjakeint profprocs; 381246037Sjhbvolatile int ticks; 382110296Sjakeint psratio; 3831541Srgrimes 384215701Sdimstatic DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */ 385212601Smavstatic int global_hardclock_run = 0; 386208494Smav 3871541Srgrimes/* 3881541Srgrimes * Initialize clock frequencies and start both clocks running. 3891541Srgrimes */ 39010358Sjulian/* ARGSUSED*/ 39110358Sjulianstatic void 39212569Sbdeinitclocks(dummy) 39312569Sbde void *dummy; 3941541Srgrimes{ 3951541Srgrimes register int i; 3961541Srgrimes 3971541Srgrimes /* 3981541Srgrimes * Set divisors to 1 (normal case) and let the machine-specific 3991541Srgrimes * code do its bit. 4001541Srgrimes */ 401209371Smav mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 4021541Srgrimes cpu_initclocks(); 4031541Srgrimes 4041541Srgrimes /* 4051541Srgrimes * Compute profhz/stathz, and fix profhz if needed. 4061541Srgrimes */ 4071541Srgrimes i = stathz ? stathz : hz; 4081541Srgrimes if (profhz == 0) 4091541Srgrimes profhz = i; 4101541Srgrimes psratio = profhz / i; 411126383Sphk#ifdef SW_WATCHDOG 412126383Sphk EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 413126383Sphk#endif 4141541Srgrimes} 4151541Srgrimes 4161541Srgrimes/* 417110296Sjake * Each time the real-time timer fires, this function is called on all CPUs. 418153666Sjhb * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only 419110296Sjake * the other CPUs in the system need to call this function. 42076078Sjhb */ 42176078Sjhbvoid 422153666Sjhbhardclock_cpu(int usermode) 42376078Sjhb{ 42476078Sjhb struct pstats *pstats; 425110296Sjake struct thread *td = curthread; 42683366Sjulian struct proc *p = td->td_proc; 427172207Sjeff int flags; 42876078Sjhb 42976078Sjhb /* 43076078Sjhb * Run current process's virtual and profile time, as needed. 43176078Sjhb */ 432163709Sjb pstats = p->p_stats; 433172207Sjeff flags = 0; 434163709Sjb if (usermode && 435170297Sjeff timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 436170297Sjeff PROC_SLOCK(p); 437172207Sjeff if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 438172207Sjeff flags |= TDF_ALRMPEND | TDF_ASTPENDING; 439170297Sjeff PROC_SUNLOCK(p); 440163709Sjb } 441170297Sjeff if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 442170297Sjeff PROC_SLOCK(p); 443172207Sjeff if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 444172207Sjeff flags |= TDF_PROFPEND | TDF_ASTPENDING; 445170297Sjeff PROC_SUNLOCK(p); 446170297Sjeff } 447170297Sjeff thread_lock(td); 448212541Smav sched_tick(1); 449172207Sjeff td->td_flags |= flags; 450170297Sjeff thread_unlock(td); 451146799Sjkoshy 452233628Sfabient#ifdef HWPMC_HOOKS 453146799Sjkoshy if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 454146799Sjkoshy PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 455233628Sfabient if (td->td_intr_frame != NULL) 456233628Sfabient PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 457146799Sjkoshy#endif 458247777Sdavide callout_process(sbinuptime()); 45976078Sjhb} 46076078Sjhb 46176078Sjhb/* 4621541Srgrimes * The real-time timer, interrupting hz times per second. 4631541Srgrimes */ 4641541Srgrimesvoid 465153666Sjhbhardclock(int usermode, uintfptr_t pc) 4661541Srgrimes{ 4671541Srgrimes 468246037Sjhb atomic_add_int(&ticks, 1); 469153666Sjhb hardclock_cpu(usermode); 470212603Smav tc_ticktock(1); 471212541Smav cpu_tick_calibration(); 4721541Srgrimes /* 4731541Srgrimes * If no separate statistics clock is available, run it from here. 47476078Sjhb * 47576078Sjhb * XXX: this only works for UP 4761541Srgrimes */ 477110296Sjake if (stathz == 0) { 478153666Sjhb profclock(usermode, pc); 479153666Sjhb statclock(usermode); 480110296Sjake } 48187902Sluigi#ifdef DEVICE_POLLING 48290550Sluigi hardclock_device_poll(); /* this is very short and quick */ 48387902Sluigi#endif /* DEVICE_POLLING */ 484126383Sphk#ifdef SW_WATCHDOG 485126383Sphk if (watchdog_enabled > 0 && --watchdog_ticks <= 0) 486116874Ssmkelly watchdog_fire(); 487126383Sphk#endif /* SW_WATCHDOG */ 4881541Srgrimes} 4891541Srgrimes 490212541Smavvoid 491232783Smavhardclock_cnt(int cnt, int usermode) 492212541Smav{ 493212541Smav struct pstats *pstats; 494212541Smav struct thread *td = curthread; 495212541Smav struct proc *p = td->td_proc; 496212541Smav int *t = DPCPU_PTR(pcputicks); 497212601Smav int flags, global, newticks; 498212601Smav#ifdef SW_WATCHDOG 499212601Smav int i; 500212601Smav#endif /* SW_WATCHDOG */ 501212541Smav 502212541Smav /* 503212541Smav * Update per-CPU and possibly global ticks values. 504212541Smav */ 505212541Smav *t += cnt; 506212541Smav do { 507212541Smav global = ticks; 508212541Smav newticks = *t - global; 509212541Smav if (newticks <= 0) { 510212541Smav if (newticks < -1) 511212541Smav *t = global - 1; 512212541Smav newticks = 0; 513212541Smav break; 514212541Smav } 515212541Smav } while (!atomic_cmpset_int(&ticks, global, *t)); 516212541Smav 517212541Smav /* 518212541Smav * Run current process's virtual and profile time, as needed. 519212541Smav */ 520212541Smav pstats = p->p_stats; 521212541Smav flags = 0; 522212541Smav if (usermode && 523212541Smav timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 524212541Smav PROC_SLOCK(p); 525212541Smav if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 526212541Smav tick * cnt) == 0) 527212541Smav flags |= TDF_ALRMPEND | TDF_ASTPENDING; 528212541Smav PROC_SUNLOCK(p); 529212541Smav } 530212541Smav if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 531212541Smav PROC_SLOCK(p); 532212541Smav if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 533212541Smav tick * cnt) == 0) 534212541Smav flags |= TDF_PROFPEND | TDF_ASTPENDING; 535212541Smav PROC_SUNLOCK(p); 536212541Smav } 537212541Smav thread_lock(td); 538212541Smav sched_tick(cnt); 539212541Smav td->td_flags |= flags; 540212541Smav thread_unlock(td); 541212541Smav 542212541Smav#ifdef HWPMC_HOOKS 543212541Smav if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 544212541Smav PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 545233628Sfabient if (td->td_intr_frame != NULL) 546233628Sfabient PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 547212541Smav#endif 548212541Smav /* We are in charge to handle this tick duty. */ 549212541Smav if (newticks > 0) { 550212601Smav /* Dangerous and no need to call these things concurrently. */ 551212601Smav if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) { 552212603Smav tc_ticktock(newticks); 553212541Smav#ifdef DEVICE_POLLING 554212601Smav /* This is very short and quick. */ 555212601Smav hardclock_device_poll(); 556212541Smav#endif /* DEVICE_POLLING */ 557212601Smav atomic_store_rel_int(&global_hardclock_run, 0); 558212601Smav } 559212541Smav#ifdef SW_WATCHDOG 560212541Smav if (watchdog_enabled > 0) { 561212601Smav i = atomic_fetchadd_int(&watchdog_ticks, -newticks); 562212601Smav if (i > 0 && i <= newticks) 563212541Smav watchdog_fire(); 564212541Smav } 565212541Smav#endif /* SW_WATCHDOG */ 566212541Smav } 567212541Smav if (curcpu == CPU_FIRST()) 568212541Smav cpu_tick_calibration(); 569212541Smav} 570212541Smav 571212541Smavvoid 572212541Smavhardclock_sync(int cpu) 573212541Smav{ 574212541Smav int *t = DPCPU_ID_PTR(cpu, pcputicks); 575212541Smav 576212541Smav *t = ticks; 577212541Smav} 578212541Smav 5791541Srgrimes/* 58034961Sphk * Compute number of ticks in the specified amount of time. 5811541Srgrimes */ 5821541Srgrimesint 58334961Sphktvtohz(tv) 5841541Srgrimes struct timeval *tv; 5851541Srgrimes{ 5865081Sbde register unsigned long ticks; 5875081Sbde register long sec, usec; 5881541Srgrimes 5891541Srgrimes /* 5905081Sbde * If the number of usecs in the whole seconds part of the time 5915081Sbde * difference fits in a long, then the total number of usecs will 5925081Sbde * fit in an unsigned long. Compute the total and convert it to 5935081Sbde * ticks, rounding up and adding 1 to allow for the current tick 5945081Sbde * to expire. Rounding also depends on unsigned long arithmetic 5955081Sbde * to avoid overflow. 5961541Srgrimes * 5975081Sbde * Otherwise, if the number of ticks in the whole seconds part of 5985081Sbde * the time difference fits in a long, then convert the parts to 5995081Sbde * ticks separately and add, using similar rounding methods and 6005081Sbde * overflow avoidance. This method would work in the previous 6015081Sbde * case but it is slightly slower and assumes that hz is integral. 6025081Sbde * 6035081Sbde * Otherwise, round the time difference down to the maximum 6045081Sbde * representable value. 6055081Sbde * 6065081Sbde * If ints have 32 bits, then the maximum value for any timeout in 6075081Sbde * 10ms ticks is 248 days. 6081541Srgrimes */ 60934961Sphk sec = tv->tv_sec; 61034961Sphk usec = tv->tv_usec; 6115081Sbde if (usec < 0) { 6125081Sbde sec--; 6135081Sbde usec += 1000000; 6145081Sbde } 6155081Sbde if (sec < 0) { 6165081Sbde#ifdef DIAGNOSTIC 61734618Sphk if (usec > 0) { 61833690Sphk sec++; 61933690Sphk usec -= 1000000; 62033690Sphk } 62134961Sphk printf("tvotohz: negative time difference %ld sec %ld usec\n", 6225081Sbde sec, usec); 6235081Sbde#endif 6245081Sbde ticks = 1; 6255081Sbde } else if (sec <= LONG_MAX / 1000000) 6265081Sbde ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 6275081Sbde / tick + 1; 6285081Sbde else if (sec <= LONG_MAX / hz) 6295081Sbde ticks = sec * hz 6305081Sbde + ((unsigned long)usec + (tick - 1)) / tick + 1; 6311541Srgrimes else 6325081Sbde ticks = LONG_MAX; 6335081Sbde if (ticks > INT_MAX) 6345081Sbde ticks = INT_MAX; 63540012Salex return ((int)ticks); 6361541Srgrimes} 6371541Srgrimes 6381541Srgrimes/* 6391541Srgrimes * Start profiling on a process. 6401541Srgrimes * 6411541Srgrimes * Kernel profiling passes proc0 which never exits and hence 6421541Srgrimes * keeps the profile clock running constantly. 6431541Srgrimes */ 6441541Srgrimesvoid 6451541Srgrimesstartprofclock(p) 6461541Srgrimes register struct proc *p; 6471541Srgrimes{ 6481541Srgrimes 649113874Sjhb PROC_LOCK_ASSERT(p, MA_OWNED); 650113874Sjhb if (p->p_flag & P_STOPPROF) 651110530Sjulian return; 652113874Sjhb if ((p->p_flag & P_PROFIL) == 0) { 653113874Sjhb p->p_flag |= P_PROFIL; 654209371Smav mtx_lock(&time_lock); 655110296Sjake if (++profprocs == 1) 656110296Sjake cpu_startprofclock(); 657209371Smav mtx_unlock(&time_lock); 6581541Srgrimes } 6591541Srgrimes} 6601541Srgrimes 6611541Srgrimes/* 6621541Srgrimes * Stop profiling on a process. 6631541Srgrimes */ 6641541Srgrimesvoid 6651541Srgrimesstopprofclock(p) 6661541Srgrimes register struct proc *p; 6671541Srgrimes{ 6681541Srgrimes 669110530Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 670113874Sjhb if (p->p_flag & P_PROFIL) { 671113874Sjhb if (p->p_profthreads != 0) { 672113874Sjhb p->p_flag |= P_STOPPROF; 673113874Sjhb while (p->p_profthreads != 0) 674113874Sjhb msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 675123740Speter "stopprof", 0); 676113874Sjhb p->p_flag &= ~P_STOPPROF; 677110530Sjulian } 678128852Scperciva if ((p->p_flag & P_PROFIL) == 0) 679128852Scperciva return; 680113874Sjhb p->p_flag &= ~P_PROFIL; 681209371Smav mtx_lock(&time_lock); 682110296Sjake if (--profprocs == 0) 683110296Sjake cpu_stopprofclock(); 684209371Smav mtx_unlock(&time_lock); 6851541Srgrimes } 6861541Srgrimes} 6871541Srgrimes 6881541Srgrimes/* 689170174Sjeff * Statistics clock. Updates rusage information and calls the scheduler 690170174Sjeff * to adjust priorities of the active thread. 691170174Sjeff * 692110296Sjake * This should be called by all active processors. 6931541Srgrimes */ 6941541Srgrimesvoid 695153666Sjhbstatclock(int usermode) 6961541Srgrimes{ 697232783Smav 698232783Smav statclock_cnt(1, usermode); 699232783Smav} 700232783Smav 701232783Smavvoid 702232783Smavstatclock_cnt(int cnt, int usermode) 703232783Smav{ 70417342Sbde struct rusage *ru; 70517342Sbde struct vmspace *vm; 706110296Sjake struct thread *td; 707110296Sjake struct proc *p; 708110296Sjake long rss; 709174070Speter long *cp_time; 7101541Srgrimes 711110296Sjake td = curthread; 712110296Sjake p = td->td_proc; 713110296Sjake 714174070Speter cp_time = (long *)PCPU_PTR(cp_time); 715153666Sjhb if (usermode) { 71653751Sbde /* 71753751Sbde * Charge the time as appropriate. 7181541Srgrimes */ 719232783Smav td->td_uticks += cnt; 720130551Sjulian if (p->p_nice > NZERO) 721232783Smav cp_time[CP_NICE] += cnt; 7221541Srgrimes else 723232783Smav cp_time[CP_USER] += cnt; 7241541Srgrimes } else { 7251541Srgrimes /* 7261541Srgrimes * Came from kernel mode, so we were: 7271541Srgrimes * - handling an interrupt, 7281541Srgrimes * - doing syscall or trap work on behalf of the current 7291541Srgrimes * user process, or 7301541Srgrimes * - spinning in the idle loop. 7311541Srgrimes * Whichever it is, charge the time as appropriate. 7321541Srgrimes * Note that we charge interrupts to the current process, 7331541Srgrimes * regardless of whether they are ``for'' that process, 7341541Srgrimes * so that we know how much of its real time was spent 7351541Srgrimes * in ``non-process'' (i.e., interrupt) work. 7361541Srgrimes */ 737151658Sjhb if ((td->td_pflags & TDP_ITHREAD) || 738151658Sjhb td->td_intr_nesting_level >= 2) { 739232783Smav td->td_iticks += cnt; 740232783Smav cp_time[CP_INTR] += cnt; 74165557Sjasone } else { 742232783Smav td->td_pticks += cnt; 743232783Smav td->td_sticks += cnt; 744167327Sjulian if (!TD_IS_IDLETHREAD(td)) 745232783Smav cp_time[CP_SYS] += cnt; 74665557Sjasone else 747232783Smav cp_time[CP_IDLE] += cnt; 74865557Sjasone } 7491541Srgrimes } 7501541Srgrimes 75165782Sjhb /* Update resource usage integrals and maximums. */ 752131436Sjhb MPASS(p->p_vmspace != NULL); 753131436Sjhb vm = p->p_vmspace; 754170174Sjeff ru = &td->td_ru; 755232783Smav ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 756232783Smav ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 757232783Smav ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 758131436Sjhb rss = pgtok(vmspace_resident_count(vm)); 759131436Sjhb if (ru->ru_maxrss < rss) 760131436Sjhb ru->ru_maxrss = rss; 761187357Sjeff KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 762187357Sjeff "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 763235459Srstone SDT_PROBE2(sched, , , tick, td, td->td_proc); 764174070Speter thread_lock_flags(td, MTX_QUIET); 765232783Smav for ( ; cnt > 0; cnt--) 766232783Smav sched_clock(td); 767170297Sjeff thread_unlock(td); 768233628Sfabient#ifdef HWPMC_HOOKS 769233628Sfabient if (td->td_intr_frame != NULL) 770233628Sfabient PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 771233628Sfabient#endif 77276078Sjhb} 77366716Sjhb 77476078Sjhbvoid 775153666Sjhbprofclock(int usermode, uintfptr_t pc) 77676078Sjhb{ 777232783Smav 778232783Smav profclock_cnt(1, usermode, pc); 779232783Smav} 780232783Smav 781232783Smavvoid 782232783Smavprofclock_cnt(int cnt, int usermode, uintfptr_t pc) 783232783Smav{ 784110296Sjake struct thread *td; 785110296Sjake#ifdef GPROF 786110296Sjake struct gmonparam *g; 787153490Sjhb uintfptr_t i; 788110296Sjake#endif 78976078Sjhb 790111032Sjulian td = curthread; 791153666Sjhb if (usermode) { 792110296Sjake /* 793110296Sjake * Came from user mode; CPU was in user state. 794110296Sjake * If this process is being profiled, record the tick. 795110530Sjulian * if there is no related user location yet, don't 796110530Sjulian * bother trying to count it. 797110296Sjake */ 798113874Sjhb if (td->td_proc->p_flag & P_PROFIL) 799232783Smav addupc_intr(td, pc, cnt); 800110296Sjake } 801110296Sjake#ifdef GPROF 802110296Sjake else { 803110296Sjake /* 804110296Sjake * Kernel statistics are just like addupc_intr, only easier. 805110296Sjake */ 806110296Sjake g = &_gmonparam; 807153666Sjhb if (g->state == GMON_PROF_ON && pc >= g->lowpc) { 808153666Sjhb i = PC_TO_I(g, pc); 809110296Sjake if (i < g->textsize) { 810232783Smav KCOUNT(g, i) += cnt; 811110296Sjake } 812110296Sjake } 813110296Sjake } 814110296Sjake#endif 815247329Smav#ifdef HWPMC_HOOKS 816247329Smav if (td->td_intr_frame != NULL) 817247329Smav PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); 818247329Smav#endif 8191541Srgrimes} 8201541Srgrimes 8211541Srgrimes/* 8221541Srgrimes * Return information about system clocks. 8231541Srgrimes */ 82412152Sphkstatic int 82562573Sphksysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 8261541Srgrimes{ 8271541Srgrimes struct clockinfo clkinfo; 8281541Srgrimes /* 8291541Srgrimes * Construct clockinfo structure. 8301541Srgrimes */ 83196052Sbde bzero(&clkinfo, sizeof(clkinfo)); 8321541Srgrimes clkinfo.hz = hz; 8331541Srgrimes clkinfo.tick = tick; 8341541Srgrimes clkinfo.profhz = profhz; 8351541Srgrimes clkinfo.stathz = stathz ? stathz : hz; 83612243Sphk return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 8371541Srgrimes} 8382858Swollman 839192304SedSYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 840192304Sed CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 84188019Sluigi 0, 0, sysctl_kern_clockrate, "S,clockinfo", 84288019Sluigi "Rate and period of various kernel clocks"); 843116874Ssmkelly 844126383Sphk#ifdef SW_WATCHDOG 845126383Sphk 846126383Sphkstatic void 847165260Sn_hibmawatchdog_config(void *unused __unused, u_int cmd, int *error) 848116874Ssmkelly{ 849126383Sphk u_int u; 850116874Ssmkelly 851126386Sphk u = cmd & WD_INTERVAL; 852165260Sn_hibma if (u >= WD_TO_1SEC) { 853126383Sphk watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 854126383Sphk watchdog_enabled = 1; 855165260Sn_hibma *error = 0; 856126383Sphk } else { 857126383Sphk watchdog_enabled = 0; 858126383Sphk } 859116874Ssmkelly} 860116874Ssmkelly 861116874Ssmkelly/* 862116874Ssmkelly * Handle a watchdog timeout by dumping interrupt information and 863170075Semaste * then either dropping to DDB or panicking. 864116874Ssmkelly */ 865116874Ssmkellystatic void 866116874Ssmkellywatchdog_fire(void) 867116874Ssmkelly{ 868116874Ssmkelly int nintr; 869209390Sed uint64_t inttotal; 870116874Ssmkelly u_long *curintr; 871116874Ssmkelly char *curname; 872116874Ssmkelly 873116874Ssmkelly curintr = intrcnt; 874116874Ssmkelly curname = intrnames; 875116874Ssmkelly inttotal = 0; 876225788Smav nintr = sintrcnt / sizeof(u_long); 877157822Sjhb 878116874Ssmkelly printf("interrupt total\n"); 879116874Ssmkelly while (--nintr >= 0) { 880116874Ssmkelly if (*curintr) 881116874Ssmkelly printf("%-12s %20lu\n", curname, *curintr); 882116874Ssmkelly curname += strlen(curname) + 1; 883116874Ssmkelly inttotal += *curintr++; 884116874Ssmkelly } 885116908Ssmkelly printf("Total %20ju\n", (uintmax_t)inttotal); 886170075Semaste 887170075Semaste#if defined(KDB) && !defined(KDB_UNATTENDED) 888170075Semaste kdb_backtrace(); 889174898Srwatson kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 890170075Semaste#else 891116874Ssmkelly panic("watchdog timeout"); 892170075Semaste#endif 893116874Ssmkelly} 894116874Ssmkelly 895126383Sphk#endif /* SW_WATCHDOG */ 896