kern_clock.c revision 31639
11541Srgrimes/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 3. All advertising materials mentioning features or use of this software 191541Srgrimes * must display the following acknowledgement: 201541Srgrimes * This product includes software developed by the University of 211541Srgrimes * California, Berkeley and its contributors. 221541Srgrimes * 4. Neither the name of the University nor the names of its contributors 231541Srgrimes * may be used to endorse or promote products derived from this software 241541Srgrimes * without specific prior written permission. 251541Srgrimes * 261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361541Srgrimes * SUCH DAMAGE. 371541Srgrimes * 381541Srgrimes * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 3931639Sfsmp * $Id: kern_clock.c,v 1.45 1997/11/24 15:15:27 bde Exp $ 401541Srgrimes */ 411541Srgrimes 422858Swollman/* Portions of this software are covered by the following: */ 432858Swollman/****************************************************************************** 442858Swollman * * 452858Swollman * Copyright (c) David L. Mills 1993, 1994 * 462858Swollman * * 472858Swollman * Permission to use, copy, modify, and distribute this software and its * 482858Swollman * documentation for any purpose and without fee is hereby granted, provided * 492858Swollman * that the above copyright notice appears in all copies and that both the * 502858Swollman * copyright notice and this permission notice appear in supporting * 512858Swollman * documentation, and that the name University of Delaware not be used in * 522858Swollman * advertising or publicity pertaining to distribution of the software * 532858Swollman * without specific, written prior permission. The University of Delaware * 542858Swollman * makes no representations about the suitability this software for any * 552858Swollman * purpose. It is provided "as is" without express or implied warranty. * 562858Swollman * * 572858Swollman *****************************************************************************/ 582858Swollman 591541Srgrimes#include <sys/param.h> 601541Srgrimes#include <sys/systm.h> 611541Srgrimes#include <sys/dkstat.h> 621541Srgrimes#include <sys/callout.h> 631541Srgrimes#include <sys/kernel.h> 641541Srgrimes#include <sys/proc.h> 651541Srgrimes#include <sys/resourcevar.h> 663308Sphk#include <sys/signalvar.h> 672858Swollman#include <sys/timex.h> 682320Sdg#include <vm/vm.h> 6922521Sdyson#include <sys/lock.h> 7012662Sdg#include <vm/pmap.h> 7112662Sdg#include <vm/vm_map.h> 723308Sphk#include <sys/sysctl.h> 731541Srgrimes 741541Srgrimes#include <machine/cpu.h> 7519172Sbde#define CLOCK_HAIR /* XXX */ 762858Swollman#include <machine/clock.h> 7728551Sbde#include <machine/limits.h> 781541Srgrimes 791541Srgrimes#ifdef GPROF 801541Srgrimes#include <sys/gmon.h> 811541Srgrimes#endif 821541Srgrimes 8331639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK) 8431639Sfsmp#include <machine/smp.h> 8531639Sfsmp#endif 8631639Sfsmp 8712569Sbdestatic void initclocks __P((void *dummy)); 8810358SjulianSYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 8910358Sjulian 9016635Sbde/* Exported to machdep.c. */ 9129680Sgibbsstruct callout *callout; 9229680Sgibbsstruct callout_list callfree; 9329680Sgibbsint callwheelsize, callwheelbits, callwheelmask; 9429680Sgibbsstruct callout_tailq *callwheel; 952112Swollman 9616635Sbde 972112Swollman/* Some of these don't belong here, but it's easiest to concentrate them. */ 9831639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK) 9931639Sfsmplong cp_time[CPUSTATES]; 10031639Sfsmp#else 10112913Sphkstatic long cp_time[CPUSTATES]; 10231639Sfsmp#endif 1032112Swollmanlong dk_seek[DK_NDRIVE]; 10429179Sbdestatic long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ 1052112Swollmanlong dk_wds[DK_NDRIVE]; 1062112Swollmanlong dk_wpms[DK_NDRIVE]; 1072112Swollmanlong dk_xfer[DK_NDRIVE]; 1082112Swollman 1092112Swollmanint dk_busy; 1103640Swollmanint dk_ndrive = 0; 1113640Swollmanchar dk_names[DK_NDRIVE][DK_NAMELEN]; 1122112Swollman 1132112Swollmanlong tk_cancc; 1142112Swollmanlong tk_nin; 1152112Swollmanlong tk_nout; 1162112Swollmanlong tk_rawcc; 1172112Swollman 1181541Srgrimes/* 1191541Srgrimes * Clock handling routines. 1201541Srgrimes * 1211541Srgrimes * This code is written to operate with two timers that run independently of 1221541Srgrimes * each other. The main clock, running hz times per second, is used to keep 1231541Srgrimes * track of real time. The second timer handles kernel and user profiling, 1241541Srgrimes * and does resource use estimation. If the second timer is programmable, 1251541Srgrimes * it is randomized to avoid aliasing between the two clocks. For example, 1261541Srgrimes * the randomization prevents an adversary from always giving up the cpu 1271541Srgrimes * just before its quantum expires. Otherwise, it would never accumulate 1281541Srgrimes * cpu ticks. The mean frequency of the second timer is stathz. 1291541Srgrimes * 1301541Srgrimes * If no second timer exists, stathz will be zero; in this case we drive 1311541Srgrimes * profiling and statistics off the main clock. This WILL NOT be accurate; 1321541Srgrimes * do not do it unless absolutely necessary. 1331541Srgrimes * 1341541Srgrimes * The statistics clock may (or may not) be run at a higher rate while 1351541Srgrimes * profiling. This profile clock runs at profhz. We require that profhz 1361541Srgrimes * be an integral multiple of stathz. 1371541Srgrimes * 1381541Srgrimes * If the statistics clock is running fast, it must be divided by the ratio 1391541Srgrimes * profhz/stathz for statistics. (For profiling, every tick counts.) 1401541Srgrimes */ 1411541Srgrimes 1421541Srgrimes/* 1431541Srgrimes * TODO: 1441541Srgrimes * allocate more timeout table slots when table overflows. 1451541Srgrimes */ 1461541Srgrimes 1471541Srgrimes/* 1481541Srgrimes * Bump a timeval by a small number of usec's. 1491541Srgrimes */ 1501541Srgrimes#define BUMPTIME(t, usec) { \ 1511541Srgrimes register volatile struct timeval *tp = (t); \ 1521541Srgrimes register long us; \ 1531541Srgrimes \ 1541541Srgrimes tp->tv_usec = us = tp->tv_usec + (usec); \ 1551541Srgrimes if (us >= 1000000) { \ 1561541Srgrimes tp->tv_usec = us - 1000000; \ 1571541Srgrimes tp->tv_sec++; \ 1581541Srgrimes } \ 1591541Srgrimes} 1601541Srgrimes 1611541Srgrimesint stathz; 1621541Srgrimesint profhz; 16316635Sbdestatic int profprocs; 1641541Srgrimesint ticks; 16529680Sgibbsstatic int softticks; /* Like ticks, but for softclock(). */ 16629680Sgibbsstatic struct callout *nextsoftcheck; /* Next callout to be checked. */ 16729680Sgibbsstatic int psdiv, pscnt; /* prof => stat divider */ 16829680Sgibbsint psratio; /* ratio: prof / stat */ 1691541Srgrimes 1701541Srgrimesvolatile struct timeval time; 1711541Srgrimesvolatile struct timeval mono_time; 1721541Srgrimes 1731541Srgrimes/* 17421101Sjhay * Phase/frequency-lock loop (PLL/FLL) definitions 1752858Swollman * 1762858Swollman * The following variables are read and set by the ntp_adjtime() system 1772858Swollman * call. 1782858Swollman * 1792858Swollman * time_state shows the state of the system clock, with values defined 1802858Swollman * in the timex.h header file. 1812858Swollman * 1822858Swollman * time_status shows the status of the system clock, with bits defined 1832858Swollman * in the timex.h header file. 1842858Swollman * 18521101Sjhay * time_offset is used by the PLL/FLL to adjust the system time in small 1862858Swollman * increments. 1872858Swollman * 1882858Swollman * time_constant determines the bandwidth or "stiffness" of the PLL. 1892858Swollman * 1902858Swollman * time_tolerance determines maximum frequency error or tolerance of the 1912858Swollman * CPU clock oscillator and is a property of the architecture; however, 1922858Swollman * in principle it could change as result of the presence of external 1932858Swollman * discipline signals, for instance. 1942858Swollman * 1952858Swollman * time_precision is usually equal to the kernel tick variable; however, 1962858Swollman * in cases where a precision clock counter or external clock is 1972858Swollman * available, the resolution can be much less than this and depend on 1982858Swollman * whether the external clock is working or not. 1992858Swollman * 2002858Swollman * time_maxerror is initialized by a ntp_adjtime() call and increased by 2012858Swollman * the kernel once each second to reflect the maximum error 2022858Swollman * bound growth. 2032858Swollman * 2042858Swollman * time_esterror is set and read by the ntp_adjtime() call, but 2052858Swollman * otherwise not used by the kernel. 2062858Swollman */ 2072858Swollmanint time_status = STA_UNSYNC; /* clock status bits */ 2082858Swollmanint time_state = TIME_OK; /* clock state */ 2092858Swollmanlong time_offset = 0; /* time offset (us) */ 2102858Swollmanlong time_constant = 0; /* pll time constant */ 2112858Swollmanlong time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ 2122858Swollmanlong time_precision = 1; /* clock precision (us) */ 2132858Swollmanlong time_maxerror = MAXPHASE; /* maximum error (us) */ 2142858Swollmanlong time_esterror = MAXPHASE; /* estimated error (us) */ 2152858Swollman 2162858Swollman/* 21721101Sjhay * The following variables establish the state of the PLL/FLL and the 2182858Swollman * residual time and frequency offset of the local clock. The scale 2192858Swollman * factors are defined in the timex.h header file. 2202858Swollman * 2212858Swollman * time_phase and time_freq are the phase increment and the frequency 2222858Swollman * increment, respectively, of the kernel time variable at each tick of 2232858Swollman * the clock. 2242858Swollman * 2252858Swollman * time_freq is set via ntp_adjtime() from a value stored in a file when 2262858Swollman * the synchronization daemon is first started. Its value is retrieved 2272858Swollman * via ntp_adjtime() and written to the file about once per hour by the 2282858Swollman * daemon. 2292858Swollman * 2302858Swollman * time_adj is the adjustment added to the value of tick at each timer 23121101Sjhay * interrupt and is recomputed from time_phase and time_freq at each 23221101Sjhay * seconds rollover. 2332858Swollman * 2342858Swollman * time_reftime is the second's portion of the system time on the last 2352858Swollman * call to ntp_adjtime(). It is used to adjust the time_freq variable 2362858Swollman * and to increase the time_maxerror as the time since last update 2372858Swollman * increases. 2382858Swollman */ 23912913Sphkstatic long time_phase = 0; /* phase offset (scaled us) */ 24021101Sjhaylong time_freq = 0; /* frequency offset (scaled ppm) */ 24112913Sphkstatic long time_adj = 0; /* tick adjust (scaled 1 / hz) */ 24212913Sphkstatic long time_reftime = 0; /* time at last adjustment (s) */ 2432858Swollman 2442858Swollman#ifdef PPS_SYNC 2452858Swollman/* 24621101Sjhay * The following variables are used only if the kernel PPS discipline 24721101Sjhay * code is configured (PPS_SYNC). The scale factors are defined in the 24821101Sjhay * timex.h header file. 2492858Swollman * 2502858Swollman * pps_time contains the time at each calibration interval, as read by 25121101Sjhay * microtime(). pps_count counts the seconds of the calibration 25221101Sjhay * interval, the duration of which is nominally pps_shift in powers of 25321101Sjhay * two. 2542858Swollman * 2552858Swollman * pps_offset is the time offset produced by the time median filter 25621101Sjhay * pps_tf[], while pps_jitter is the dispersion (jitter) measured by 25721101Sjhay * this filter. 2582858Swollman * 2592858Swollman * pps_freq is the frequency offset produced by the frequency median 26021101Sjhay * filter pps_ff[], while pps_stabil is the dispersion (wander) measured 26121101Sjhay * by this filter. 2622858Swollman * 2632858Swollman * pps_usec is latched from a high resolution counter or external clock 2642858Swollman * at pps_time. Here we want the hardware counter contents only, not the 2652858Swollman * contents plus the time_tv.usec as usual. 2662858Swollman * 2672858Swollman * pps_valid counts the number of seconds since the last PPS update. It 2682858Swollman * is used as a watchdog timer to disable the PPS discipline should the 2692858Swollman * PPS signal be lost. 2702858Swollman * 2712858Swollman * pps_glitch counts the number of seconds since the beginning of an 2722858Swollman * offset burst more than tick/2 from current nominal offset. It is used 2732858Swollman * mainly to suppress error bursts due to priority conflicts between the 2742858Swollman * PPS interrupt and timer interrupt. 2752858Swollman * 2762858Swollman * pps_intcnt counts the calibration intervals for use in the interval- 2772858Swollman * adaptation algorithm. It's just too complicated for words. 2782858Swollman */ 2792858Swollmanstruct timeval pps_time; /* kernel time at last interval */ 2802858Swollmanlong pps_offset = 0; /* pps time offset (us) */ 2812858Swollmanlong pps_jitter = MAXTIME; /* pps time dispersion (jitter) (us) */ 2822858Swollmanlong pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */ 2832858Swollmanlong pps_freq = 0; /* frequency offset (scaled ppm) */ 2842858Swollmanlong pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ 2852858Swollmanlong pps_ff[] = {0, 0, 0}; /* frequency offset median filter */ 2862858Swollmanlong pps_usec = 0; /* microsec counter at last interval */ 2872858Swollmanlong pps_valid = PPS_VALID; /* pps signal watchdog counter */ 2882858Swollmanint pps_glitch = 0; /* pps signal glitch counter */ 2892858Swollmanint pps_count = 0; /* calibration interval counter (s) */ 2902858Swollmanint pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ 2912858Swollmanint pps_intcnt = 0; /* intervals at current duration */ 2922858Swollman 2932858Swollman/* 2942858Swollman * PPS signal quality monitors 2952858Swollman * 2962858Swollman * pps_jitcnt counts the seconds that have been discarded because the 2972858Swollman * jitter measured by the time median filter exceeds the limit MAXTIME 2982858Swollman * (100 us). 2992858Swollman * 3002858Swollman * pps_calcnt counts the frequency calibration intervals, which are 3012858Swollman * variable from 4 s to 256 s. 3022858Swollman * 3032858Swollman * pps_errcnt counts the calibration intervals which have been discarded 3042858Swollman * because the wander exceeds the limit MAXFREQ (100 ppm) or where the 3052858Swollman * calibration interval jitter exceeds two ticks. 3062858Swollman * 3072858Swollman * pps_stbcnt counts the calibration intervals that have been discarded 3082858Swollman * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us). 3092858Swollman */ 3102858Swollmanlong pps_jitcnt = 0; /* jitter limit exceeded */ 3112858Swollmanlong pps_calcnt = 0; /* calibration intervals */ 3122858Swollmanlong pps_errcnt = 0; /* calibration errors */ 3132858Swollmanlong pps_stbcnt = 0; /* stability limit exceeded */ 3142858Swollman#endif /* PPS_SYNC */ 3152858Swollman 3162858Swollman/* XXX none of this stuff works under FreeBSD */ 3172858Swollman#ifdef EXT_CLOCK 3182858Swollman/* 3192858Swollman * External clock definitions 3202858Swollman * 3212858Swollman * The following definitions and declarations are used only if an 3222858Swollman * external clock (HIGHBALL or TPRO) is configured on the system. 3232858Swollman */ 3242858Swollman#define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */ 3252858Swollman 3262858Swollman/* 3272858Swollman * The clock_count variable is set to CLOCK_INTERVAL at each PPS 3282858Swollman * interrupt and decremented once each second. 3292858Swollman */ 3302858Swollmanint clock_count = 0; /* CPU clock counter */ 3312858Swollman 3322858Swollman#ifdef HIGHBALL 3332858Swollman/* 3342858Swollman * The clock_offset and clock_cpu variables are used by the HIGHBALL 3352858Swollman * interface. The clock_offset variable defines the offset between 3362858Swollman * system time and the HIGBALL counters. The clock_cpu variable contains 3372858Swollman * the offset between the system clock and the HIGHBALL clock for use in 3382858Swollman * disciplining the kernel time variable. 3392858Swollman */ 3402858Swollmanextern struct timeval clock_offset; /* Highball clock offset */ 3412858Swollmanlong clock_cpu = 0; /* CPU clock adjust */ 3422858Swollman#endif /* HIGHBALL */ 3432858Swollman#endif /* EXT_CLOCK */ 3442858Swollman 3452858Swollman/* 3462858Swollman * hardupdate() - local clock update 3472858Swollman * 3482858Swollman * This routine is called by ntp_adjtime() to update the local clock 34921101Sjhay * phase and frequency. The implementation is of an adaptive-parameter, 35021101Sjhay * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new 35121101Sjhay * time and frequency offset estimates for each call. If the kernel PPS 3522858Swollman * discipline code is configured (PPS_SYNC), the PPS signal itself 3532858Swollman * determines the new time offset, instead of the calling argument. 3542858Swollman * Presumably, calls to ntp_adjtime() occur only when the caller 3552858Swollman * believes the local clock is valid within some bound (+-128 ms with 3562858Swollman * NTP). If the caller's time is far different than the PPS time, an 3572858Swollman * argument will ensue, and it's not clear who will lose. 3582858Swollman * 35921101Sjhay * For uncompensated quartz crystal oscillatores and nominal update 36021101Sjhay * intervals less than 1024 s, operation should be in phase-lock mode 36121101Sjhay * (STA_FLL = 0), where the loop is disciplined to phase. For update 36221101Sjhay * intervals greater than thiss, operation should be in frequency-lock 36321101Sjhay * mode (STA_FLL = 1), where the loop is disciplined to frequency. 3642858Swollman * 3652858Swollman * Note: splclock() is in effect. 3662858Swollman */ 3672858Swollmanvoid 3682858Swollmanhardupdate(offset) 3692858Swollman long offset; 3702858Swollman{ 3712858Swollman long ltemp, mtemp; 3722858Swollman 3732858Swollman if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME)) 3742858Swollman return; 3752858Swollman ltemp = offset; 3762858Swollman#ifdef PPS_SYNC 3772858Swollman if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) 3782858Swollman ltemp = pps_offset; 3792858Swollman#endif /* PPS_SYNC */ 38021101Sjhay 38121101Sjhay /* 38221101Sjhay * Scale the phase adjustment and clamp to the operating range. 38321101Sjhay */ 3842858Swollman if (ltemp > MAXPHASE) 3852858Swollman time_offset = MAXPHASE << SHIFT_UPDATE; 3862858Swollman else if (ltemp < -MAXPHASE) 3872858Swollman time_offset = -(MAXPHASE << SHIFT_UPDATE); 3882858Swollman else 3892858Swollman time_offset = ltemp << SHIFT_UPDATE; 39021101Sjhay 39121101Sjhay /* 39221101Sjhay * Select whether the frequency is to be controlled and in which 39321101Sjhay * mode (PLL or FLL). Clamp to the operating range. Ugly 39421101Sjhay * multiply/divide should be replaced someday. 39521101Sjhay */ 39621101Sjhay if (time_status & STA_FREQHOLD || time_reftime == 0) 39721101Sjhay time_reftime = time.tv_sec; 3982858Swollman mtemp = time.tv_sec - time_reftime; 3992858Swollman time_reftime = time.tv_sec; 40021101Sjhay if (time_status & STA_FLL) { 40121101Sjhay if (mtemp >= MINSEC) { 40221101Sjhay ltemp = ((time_offset / mtemp) << (SHIFT_USEC - 40321101Sjhay SHIFT_UPDATE)); 40421101Sjhay if (ltemp < 0) 40521101Sjhay time_freq -= -ltemp >> SHIFT_KH; 40621101Sjhay else 40721101Sjhay time_freq += ltemp >> SHIFT_KH; 40821101Sjhay } 40921101Sjhay } else { 41021101Sjhay if (mtemp < MAXSEC) { 41121101Sjhay ltemp *= mtemp; 41221101Sjhay if (ltemp < 0) 41321101Sjhay time_freq -= -ltemp >> (time_constant + 41421101Sjhay time_constant + SHIFT_KF - 41521101Sjhay SHIFT_USEC); 41621101Sjhay else 41721101Sjhay time_freq += ltemp >> (time_constant + 41821101Sjhay time_constant + SHIFT_KF - 41921101Sjhay SHIFT_USEC); 42021101Sjhay } 42121101Sjhay } 4222858Swollman if (time_freq > time_tolerance) 4232858Swollman time_freq = time_tolerance; 4242858Swollman else if (time_freq < -time_tolerance) 4252858Swollman time_freq = -time_tolerance; 4262858Swollman} 4272858Swollman 4282858Swollman 4292858Swollman 4302858Swollman/* 4311541Srgrimes * Initialize clock frequencies and start both clocks running. 4321541Srgrimes */ 43310358Sjulian/* ARGSUSED*/ 43410358Sjulianstatic void 43512569Sbdeinitclocks(dummy) 43612569Sbde void *dummy; 4371541Srgrimes{ 4381541Srgrimes register int i; 4391541Srgrimes 4401541Srgrimes /* 4411541Srgrimes * Set divisors to 1 (normal case) and let the machine-specific 4421541Srgrimes * code do its bit. 4431541Srgrimes */ 4441541Srgrimes psdiv = pscnt = 1; 4451541Srgrimes cpu_initclocks(); 4461541Srgrimes 4471541Srgrimes /* 4481541Srgrimes * Compute profhz/stathz, and fix profhz if needed. 4491541Srgrimes */ 4501541Srgrimes i = stathz ? stathz : hz; 4511541Srgrimes if (profhz == 0) 4521541Srgrimes profhz = i; 4531541Srgrimes psratio = profhz / i; 4541541Srgrimes} 4551541Srgrimes 4561541Srgrimes/* 4571541Srgrimes * The real-time timer, interrupting hz times per second. 4581541Srgrimes */ 4591541Srgrimesvoid 4601541Srgrimeshardclock(frame) 4611541Srgrimes register struct clockframe *frame; 4621541Srgrimes{ 4631541Srgrimes register struct proc *p; 4641541Srgrimes 4651541Srgrimes p = curproc; 4661541Srgrimes if (p) { 4671541Srgrimes register struct pstats *pstats; 4681541Srgrimes 4691541Srgrimes /* 4701541Srgrimes * Run current process's virtual and profile time, as needed. 4711541Srgrimes */ 4721541Srgrimes pstats = p->p_stats; 4731541Srgrimes if (CLKF_USERMODE(frame) && 4741541Srgrimes timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 4751541Srgrimes itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 4761541Srgrimes psignal(p, SIGVTALRM); 4771541Srgrimes if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 4781541Srgrimes itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 4791541Srgrimes psignal(p, SIGPROF); 4801541Srgrimes } 4811541Srgrimes 48231639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK) 48331639Sfsmp forward_hardclock(pscnt); 48431639Sfsmp#endif 4851541Srgrimes /* 4861541Srgrimes * If no separate statistics clock is available, run it from here. 4871541Srgrimes */ 4881541Srgrimes if (stathz == 0) 4891541Srgrimes statclock(frame); 4901541Srgrimes 4911541Srgrimes /* 4928876Srgrimes * Increment the time-of-day. 4931541Srgrimes */ 4941541Srgrimes ticks++; 4952858Swollman { 4962858Swollman int time_update; 4972858Swollman struct timeval newtime = time; 4982858Swollman long ltemp; 4992858Swollman 5002858Swollman if (timedelta == 0) { 50111451Swollman time_update = CPU_THISTICKLEN(tick); 5022858Swollman } else { 50311451Swollman time_update = CPU_THISTICKLEN(tick) + tickdelta; 5043183Swollman timedelta -= tickdelta; 5052858Swollman } 5062858Swollman BUMPTIME(&mono_time, time_update); 5072858Swollman 5082858Swollman /* 5092858Swollman * Compute the phase adjustment. If the low-order bits 5102858Swollman * (time_phase) of the update overflow, bump the high-order bits 5112858Swollman * (time_update). 5122858Swollman */ 5132858Swollman time_phase += time_adj; 5142858Swollman if (time_phase <= -FINEUSEC) { 5152858Swollman ltemp = -time_phase >> SHIFT_SCALE; 5162858Swollman time_phase += ltemp << SHIFT_SCALE; 5172858Swollman time_update -= ltemp; 5182858Swollman } 5192858Swollman else if (time_phase >= FINEUSEC) { 5202858Swollman ltemp = time_phase >> SHIFT_SCALE; 5212858Swollman time_phase -= ltemp << SHIFT_SCALE; 5222858Swollman time_update += ltemp; 5232858Swollman } 5242858Swollman 5252858Swollman newtime.tv_usec += time_update; 5262858Swollman /* 5272858Swollman * On rollover of the second the phase adjustment to be used for 5282858Swollman * the next second is calculated. Also, the maximum error is 5292858Swollman * increased by the tolerance. If the PPS frequency discipline 5302858Swollman * code is present, the phase is increased to compensate for the 5312858Swollman * CPU clock oscillator frequency error. 5322858Swollman * 53321101Sjhay * On a 32-bit machine and given parameters in the timex.h 53421101Sjhay * header file, the maximum phase adjustment is +-512 ms and 53521101Sjhay * maximum frequency offset is a tad less than) +-512 ppm. On a 53621101Sjhay * 64-bit machine, you shouldn't need to ask. 5372858Swollman */ 5382858Swollman if (newtime.tv_usec >= 1000000) { 5392858Swollman newtime.tv_usec -= 1000000; 5402858Swollman newtime.tv_sec++; 5412858Swollman time_maxerror += time_tolerance >> SHIFT_USEC; 54221101Sjhay 54321101Sjhay /* 54421101Sjhay * Compute the phase adjustment for the next second. In 54521101Sjhay * PLL mode, the offset is reduced by a fixed factor 54621101Sjhay * times the time constant. In FLL mode the offset is 54721101Sjhay * used directly. In either mode, the maximum phase 54821101Sjhay * adjustment for each second is clamped so as to spread 54921101Sjhay * the adjustment over not more than the number of 55021101Sjhay * seconds between updates. 55121101Sjhay */ 5522858Swollman if (time_offset < 0) { 55321101Sjhay ltemp = -time_offset; 55421101Sjhay if (!(time_status & STA_FLL)) 55521101Sjhay ltemp >>= SHIFT_KG + time_constant; 55621101Sjhay if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 55721101Sjhay ltemp = (MAXPHASE / MINSEC) << 55821101Sjhay SHIFT_UPDATE; 5592858Swollman time_offset += ltemp; 56021101Sjhay time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - 56121101Sjhay SHIFT_UPDATE); 56221101Sjhay } else { 56321101Sjhay ltemp = time_offset; 56421101Sjhay if (!(time_status & STA_FLL)) 56521101Sjhay ltemp >>= SHIFT_KG + time_constant; 56621101Sjhay if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 56721101Sjhay ltemp = (MAXPHASE / MINSEC) << 56821101Sjhay SHIFT_UPDATE; 56921101Sjhay time_offset -= ltemp; 57021101Sjhay time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - 57121101Sjhay SHIFT_UPDATE); 57221101Sjhay } 57321101Sjhay 5742858Swollman /* 57521101Sjhay * Compute the frequency estimate and additional phase 57621101Sjhay * adjustment due to frequency error for the next 57721101Sjhay * second. When the PPS signal is engaged, gnaw on the 57821101Sjhay * watchdog counter and update the frequency computed by 57921101Sjhay * the pll and the PPS signal. 5802858Swollman */ 58121101Sjhay#ifdef PPS_SYNC 5822858Swollman pps_valid++; 5832858Swollman if (pps_valid == PPS_VALID) { 5842858Swollman pps_jitter = MAXTIME; 5852858Swollman pps_stabil = MAXFREQ; 5862858Swollman time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | 5872858Swollman STA_PPSWANDER | STA_PPSERROR); 5882858Swollman } 5892858Swollman ltemp = time_freq + pps_freq; 5902858Swollman#else 5912858Swollman ltemp = time_freq; 5922858Swollman#endif /* PPS_SYNC */ 5932858Swollman if (ltemp < 0) 5942858Swollman time_adj -= -ltemp >> 5952858Swollman (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 5962858Swollman else 5972858Swollman time_adj += ltemp >> 5982858Swollman (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 5992858Swollman 60021101Sjhay#if SHIFT_HZ == 7 6012858Swollman /* 6022858Swollman * When the CPU clock oscillator frequency is not a 6032858Swollman * power of two in Hz, the SHIFT_HZ is only an 6042858Swollman * approximate scale factor. In the SunOS kernel, this 6052858Swollman * results in a PLL gain factor of 1/1.28 = 0.78 what it 6062858Swollman * should be. In the following code the overall gain is 6072858Swollman * increased by a factor of 1.25, which results in a 6082858Swollman * residual error less than 3 percent. 6092858Swollman */ 6102858Swollman /* Same thing applies for FreeBSD --GAW */ 6112858Swollman if (hz == 100) { 6122858Swollman if (time_adj < 0) 6132858Swollman time_adj -= -time_adj >> 2; 6142858Swollman else 6152858Swollman time_adj += time_adj >> 2; 6162858Swollman } 61721101Sjhay#endif /* SHIFT_HZ */ 6182858Swollman 6192858Swollman /* XXX - this is really bogus, but can't be fixed until 6202858Swollman xntpd's idea of the system clock is fixed to know how 6212858Swollman the user wants leap seconds handled; in the mean time, 6222858Swollman we assume that users of NTP are running without proper 6232858Swollman leap second support (this is now the default anyway) */ 6242858Swollman /* 6252858Swollman * Leap second processing. If in leap-insert state at 6262858Swollman * the end of the day, the system clock is set back one 6272858Swollman * second; if in leap-delete state, the system clock is 6282858Swollman * set ahead one second. The microtime() routine or 6292858Swollman * external clock driver will insure that reported time 6302858Swollman * is always monotonic. The ugly divides should be 6312858Swollman * replaced. 6322858Swollman */ 6332858Swollman switch (time_state) { 6348876Srgrimes 6352858Swollman case TIME_OK: 6362858Swollman if (time_status & STA_INS) 6372858Swollman time_state = TIME_INS; 6382858Swollman else if (time_status & STA_DEL) 6392858Swollman time_state = TIME_DEL; 6402858Swollman break; 6418876Srgrimes 6422858Swollman case TIME_INS: 6432858Swollman if (newtime.tv_sec % 86400 == 0) { 6442858Swollman newtime.tv_sec--; 6452858Swollman time_state = TIME_OOP; 6462858Swollman } 6472858Swollman break; 6482858Swollman 6492858Swollman case TIME_DEL: 6502858Swollman if ((newtime.tv_sec + 1) % 86400 == 0) { 6512858Swollman newtime.tv_sec++; 6522858Swollman time_state = TIME_WAIT; 6532858Swollman } 6542858Swollman break; 6558876Srgrimes 6562858Swollman case TIME_OOP: 6572858Swollman time_state = TIME_WAIT; 6582858Swollman break; 6598876Srgrimes 6602858Swollman case TIME_WAIT: 6612858Swollman if (!(time_status & (STA_INS | STA_DEL))) 6622858Swollman time_state = TIME_OK; 6632858Swollman } 6642858Swollman } 6652858Swollman CPU_CLOCKUPDATE(&time, &newtime); 6661541Srgrimes } 6671541Srgrimes 6681541Srgrimes /* 6691541Srgrimes * Process callouts at a very low cpu priority, so we don't keep the 6701541Srgrimes * relatively high clock interrupt priority any longer than necessary. 6711541Srgrimes */ 67229680Sgibbs if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 6731541Srgrimes if (CLKF_BASEPRI(frame)) { 6741541Srgrimes /* 6751541Srgrimes * Save the overhead of a software interrupt; 6761541Srgrimes * it will happen as soon as we return, so do it now. 6771541Srgrimes */ 6781541Srgrimes (void)splsoftclock(); 6791541Srgrimes softclock(); 6801541Srgrimes } else 6811541Srgrimes setsoftclock(); 68229680Sgibbs } else if (softticks + 1 == ticks) { 68329680Sgibbs ++softticks; 6841541Srgrimes } 6851541Srgrimes} 6861541Srgrimes 6871541Srgrimes/* 68829680Sgibbs * The callout mechanism is based on the work of Adam M. Costello and 68929680Sgibbs * George Varghese, published in a technical report entitled "Redesigning 69029680Sgibbs * the BSD Callout and Timer Facilities" and modified slightly for inclusion 69129680Sgibbs * in FreeBSD by Justin T. Gibbs. The original work on the data structures 69229680Sgibbs * used in this implementation was published by G.Varghese and A. Lauck in 69329680Sgibbs * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 69429680Sgibbs * the Efficient Implementation of a Timer Facility" in the Proceedings of 69529680Sgibbs * the 11th ACM Annual Symposium on Operating Systems Principles, 69629680Sgibbs * Austin, Texas Nov 1987. 69729680Sgibbs */ 69829680Sgibbs/* 6991541Srgrimes * Software (low priority) clock interrupt. 7001541Srgrimes * Run periodic events from timeout queue. 7011541Srgrimes */ 7021541Srgrimes/*ARGSUSED*/ 7031541Srgrimesvoid 7041541Srgrimessoftclock() 7051541Srgrimes{ 7061541Srgrimes register struct callout *c; 70729805Sgibbs register struct callout_tailq *bucket; 7081541Srgrimes register int s; 70929805Sgibbs register int curticks; 71029680Sgibbs register int steps; /* 71129680Sgibbs * Number of steps taken since 71229680Sgibbs * we last allowed interrupts. 71329680Sgibbs */ 7141541Srgrimes 71529680Sgibbs #ifndef MAX_SOFTCLOCK_STEPS 71629680Sgibbs #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ 71729680Sgibbs #endif /* MAX_SOFTCLOCK_STEPS */ 71829680Sgibbs 71929680Sgibbs steps = 0; 7201541Srgrimes s = splhigh(); 72129680Sgibbs while (softticks != ticks) { 72229805Sgibbs softticks++; 72329805Sgibbs /* 72429805Sgibbs * softticks may be modified by hard clock, so cache 72529805Sgibbs * it while we work on a given bucket. 72629805Sgibbs */ 72729805Sgibbs curticks = softticks; 72829805Sgibbs bucket = &callwheel[curticks & callwheelmask]; 72929805Sgibbs c = TAILQ_FIRST(bucket); 73029680Sgibbs while (c) { 73129805Sgibbs if (c->c_time != curticks) { 73229680Sgibbs c = TAILQ_NEXT(c, c_links.tqe); 73329680Sgibbs ++steps; 73429680Sgibbs if (steps >= MAX_SOFTCLOCK_STEPS) { 73529680Sgibbs nextsoftcheck = c; 73629805Sgibbs /* Give interrupts a chance. */ 73729680Sgibbs splx(s); 73829680Sgibbs s = splhigh(); 73929680Sgibbs c = nextsoftcheck; 74029680Sgibbs steps = 0; 74129680Sgibbs } 74229680Sgibbs } else { 74329680Sgibbs void (*c_func)(void *); 74429680Sgibbs void *c_arg; 74529680Sgibbs 74629680Sgibbs nextsoftcheck = TAILQ_NEXT(c, c_links.tqe); 74729805Sgibbs TAILQ_REMOVE(bucket, c, c_links.tqe); 74829680Sgibbs c_func = c->c_func; 74929680Sgibbs c_arg = c->c_arg; 75029680Sgibbs c->c_func = NULL; 75129680Sgibbs SLIST_INSERT_HEAD(&callfree, c, c_links.sle); 75229680Sgibbs splx(s); 75329680Sgibbs c_func(c_arg); 75429680Sgibbs s = splhigh(); 75529680Sgibbs steps = 0; 75629680Sgibbs c = nextsoftcheck; 75729680Sgibbs } 75829680Sgibbs } 7591541Srgrimes } 76029680Sgibbs nextsoftcheck = NULL; 7611541Srgrimes splx(s); 7621541Srgrimes} 7631541Srgrimes 7641541Srgrimes/* 7651541Srgrimes * timeout -- 7661541Srgrimes * Execute a function after a specified length of time. 7671541Srgrimes * 7681541Srgrimes * untimeout -- 7691541Srgrimes * Cancel previous timeout function call. 7701541Srgrimes * 77129680Sgibbs * callout_handle_init -- 77229680Sgibbs * Initialize a handle so that using it with untimeout is benign. 77329680Sgibbs * 7741541Srgrimes * See AT&T BCI Driver Reference Manual for specification. This 77529680Sgibbs * implementation differs from that one in that although an 77629680Sgibbs * identification value is returned from timeout, the original 77729680Sgibbs * arguments to timeout as well as the identifier are used to 77829680Sgibbs * identify entries for untimeout. 7791541Srgrimes */ 78029680Sgibbsstruct callout_handle 78129680Sgibbstimeout(ftn, arg, to_ticks) 7822112Swollman timeout_t ftn; 7831541Srgrimes void *arg; 78429680Sgibbs register int to_ticks; 7851541Srgrimes{ 78629680Sgibbs int s; 78729680Sgibbs struct callout *new; 78829680Sgibbs struct callout_handle handle; 7891541Srgrimes 79029680Sgibbs if (to_ticks <= 0) 79129680Sgibbs to_ticks = 1; 7921541Srgrimes 7931541Srgrimes /* Lock out the clock. */ 7941541Srgrimes s = splhigh(); 7951541Srgrimes 7961541Srgrimes /* Fill in the next free callout structure. */ 79729680Sgibbs new = SLIST_FIRST(&callfree); 79829680Sgibbs if (new == NULL) 79929680Sgibbs /* XXX Attempt to malloc first */ 8001541Srgrimes panic("timeout table full"); 80129680Sgibbs 80229680Sgibbs SLIST_REMOVE_HEAD(&callfree, c_links.sle); 8031541Srgrimes new->c_arg = arg; 8041541Srgrimes new->c_func = ftn; 80529805Sgibbs new->c_time = ticks + to_ticks; 80629805Sgibbs TAILQ_INSERT_TAIL(&callwheel[new->c_time & callwheelmask], 80729805Sgibbs new, c_links.tqe); 8081541Srgrimes 8091541Srgrimes splx(s); 81029680Sgibbs handle.callout = new; 81129680Sgibbs return (handle); 8121541Srgrimes} 8131541Srgrimes 8141541Srgrimesvoid 81529680Sgibbsuntimeout(ftn, arg, handle) 8162112Swollman timeout_t ftn; 8171541Srgrimes void *arg; 81829680Sgibbs struct callout_handle handle; 8191541Srgrimes{ 8201541Srgrimes register int s; 8211541Srgrimes 82229680Sgibbs /* 82329680Sgibbs * Check for a handle that was initialized 82429680Sgibbs * by callout_handle_init, but never used 82529680Sgibbs * for a real timeout. 82629680Sgibbs */ 82729680Sgibbs if (handle.callout == NULL) 82829680Sgibbs return; 82929680Sgibbs 8301541Srgrimes s = splhigh(); 83129680Sgibbs if ((handle.callout->c_func == ftn) 83229680Sgibbs && (handle.callout->c_arg == arg)) { 83329680Sgibbs if (nextsoftcheck == handle.callout) { 83429680Sgibbs nextsoftcheck = TAILQ_NEXT(handle.callout, c_links.tqe); 8351541Srgrimes } 83629805Sgibbs TAILQ_REMOVE(&callwheel[handle.callout->c_time & callwheelmask], 83729680Sgibbs handle.callout, c_links.tqe); 83829680Sgibbs handle.callout->c_func = NULL; 83929680Sgibbs SLIST_INSERT_HEAD(&callfree, handle.callout, c_links.sle); 84029680Sgibbs } 8411541Srgrimes splx(s); 8421541Srgrimes} 8431541Srgrimes 84424101Sbdevoid 84529680Sgibbscallout_handle_init(struct callout_handle *handle) 84629680Sgibbs{ 84729680Sgibbs handle->callout = NULL; 84829680Sgibbs} 84929680Sgibbs 85029680Sgibbsvoid 85124101Sbdegettime(struct timeval *tvp) 85224101Sbde{ 85324101Sbde int s; 85424101Sbde 85524101Sbde s = splclock(); 85624117Smpp /* XXX should use microtime() iff tv_usec is used. */ 85724101Sbde *tvp = time; 85824101Sbde splx(s); 85924101Sbde} 86024101Sbde 8611541Srgrimes/* 8621541Srgrimes * Compute number of hz until specified time. Used to 8631541Srgrimes * compute third argument to timeout() from an absolute time. 8641541Srgrimes */ 8651541Srgrimesint 8661541Srgrimeshzto(tv) 8671541Srgrimes struct timeval *tv; 8681541Srgrimes{ 8695081Sbde register unsigned long ticks; 8705081Sbde register long sec, usec; 8711541Srgrimes int s; 8721541Srgrimes 8731541Srgrimes /* 8745081Sbde * If the number of usecs in the whole seconds part of the time 8755081Sbde * difference fits in a long, then the total number of usecs will 8765081Sbde * fit in an unsigned long. Compute the total and convert it to 8775081Sbde * ticks, rounding up and adding 1 to allow for the current tick 8785081Sbde * to expire. Rounding also depends on unsigned long arithmetic 8795081Sbde * to avoid overflow. 8801541Srgrimes * 8815081Sbde * Otherwise, if the number of ticks in the whole seconds part of 8825081Sbde * the time difference fits in a long, then convert the parts to 8835081Sbde * ticks separately and add, using similar rounding methods and 8845081Sbde * overflow avoidance. This method would work in the previous 8855081Sbde * case but it is slightly slower and assumes that hz is integral. 8865081Sbde * 8875081Sbde * Otherwise, round the time difference down to the maximum 8885081Sbde * representable value. 8895081Sbde * 8905081Sbde * If ints have 32 bits, then the maximum value for any timeout in 8915081Sbde * 10ms ticks is 248 days. 8921541Srgrimes */ 8935081Sbde s = splclock(); 8941541Srgrimes sec = tv->tv_sec - time.tv_sec; 8955081Sbde usec = tv->tv_usec - time.tv_usec; 8965081Sbde splx(s); 8975081Sbde if (usec < 0) { 8985081Sbde sec--; 8995081Sbde usec += 1000000; 9005081Sbde } 9015081Sbde if (sec < 0) { 9025081Sbde#ifdef DIAGNOSTIC 9035081Sbde printf("hzto: negative time difference %ld sec %ld usec\n", 9045081Sbde sec, usec); 9055081Sbde#endif 9065081Sbde ticks = 1; 9075081Sbde } else if (sec <= LONG_MAX / 1000000) 9085081Sbde ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 9095081Sbde / tick + 1; 9105081Sbde else if (sec <= LONG_MAX / hz) 9115081Sbde ticks = sec * hz 9125081Sbde + ((unsigned long)usec + (tick - 1)) / tick + 1; 9131541Srgrimes else 9145081Sbde ticks = LONG_MAX; 9155081Sbde if (ticks > INT_MAX) 9165081Sbde ticks = INT_MAX; 9171541Srgrimes return (ticks); 9181541Srgrimes} 9191541Srgrimes 9201541Srgrimes/* 9211541Srgrimes * Start profiling on a process. 9221541Srgrimes * 9231541Srgrimes * Kernel profiling passes proc0 which never exits and hence 9241541Srgrimes * keeps the profile clock running constantly. 9251541Srgrimes */ 9261541Srgrimesvoid 9271541Srgrimesstartprofclock(p) 9281541Srgrimes register struct proc *p; 9291541Srgrimes{ 9301541Srgrimes int s; 9311541Srgrimes 9321541Srgrimes if ((p->p_flag & P_PROFIL) == 0) { 9331541Srgrimes p->p_flag |= P_PROFIL; 9341541Srgrimes if (++profprocs == 1 && stathz != 0) { 9351541Srgrimes s = splstatclock(); 9361541Srgrimes psdiv = pscnt = psratio; 9371541Srgrimes setstatclockrate(profhz); 9381541Srgrimes splx(s); 9391541Srgrimes } 9401541Srgrimes } 9411541Srgrimes} 9421541Srgrimes 9431541Srgrimes/* 9441541Srgrimes * Stop profiling on a process. 9451541Srgrimes */ 9461541Srgrimesvoid 9471541Srgrimesstopprofclock(p) 9481541Srgrimes register struct proc *p; 9491541Srgrimes{ 9501541Srgrimes int s; 9511541Srgrimes 9521541Srgrimes if (p->p_flag & P_PROFIL) { 9531541Srgrimes p->p_flag &= ~P_PROFIL; 9541541Srgrimes if (--profprocs == 0 && stathz != 0) { 9551541Srgrimes s = splstatclock(); 9561541Srgrimes psdiv = pscnt = 1; 9571541Srgrimes setstatclockrate(stathz); 9581541Srgrimes splx(s); 9591541Srgrimes } 9601541Srgrimes } 9611541Srgrimes} 9621541Srgrimes 9631541Srgrimes/* 9641541Srgrimes * Statistics clock. Grab profile sample, and if divider reaches 0, 9651541Srgrimes * do process and kernel statistics. 9661541Srgrimes */ 9671541Srgrimesvoid 9681541Srgrimesstatclock(frame) 9691541Srgrimes register struct clockframe *frame; 9701541Srgrimes{ 9711541Srgrimes#ifdef GPROF 9721541Srgrimes register struct gmonparam *g; 9731541Srgrimes#endif 97417342Sbde register struct proc *p; 9751541Srgrimes register int i; 97617342Sbde struct pstats *pstats; 97717342Sbde long rss; 97817342Sbde struct rusage *ru; 97917342Sbde struct vmspace *vm; 9801541Srgrimes 9811541Srgrimes if (CLKF_USERMODE(frame)) { 98217342Sbde p = curproc; 9831541Srgrimes if (p->p_flag & P_PROFIL) 9841541Srgrimes addupc_intr(p, CLKF_PC(frame), 1); 98531639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK) 98631639Sfsmp if (stathz != 0) 98731639Sfsmp forward_statclock(pscnt); 98831639Sfsmp#endif 9891541Srgrimes if (--pscnt > 0) 9901541Srgrimes return; 9911541Srgrimes /* 9921541Srgrimes * Came from user mode; CPU was in user state. 9931541Srgrimes * If this process is being profiled record the tick. 9941541Srgrimes */ 9951541Srgrimes p->p_uticks++; 9961541Srgrimes if (p->p_nice > NZERO) 9971541Srgrimes cp_time[CP_NICE]++; 9981541Srgrimes else 9991541Srgrimes cp_time[CP_USER]++; 10001541Srgrimes } else { 10011541Srgrimes#ifdef GPROF 10021541Srgrimes /* 10031541Srgrimes * Kernel statistics are just like addupc_intr, only easier. 10041541Srgrimes */ 10051541Srgrimes g = &_gmonparam; 10061541Srgrimes if (g->state == GMON_PROF_ON) { 10071541Srgrimes i = CLKF_PC(frame) - g->lowpc; 10081541Srgrimes if (i < g->textsize) { 10091541Srgrimes i /= HISTFRACTION * sizeof(*g->kcount); 10101541Srgrimes g->kcount[i]++; 10111541Srgrimes } 10121541Srgrimes } 10131541Srgrimes#endif 101431639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK) 101531639Sfsmp if (stathz != 0) 101631639Sfsmp forward_statclock(pscnt); 101731639Sfsmp#endif 10181541Srgrimes if (--pscnt > 0) 10191541Srgrimes return; 10201541Srgrimes /* 10211541Srgrimes * Came from kernel mode, so we were: 10221541Srgrimes * - handling an interrupt, 10231541Srgrimes * - doing syscall or trap work on behalf of the current 10241541Srgrimes * user process, or 10251541Srgrimes * - spinning in the idle loop. 10261541Srgrimes * Whichever it is, charge the time as appropriate. 10271541Srgrimes * Note that we charge interrupts to the current process, 10281541Srgrimes * regardless of whether they are ``for'' that process, 10291541Srgrimes * so that we know how much of its real time was spent 10301541Srgrimes * in ``non-process'' (i.e., interrupt) work. 10311541Srgrimes */ 103217342Sbde p = curproc; 10331541Srgrimes if (CLKF_INTR(frame)) { 10341541Srgrimes if (p != NULL) 10351541Srgrimes p->p_iticks++; 10361541Srgrimes cp_time[CP_INTR]++; 103731393Sbde } else if (p != NULL) { 10381541Srgrimes p->p_sticks++; 10391541Srgrimes cp_time[CP_SYS]++; 10401541Srgrimes } else 10411541Srgrimes cp_time[CP_IDLE]++; 10421541Srgrimes } 10431541Srgrimes pscnt = psdiv; 10441541Srgrimes 10451541Srgrimes /* 10461541Srgrimes * We maintain statistics shown by user-level statistics 10471541Srgrimes * programs: the amount of time in each cpu state, and 10481541Srgrimes * the amount of time each of DK_NDRIVE ``drives'' is busy. 10491541Srgrimes * 10501541Srgrimes * XXX should either run linked list of drives, or (better) 10511541Srgrimes * grab timestamps in the start & done code. 10521541Srgrimes */ 10531541Srgrimes for (i = 0; i < DK_NDRIVE; i++) 10541541Srgrimes if (dk_busy & (1 << i)) 10551541Srgrimes dk_time[i]++; 10561541Srgrimes 10571541Srgrimes /* 10581541Srgrimes * We adjust the priority of the current process. The priority of 10591541Srgrimes * a process gets worse as it accumulates CPU time. The cpu usage 10601541Srgrimes * estimator (p_estcpu) is increased here. The formula for computing 10611541Srgrimes * priorities (in kern_synch.c) will compute a different value each 10621541Srgrimes * time p_estcpu increases by 4. The cpu usage estimator ramps up 10631541Srgrimes * quite quickly when the process is running (linearly), and decays 10641541Srgrimes * away exponentially, at a rate which is proportionally slower when 10651541Srgrimes * the system is busy. The basic principal is that the system will 10661541Srgrimes * 90% forget that the process used a lot of CPU time in 5 * loadav 10671541Srgrimes * seconds. This causes the system to favor processes which haven't 10681541Srgrimes * run much recently, and to round-robin among other processes. 10691541Srgrimes */ 10701541Srgrimes if (p != NULL) { 10711541Srgrimes p->p_cpticks++; 10721541Srgrimes if (++p->p_estcpu == 0) 10731541Srgrimes p->p_estcpu--; 10741541Srgrimes if ((p->p_estcpu & 3) == 0) { 10751541Srgrimes resetpriority(p); 10761541Srgrimes if (p->p_priority >= PUSER) 10771541Srgrimes p->p_priority = p->p_usrpri; 10781541Srgrimes } 107917342Sbde 108017342Sbde /* Update resource usage integrals and maximums. */ 108117342Sbde if ((pstats = p->p_stats) != NULL && 108217342Sbde (ru = &pstats->p_ru) != NULL && 108317342Sbde (vm = p->p_vmspace) != NULL) { 108417342Sbde ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 108517342Sbde ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 108617342Sbde ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 108717342Sbde rss = vm->vm_pmap.pm_stats.resident_count * 108817342Sbde PAGE_SIZE / 1024; 108917342Sbde if (ru->ru_maxrss < rss) 109017342Sbde ru->ru_maxrss = rss; 109117342Sbde } 10921541Srgrimes } 10931541Srgrimes} 10941541Srgrimes 10951541Srgrimes/* 10961541Srgrimes * Return information about system clocks. 10971541Srgrimes */ 109812152Sphkstatic int 109912152Sphksysctl_kern_clockrate SYSCTL_HANDLER_ARGS 11001541Srgrimes{ 11011541Srgrimes struct clockinfo clkinfo; 11021541Srgrimes /* 11031541Srgrimes * Construct clockinfo structure. 11041541Srgrimes */ 11051541Srgrimes clkinfo.hz = hz; 11061541Srgrimes clkinfo.tick = tick; 110726897Sjhay clkinfo.tickadj = tickadj; 11081541Srgrimes clkinfo.profhz = profhz; 11091541Srgrimes clkinfo.stathz = stathz ? stathz : hz; 111012243Sphk return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 11111541Srgrimes} 11122858Swollman 111312623SphkSYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 111412650Sphk 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 111512152Sphk 111621101Sjhay#ifdef PPS_SYNC 11172858Swollman/* 111821101Sjhay * hardpps() - discipline CPU clock oscillator to external PPS signal 11192858Swollman * 11202858Swollman * This routine is called at each PPS interrupt in order to discipline 112121101Sjhay * the CPU clock oscillator to the PPS signal. It measures the PPS phase 112221101Sjhay * and leaves it in a handy spot for the hardclock() routine. It 112321101Sjhay * integrates successive PPS phase differences and calculates the 11242858Swollman * frequency offset. This is used in hardclock() to discipline the CPU 11252858Swollman * clock oscillator so that intrinsic frequency error is cancelled out. 112621101Sjhay * The code requires the caller to capture the time and hardware counter 112721101Sjhay * value at the on-time PPS signal transition. 112821101Sjhay * 112921101Sjhay * Note that, on some Unix systems, this routine runs at an interrupt 113021101Sjhay * priority level higher than the timer interrupt routine hardclock(). 113121101Sjhay * Therefore, the variables used are distinct from the hardclock() 113221101Sjhay * variables, except for certain exceptions: The PPS frequency pps_freq 113321101Sjhay * and phase pps_offset variables are determined by this routine and 113421101Sjhay * updated atomically. The time_tolerance variable can be considered a 113521101Sjhay * constant, since it is infrequently changed, and then only when the 113621101Sjhay * PPS signal is disabled. The watchdog counter pps_valid is updated 113721101Sjhay * once per second by hardclock() and is atomically cleared in this 113821101Sjhay * routine. 11392858Swollman */ 11402858Swollmanvoid 11412858Swollmanhardpps(tvp, usec) 11422858Swollman struct timeval *tvp; /* time at PPS */ 11432858Swollman long usec; /* hardware counter at PPS */ 11442858Swollman{ 11452858Swollman long u_usec, v_usec, bigtick; 11462858Swollman long cal_sec, cal_usec; 11472858Swollman 11482858Swollman /* 114921101Sjhay * An occasional glitch can be produced when the PPS interrupt 115021101Sjhay * occurs in the hardclock() routine before the time variable is 115121101Sjhay * updated. Here the offset is discarded when the difference 115221101Sjhay * between it and the last one is greater than tick/2, but not 115321101Sjhay * if the interval since the first discard exceeds 30 s. 115421101Sjhay */ 115521101Sjhay time_status |= STA_PPSSIGNAL; 115621101Sjhay time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 115721101Sjhay pps_valid = 0; 115821101Sjhay u_usec = -tvp->tv_usec; 115921101Sjhay if (u_usec < -500000) 116021101Sjhay u_usec += 1000000; 116121101Sjhay v_usec = pps_offset - u_usec; 116221101Sjhay if (v_usec < 0) 116321101Sjhay v_usec = -v_usec; 116421101Sjhay if (v_usec > (tick >> 1)) { 116521101Sjhay if (pps_glitch > MAXGLITCH) { 116621101Sjhay pps_glitch = 0; 116721101Sjhay pps_tf[2] = u_usec; 116821101Sjhay pps_tf[1] = u_usec; 116921101Sjhay } else { 117021101Sjhay pps_glitch++; 117121101Sjhay u_usec = pps_offset; 117221101Sjhay } 117321101Sjhay } else 117421101Sjhay pps_glitch = 0; 117521101Sjhay 117621101Sjhay /* 117721101Sjhay * A three-stage median filter is used to help deglitch the pps 117821101Sjhay * time. The median sample becomes the time offset estimate; the 117921101Sjhay * difference between the other two samples becomes the time 118021101Sjhay * dispersion (jitter) estimate. 118121101Sjhay */ 118221101Sjhay pps_tf[2] = pps_tf[1]; 118321101Sjhay pps_tf[1] = pps_tf[0]; 118421101Sjhay pps_tf[0] = u_usec; 118521101Sjhay if (pps_tf[0] > pps_tf[1]) { 118621101Sjhay if (pps_tf[1] > pps_tf[2]) { 118721101Sjhay pps_offset = pps_tf[1]; /* 0 1 2 */ 118821101Sjhay v_usec = pps_tf[0] - pps_tf[2]; 118921101Sjhay } else if (pps_tf[2] > pps_tf[0]) { 119021101Sjhay pps_offset = pps_tf[0]; /* 2 0 1 */ 119121101Sjhay v_usec = pps_tf[2] - pps_tf[1]; 119221101Sjhay } else { 119321101Sjhay pps_offset = pps_tf[2]; /* 0 2 1 */ 119421101Sjhay v_usec = pps_tf[0] - pps_tf[1]; 119521101Sjhay } 119621101Sjhay } else { 119721101Sjhay if (pps_tf[1] < pps_tf[2]) { 119821101Sjhay pps_offset = pps_tf[1]; /* 2 1 0 */ 119921101Sjhay v_usec = pps_tf[2] - pps_tf[0]; 120021101Sjhay } else if (pps_tf[2] < pps_tf[0]) { 120121101Sjhay pps_offset = pps_tf[0]; /* 1 0 2 */ 120221101Sjhay v_usec = pps_tf[1] - pps_tf[2]; 120321101Sjhay } else { 120421101Sjhay pps_offset = pps_tf[2]; /* 1 2 0 */ 120521101Sjhay v_usec = pps_tf[1] - pps_tf[0]; 120621101Sjhay } 120721101Sjhay } 120821101Sjhay if (v_usec > MAXTIME) 120921101Sjhay pps_jitcnt++; 121021101Sjhay v_usec = (v_usec << PPS_AVG) - pps_jitter; 121121101Sjhay if (v_usec < 0) 121221101Sjhay pps_jitter -= -v_usec >> PPS_AVG; 121321101Sjhay else 121421101Sjhay pps_jitter += v_usec >> PPS_AVG; 121521101Sjhay if (pps_jitter > (MAXTIME >> 1)) 121621101Sjhay time_status |= STA_PPSJITTER; 121721101Sjhay 121821101Sjhay /* 12192858Swollman * During the calibration interval adjust the starting time when 12202858Swollman * the tick overflows. At the end of the interval compute the 12212858Swollman * duration of the interval and the difference of the hardware 12222858Swollman * counters at the beginning and end of the interval. This code 12232858Swollman * is deliciously complicated by the fact valid differences may 12242858Swollman * exceed the value of tick when using long calibration 12252858Swollman * intervals and small ticks. Note that the counter can be 12262858Swollman * greater than tick if caught at just the wrong instant, but 12272858Swollman * the values returned and used here are correct. 12282858Swollman */ 12292858Swollman bigtick = (long)tick << SHIFT_USEC; 123021101Sjhay pps_usec -= pps_freq; 12312858Swollman if (pps_usec >= bigtick) 12322858Swollman pps_usec -= bigtick; 12332858Swollman if (pps_usec < 0) 12342858Swollman pps_usec += bigtick; 12352858Swollman pps_time.tv_sec++; 12362858Swollman pps_count++; 12372858Swollman if (pps_count < (1 << pps_shift)) 12382858Swollman return; 12392858Swollman pps_count = 0; 124021101Sjhay pps_calcnt++; 12412858Swollman u_usec = usec << SHIFT_USEC; 12422858Swollman v_usec = pps_usec - u_usec; 12432858Swollman if (v_usec >= bigtick >> 1) 12442858Swollman v_usec -= bigtick; 12452858Swollman if (v_usec < -(bigtick >> 1)) 12462858Swollman v_usec += bigtick; 12472858Swollman if (v_usec < 0) 124821101Sjhay v_usec = -(-v_usec >> pps_shift); 12492858Swollman else 125021101Sjhay v_usec = v_usec >> pps_shift; 12512858Swollman pps_usec = u_usec; 12522858Swollman cal_sec = tvp->tv_sec; 12532858Swollman cal_usec = tvp->tv_usec; 12542858Swollman cal_sec -= pps_time.tv_sec; 12552858Swollman cal_usec -= pps_time.tv_usec; 12562858Swollman if (cal_usec < 0) { 12572858Swollman cal_usec += 1000000; 12582858Swollman cal_sec--; 12592858Swollman } 12602858Swollman pps_time = *tvp; 12612858Swollman 12622858Swollman /* 12632858Swollman * Check for lost interrupts, noise, excessive jitter and 12642858Swollman * excessive frequency error. The number of timer ticks during 12652858Swollman * the interval may vary +-1 tick. Add to this a margin of one 12662858Swollman * tick for the PPS signal jitter and maximum frequency 12672858Swollman * deviation. If the limits are exceeded, the calibration 12682858Swollman * interval is reset to the minimum and we start over. 12692858Swollman */ 12702858Swollman u_usec = (long)tick << 1; 12712858Swollman if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) 12722858Swollman || (cal_sec == 0 && cal_usec < u_usec)) 127321101Sjhay || v_usec > time_tolerance || v_usec < -time_tolerance) { 127421101Sjhay pps_errcnt++; 127521101Sjhay pps_shift = PPS_SHIFT; 127621101Sjhay pps_intcnt = 0; 127721101Sjhay time_status |= STA_PPSERROR; 12782858Swollman return; 12792858Swollman } 12802858Swollman 12812858Swollman /* 12822858Swollman * A three-stage median filter is used to help deglitch the pps 128321101Sjhay * frequency. The median sample becomes the frequency offset 128421101Sjhay * estimate; the difference between the other two samples 128521101Sjhay * becomes the frequency dispersion (stability) estimate. 12862858Swollman */ 128721101Sjhay pps_ff[2] = pps_ff[1]; 128821101Sjhay pps_ff[1] = pps_ff[0]; 128921101Sjhay pps_ff[0] = v_usec; 129021101Sjhay if (pps_ff[0] > pps_ff[1]) { 129121101Sjhay if (pps_ff[1] > pps_ff[2]) { 129221101Sjhay u_usec = pps_ff[1]; /* 0 1 2 */ 129321101Sjhay v_usec = pps_ff[0] - pps_ff[2]; 129421101Sjhay } else if (pps_ff[2] > pps_ff[0]) { 129521101Sjhay u_usec = pps_ff[0]; /* 2 0 1 */ 129621101Sjhay v_usec = pps_ff[2] - pps_ff[1]; 12972858Swollman } else { 129821101Sjhay u_usec = pps_ff[2]; /* 0 2 1 */ 129921101Sjhay v_usec = pps_ff[0] - pps_ff[1]; 13002858Swollman } 13012858Swollman } else { 130221101Sjhay if (pps_ff[1] < pps_ff[2]) { 130321101Sjhay u_usec = pps_ff[1]; /* 2 1 0 */ 130421101Sjhay v_usec = pps_ff[2] - pps_ff[0]; 130521101Sjhay } else if (pps_ff[2] < pps_ff[0]) { 130621101Sjhay u_usec = pps_ff[0]; /* 1 0 2 */ 130721101Sjhay v_usec = pps_ff[1] - pps_ff[2]; 13082858Swollman } else { 130921101Sjhay u_usec = pps_ff[2]; /* 1 2 0 */ 131021101Sjhay v_usec = pps_ff[1] - pps_ff[0]; 13112858Swollman } 13122858Swollman } 13132858Swollman 13142858Swollman /* 131521101Sjhay * Here the frequency dispersion (stability) is updated. If it 131621101Sjhay * is less than one-fourth the maximum (MAXFREQ), the frequency 131721101Sjhay * offset is updated as well, but clamped to the tolerance. It 131821101Sjhay * will be processed later by the hardclock() routine. 13192858Swollman */ 132021101Sjhay v_usec = (v_usec >> 1) - pps_stabil; 13212858Swollman if (v_usec < 0) 132221101Sjhay pps_stabil -= -v_usec >> PPS_AVG; 13232858Swollman else 132421101Sjhay pps_stabil += v_usec >> PPS_AVG; 132521101Sjhay if (pps_stabil > MAXFREQ >> 2) { 132621101Sjhay pps_stbcnt++; 132721101Sjhay time_status |= STA_PPSWANDER; 13282858Swollman return; 13292858Swollman } 133021101Sjhay if (time_status & STA_PPSFREQ) { 133121101Sjhay if (u_usec < 0) { 133221101Sjhay pps_freq -= -u_usec >> PPS_AVG; 133321101Sjhay if (pps_freq < -time_tolerance) 133421101Sjhay pps_freq = -time_tolerance; 133521101Sjhay u_usec = -u_usec; 133621101Sjhay } else { 133721101Sjhay pps_freq += u_usec >> PPS_AVG; 133821101Sjhay if (pps_freq > time_tolerance) 133921101Sjhay pps_freq = time_tolerance; 134021101Sjhay } 13412858Swollman } 13422858Swollman 13432858Swollman /* 13442858Swollman * Here the calibration interval is adjusted. If the maximum 134521101Sjhay * time difference is greater than tick / 4, reduce the interval 13462858Swollman * by half. If this is not the case for four consecutive 13472858Swollman * intervals, double the interval. 13482858Swollman */ 134921101Sjhay if (u_usec << pps_shift > bigtick >> 2) { 135021101Sjhay pps_intcnt = 0; 135121101Sjhay if (pps_shift > PPS_SHIFT) 135221101Sjhay pps_shift--; 135321101Sjhay } else if (pps_intcnt >= 4) { 135421101Sjhay pps_intcnt = 0; 135521101Sjhay if (pps_shift < PPS_SHIFTMAX) 135621101Sjhay pps_shift++; 13572858Swollman } else 135821101Sjhay pps_intcnt++; 13592858Swollman} 13602858Swollman#endif /* PPS_SYNC */ 1361