kern_clock.c revision 31639
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
381541Srgrimes *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
3931639Sfsmp * $Id: kern_clock.c,v 1.45 1997/11/24 15:15:27 bde Exp $
401541Srgrimes */
411541Srgrimes
422858Swollman/* Portions of this software are covered by the following: */
432858Swollman/******************************************************************************
442858Swollman *                                                                            *
452858Swollman * Copyright (c) David L. Mills 1993, 1994                                    *
462858Swollman *                                                                            *
472858Swollman * Permission to use, copy, modify, and distribute this software and its      *
482858Swollman * documentation for any purpose and without fee is hereby granted, provided  *
492858Swollman * that the above copyright notice appears in all copies and that both the    *
502858Swollman * copyright notice and this permission notice appear in supporting           *
512858Swollman * documentation, and that the name University of Delaware not be used in     *
522858Swollman * advertising or publicity pertaining to distribution of the software        *
532858Swollman * without specific, written prior permission.  The University of Delaware    *
542858Swollman * makes no representations about the suitability this software for any       *
552858Swollman * purpose.  It is provided "as is" without express or implied warranty.      *
562858Swollman *                                                                            *
572858Swollman *****************************************************************************/
582858Swollman
591541Srgrimes#include <sys/param.h>
601541Srgrimes#include <sys/systm.h>
611541Srgrimes#include <sys/dkstat.h>
621541Srgrimes#include <sys/callout.h>
631541Srgrimes#include <sys/kernel.h>
641541Srgrimes#include <sys/proc.h>
651541Srgrimes#include <sys/resourcevar.h>
663308Sphk#include <sys/signalvar.h>
672858Swollman#include <sys/timex.h>
682320Sdg#include <vm/vm.h>
6922521Sdyson#include <sys/lock.h>
7012662Sdg#include <vm/pmap.h>
7112662Sdg#include <vm/vm_map.h>
723308Sphk#include <sys/sysctl.h>
731541Srgrimes
741541Srgrimes#include <machine/cpu.h>
7519172Sbde#define CLOCK_HAIR		/* XXX */
762858Swollman#include <machine/clock.h>
7728551Sbde#include <machine/limits.h>
781541Srgrimes
791541Srgrimes#ifdef GPROF
801541Srgrimes#include <sys/gmon.h>
811541Srgrimes#endif
821541Srgrimes
8331639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK)
8431639Sfsmp#include <machine/smp.h>
8531639Sfsmp#endif
8631639Sfsmp
8712569Sbdestatic void initclocks __P((void *dummy));
8810358SjulianSYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
8910358Sjulian
9016635Sbde/* Exported to machdep.c. */
9129680Sgibbsstruct callout *callout;
9229680Sgibbsstruct callout_list callfree;
9329680Sgibbsint callwheelsize, callwheelbits, callwheelmask;
9429680Sgibbsstruct callout_tailq *callwheel;
952112Swollman
9616635Sbde
972112Swollman/* Some of these don't belong here, but it's easiest to concentrate them. */
9831639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK)
9931639Sfsmplong cp_time[CPUSTATES];
10031639Sfsmp#else
10112913Sphkstatic long cp_time[CPUSTATES];
10231639Sfsmp#endif
1032112Swollmanlong dk_seek[DK_NDRIVE];
10429179Sbdestatic long dk_time[DK_NDRIVE];	/* time busy (in statclock ticks) */
1052112Swollmanlong dk_wds[DK_NDRIVE];
1062112Swollmanlong dk_wpms[DK_NDRIVE];
1072112Swollmanlong dk_xfer[DK_NDRIVE];
1082112Swollman
1092112Swollmanint dk_busy;
1103640Swollmanint dk_ndrive = 0;
1113640Swollmanchar dk_names[DK_NDRIVE][DK_NAMELEN];
1122112Swollman
1132112Swollmanlong tk_cancc;
1142112Swollmanlong tk_nin;
1152112Swollmanlong tk_nout;
1162112Swollmanlong tk_rawcc;
1172112Swollman
1181541Srgrimes/*
1191541Srgrimes * Clock handling routines.
1201541Srgrimes *
1211541Srgrimes * This code is written to operate with two timers that run independently of
1221541Srgrimes * each other.  The main clock, running hz times per second, is used to keep
1231541Srgrimes * track of real time.  The second timer handles kernel and user profiling,
1241541Srgrimes * and does resource use estimation.  If the second timer is programmable,
1251541Srgrimes * it is randomized to avoid aliasing between the two clocks.  For example,
1261541Srgrimes * the randomization prevents an adversary from always giving up the cpu
1271541Srgrimes * just before its quantum expires.  Otherwise, it would never accumulate
1281541Srgrimes * cpu ticks.  The mean frequency of the second timer is stathz.
1291541Srgrimes *
1301541Srgrimes * If no second timer exists, stathz will be zero; in this case we drive
1311541Srgrimes * profiling and statistics off the main clock.  This WILL NOT be accurate;
1321541Srgrimes * do not do it unless absolutely necessary.
1331541Srgrimes *
1341541Srgrimes * The statistics clock may (or may not) be run at a higher rate while
1351541Srgrimes * profiling.  This profile clock runs at profhz.  We require that profhz
1361541Srgrimes * be an integral multiple of stathz.
1371541Srgrimes *
1381541Srgrimes * If the statistics clock is running fast, it must be divided by the ratio
1391541Srgrimes * profhz/stathz for statistics.  (For profiling, every tick counts.)
1401541Srgrimes */
1411541Srgrimes
1421541Srgrimes/*
1431541Srgrimes * TODO:
1441541Srgrimes *	allocate more timeout table slots when table overflows.
1451541Srgrimes */
1461541Srgrimes
1471541Srgrimes/*
1481541Srgrimes * Bump a timeval by a small number of usec's.
1491541Srgrimes */
1501541Srgrimes#define BUMPTIME(t, usec) { \
1511541Srgrimes	register volatile struct timeval *tp = (t); \
1521541Srgrimes	register long us; \
1531541Srgrimes \
1541541Srgrimes	tp->tv_usec = us = tp->tv_usec + (usec); \
1551541Srgrimes	if (us >= 1000000) { \
1561541Srgrimes		tp->tv_usec = us - 1000000; \
1571541Srgrimes		tp->tv_sec++; \
1581541Srgrimes	} \
1591541Srgrimes}
1601541Srgrimes
1611541Srgrimesint	stathz;
1621541Srgrimesint	profhz;
16316635Sbdestatic int profprocs;
1641541Srgrimesint	ticks;
16529680Sgibbsstatic int softticks;			/* Like ticks, but for softclock(). */
16629680Sgibbsstatic struct callout *nextsoftcheck;	/* Next callout to be checked. */
16729680Sgibbsstatic int psdiv, pscnt;		/* prof => stat divider */
16829680Sgibbsint psratio;				/* ratio: prof / stat */
1691541Srgrimes
1701541Srgrimesvolatile struct	timeval time;
1711541Srgrimesvolatile struct	timeval mono_time;
1721541Srgrimes
1731541Srgrimes/*
17421101Sjhay * Phase/frequency-lock loop (PLL/FLL) definitions
1752858Swollman *
1762858Swollman * The following variables are read and set by the ntp_adjtime() system
1772858Swollman * call.
1782858Swollman *
1792858Swollman * time_state shows the state of the system clock, with values defined
1802858Swollman * in the timex.h header file.
1812858Swollman *
1822858Swollman * time_status shows the status of the system clock, with bits defined
1832858Swollman * in the timex.h header file.
1842858Swollman *
18521101Sjhay * time_offset is used by the PLL/FLL to adjust the system time in small
1862858Swollman * increments.
1872858Swollman *
1882858Swollman * time_constant determines the bandwidth or "stiffness" of the PLL.
1892858Swollman *
1902858Swollman * time_tolerance determines maximum frequency error or tolerance of the
1912858Swollman * CPU clock oscillator and is a property of the architecture; however,
1922858Swollman * in principle it could change as result of the presence of external
1932858Swollman * discipline signals, for instance.
1942858Swollman *
1952858Swollman * time_precision is usually equal to the kernel tick variable; however,
1962858Swollman * in cases where a precision clock counter or external clock is
1972858Swollman * available, the resolution can be much less than this and depend on
1982858Swollman * whether the external clock is working or not.
1992858Swollman *
2002858Swollman * time_maxerror is initialized by a ntp_adjtime() call and increased by
2012858Swollman * the kernel once each second to reflect the maximum error
2022858Swollman * bound growth.
2032858Swollman *
2042858Swollman * time_esterror is set and read by the ntp_adjtime() call, but
2052858Swollman * otherwise not used by the kernel.
2062858Swollman */
2072858Swollmanint time_status = STA_UNSYNC;	/* clock status bits */
2082858Swollmanint time_state = TIME_OK;	/* clock state */
2092858Swollmanlong time_offset = 0;		/* time offset (us) */
2102858Swollmanlong time_constant = 0;		/* pll time constant */
2112858Swollmanlong time_tolerance = MAXFREQ;	/* frequency tolerance (scaled ppm) */
2122858Swollmanlong time_precision = 1;	/* clock precision (us) */
2132858Swollmanlong time_maxerror = MAXPHASE;	/* maximum error (us) */
2142858Swollmanlong time_esterror = MAXPHASE;	/* estimated error (us) */
2152858Swollman
2162858Swollman/*
21721101Sjhay * The following variables establish the state of the PLL/FLL and the
2182858Swollman * residual time and frequency offset of the local clock. The scale
2192858Swollman * factors are defined in the timex.h header file.
2202858Swollman *
2212858Swollman * time_phase and time_freq are the phase increment and the frequency
2222858Swollman * increment, respectively, of the kernel time variable at each tick of
2232858Swollman * the clock.
2242858Swollman *
2252858Swollman * time_freq is set via ntp_adjtime() from a value stored in a file when
2262858Swollman * the synchronization daemon is first started. Its value is retrieved
2272858Swollman * via ntp_adjtime() and written to the file about once per hour by the
2282858Swollman * daemon.
2292858Swollman *
2302858Swollman * time_adj is the adjustment added to the value of tick at each timer
23121101Sjhay * interrupt and is recomputed from time_phase and time_freq at each
23221101Sjhay * seconds rollover.
2332858Swollman *
2342858Swollman * time_reftime is the second's portion of the system time on the last
2352858Swollman * call to ntp_adjtime(). It is used to adjust the time_freq variable
2362858Swollman * and to increase the time_maxerror as the time since last update
2372858Swollman * increases.
2382858Swollman */
23912913Sphkstatic long time_phase = 0;		/* phase offset (scaled us) */
24021101Sjhaylong time_freq = 0;			/* frequency offset (scaled ppm) */
24112913Sphkstatic long time_adj = 0;		/* tick adjust (scaled 1 / hz) */
24212913Sphkstatic long time_reftime = 0;		/* time at last adjustment (s) */
2432858Swollman
2442858Swollman#ifdef PPS_SYNC
2452858Swollman/*
24621101Sjhay * The following variables are used only if the kernel PPS discipline
24721101Sjhay * code is configured (PPS_SYNC). The scale factors are defined in the
24821101Sjhay * timex.h header file.
2492858Swollman *
2502858Swollman * pps_time contains the time at each calibration interval, as read by
25121101Sjhay * microtime(). pps_count counts the seconds of the calibration
25221101Sjhay * interval, the duration of which is nominally pps_shift in powers of
25321101Sjhay * two.
2542858Swollman *
2552858Swollman * pps_offset is the time offset produced by the time median filter
25621101Sjhay * pps_tf[], while pps_jitter is the dispersion (jitter) measured by
25721101Sjhay * this filter.
2582858Swollman *
2592858Swollman * pps_freq is the frequency offset produced by the frequency median
26021101Sjhay * filter pps_ff[], while pps_stabil is the dispersion (wander) measured
26121101Sjhay * by this filter.
2622858Swollman *
2632858Swollman * pps_usec is latched from a high resolution counter or external clock
2642858Swollman * at pps_time. Here we want the hardware counter contents only, not the
2652858Swollman * contents plus the time_tv.usec as usual.
2662858Swollman *
2672858Swollman * pps_valid counts the number of seconds since the last PPS update. It
2682858Swollman * is used as a watchdog timer to disable the PPS discipline should the
2692858Swollman * PPS signal be lost.
2702858Swollman *
2712858Swollman * pps_glitch counts the number of seconds since the beginning of an
2722858Swollman * offset burst more than tick/2 from current nominal offset. It is used
2732858Swollman * mainly to suppress error bursts due to priority conflicts between the
2742858Swollman * PPS interrupt and timer interrupt.
2752858Swollman *
2762858Swollman * pps_intcnt counts the calibration intervals for use in the interval-
2772858Swollman * adaptation algorithm. It's just too complicated for words.
2782858Swollman */
2792858Swollmanstruct timeval pps_time;	/* kernel time at last interval */
2802858Swollmanlong pps_offset = 0;		/* pps time offset (us) */
2812858Swollmanlong pps_jitter = MAXTIME;	/* pps time dispersion (jitter) (us) */
2822858Swollmanlong pps_tf[] = {0, 0, 0};	/* pps time offset median filter (us) */
2832858Swollmanlong pps_freq = 0;		/* frequency offset (scaled ppm) */
2842858Swollmanlong pps_stabil = MAXFREQ;	/* frequency dispersion (scaled ppm) */
2852858Swollmanlong pps_ff[] = {0, 0, 0};	/* frequency offset median filter */
2862858Swollmanlong pps_usec = 0;		/* microsec counter at last interval */
2872858Swollmanlong pps_valid = PPS_VALID;	/* pps signal watchdog counter */
2882858Swollmanint pps_glitch = 0;		/* pps signal glitch counter */
2892858Swollmanint pps_count = 0;		/* calibration interval counter (s) */
2902858Swollmanint pps_shift = PPS_SHIFT;	/* interval duration (s) (shift) */
2912858Swollmanint pps_intcnt = 0;		/* intervals at current duration */
2922858Swollman
2932858Swollman/*
2942858Swollman * PPS signal quality monitors
2952858Swollman *
2962858Swollman * pps_jitcnt counts the seconds that have been discarded because the
2972858Swollman * jitter measured by the time median filter exceeds the limit MAXTIME
2982858Swollman * (100 us).
2992858Swollman *
3002858Swollman * pps_calcnt counts the frequency calibration intervals, which are
3012858Swollman * variable from 4 s to 256 s.
3022858Swollman *
3032858Swollman * pps_errcnt counts the calibration intervals which have been discarded
3042858Swollman * because the wander exceeds the limit MAXFREQ (100 ppm) or where the
3052858Swollman * calibration interval jitter exceeds two ticks.
3062858Swollman *
3072858Swollman * pps_stbcnt counts the calibration intervals that have been discarded
3082858Swollman * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us).
3092858Swollman */
3102858Swollmanlong pps_jitcnt = 0;		/* jitter limit exceeded */
3112858Swollmanlong pps_calcnt = 0;		/* calibration intervals */
3122858Swollmanlong pps_errcnt = 0;		/* calibration errors */
3132858Swollmanlong pps_stbcnt = 0;		/* stability limit exceeded */
3142858Swollman#endif /* PPS_SYNC */
3152858Swollman
3162858Swollman/* XXX none of this stuff works under FreeBSD */
3172858Swollman#ifdef EXT_CLOCK
3182858Swollman/*
3192858Swollman * External clock definitions
3202858Swollman *
3212858Swollman * The following definitions and declarations are used only if an
3222858Swollman * external clock (HIGHBALL or TPRO) is configured on the system.
3232858Swollman */
3242858Swollman#define CLOCK_INTERVAL 30	/* CPU clock update interval (s) */
3252858Swollman
3262858Swollman/*
3272858Swollman * The clock_count variable is set to CLOCK_INTERVAL at each PPS
3282858Swollman * interrupt and decremented once each second.
3292858Swollman */
3302858Swollmanint clock_count = 0;		/* CPU clock counter */
3312858Swollman
3322858Swollman#ifdef HIGHBALL
3332858Swollman/*
3342858Swollman * The clock_offset and clock_cpu variables are used by the HIGHBALL
3352858Swollman * interface. The clock_offset variable defines the offset between
3362858Swollman * system time and the HIGBALL counters. The clock_cpu variable contains
3372858Swollman * the offset between the system clock and the HIGHBALL clock for use in
3382858Swollman * disciplining the kernel time variable.
3392858Swollman */
3402858Swollmanextern struct timeval clock_offset; /* Highball clock offset */
3412858Swollmanlong clock_cpu = 0;		/* CPU clock adjust */
3422858Swollman#endif /* HIGHBALL */
3432858Swollman#endif /* EXT_CLOCK */
3442858Swollman
3452858Swollman/*
3462858Swollman * hardupdate() - local clock update
3472858Swollman *
3482858Swollman * This routine is called by ntp_adjtime() to update the local clock
34921101Sjhay * phase and frequency. The implementation is of an adaptive-parameter,
35021101Sjhay * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new
35121101Sjhay * time and frequency offset estimates for each call. If the kernel PPS
3522858Swollman * discipline code is configured (PPS_SYNC), the PPS signal itself
3532858Swollman * determines the new time offset, instead of the calling argument.
3542858Swollman * Presumably, calls to ntp_adjtime() occur only when the caller
3552858Swollman * believes the local clock is valid within some bound (+-128 ms with
3562858Swollman * NTP). If the caller's time is far different than the PPS time, an
3572858Swollman * argument will ensue, and it's not clear who will lose.
3582858Swollman *
35921101Sjhay * For uncompensated quartz crystal oscillatores and nominal update
36021101Sjhay * intervals less than 1024 s, operation should be in phase-lock mode
36121101Sjhay * (STA_FLL = 0), where the loop is disciplined to phase. For update
36221101Sjhay * intervals greater than thiss, operation should be in frequency-lock
36321101Sjhay * mode (STA_FLL = 1), where the loop is disciplined to frequency.
3642858Swollman *
3652858Swollman * Note: splclock() is in effect.
3662858Swollman */
3672858Swollmanvoid
3682858Swollmanhardupdate(offset)
3692858Swollman	long offset;
3702858Swollman{
3712858Swollman	long ltemp, mtemp;
3722858Swollman
3732858Swollman	if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME))
3742858Swollman		return;
3752858Swollman	ltemp = offset;
3762858Swollman#ifdef PPS_SYNC
3772858Swollman	if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
3782858Swollman		ltemp = pps_offset;
3792858Swollman#endif /* PPS_SYNC */
38021101Sjhay
38121101Sjhay	/*
38221101Sjhay	 * Scale the phase adjustment and clamp to the operating range.
38321101Sjhay	 */
3842858Swollman	if (ltemp > MAXPHASE)
3852858Swollman		time_offset = MAXPHASE << SHIFT_UPDATE;
3862858Swollman	else if (ltemp < -MAXPHASE)
3872858Swollman		time_offset = -(MAXPHASE << SHIFT_UPDATE);
3882858Swollman	else
3892858Swollman		time_offset = ltemp << SHIFT_UPDATE;
39021101Sjhay
39121101Sjhay	/*
39221101Sjhay	 * Select whether the frequency is to be controlled and in which
39321101Sjhay	 * mode (PLL or FLL). Clamp to the operating range. Ugly
39421101Sjhay	 * multiply/divide should be replaced someday.
39521101Sjhay	 */
39621101Sjhay	if (time_status & STA_FREQHOLD || time_reftime == 0)
39721101Sjhay		time_reftime = time.tv_sec;
3982858Swollman	mtemp = time.tv_sec - time_reftime;
3992858Swollman	time_reftime = time.tv_sec;
40021101Sjhay	if (time_status & STA_FLL) {
40121101Sjhay		if (mtemp >= MINSEC) {
40221101Sjhay			ltemp = ((time_offset / mtemp) << (SHIFT_USEC -
40321101Sjhay			    SHIFT_UPDATE));
40421101Sjhay			if (ltemp < 0)
40521101Sjhay				time_freq -= -ltemp >> SHIFT_KH;
40621101Sjhay			else
40721101Sjhay				time_freq += ltemp >> SHIFT_KH;
40821101Sjhay		}
40921101Sjhay	} else {
41021101Sjhay		if (mtemp < MAXSEC) {
41121101Sjhay			ltemp *= mtemp;
41221101Sjhay			if (ltemp < 0)
41321101Sjhay				time_freq -= -ltemp >> (time_constant +
41421101Sjhay				    time_constant + SHIFT_KF -
41521101Sjhay				    SHIFT_USEC);
41621101Sjhay			else
41721101Sjhay				time_freq += ltemp >> (time_constant +
41821101Sjhay				    time_constant + SHIFT_KF -
41921101Sjhay				    SHIFT_USEC);
42021101Sjhay		}
42121101Sjhay	}
4222858Swollman	if (time_freq > time_tolerance)
4232858Swollman		time_freq = time_tolerance;
4242858Swollman	else if (time_freq < -time_tolerance)
4252858Swollman		time_freq = -time_tolerance;
4262858Swollman}
4272858Swollman
4282858Swollman
4292858Swollman
4302858Swollman/*
4311541Srgrimes * Initialize clock frequencies and start both clocks running.
4321541Srgrimes */
43310358Sjulian/* ARGSUSED*/
43410358Sjulianstatic void
43512569Sbdeinitclocks(dummy)
43612569Sbde	void *dummy;
4371541Srgrimes{
4381541Srgrimes	register int i;
4391541Srgrimes
4401541Srgrimes	/*
4411541Srgrimes	 * Set divisors to 1 (normal case) and let the machine-specific
4421541Srgrimes	 * code do its bit.
4431541Srgrimes	 */
4441541Srgrimes	psdiv = pscnt = 1;
4451541Srgrimes	cpu_initclocks();
4461541Srgrimes
4471541Srgrimes	/*
4481541Srgrimes	 * Compute profhz/stathz, and fix profhz if needed.
4491541Srgrimes	 */
4501541Srgrimes	i = stathz ? stathz : hz;
4511541Srgrimes	if (profhz == 0)
4521541Srgrimes		profhz = i;
4531541Srgrimes	psratio = profhz / i;
4541541Srgrimes}
4551541Srgrimes
4561541Srgrimes/*
4571541Srgrimes * The real-time timer, interrupting hz times per second.
4581541Srgrimes */
4591541Srgrimesvoid
4601541Srgrimeshardclock(frame)
4611541Srgrimes	register struct clockframe *frame;
4621541Srgrimes{
4631541Srgrimes	register struct proc *p;
4641541Srgrimes
4651541Srgrimes	p = curproc;
4661541Srgrimes	if (p) {
4671541Srgrimes		register struct pstats *pstats;
4681541Srgrimes
4691541Srgrimes		/*
4701541Srgrimes		 * Run current process's virtual and profile time, as needed.
4711541Srgrimes		 */
4721541Srgrimes		pstats = p->p_stats;
4731541Srgrimes		if (CLKF_USERMODE(frame) &&
4741541Srgrimes		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
4751541Srgrimes		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
4761541Srgrimes			psignal(p, SIGVTALRM);
4771541Srgrimes		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
4781541Srgrimes		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
4791541Srgrimes			psignal(p, SIGPROF);
4801541Srgrimes	}
4811541Srgrimes
48231639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK)
48331639Sfsmp	forward_hardclock(pscnt);
48431639Sfsmp#endif
4851541Srgrimes	/*
4861541Srgrimes	 * If no separate statistics clock is available, run it from here.
4871541Srgrimes	 */
4881541Srgrimes	if (stathz == 0)
4891541Srgrimes		statclock(frame);
4901541Srgrimes
4911541Srgrimes	/*
4928876Srgrimes	 * Increment the time-of-day.
4931541Srgrimes	 */
4941541Srgrimes	ticks++;
4952858Swollman	{
4962858Swollman		int time_update;
4972858Swollman		struct timeval newtime = time;
4982858Swollman		long ltemp;
4992858Swollman
5002858Swollman		if (timedelta == 0) {
50111451Swollman			time_update = CPU_THISTICKLEN(tick);
5022858Swollman		} else {
50311451Swollman			time_update = CPU_THISTICKLEN(tick) + tickdelta;
5043183Swollman			timedelta -= tickdelta;
5052858Swollman		}
5062858Swollman		BUMPTIME(&mono_time, time_update);
5072858Swollman
5082858Swollman		/*
5092858Swollman		 * Compute the phase adjustment. If the low-order bits
5102858Swollman		 * (time_phase) of the update overflow, bump the high-order bits
5112858Swollman		 * (time_update).
5122858Swollman		 */
5132858Swollman		time_phase += time_adj;
5142858Swollman		if (time_phase <= -FINEUSEC) {
5152858Swollman		  ltemp = -time_phase >> SHIFT_SCALE;
5162858Swollman		  time_phase += ltemp << SHIFT_SCALE;
5172858Swollman		  time_update -= ltemp;
5182858Swollman		}
5192858Swollman		else if (time_phase >= FINEUSEC) {
5202858Swollman		  ltemp = time_phase >> SHIFT_SCALE;
5212858Swollman		  time_phase -= ltemp << SHIFT_SCALE;
5222858Swollman		  time_update += ltemp;
5232858Swollman		}
5242858Swollman
5252858Swollman		newtime.tv_usec += time_update;
5262858Swollman		/*
5272858Swollman		 * On rollover of the second the phase adjustment to be used for
5282858Swollman		 * the next second is calculated. Also, the maximum error is
5292858Swollman		 * increased by the tolerance. If the PPS frequency discipline
5302858Swollman		 * code is present, the phase is increased to compensate for the
5312858Swollman		 * CPU clock oscillator frequency error.
5322858Swollman		 *
53321101Sjhay		 * On a 32-bit machine and given parameters in the timex.h
53421101Sjhay		 * header file, the maximum phase adjustment is +-512 ms and
53521101Sjhay		 * maximum frequency offset is a tad less than) +-512 ppm. On a
53621101Sjhay		 * 64-bit machine, you shouldn't need to ask.
5372858Swollman		 */
5382858Swollman		if (newtime.tv_usec >= 1000000) {
5392858Swollman		  newtime.tv_usec -= 1000000;
5402858Swollman		  newtime.tv_sec++;
5412858Swollman		  time_maxerror += time_tolerance >> SHIFT_USEC;
54221101Sjhay
54321101Sjhay		  /*
54421101Sjhay		   * Compute the phase adjustment for the next second. In
54521101Sjhay		   * PLL mode, the offset is reduced by a fixed factor
54621101Sjhay		   * times the time constant. In FLL mode the offset is
54721101Sjhay		   * used directly. In either mode, the maximum phase
54821101Sjhay		   * adjustment for each second is clamped so as to spread
54921101Sjhay		   * the adjustment over not more than the number of
55021101Sjhay		   * seconds between updates.
55121101Sjhay		   */
5522858Swollman		  if (time_offset < 0) {
55321101Sjhay		    ltemp = -time_offset;
55421101Sjhay		    if (!(time_status & STA_FLL))
55521101Sjhay			ltemp >>= SHIFT_KG + time_constant;
55621101Sjhay		    if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
55721101Sjhay			ltemp = (MAXPHASE / MINSEC) <<
55821101Sjhay			    SHIFT_UPDATE;
5592858Swollman		    time_offset += ltemp;
56021101Sjhay		    time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
56121101Sjhay			SHIFT_UPDATE);
56221101Sjhay		    } else {
56321101Sjhay		        ltemp = time_offset;
56421101Sjhay			if (!(time_status & STA_FLL))
56521101Sjhay				ltemp >>= SHIFT_KG + time_constant;
56621101Sjhay			if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
56721101Sjhay				ltemp = (MAXPHASE / MINSEC) <<
56821101Sjhay				    SHIFT_UPDATE;
56921101Sjhay			time_offset -= ltemp;
57021101Sjhay			time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
57121101Sjhay			    SHIFT_UPDATE);
57221101Sjhay		    }
57321101Sjhay
5742858Swollman		  /*
57521101Sjhay		   * Compute the frequency estimate and additional phase
57621101Sjhay		   * adjustment due to frequency error for the next
57721101Sjhay		   * second. When the PPS signal is engaged, gnaw on the
57821101Sjhay		   * watchdog counter and update the frequency computed by
57921101Sjhay		   * the pll and the PPS signal.
5802858Swollman		   */
58121101Sjhay#ifdef PPS_SYNC
5822858Swollman		  pps_valid++;
5832858Swollman		  if (pps_valid == PPS_VALID) {
5842858Swollman		    pps_jitter = MAXTIME;
5852858Swollman		    pps_stabil = MAXFREQ;
5862858Swollman		    time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
5872858Swollman				     STA_PPSWANDER | STA_PPSERROR);
5882858Swollman		  }
5892858Swollman		  ltemp = time_freq + pps_freq;
5902858Swollman#else
5912858Swollman		  ltemp = time_freq;
5922858Swollman#endif /* PPS_SYNC */
5932858Swollman		  if (ltemp < 0)
5942858Swollman		    time_adj -= -ltemp >>
5952858Swollman		      (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
5962858Swollman		  else
5972858Swollman		    time_adj += ltemp >>
5982858Swollman		      (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
5992858Swollman
60021101Sjhay#if SHIFT_HZ == 7
6012858Swollman		  /*
6022858Swollman		   * When the CPU clock oscillator frequency is not a
6032858Swollman		   * power of two in Hz, the SHIFT_HZ is only an
6042858Swollman		   * approximate scale factor. In the SunOS kernel, this
6052858Swollman		   * results in a PLL gain factor of 1/1.28 = 0.78 what it
6062858Swollman		   * should be. In the following code the overall gain is
6072858Swollman		   * increased by a factor of 1.25, which results in a
6082858Swollman		   * residual error less than 3 percent.
6092858Swollman		   */
6102858Swollman		  /* Same thing applies for FreeBSD --GAW */
6112858Swollman		  if (hz == 100) {
6122858Swollman		    if (time_adj < 0)
6132858Swollman		      time_adj -= -time_adj >> 2;
6142858Swollman		    else
6152858Swollman		      time_adj += time_adj >> 2;
6162858Swollman		  }
61721101Sjhay#endif /* SHIFT_HZ */
6182858Swollman
6192858Swollman		  /* XXX - this is really bogus, but can't be fixed until
6202858Swollman		     xntpd's idea of the system clock is fixed to know how
6212858Swollman		     the user wants leap seconds handled; in the mean time,
6222858Swollman		     we assume that users of NTP are running without proper
6232858Swollman		     leap second support (this is now the default anyway) */
6242858Swollman		  /*
6252858Swollman		   * Leap second processing. If in leap-insert state at
6262858Swollman		   * the end of the day, the system clock is set back one
6272858Swollman		   * second; if in leap-delete state, the system clock is
6282858Swollman		   * set ahead one second. The microtime() routine or
6292858Swollman		   * external clock driver will insure that reported time
6302858Swollman		   * is always monotonic. The ugly divides should be
6312858Swollman		   * replaced.
6322858Swollman		   */
6332858Swollman		  switch (time_state) {
6348876Srgrimes
6352858Swollman		  case TIME_OK:
6362858Swollman		    if (time_status & STA_INS)
6372858Swollman		      time_state = TIME_INS;
6382858Swollman		    else if (time_status & STA_DEL)
6392858Swollman		      time_state = TIME_DEL;
6402858Swollman		    break;
6418876Srgrimes
6422858Swollman		  case TIME_INS:
6432858Swollman		    if (newtime.tv_sec % 86400 == 0) {
6442858Swollman		      newtime.tv_sec--;
6452858Swollman		      time_state = TIME_OOP;
6462858Swollman		    }
6472858Swollman		    break;
6482858Swollman
6492858Swollman		  case TIME_DEL:
6502858Swollman		    if ((newtime.tv_sec + 1) % 86400 == 0) {
6512858Swollman		      newtime.tv_sec++;
6522858Swollman		      time_state = TIME_WAIT;
6532858Swollman		    }
6542858Swollman		    break;
6558876Srgrimes
6562858Swollman		  case TIME_OOP:
6572858Swollman		    time_state = TIME_WAIT;
6582858Swollman		    break;
6598876Srgrimes
6602858Swollman		  case TIME_WAIT:
6612858Swollman		    if (!(time_status & (STA_INS | STA_DEL)))
6622858Swollman		      time_state = TIME_OK;
6632858Swollman		  }
6642858Swollman		}
6652858Swollman		CPU_CLOCKUPDATE(&time, &newtime);
6661541Srgrimes	}
6671541Srgrimes
6681541Srgrimes	/*
6691541Srgrimes	 * Process callouts at a very low cpu priority, so we don't keep the
6701541Srgrimes	 * relatively high clock interrupt priority any longer than necessary.
6711541Srgrimes	 */
67229680Sgibbs	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
6731541Srgrimes		if (CLKF_BASEPRI(frame)) {
6741541Srgrimes			/*
6751541Srgrimes			 * Save the overhead of a software interrupt;
6761541Srgrimes			 * it will happen as soon as we return, so do it now.
6771541Srgrimes			 */
6781541Srgrimes			(void)splsoftclock();
6791541Srgrimes			softclock();
6801541Srgrimes		} else
6811541Srgrimes			setsoftclock();
68229680Sgibbs	} else if (softticks + 1 == ticks) {
68329680Sgibbs		++softticks;
6841541Srgrimes	}
6851541Srgrimes}
6861541Srgrimes
6871541Srgrimes/*
68829680Sgibbs * The callout mechanism is based on the work of Adam M. Costello and
68929680Sgibbs * George Varghese, published in a technical report entitled "Redesigning
69029680Sgibbs * the BSD Callout and Timer Facilities" and modified slightly for inclusion
69129680Sgibbs * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
69229680Sgibbs * used in this implementation was published by G.Varghese and A. Lauck in
69329680Sgibbs * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
69429680Sgibbs * the Efficient Implementation of a Timer Facility" in the Proceedings of
69529680Sgibbs * the 11th ACM Annual Symposium on Operating Systems Principles,
69629680Sgibbs * Austin, Texas Nov 1987.
69729680Sgibbs */
69829680Sgibbs/*
6991541Srgrimes * Software (low priority) clock interrupt.
7001541Srgrimes * Run periodic events from timeout queue.
7011541Srgrimes */
7021541Srgrimes/*ARGSUSED*/
7031541Srgrimesvoid
7041541Srgrimessoftclock()
7051541Srgrimes{
7061541Srgrimes	register struct callout *c;
70729805Sgibbs	register struct callout_tailq *bucket;
7081541Srgrimes	register int s;
70929805Sgibbs	register int curticks;
71029680Sgibbs	register int steps;	/*
71129680Sgibbs				 * Number of steps taken since
71229680Sgibbs				 * we last allowed interrupts.
71329680Sgibbs				 */
7141541Srgrimes
71529680Sgibbs	#ifndef MAX_SOFTCLOCK_STEPS
71629680Sgibbs	#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
71729680Sgibbs	#endif /* MAX_SOFTCLOCK_STEPS */
71829680Sgibbs
71929680Sgibbs	steps = 0;
7201541Srgrimes	s = splhigh();
72129680Sgibbs	while (softticks != ticks) {
72229805Sgibbs		softticks++;
72329805Sgibbs		/*
72429805Sgibbs		 * softticks may be modified by hard clock, so cache
72529805Sgibbs		 * it while we work on a given bucket.
72629805Sgibbs		 */
72729805Sgibbs		curticks = softticks;
72829805Sgibbs		bucket = &callwheel[curticks & callwheelmask];
72929805Sgibbs		c = TAILQ_FIRST(bucket);
73029680Sgibbs		while (c) {
73129805Sgibbs			if (c->c_time != curticks) {
73229680Sgibbs				c = TAILQ_NEXT(c, c_links.tqe);
73329680Sgibbs				++steps;
73429680Sgibbs				if (steps >= MAX_SOFTCLOCK_STEPS) {
73529680Sgibbs					nextsoftcheck = c;
73629805Sgibbs					/* Give interrupts a chance. */
73729680Sgibbs					splx(s);
73829680Sgibbs					s = splhigh();
73929680Sgibbs					c = nextsoftcheck;
74029680Sgibbs					steps = 0;
74129680Sgibbs				}
74229680Sgibbs			} else {
74329680Sgibbs				void (*c_func)(void *);
74429680Sgibbs				void *c_arg;
74529680Sgibbs
74629680Sgibbs				nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
74729805Sgibbs				TAILQ_REMOVE(bucket, c, c_links.tqe);
74829680Sgibbs				c_func = c->c_func;
74929680Sgibbs				c_arg = c->c_arg;
75029680Sgibbs				c->c_func = NULL;
75129680Sgibbs				SLIST_INSERT_HEAD(&callfree, c, c_links.sle);
75229680Sgibbs				splx(s);
75329680Sgibbs				c_func(c_arg);
75429680Sgibbs				s = splhigh();
75529680Sgibbs				steps = 0;
75629680Sgibbs				c = nextsoftcheck;
75729680Sgibbs			}
75829680Sgibbs		}
7591541Srgrimes	}
76029680Sgibbs	nextsoftcheck = NULL;
7611541Srgrimes	splx(s);
7621541Srgrimes}
7631541Srgrimes
7641541Srgrimes/*
7651541Srgrimes * timeout --
7661541Srgrimes *	Execute a function after a specified length of time.
7671541Srgrimes *
7681541Srgrimes * untimeout --
7691541Srgrimes *	Cancel previous timeout function call.
7701541Srgrimes *
77129680Sgibbs * callout_handle_init --
77229680Sgibbs *	Initialize a handle so that using it with untimeout is benign.
77329680Sgibbs *
7741541Srgrimes *	See AT&T BCI Driver Reference Manual for specification.  This
77529680Sgibbs *	implementation differs from that one in that although an
77629680Sgibbs *	identification value is returned from timeout, the original
77729680Sgibbs *	arguments to timeout as well as the identifier are used to
77829680Sgibbs *	identify entries for untimeout.
7791541Srgrimes */
78029680Sgibbsstruct callout_handle
78129680Sgibbstimeout(ftn, arg, to_ticks)
7822112Swollman	timeout_t ftn;
7831541Srgrimes	void *arg;
78429680Sgibbs	register int to_ticks;
7851541Srgrimes{
78629680Sgibbs	int s;
78729680Sgibbs	struct callout *new;
78829680Sgibbs	struct callout_handle handle;
7891541Srgrimes
79029680Sgibbs	if (to_ticks <= 0)
79129680Sgibbs		to_ticks = 1;
7921541Srgrimes
7931541Srgrimes	/* Lock out the clock. */
7941541Srgrimes	s = splhigh();
7951541Srgrimes
7961541Srgrimes	/* Fill in the next free callout structure. */
79729680Sgibbs	new = SLIST_FIRST(&callfree);
79829680Sgibbs	if (new == NULL)
79929680Sgibbs		/* XXX Attempt to malloc first */
8001541Srgrimes		panic("timeout table full");
80129680Sgibbs
80229680Sgibbs	SLIST_REMOVE_HEAD(&callfree, c_links.sle);
8031541Srgrimes	new->c_arg = arg;
8041541Srgrimes	new->c_func = ftn;
80529805Sgibbs	new->c_time = ticks + to_ticks;
80629805Sgibbs	TAILQ_INSERT_TAIL(&callwheel[new->c_time & callwheelmask],
80729805Sgibbs			  new, c_links.tqe);
8081541Srgrimes
8091541Srgrimes	splx(s);
81029680Sgibbs	handle.callout = new;
81129680Sgibbs	return (handle);
8121541Srgrimes}
8131541Srgrimes
8141541Srgrimesvoid
81529680Sgibbsuntimeout(ftn, arg, handle)
8162112Swollman	timeout_t ftn;
8171541Srgrimes	void *arg;
81829680Sgibbs	struct callout_handle handle;
8191541Srgrimes{
8201541Srgrimes	register int s;
8211541Srgrimes
82229680Sgibbs	/*
82329680Sgibbs	 * Check for a handle that was initialized
82429680Sgibbs	 * by callout_handle_init, but never used
82529680Sgibbs	 * for a real timeout.
82629680Sgibbs	 */
82729680Sgibbs	if (handle.callout == NULL)
82829680Sgibbs		return;
82929680Sgibbs
8301541Srgrimes	s = splhigh();
83129680Sgibbs	if ((handle.callout->c_func == ftn)
83229680Sgibbs	 && (handle.callout->c_arg == arg)) {
83329680Sgibbs		if (nextsoftcheck == handle.callout) {
83429680Sgibbs			nextsoftcheck = TAILQ_NEXT(handle.callout, c_links.tqe);
8351541Srgrimes		}
83629805Sgibbs		TAILQ_REMOVE(&callwheel[handle.callout->c_time & callwheelmask],
83729680Sgibbs			     handle.callout, c_links.tqe);
83829680Sgibbs		handle.callout->c_func = NULL;
83929680Sgibbs		SLIST_INSERT_HEAD(&callfree, handle.callout, c_links.sle);
84029680Sgibbs	}
8411541Srgrimes	splx(s);
8421541Srgrimes}
8431541Srgrimes
84424101Sbdevoid
84529680Sgibbscallout_handle_init(struct callout_handle *handle)
84629680Sgibbs{
84729680Sgibbs	handle->callout = NULL;
84829680Sgibbs}
84929680Sgibbs
85029680Sgibbsvoid
85124101Sbdegettime(struct timeval *tvp)
85224101Sbde{
85324101Sbde	int s;
85424101Sbde
85524101Sbde	s = splclock();
85624117Smpp	/* XXX should use microtime() iff tv_usec is used. */
85724101Sbde	*tvp = time;
85824101Sbde	splx(s);
85924101Sbde}
86024101Sbde
8611541Srgrimes/*
8621541Srgrimes * Compute number of hz until specified time.  Used to
8631541Srgrimes * compute third argument to timeout() from an absolute time.
8641541Srgrimes */
8651541Srgrimesint
8661541Srgrimeshzto(tv)
8671541Srgrimes	struct timeval *tv;
8681541Srgrimes{
8695081Sbde	register unsigned long ticks;
8705081Sbde	register long sec, usec;
8711541Srgrimes	int s;
8721541Srgrimes
8731541Srgrimes	/*
8745081Sbde	 * If the number of usecs in the whole seconds part of the time
8755081Sbde	 * difference fits in a long, then the total number of usecs will
8765081Sbde	 * fit in an unsigned long.  Compute the total and convert it to
8775081Sbde	 * ticks, rounding up and adding 1 to allow for the current tick
8785081Sbde	 * to expire.  Rounding also depends on unsigned long arithmetic
8795081Sbde	 * to avoid overflow.
8801541Srgrimes	 *
8815081Sbde	 * Otherwise, if the number of ticks in the whole seconds part of
8825081Sbde	 * the time difference fits in a long, then convert the parts to
8835081Sbde	 * ticks separately and add, using similar rounding methods and
8845081Sbde	 * overflow avoidance.  This method would work in the previous
8855081Sbde	 * case but it is slightly slower and assumes that hz is integral.
8865081Sbde	 *
8875081Sbde	 * Otherwise, round the time difference down to the maximum
8885081Sbde	 * representable value.
8895081Sbde	 *
8905081Sbde	 * If ints have 32 bits, then the maximum value for any timeout in
8915081Sbde	 * 10ms ticks is 248 days.
8921541Srgrimes	 */
8935081Sbde	s = splclock();
8941541Srgrimes	sec = tv->tv_sec - time.tv_sec;
8955081Sbde	usec = tv->tv_usec - time.tv_usec;
8965081Sbde	splx(s);
8975081Sbde	if (usec < 0) {
8985081Sbde		sec--;
8995081Sbde		usec += 1000000;
9005081Sbde	}
9015081Sbde	if (sec < 0) {
9025081Sbde#ifdef DIAGNOSTIC
9035081Sbde		printf("hzto: negative time difference %ld sec %ld usec\n",
9045081Sbde		       sec, usec);
9055081Sbde#endif
9065081Sbde		ticks = 1;
9075081Sbde	} else if (sec <= LONG_MAX / 1000000)
9085081Sbde		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
9095081Sbde			/ tick + 1;
9105081Sbde	else if (sec <= LONG_MAX / hz)
9115081Sbde		ticks = sec * hz
9125081Sbde			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
9131541Srgrimes	else
9145081Sbde		ticks = LONG_MAX;
9155081Sbde	if (ticks > INT_MAX)
9165081Sbde		ticks = INT_MAX;
9171541Srgrimes	return (ticks);
9181541Srgrimes}
9191541Srgrimes
9201541Srgrimes/*
9211541Srgrimes * Start profiling on a process.
9221541Srgrimes *
9231541Srgrimes * Kernel profiling passes proc0 which never exits and hence
9241541Srgrimes * keeps the profile clock running constantly.
9251541Srgrimes */
9261541Srgrimesvoid
9271541Srgrimesstartprofclock(p)
9281541Srgrimes	register struct proc *p;
9291541Srgrimes{
9301541Srgrimes	int s;
9311541Srgrimes
9321541Srgrimes	if ((p->p_flag & P_PROFIL) == 0) {
9331541Srgrimes		p->p_flag |= P_PROFIL;
9341541Srgrimes		if (++profprocs == 1 && stathz != 0) {
9351541Srgrimes			s = splstatclock();
9361541Srgrimes			psdiv = pscnt = psratio;
9371541Srgrimes			setstatclockrate(profhz);
9381541Srgrimes			splx(s);
9391541Srgrimes		}
9401541Srgrimes	}
9411541Srgrimes}
9421541Srgrimes
9431541Srgrimes/*
9441541Srgrimes * Stop profiling on a process.
9451541Srgrimes */
9461541Srgrimesvoid
9471541Srgrimesstopprofclock(p)
9481541Srgrimes	register struct proc *p;
9491541Srgrimes{
9501541Srgrimes	int s;
9511541Srgrimes
9521541Srgrimes	if (p->p_flag & P_PROFIL) {
9531541Srgrimes		p->p_flag &= ~P_PROFIL;
9541541Srgrimes		if (--profprocs == 0 && stathz != 0) {
9551541Srgrimes			s = splstatclock();
9561541Srgrimes			psdiv = pscnt = 1;
9571541Srgrimes			setstatclockrate(stathz);
9581541Srgrimes			splx(s);
9591541Srgrimes		}
9601541Srgrimes	}
9611541Srgrimes}
9621541Srgrimes
9631541Srgrimes/*
9641541Srgrimes * Statistics clock.  Grab profile sample, and if divider reaches 0,
9651541Srgrimes * do process and kernel statistics.
9661541Srgrimes */
9671541Srgrimesvoid
9681541Srgrimesstatclock(frame)
9691541Srgrimes	register struct clockframe *frame;
9701541Srgrimes{
9711541Srgrimes#ifdef GPROF
9721541Srgrimes	register struct gmonparam *g;
9731541Srgrimes#endif
97417342Sbde	register struct proc *p;
9751541Srgrimes	register int i;
97617342Sbde	struct pstats *pstats;
97717342Sbde	long rss;
97817342Sbde	struct rusage *ru;
97917342Sbde	struct vmspace *vm;
9801541Srgrimes
9811541Srgrimes	if (CLKF_USERMODE(frame)) {
98217342Sbde		p = curproc;
9831541Srgrimes		if (p->p_flag & P_PROFIL)
9841541Srgrimes			addupc_intr(p, CLKF_PC(frame), 1);
98531639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK)
98631639Sfsmp		if (stathz != 0)
98731639Sfsmp			forward_statclock(pscnt);
98831639Sfsmp#endif
9891541Srgrimes		if (--pscnt > 0)
9901541Srgrimes			return;
9911541Srgrimes		/*
9921541Srgrimes		 * Came from user mode; CPU was in user state.
9931541Srgrimes		 * If this process is being profiled record the tick.
9941541Srgrimes		 */
9951541Srgrimes		p->p_uticks++;
9961541Srgrimes		if (p->p_nice > NZERO)
9971541Srgrimes			cp_time[CP_NICE]++;
9981541Srgrimes		else
9991541Srgrimes			cp_time[CP_USER]++;
10001541Srgrimes	} else {
10011541Srgrimes#ifdef GPROF
10021541Srgrimes		/*
10031541Srgrimes		 * Kernel statistics are just like addupc_intr, only easier.
10041541Srgrimes		 */
10051541Srgrimes		g = &_gmonparam;
10061541Srgrimes		if (g->state == GMON_PROF_ON) {
10071541Srgrimes			i = CLKF_PC(frame) - g->lowpc;
10081541Srgrimes			if (i < g->textsize) {
10091541Srgrimes				i /= HISTFRACTION * sizeof(*g->kcount);
10101541Srgrimes				g->kcount[i]++;
10111541Srgrimes			}
10121541Srgrimes		}
10131541Srgrimes#endif
101431639Sfsmp#if defined(SMP) && defined(BETTER_CLOCK)
101531639Sfsmp		if (stathz != 0)
101631639Sfsmp			forward_statclock(pscnt);
101731639Sfsmp#endif
10181541Srgrimes		if (--pscnt > 0)
10191541Srgrimes			return;
10201541Srgrimes		/*
10211541Srgrimes		 * Came from kernel mode, so we were:
10221541Srgrimes		 * - handling an interrupt,
10231541Srgrimes		 * - doing syscall or trap work on behalf of the current
10241541Srgrimes		 *   user process, or
10251541Srgrimes		 * - spinning in the idle loop.
10261541Srgrimes		 * Whichever it is, charge the time as appropriate.
10271541Srgrimes		 * Note that we charge interrupts to the current process,
10281541Srgrimes		 * regardless of whether they are ``for'' that process,
10291541Srgrimes		 * so that we know how much of its real time was spent
10301541Srgrimes		 * in ``non-process'' (i.e., interrupt) work.
10311541Srgrimes		 */
103217342Sbde		p = curproc;
10331541Srgrimes		if (CLKF_INTR(frame)) {
10341541Srgrimes			if (p != NULL)
10351541Srgrimes				p->p_iticks++;
10361541Srgrimes			cp_time[CP_INTR]++;
103731393Sbde		} else if (p != NULL) {
10381541Srgrimes			p->p_sticks++;
10391541Srgrimes			cp_time[CP_SYS]++;
10401541Srgrimes		} else
10411541Srgrimes			cp_time[CP_IDLE]++;
10421541Srgrimes	}
10431541Srgrimes	pscnt = psdiv;
10441541Srgrimes
10451541Srgrimes	/*
10461541Srgrimes	 * We maintain statistics shown by user-level statistics
10471541Srgrimes	 * programs:  the amount of time in each cpu state, and
10481541Srgrimes	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
10491541Srgrimes	 *
10501541Srgrimes	 * XXX	should either run linked list of drives, or (better)
10511541Srgrimes	 *	grab timestamps in the start & done code.
10521541Srgrimes	 */
10531541Srgrimes	for (i = 0; i < DK_NDRIVE; i++)
10541541Srgrimes		if (dk_busy & (1 << i))
10551541Srgrimes			dk_time[i]++;
10561541Srgrimes
10571541Srgrimes	/*
10581541Srgrimes	 * We adjust the priority of the current process.  The priority of
10591541Srgrimes	 * a process gets worse as it accumulates CPU time.  The cpu usage
10601541Srgrimes	 * estimator (p_estcpu) is increased here.  The formula for computing
10611541Srgrimes	 * priorities (in kern_synch.c) will compute a different value each
10621541Srgrimes	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
10631541Srgrimes	 * quite quickly when the process is running (linearly), and decays
10641541Srgrimes	 * away exponentially, at a rate which is proportionally slower when
10651541Srgrimes	 * the system is busy.  The basic principal is that the system will
10661541Srgrimes	 * 90% forget that the process used a lot of CPU time in 5 * loadav
10671541Srgrimes	 * seconds.  This causes the system to favor processes which haven't
10681541Srgrimes	 * run much recently, and to round-robin among other processes.
10691541Srgrimes	 */
10701541Srgrimes	if (p != NULL) {
10711541Srgrimes		p->p_cpticks++;
10721541Srgrimes		if (++p->p_estcpu == 0)
10731541Srgrimes			p->p_estcpu--;
10741541Srgrimes		if ((p->p_estcpu & 3) == 0) {
10751541Srgrimes			resetpriority(p);
10761541Srgrimes			if (p->p_priority >= PUSER)
10771541Srgrimes				p->p_priority = p->p_usrpri;
10781541Srgrimes		}
107917342Sbde
108017342Sbde		/* Update resource usage integrals and maximums. */
108117342Sbde		if ((pstats = p->p_stats) != NULL &&
108217342Sbde		    (ru = &pstats->p_ru) != NULL &&
108317342Sbde		    (vm = p->p_vmspace) != NULL) {
108417342Sbde			ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
108517342Sbde			ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
108617342Sbde			ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
108717342Sbde			rss = vm->vm_pmap.pm_stats.resident_count *
108817342Sbde			      PAGE_SIZE / 1024;
108917342Sbde			if (ru->ru_maxrss < rss)
109017342Sbde				ru->ru_maxrss = rss;
109117342Sbde        	}
10921541Srgrimes	}
10931541Srgrimes}
10941541Srgrimes
10951541Srgrimes/*
10961541Srgrimes * Return information about system clocks.
10971541Srgrimes */
109812152Sphkstatic int
109912152Sphksysctl_kern_clockrate SYSCTL_HANDLER_ARGS
11001541Srgrimes{
11011541Srgrimes	struct clockinfo clkinfo;
11021541Srgrimes	/*
11031541Srgrimes	 * Construct clockinfo structure.
11041541Srgrimes	 */
11051541Srgrimes	clkinfo.hz = hz;
11061541Srgrimes	clkinfo.tick = tick;
110726897Sjhay	clkinfo.tickadj = tickadj;
11081541Srgrimes	clkinfo.profhz = profhz;
11091541Srgrimes	clkinfo.stathz = stathz ? stathz : hz;
111012243Sphk	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
11111541Srgrimes}
11122858Swollman
111312623SphkSYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
111412650Sphk	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
111512152Sphk
111621101Sjhay#ifdef PPS_SYNC
11172858Swollman/*
111821101Sjhay * hardpps() - discipline CPU clock oscillator to external PPS signal
11192858Swollman *
11202858Swollman * This routine is called at each PPS interrupt in order to discipline
112121101Sjhay * the CPU clock oscillator to the PPS signal. It measures the PPS phase
112221101Sjhay * and leaves it in a handy spot for the hardclock() routine. It
112321101Sjhay * integrates successive PPS phase differences and calculates the
11242858Swollman * frequency offset. This is used in hardclock() to discipline the CPU
11252858Swollman * clock oscillator so that intrinsic frequency error is cancelled out.
112621101Sjhay * The code requires the caller to capture the time and hardware counter
112721101Sjhay * value at the on-time PPS signal transition.
112821101Sjhay *
112921101Sjhay * Note that, on some Unix systems, this routine runs at an interrupt
113021101Sjhay * priority level higher than the timer interrupt routine hardclock().
113121101Sjhay * Therefore, the variables used are distinct from the hardclock()
113221101Sjhay * variables, except for certain exceptions: The PPS frequency pps_freq
113321101Sjhay * and phase pps_offset variables are determined by this routine and
113421101Sjhay * updated atomically. The time_tolerance variable can be considered a
113521101Sjhay * constant, since it is infrequently changed, and then only when the
113621101Sjhay * PPS signal is disabled. The watchdog counter pps_valid is updated
113721101Sjhay * once per second by hardclock() and is atomically cleared in this
113821101Sjhay * routine.
11392858Swollman */
11402858Swollmanvoid
11412858Swollmanhardpps(tvp, usec)
11422858Swollman	struct timeval *tvp;		/* time at PPS */
11432858Swollman	long usec;			/* hardware counter at PPS */
11442858Swollman{
11452858Swollman	long u_usec, v_usec, bigtick;
11462858Swollman	long cal_sec, cal_usec;
11472858Swollman
11482858Swollman	/*
114921101Sjhay	 * An occasional glitch can be produced when the PPS interrupt
115021101Sjhay	 * occurs in the hardclock() routine before the time variable is
115121101Sjhay	 * updated. Here the offset is discarded when the difference
115221101Sjhay	 * between it and the last one is greater than tick/2, but not
115321101Sjhay	 * if the interval since the first discard exceeds 30 s.
115421101Sjhay	 */
115521101Sjhay	time_status |= STA_PPSSIGNAL;
115621101Sjhay	time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
115721101Sjhay	pps_valid = 0;
115821101Sjhay	u_usec = -tvp->tv_usec;
115921101Sjhay	if (u_usec < -500000)
116021101Sjhay		u_usec += 1000000;
116121101Sjhay	v_usec = pps_offset - u_usec;
116221101Sjhay	if (v_usec < 0)
116321101Sjhay		v_usec = -v_usec;
116421101Sjhay	if (v_usec > (tick >> 1)) {
116521101Sjhay		if (pps_glitch > MAXGLITCH) {
116621101Sjhay			pps_glitch = 0;
116721101Sjhay			pps_tf[2] = u_usec;
116821101Sjhay			pps_tf[1] = u_usec;
116921101Sjhay		} else {
117021101Sjhay			pps_glitch++;
117121101Sjhay			u_usec = pps_offset;
117221101Sjhay		}
117321101Sjhay	} else
117421101Sjhay		pps_glitch = 0;
117521101Sjhay
117621101Sjhay	/*
117721101Sjhay	 * A three-stage median filter is used to help deglitch the pps
117821101Sjhay	 * time. The median sample becomes the time offset estimate; the
117921101Sjhay	 * difference between the other two samples becomes the time
118021101Sjhay	 * dispersion (jitter) estimate.
118121101Sjhay	 */
118221101Sjhay	pps_tf[2] = pps_tf[1];
118321101Sjhay	pps_tf[1] = pps_tf[0];
118421101Sjhay	pps_tf[0] = u_usec;
118521101Sjhay	if (pps_tf[0] > pps_tf[1]) {
118621101Sjhay		if (pps_tf[1] > pps_tf[2]) {
118721101Sjhay			pps_offset = pps_tf[1];		/* 0 1 2 */
118821101Sjhay			v_usec = pps_tf[0] - pps_tf[2];
118921101Sjhay		} else if (pps_tf[2] > pps_tf[0]) {
119021101Sjhay			pps_offset = pps_tf[0];		/* 2 0 1 */
119121101Sjhay			v_usec = pps_tf[2] - pps_tf[1];
119221101Sjhay		} else {
119321101Sjhay			pps_offset = pps_tf[2];		/* 0 2 1 */
119421101Sjhay			v_usec = pps_tf[0] - pps_tf[1];
119521101Sjhay		}
119621101Sjhay	} else {
119721101Sjhay		if (pps_tf[1] < pps_tf[2]) {
119821101Sjhay			pps_offset = pps_tf[1];		/* 2 1 0 */
119921101Sjhay			v_usec = pps_tf[2] - pps_tf[0];
120021101Sjhay		} else  if (pps_tf[2] < pps_tf[0]) {
120121101Sjhay			pps_offset = pps_tf[0];		/* 1 0 2 */
120221101Sjhay			v_usec = pps_tf[1] - pps_tf[2];
120321101Sjhay		} else {
120421101Sjhay			pps_offset = pps_tf[2];		/* 1 2 0 */
120521101Sjhay			v_usec = pps_tf[1] - pps_tf[0];
120621101Sjhay		}
120721101Sjhay	}
120821101Sjhay	if (v_usec > MAXTIME)
120921101Sjhay		pps_jitcnt++;
121021101Sjhay	v_usec = (v_usec << PPS_AVG) - pps_jitter;
121121101Sjhay	if (v_usec < 0)
121221101Sjhay		pps_jitter -= -v_usec >> PPS_AVG;
121321101Sjhay	else
121421101Sjhay		pps_jitter += v_usec >> PPS_AVG;
121521101Sjhay	if (pps_jitter > (MAXTIME >> 1))
121621101Sjhay		time_status |= STA_PPSJITTER;
121721101Sjhay
121821101Sjhay	/*
12192858Swollman	 * During the calibration interval adjust the starting time when
12202858Swollman	 * the tick overflows. At the end of the interval compute the
12212858Swollman	 * duration of the interval and the difference of the hardware
12222858Swollman	 * counters at the beginning and end of the interval. This code
12232858Swollman	 * is deliciously complicated by the fact valid differences may
12242858Swollman	 * exceed the value of tick when using long calibration
12252858Swollman	 * intervals and small ticks. Note that the counter can be
12262858Swollman	 * greater than tick if caught at just the wrong instant, but
12272858Swollman	 * the values returned and used here are correct.
12282858Swollman	 */
12292858Swollman	bigtick = (long)tick << SHIFT_USEC;
123021101Sjhay	pps_usec -= pps_freq;
12312858Swollman	if (pps_usec >= bigtick)
12322858Swollman		pps_usec -= bigtick;
12332858Swollman	if (pps_usec < 0)
12342858Swollman		pps_usec += bigtick;
12352858Swollman	pps_time.tv_sec++;
12362858Swollman	pps_count++;
12372858Swollman	if (pps_count < (1 << pps_shift))
12382858Swollman		return;
12392858Swollman	pps_count = 0;
124021101Sjhay	pps_calcnt++;
12412858Swollman	u_usec = usec << SHIFT_USEC;
12422858Swollman	v_usec = pps_usec - u_usec;
12432858Swollman	if (v_usec >= bigtick >> 1)
12442858Swollman		v_usec -= bigtick;
12452858Swollman	if (v_usec < -(bigtick >> 1))
12462858Swollman		v_usec += bigtick;
12472858Swollman	if (v_usec < 0)
124821101Sjhay		v_usec = -(-v_usec >> pps_shift);
12492858Swollman	else
125021101Sjhay		v_usec = v_usec >> pps_shift;
12512858Swollman	pps_usec = u_usec;
12522858Swollman	cal_sec = tvp->tv_sec;
12532858Swollman	cal_usec = tvp->tv_usec;
12542858Swollman	cal_sec -= pps_time.tv_sec;
12552858Swollman	cal_usec -= pps_time.tv_usec;
12562858Swollman	if (cal_usec < 0) {
12572858Swollman		cal_usec += 1000000;
12582858Swollman		cal_sec--;
12592858Swollman	}
12602858Swollman	pps_time = *tvp;
12612858Swollman
12622858Swollman	/*
12632858Swollman	 * Check for lost interrupts, noise, excessive jitter and
12642858Swollman	 * excessive frequency error. The number of timer ticks during
12652858Swollman	 * the interval may vary +-1 tick. Add to this a margin of one
12662858Swollman	 * tick for the PPS signal jitter and maximum frequency
12672858Swollman	 * deviation. If the limits are exceeded, the calibration
12682858Swollman	 * interval is reset to the minimum and we start over.
12692858Swollman	 */
12702858Swollman	u_usec = (long)tick << 1;
12712858Swollman	if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec))
12722858Swollman	    || (cal_sec == 0 && cal_usec < u_usec))
127321101Sjhay	    || v_usec > time_tolerance || v_usec < -time_tolerance) {
127421101Sjhay		pps_errcnt++;
127521101Sjhay		pps_shift = PPS_SHIFT;
127621101Sjhay		pps_intcnt = 0;
127721101Sjhay		time_status |= STA_PPSERROR;
12782858Swollman		return;
12792858Swollman	}
12802858Swollman
12812858Swollman	/*
12822858Swollman	 * A three-stage median filter is used to help deglitch the pps
128321101Sjhay	 * frequency. The median sample becomes the frequency offset
128421101Sjhay	 * estimate; the difference between the other two samples
128521101Sjhay	 * becomes the frequency dispersion (stability) estimate.
12862858Swollman	 */
128721101Sjhay	pps_ff[2] = pps_ff[1];
128821101Sjhay	pps_ff[1] = pps_ff[0];
128921101Sjhay	pps_ff[0] = v_usec;
129021101Sjhay	if (pps_ff[0] > pps_ff[1]) {
129121101Sjhay		if (pps_ff[1] > pps_ff[2]) {
129221101Sjhay			u_usec = pps_ff[1];		/* 0 1 2 */
129321101Sjhay			v_usec = pps_ff[0] - pps_ff[2];
129421101Sjhay		} else if (pps_ff[2] > pps_ff[0]) {
129521101Sjhay			u_usec = pps_ff[0];		/* 2 0 1 */
129621101Sjhay			v_usec = pps_ff[2] - pps_ff[1];
12972858Swollman		} else {
129821101Sjhay			u_usec = pps_ff[2];		/* 0 2 1 */
129921101Sjhay			v_usec = pps_ff[0] - pps_ff[1];
13002858Swollman		}
13012858Swollman	} else {
130221101Sjhay		if (pps_ff[1] < pps_ff[2]) {
130321101Sjhay			u_usec = pps_ff[1];		/* 2 1 0 */
130421101Sjhay			v_usec = pps_ff[2] - pps_ff[0];
130521101Sjhay		} else  if (pps_ff[2] < pps_ff[0]) {
130621101Sjhay			u_usec = pps_ff[0];		/* 1 0 2 */
130721101Sjhay			v_usec = pps_ff[1] - pps_ff[2];
13082858Swollman		} else {
130921101Sjhay			u_usec = pps_ff[2];		/* 1 2 0 */
131021101Sjhay			v_usec = pps_ff[1] - pps_ff[0];
13112858Swollman		}
13122858Swollman	}
13132858Swollman
13142858Swollman	/*
131521101Sjhay	 * Here the frequency dispersion (stability) is updated. If it
131621101Sjhay	 * is less than one-fourth the maximum (MAXFREQ), the frequency
131721101Sjhay	 * offset is updated as well, but clamped to the tolerance. It
131821101Sjhay	 * will be processed later by the hardclock() routine.
13192858Swollman	 */
132021101Sjhay	v_usec = (v_usec >> 1) - pps_stabil;
13212858Swollman	if (v_usec < 0)
132221101Sjhay		pps_stabil -= -v_usec >> PPS_AVG;
13232858Swollman	else
132421101Sjhay		pps_stabil += v_usec >> PPS_AVG;
132521101Sjhay	if (pps_stabil > MAXFREQ >> 2) {
132621101Sjhay		pps_stbcnt++;
132721101Sjhay		time_status |= STA_PPSWANDER;
13282858Swollman		return;
13292858Swollman	}
133021101Sjhay	if (time_status & STA_PPSFREQ) {
133121101Sjhay		if (u_usec < 0) {
133221101Sjhay			pps_freq -= -u_usec >> PPS_AVG;
133321101Sjhay			if (pps_freq < -time_tolerance)
133421101Sjhay				pps_freq = -time_tolerance;
133521101Sjhay			u_usec = -u_usec;
133621101Sjhay		} else {
133721101Sjhay			pps_freq += u_usec >> PPS_AVG;
133821101Sjhay			if (pps_freq > time_tolerance)
133921101Sjhay				pps_freq = time_tolerance;
134021101Sjhay		}
13412858Swollman	}
13422858Swollman
13432858Swollman	/*
13442858Swollman	 * Here the calibration interval is adjusted. If the maximum
134521101Sjhay	 * time difference is greater than tick / 4, reduce the interval
13462858Swollman	 * by half. If this is not the case for four consecutive
13472858Swollman	 * intervals, double the interval.
13482858Swollman	 */
134921101Sjhay	if (u_usec << pps_shift > bigtick >> 2) {
135021101Sjhay		pps_intcnt = 0;
135121101Sjhay		if (pps_shift > PPS_SHIFT)
135221101Sjhay			pps_shift--;
135321101Sjhay	} else if (pps_intcnt >= 4) {
135421101Sjhay		pps_intcnt = 0;
135521101Sjhay		if (pps_shift < PPS_SHIFTMAX)
135621101Sjhay			pps_shift++;
13572858Swollman	} else
135821101Sjhay		pps_intcnt++;
13592858Swollman}
13602858Swollman#endif /* PPS_SYNC */
1361