kern_clock.c revision 35029
1static volatile int print_tci = 1;
2
3/*-
4 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
5 * Copyright (c) 1982, 1986, 1991, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
42 * $Id: kern_clock.c,v 1.62 1998/03/31 10:47:01 phk Exp $
43 */
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/dkstat.h>
48#include <sys/callout.h>
49#include <sys/kernel.h>
50#include <sys/proc.h>
51#include <sys/resourcevar.h>
52#include <sys/signalvar.h>
53#include <sys/timex.h>
54#include <vm/vm.h>
55#include <sys/lock.h>
56#include <vm/pmap.h>
57#include <vm/vm_map.h>
58#include <sys/sysctl.h>
59
60#include <machine/cpu.h>
61#include <machine/limits.h>
62
63#ifdef GPROF
64#include <sys/gmon.h>
65#endif
66
67#if defined(SMP) && defined(BETTER_CLOCK)
68#include <machine/smp.h>
69#endif
70
71static void initclocks __P((void *dummy));
72SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
73
74static void tco_forward __P((void));
75static void tco_setscales __P((struct timecounter *tc));
76
77/* Some of these don't belong here, but it's easiest to concentrate them. */
78#if defined(SMP) && defined(BETTER_CLOCK)
79long cp_time[CPUSTATES];
80#else
81static long cp_time[CPUSTATES];
82#endif
83long dk_seek[DK_NDRIVE];
84static long dk_time[DK_NDRIVE];	/* time busy (in statclock ticks) */
85long dk_wds[DK_NDRIVE];
86long dk_wpms[DK_NDRIVE];
87long dk_xfer[DK_NDRIVE];
88
89int dk_busy;
90int dk_ndrive = 0;
91char dk_names[DK_NDRIVE][DK_NAMELEN];
92
93long tk_cancc;
94long tk_nin;
95long tk_nout;
96long tk_rawcc;
97
98struct timecounter *timecounter;
99
100time_t time_second;
101
102/*
103 * Clock handling routines.
104 *
105 * This code is written to operate with two timers that run independently of
106 * each other.
107 *
108 * The main timer, running hz times per second, is used to trigger interval
109 * timers, timeouts and rescheduling as needed.
110 *
111 * The second timer handles kernel and user profiling,
112 * and does resource use estimation.  If the second timer is programmable,
113 * it is randomized to avoid aliasing between the two clocks.  For example,
114 * the randomization prevents an adversary from always giving up the cpu
115 * just before its quantum expires.  Otherwise, it would never accumulate
116 * cpu ticks.  The mean frequency of the second timer is stathz.
117 *
118 * If no second timer exists, stathz will be zero; in this case we drive
119 * profiling and statistics off the main clock.  This WILL NOT be accurate;
120 * do not do it unless absolutely necessary.
121 *
122 * The statistics clock may (or may not) be run at a higher rate while
123 * profiling.  This profile clock runs at profhz.  We require that profhz
124 * be an integral multiple of stathz.
125 *
126 * If the statistics clock is running fast, it must be divided by the ratio
127 * profhz/stathz for statistics.  (For profiling, every tick counts.)
128 *
129 * Time-of-day is maintained using a "timecounter", which may or may
130 * not be related to the hardware generating the above mentioned
131 * interrupts.
132 */
133
134int	stathz;
135int	profhz;
136static int profprocs;
137int	ticks;
138static int psdiv, pscnt;		/* prof => stat divider */
139int	psratio;			/* ratio: prof / stat */
140
141/*
142 * Initialize clock frequencies and start both clocks running.
143 */
144/* ARGSUSED*/
145static void
146initclocks(dummy)
147	void *dummy;
148{
149	register int i;
150
151	/*
152	 * Set divisors to 1 (normal case) and let the machine-specific
153	 * code do its bit.
154	 */
155	psdiv = pscnt = 1;
156	cpu_initclocks();
157
158	/*
159	 * Compute profhz/stathz, and fix profhz if needed.
160	 */
161	i = stathz ? stathz : hz;
162	if (profhz == 0)
163		profhz = i;
164	psratio = profhz / i;
165}
166
167/*
168 * The real-time timer, interrupting hz times per second.
169 */
170void
171hardclock(frame)
172	register struct clockframe *frame;
173{
174	register struct proc *p;
175
176	p = curproc;
177	if (p) {
178		register struct pstats *pstats;
179
180		/*
181		 * Run current process's virtual and profile time, as needed.
182		 */
183		pstats = p->p_stats;
184		if (CLKF_USERMODE(frame) &&
185		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
186		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
187			psignal(p, SIGVTALRM);
188		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
189		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
190			psignal(p, SIGPROF);
191	}
192
193#if defined(SMP) && defined(BETTER_CLOCK)
194	forward_hardclock(pscnt);
195#endif
196
197	/*
198	 * If no separate statistics clock is available, run it from here.
199	 */
200	if (stathz == 0)
201		statclock(frame);
202
203	tco_forward();
204	ticks++;
205
206	/*
207	 * Process callouts at a very low cpu priority, so we don't keep the
208	 * relatively high clock interrupt priority any longer than necessary.
209	 */
210	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
211		if (CLKF_BASEPRI(frame)) {
212			/*
213			 * Save the overhead of a software interrupt;
214			 * it will happen as soon as we return, so do it now.
215			 */
216			(void)splsoftclock();
217			softclock();
218		} else
219			setsoftclock();
220	} else if (softticks + 1 == ticks)
221		++softticks;
222}
223
224/*
225 * Compute number of ticks in the specified amount of time.
226 */
227int
228tvtohz(tv)
229	struct timeval *tv;
230{
231	register unsigned long ticks;
232	register long sec, usec;
233
234	/*
235	 * If the number of usecs in the whole seconds part of the time
236	 * difference fits in a long, then the total number of usecs will
237	 * fit in an unsigned long.  Compute the total and convert it to
238	 * ticks, rounding up and adding 1 to allow for the current tick
239	 * to expire.  Rounding also depends on unsigned long arithmetic
240	 * to avoid overflow.
241	 *
242	 * Otherwise, if the number of ticks in the whole seconds part of
243	 * the time difference fits in a long, then convert the parts to
244	 * ticks separately and add, using similar rounding methods and
245	 * overflow avoidance.  This method would work in the previous
246	 * case but it is slightly slower and assumes that hz is integral.
247	 *
248	 * Otherwise, round the time difference down to the maximum
249	 * representable value.
250	 *
251	 * If ints have 32 bits, then the maximum value for any timeout in
252	 * 10ms ticks is 248 days.
253	 */
254	sec = tv->tv_sec;
255	usec = tv->tv_usec;
256	if (usec < 0) {
257		sec--;
258		usec += 1000000;
259	}
260	if (sec < 0) {
261#ifdef DIAGNOSTIC
262		if (usec > 0) {
263			sec++;
264			usec -= 1000000;
265		}
266		printf("tvotohz: negative time difference %ld sec %ld usec\n",
267		       sec, usec);
268#endif
269		ticks = 1;
270	} else if (sec <= LONG_MAX / 1000000)
271		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
272			/ tick + 1;
273	else if (sec <= LONG_MAX / hz)
274		ticks = sec * hz
275			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
276	else
277		ticks = LONG_MAX;
278	if (ticks > INT_MAX)
279		ticks = INT_MAX;
280	return (ticks);
281}
282
283
284/*
285 * Compute number of hz until specified time.  Used to
286 * compute third argument to timeout() from an absolute time.
287 */
288int
289hzto(tv)
290	struct timeval *tv;
291{
292	struct timeval t2;
293
294	getmicrotime(&t2);
295	t2.tv_sec = tv->tv_sec - t2.tv_sec;
296	t2.tv_usec = tv->tv_usec - t2.tv_usec;
297	return (tvtohz(&t2));
298}
299
300/*
301 * Start profiling on a process.
302 *
303 * Kernel profiling passes proc0 which never exits and hence
304 * keeps the profile clock running constantly.
305 */
306void
307startprofclock(p)
308	register struct proc *p;
309{
310	int s;
311
312	if ((p->p_flag & P_PROFIL) == 0) {
313		p->p_flag |= P_PROFIL;
314		if (++profprocs == 1 && stathz != 0) {
315			s = splstatclock();
316			psdiv = pscnt = psratio;
317			setstatclockrate(profhz);
318			splx(s);
319		}
320	}
321}
322
323/*
324 * Stop profiling on a process.
325 */
326void
327stopprofclock(p)
328	register struct proc *p;
329{
330	int s;
331
332	if (p->p_flag & P_PROFIL) {
333		p->p_flag &= ~P_PROFIL;
334		if (--profprocs == 0 && stathz != 0) {
335			s = splstatclock();
336			psdiv = pscnt = 1;
337			setstatclockrate(stathz);
338			splx(s);
339		}
340	}
341}
342
343/*
344 * Statistics clock.  Grab profile sample, and if divider reaches 0,
345 * do process and kernel statistics.
346 */
347void
348statclock(frame)
349	register struct clockframe *frame;
350{
351#ifdef GPROF
352	register struct gmonparam *g;
353#endif
354	register struct proc *p;
355	register int i;
356	struct pstats *pstats;
357	long rss;
358	struct rusage *ru;
359	struct vmspace *vm;
360
361	if (CLKF_USERMODE(frame)) {
362		p = curproc;
363		if (p->p_flag & P_PROFIL)
364			addupc_intr(p, CLKF_PC(frame), 1);
365#if defined(SMP) && defined(BETTER_CLOCK)
366		if (stathz != 0)
367			forward_statclock(pscnt);
368#endif
369		if (--pscnt > 0)
370			return;
371		/*
372		 * Came from user mode; CPU was in user state.
373		 * If this process is being profiled record the tick.
374		 */
375		p->p_uticks++;
376		if (p->p_nice > NZERO)
377			cp_time[CP_NICE]++;
378		else
379			cp_time[CP_USER]++;
380	} else {
381#ifdef GPROF
382		/*
383		 * Kernel statistics are just like addupc_intr, only easier.
384		 */
385		g = &_gmonparam;
386		if (g->state == GMON_PROF_ON) {
387			i = CLKF_PC(frame) - g->lowpc;
388			if (i < g->textsize) {
389				i /= HISTFRACTION * sizeof(*g->kcount);
390				g->kcount[i]++;
391			}
392		}
393#endif
394#if defined(SMP) && defined(BETTER_CLOCK)
395		if (stathz != 0)
396			forward_statclock(pscnt);
397#endif
398		if (--pscnt > 0)
399			return;
400		/*
401		 * Came from kernel mode, so we were:
402		 * - handling an interrupt,
403		 * - doing syscall or trap work on behalf of the current
404		 *   user process, or
405		 * - spinning in the idle loop.
406		 * Whichever it is, charge the time as appropriate.
407		 * Note that we charge interrupts to the current process,
408		 * regardless of whether they are ``for'' that process,
409		 * so that we know how much of its real time was spent
410		 * in ``non-process'' (i.e., interrupt) work.
411		 */
412		p = curproc;
413		if (CLKF_INTR(frame)) {
414			if (p != NULL)
415				p->p_iticks++;
416			cp_time[CP_INTR]++;
417		} else if (p != NULL) {
418			p->p_sticks++;
419			cp_time[CP_SYS]++;
420		} else
421			cp_time[CP_IDLE]++;
422	}
423	pscnt = psdiv;
424
425	/*
426	 * We maintain statistics shown by user-level statistics
427	 * programs:  the amount of time in each cpu state, and
428	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
429	 *
430	 * XXX	should either run linked list of drives, or (better)
431	 *	grab timestamps in the start & done code.
432	 */
433	for (i = 0; i < DK_NDRIVE; i++)
434		if (dk_busy & (1 << i))
435			dk_time[i]++;
436
437	/*
438	 * We adjust the priority of the current process.  The priority of
439	 * a process gets worse as it accumulates CPU time.  The cpu usage
440	 * estimator (p_estcpu) is increased here.  The formula for computing
441	 * priorities (in kern_synch.c) will compute a different value each
442	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
443	 * quite quickly when the process is running (linearly), and decays
444	 * away exponentially, at a rate which is proportionally slower when
445	 * the system is busy.  The basic principal is that the system will
446	 * 90% forget that the process used a lot of CPU time in 5 * loadav
447	 * seconds.  This causes the system to favor processes which haven't
448	 * run much recently, and to round-robin among other processes.
449	 */
450	if (p != NULL) {
451		p->p_cpticks++;
452		if (++p->p_estcpu == 0)
453			p->p_estcpu--;
454		if ((p->p_estcpu & 3) == 0) {
455			resetpriority(p);
456			if (p->p_priority >= PUSER)
457				p->p_priority = p->p_usrpri;
458		}
459
460		/* Update resource usage integrals and maximums. */
461		if ((pstats = p->p_stats) != NULL &&
462		    (ru = &pstats->p_ru) != NULL &&
463		    (vm = p->p_vmspace) != NULL) {
464			ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
465			ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
466			ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
467			rss = vm->vm_pmap.pm_stats.resident_count *
468			      PAGE_SIZE / 1024;
469			if (ru->ru_maxrss < rss)
470				ru->ru_maxrss = rss;
471        	}
472	}
473}
474
475/*
476 * Return information about system clocks.
477 */
478static int
479sysctl_kern_clockrate SYSCTL_HANDLER_ARGS
480{
481	struct clockinfo clkinfo;
482	/*
483	 * Construct clockinfo structure.
484	 */
485	clkinfo.hz = hz;
486	clkinfo.tick = tick;
487	clkinfo.tickadj = tickadj;
488	clkinfo.profhz = profhz;
489	clkinfo.stathz = stathz ? stathz : hz;
490	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
491}
492
493SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
494	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
495
496
497/*
498 * We have four functions for looking at the clock, two for microseconds
499 * and two for nanoseconds.  For each there is fast but less precise
500 * version "get{nano|micro}time" which will return a time which is up
501 * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time"
502 * will return a timestamp which is as precise as possible.
503 */
504
505void
506getmicrotime(struct timeval *tvp)
507{
508	struct timecounter *tc;
509
510	tc = timecounter;
511	*tvp = tc->microtime;
512}
513
514void
515getnanotime(struct timespec *tsp)
516{
517	struct timecounter *tc;
518
519	tc = timecounter;
520	*tsp = tc->nanotime;
521}
522
523void
524microtime(struct timeval *tv)
525{
526	struct timecounter *tc;
527
528	tc = (struct timecounter *)timecounter;
529	tv->tv_sec = tc->offset_sec;
530	tv->tv_usec = tc->offset_micro;
531	tv->tv_usec +=
532	    ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32;
533	tv->tv_usec += boottime.tv_usec;
534	tv->tv_sec += boottime.tv_sec;
535	if (tv->tv_usec >= 1000000) {
536		tv->tv_usec -= 1000000;
537		tv->tv_sec++;
538	}
539}
540
541void
542nanotime(struct timespec *tv)
543{
544	u_int count;
545	u_int64_t delta;
546	struct timecounter *tc;
547
548	tc = (struct timecounter *)timecounter;
549	tv->tv_sec = tc->offset_sec;
550	count = tc->get_timedelta(tc);
551	delta = tc->offset_nano;
552	delta += ((u_int64_t)count * tc->scale_nano_f);
553	delta >>= 32;
554	delta += ((u_int64_t)count * tc->scale_nano_i);
555	delta += boottime.tv_usec * 1000;
556	tv->tv_sec += boottime.tv_sec;
557	if (delta >= 1000000000) {
558		delta -= 1000000000;
559		tv->tv_sec++;
560	}
561	tv->tv_nsec = delta;
562}
563
564void
565getmicroruntime(struct timeval *tvp)
566{
567	struct timecounter *tc;
568
569	tc = timecounter;
570	tvp->tv_sec = tc->offset_sec;
571	tvp->tv_usec = tc->offset_micro;
572}
573
574void
575getnanoruntime(struct timespec *tsp)
576{
577	struct timecounter *tc;
578
579	tc = timecounter;
580	tsp->tv_sec = tc->offset_sec;
581	tsp->tv_nsec = tc->offset_nano >> 32;
582}
583
584void
585microruntime(struct timeval *tv)
586{
587	struct timecounter *tc;
588
589	tc = (struct timecounter *)timecounter;
590	tv->tv_sec = tc->offset_sec;
591	tv->tv_usec = tc->offset_micro;
592	tv->tv_usec +=
593	    ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32;
594	if (tv->tv_usec >= 1000000) {
595		tv->tv_usec -= 1000000;
596		tv->tv_sec++;
597	}
598}
599
600void
601nanoruntime(struct timespec *tv)
602{
603	u_int count;
604	u_int64_t delta;
605	struct timecounter *tc;
606
607	tc = (struct timecounter *)timecounter;
608	tv->tv_sec = tc->offset_sec;
609	count = tc->get_timedelta(tc);
610	delta = tc->offset_nano;
611	delta += ((u_int64_t)count * tc->scale_nano_f);
612	delta >>= 32;
613	delta += ((u_int64_t)count * tc->scale_nano_i);
614	if (delta >= 1000000000) {
615		delta -= 1000000000;
616		tv->tv_sec++;
617	}
618	tv->tv_nsec = delta;
619}
620
621static void
622tco_setscales(struct timecounter *tc)
623{
624	u_int64_t scale;
625
626	scale = 1000000000LL << 32;
627	if (tc->adjustment > 0)
628		scale += (tc->adjustment * 1000LL) << 10;
629	else
630		scale -= (-tc->adjustment * 1000LL) << 10;
631	scale /= tc->frequency;
632	tc->scale_micro = scale / 1000;
633	tc->scale_nano_f = scale & 0xffffffff;
634	tc->scale_nano_i = scale >> 32;
635}
636
637static u_int
638delta_timecounter(struct timecounter *tc)
639{
640
641	return((tc->get_timecount() - tc->offset_count) & tc->counter_mask);
642}
643
644void
645init_timecounter(struct timecounter *tc)
646{
647	struct timespec ts0, ts1;
648	int i;
649
650	if (!tc->get_timedelta)
651		tc->get_timedelta = delta_timecounter;
652	tc->adjustment = 0;
653	tco_setscales(tc);
654	tc->offset_count = tc->get_timecount();
655	tc[0].tweak = &tc[0];
656	tc[2] = tc[1] = tc[0];
657	tc[1].other = &tc[2];
658	tc[2].other = &tc[1];
659	if (!timecounter || !strcmp(timecounter->name, "dummy"))
660		timecounter = &tc[2];
661	tc = &tc[1];
662
663	/*
664	 * Figure out the cost of calling this timecounter.
665	 * XXX: The 1:15 ratio is a guess at reality.
666	 */
667	nanotime(&ts0);
668	for (i = 0; i < 16; i ++)
669		tc->get_timecount();
670	for (i = 0; i < 240; i ++)
671		tc->get_timedelta(tc);
672	nanotime(&ts1);
673	ts1.tv_sec -= ts0.tv_sec;
674	tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
675	tc->cost >>= 8;
676	if (print_tci && strcmp(tc->name, "dummy"))
677		printf("Timecounter \"%s\"  frequency %lu Hz  cost %u ns\n",
678		    tc->name, tc->frequency, tc->cost);
679
680	/* XXX: For now always start using the counter. */
681	tc->offset_count = tc->get_timecount();
682	nanotime(&ts1);
683	tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32;
684	tc->offset_micro = ts1.tv_nsec / 1000;
685	tc->offset_sec = ts1.tv_sec;
686	timecounter = tc;
687}
688
689void
690set_timecounter(struct timespec *ts)
691{
692	struct timespec ts2;
693
694	nanoruntime(&ts2);
695	boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
696	boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
697	if (boottime.tv_usec < 0) {
698		boottime.tv_usec += 1000000;
699		boottime.tv_sec--;
700	}
701	/* fiddle all the little crinkly bits around the fiords... */
702	tco_forward();
703}
704
705
706#if 0 /* Currently unused */
707void
708switch_timecounter(struct timecounter *newtc)
709{
710	int s;
711	struct timecounter *tc;
712	struct timespec ts;
713
714	s = splclock();
715	tc = timecounter;
716	if (newtc == tc || newtc == tc->other) {
717		splx(s);
718		return;
719	}
720	nanotime(&ts);
721	newtc->offset_sec = ts.tv_sec;
722	newtc->offset_nano = (u_int64_t)ts.tv_nsec << 32;
723	newtc->offset_micro = ts.tv_nsec / 1000;
724	newtc->offset_count = newtc->get_timecount();
725	timecounter = newtc;
726	splx(s);
727}
728#endif
729
730static struct timecounter *
731sync_other_counter(void)
732{
733	struct timecounter *tc, *tco;
734	u_int delta;
735
736	tc = timecounter->other;
737	tco = tc->other;
738	*tc = *timecounter;
739	tc->other = tco;
740	delta = tc->get_timedelta(tc);
741	tc->offset_count += delta;
742	tc->offset_count &= tc->counter_mask;
743	tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f;
744	tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32;
745	return (tc);
746}
747
748static void
749tco_forward(void)
750{
751	struct timecounter *tc;
752
753	tc = sync_other_counter();
754	if (timedelta != 0) {
755		tc->offset_nano += (u_int64_t)(tickdelta * 1000) << 32;
756		timedelta -= tickdelta;
757	}
758
759	if (tc->offset_nano >= 1000000000ULL << 32) {
760		tc->offset_nano -= 1000000000ULL << 32;
761		tc->offset_sec++;
762		tc->frequency = tc->tweak->frequency;
763		tc->adjustment = tc->tweak->adjustment;
764		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
765		tco_setscales(tc);
766	}
767
768	tc->offset_micro = (tc->offset_nano / 1000) >> 32;
769
770	/* Figure out the wall-clock time */
771	tc->nanotime.tv_sec = tc->offset_sec + boottime.tv_sec;
772	tc->nanotime.tv_nsec = (tc->offset_nano >> 32) + boottime.tv_usec * 1000;
773	tc->microtime.tv_usec = tc->offset_micro + boottime.tv_usec;
774	if (tc->nanotime.tv_nsec > 1000000000) {
775		tc->nanotime.tv_nsec -= 1000000000;
776		tc->microtime.tv_usec -= 1000000;
777		tc->nanotime.tv_sec++;
778	}
779	time_second = tc->microtime.tv_sec = tc->nanotime.tv_sec;
780
781	timecounter = tc;
782}
783
784static int
785sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
786{
787
788	return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency,
789	    sizeof(timecounter->tweak->frequency), req));
790}
791
792static int
793sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
794{
795
796	return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment,
797	    sizeof(timecounter->tweak->adjustment), req));
798}
799
800SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
801
802SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW,
803    0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", "");
804
805SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW,
806    0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", "");
807
808/*
809 * Implement a dummy timecounter which we can use until we get a real one
810 * in the air.  This allows the console and other early stuff to use
811 * timeservices.
812 */
813
814static u_int64_t
815dummy_get_timecount(void)
816{
817	static u_int64_t now;
818	return (++now);
819}
820
821static struct timecounter dummy_timecounter[3] = {
822	{
823		0,
824		dummy_get_timecount,
825		~0,
826		100000,
827		"dummy"
828	}
829};
830
831static void
832initdummytimecounter(void *dummy)
833{
834	init_timecounter(dummy_timecounter);
835}
836
837SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL)
838