11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
341541Srgrimes *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
351541Srgrimes */
361541Srgrimes
37116182Sobrien#include <sys/cdefs.h>
38116182Sobrien__FBSDID("$FreeBSD$");
39116182Sobrien
40170075Semaste#include "opt_kdb.h"
41150968Sglebius#include "opt_device_polling.h"
42147565Speter#include "opt_hwpmc_hooks.h"
43235459Srstone#include "opt_kdtrace.h"
4444666Sphk#include "opt_ntp.h"
45116874Ssmkelly#include "opt_watchdog.h"
4644666Sphk
471541Srgrimes#include <sys/param.h>
481541Srgrimes#include <sys/systm.h>
491541Srgrimes#include <sys/callout.h>
50131927Smarcel#include <sys/kdb.h>
511541Srgrimes#include <sys/kernel.h>
52201879Sattilio#include <sys/kthread.h>
53201879Sattilio#include <sys/ktr.h>
5474914Sjhb#include <sys/lock.h>
5567365Sjhb#include <sys/mutex.h>
561541Srgrimes#include <sys/proc.h>
57111024Sjeff#include <sys/resource.h>
581541Srgrimes#include <sys/resourcevar.h>
59104964Sjeff#include <sys/sched.h>
60235459Srstone#include <sys/sdt.h>
613308Sphk#include <sys/signalvar.h>
62201879Sattilio#include <sys/sleepqueue.h>
6376078Sjhb#include <sys/smp.h>
642320Sdg#include <vm/vm.h>
6512662Sdg#include <vm/pmap.h>
6612662Sdg#include <vm/vm_map.h>
673308Sphk#include <sys/sysctl.h>
6867551Sjhb#include <sys/bus.h>
6967551Sjhb#include <sys/interrupt.h>
70114216Skan#include <sys/limits.h>
71102926Sphk#include <sys/timetc.h>
721541Srgrimes
731541Srgrimes#ifdef GPROF
741541Srgrimes#include <sys/gmon.h>
751541Srgrimes#endif
761541Srgrimes
77146799Sjkoshy#ifdef HWPMC_HOOKS
78146799Sjkoshy#include <sys/pmckern.h>
79233628SfabientPMC_SOFT_DEFINE( , , clock, hard);
80233628SfabientPMC_SOFT_DEFINE( , , clock, stat);
81247836SfabientPMC_SOFT_DEFINE_EX( , , clock, prof, \
82247836Sfabient    cpu_startprofclock, cpu_stopprofclock);
83146799Sjkoshy#endif
84146799Sjkoshy
8587902Sluigi#ifdef DEVICE_POLLING
8687902Sluigiextern void hardclock_device_poll(void);
8787902Sluigi#endif /* DEVICE_POLLING */
8831639Sfsmp
8992723Salfredstatic void initclocks(void *dummy);
90177253SrwatsonSYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
9110358Sjulian
92169803Sjeff/* Spin-lock protecting profiling statistics. */
93170468Sattiliostatic struct mtx time_lock;
94169803Sjeff
95235459SrstoneSDT_PROVIDER_DECLARE(sched);
96260817SavgSDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *");
97235459Srstone
98147692Speterstatic int
99147692Spetersysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
100147692Speter{
101147692Speter	int error;
102174070Speter	long cp_time[CPUSTATES];
103147703Sps#ifdef SCTL_MASK32
104147692Speter	int i;
105147692Speter	unsigned int cp_time32[CPUSTATES];
106174070Speter#endif
107157822Sjhb
108174070Speter	read_cpu_time(cp_time);
109174070Speter#ifdef SCTL_MASK32
110147703Sps	if (req->flags & SCTL_MASK32) {
111147692Speter		if (!req->oldptr)
112147692Speter			return SYSCTL_OUT(req, 0, sizeof(cp_time32));
113147692Speter		for (i = 0; i < CPUSTATES; i++)
114147692Speter			cp_time32[i] = (unsigned int)cp_time[i];
115147692Speter		error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
116147692Speter	} else
117147692Speter#endif
118147692Speter	{
119147692Speter		if (!req->oldptr)
120147692Speter			return SYSCTL_OUT(req, 0, sizeof(cp_time));
121147692Speter		error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
122147692Speter	}
123147692Speter	return error;
124147692Speter}
125147692Speter
126192304SedSYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
127147692Speter    0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
128147692Speter
129174070Speterstatic long empty[CPUSTATES];
130174070Speter
131174070Speterstatic int
132174070Spetersysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
133174070Speter{
134174070Speter	struct pcpu *pcpu;
135174070Speter	int error;
136174072Srwatson	int c;
137174070Speter	long *cp_time;
138174070Speter#ifdef SCTL_MASK32
139174070Speter	unsigned int cp_time32[CPUSTATES];
140174072Srwatson	int i;
141174070Speter#endif
142174070Speter
143174070Speter	if (!req->oldptr) {
144174070Speter#ifdef SCTL_MASK32
145174070Speter		if (req->flags & SCTL_MASK32)
146174070Speter			return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
147174070Speter		else
148174070Speter#endif
149174070Speter			return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
150174070Speter	}
151174070Speter	for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
152174070Speter		if (!CPU_ABSENT(c)) {
153174070Speter			pcpu = pcpu_find(c);
154174070Speter			cp_time = pcpu->pc_cp_time;
155174070Speter		} else {
156174070Speter			cp_time = empty;
157174070Speter		}
158174070Speter#ifdef SCTL_MASK32
159174070Speter		if (req->flags & SCTL_MASK32) {
160174070Speter			for (i = 0; i < CPUSTATES; i++)
161174070Speter				cp_time32[i] = (unsigned int)cp_time[i];
162174070Speter			error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
163174070Speter		} else
164174070Speter#endif
165174070Speter			error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
166174070Speter	}
167174070Speter	return error;
168174070Speter}
169174070Speter
170192304SedSYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
171174070Speter    0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
172174070Speter
173201879Sattilio#ifdef DEADLKRES
174206482Sattiliostatic const char *blessed[] = {
175206879Sattilio	"getblk",
176206482Sattilio	"so_snd_sx",
177206482Sattilio	"so_rcv_sx",
178206482Sattilio	NULL
179206482Sattilio};
180201879Sattiliostatic int slptime_threshold = 1800;
181201879Sattiliostatic int blktime_threshold = 900;
182201879Sattiliostatic int sleepfreq = 3;
183201879Sattilio
184201879Sattiliostatic void
185201879Sattiliodeadlkres(void)
186201879Sattilio{
187201879Sattilio	struct proc *p;
188201879Sattilio	struct thread *td;
189201879Sattilio	void *wchan;
190206482Sattilio	int blkticks, i, slpticks, slptype, tryl, tticks;
191201879Sattilio
192201879Sattilio	tryl = 0;
193201879Sattilio	for (;;) {
194201879Sattilio		blkticks = blktime_threshold * hz;
195201879Sattilio		slpticks = slptime_threshold * hz;
196201879Sattilio
197201879Sattilio		/*
198201879Sattilio		 * Avoid to sleep on the sx_lock in order to avoid a possible
199201879Sattilio		 * priority inversion problem leading to starvation.
200201879Sattilio		 * If the lock can't be held after 100 tries, panic.
201201879Sattilio		 */
202201879Sattilio		if (!sx_try_slock(&allproc_lock)) {
203201879Sattilio			if (tryl > 100)
204201879Sattilio		panic("%s: possible deadlock detected on allproc_lock\n",
205201879Sattilio				    __func__);
206201879Sattilio			tryl++;
207214682Sjhb			pause("allproc", sleepfreq * hz);
208201879Sattilio			continue;
209201879Sattilio		}
210201879Sattilio		tryl = 0;
211201879Sattilio		FOREACH_PROC_IN_SYSTEM(p) {
212201879Sattilio			PROC_LOCK(p);
213220390Sjhb			if (p->p_state == PRS_NEW) {
214220390Sjhb				PROC_UNLOCK(p);
215220390Sjhb				continue;
216220390Sjhb			}
217201879Sattilio			FOREACH_THREAD_IN_PROC(p, td) {
218209761Sattilio
219201879Sattilio				thread_lock(td);
220252342Srstone				if (TD_ON_LOCK(td)) {
221201879Sattilio
222201879Sattilio					/*
223201879Sattilio					 * The thread should be blocked on a
224201879Sattilio					 * turnstile, simply check if the
225201879Sattilio					 * turnstile channel is in good state.
226201879Sattilio					 */
227201879Sattilio					MPASS(td->td_blocked != NULL);
228206482Sattilio
229201879Sattilio					tticks = ticks - td->td_blktick;
230201879Sattilio					thread_unlock(td);
231201879Sattilio					if (tticks > blkticks) {
232201879Sattilio
233201879Sattilio						/*
234201879Sattilio						 * Accordingly with provided
235201879Sattilio						 * thresholds, this thread is
236201879Sattilio						 * stuck for too long on a
237201879Sattilio						 * turnstile.
238201879Sattilio						 */
239201879Sattilio						PROC_UNLOCK(p);
240201879Sattilio						sx_sunlock(&allproc_lock);
241201879Sattilio	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
242201879Sattilio						    __func__, td, tticks);
243201879Sattilio					}
244209761Sattilio				} else if (TD_IS_SLEEPING(td) &&
245252342Srstone				    TD_ON_SLEEPQ(td)) {
246201879Sattilio
247201879Sattilio					/*
248201879Sattilio					 * Check if the thread is sleeping on a
249201879Sattilio					 * lock, otherwise skip the check.
250201879Sattilio					 * Drop the thread lock in order to
251201879Sattilio					 * avoid a LOR with the sleepqueue
252201879Sattilio					 * spinlock.
253201879Sattilio					 */
254201879Sattilio					wchan = td->td_wchan;
255201879Sattilio					tticks = ticks - td->td_slptick;
256201879Sattilio					thread_unlock(td);
257201879Sattilio					slptype = sleepq_type(wchan);
258201879Sattilio					if ((slptype == SLEEPQ_SX ||
259201879Sattilio					    slptype == SLEEPQ_LK) &&
260201879Sattilio					    tticks > slpticks) {
261201879Sattilio
262201879Sattilio						/*
263201879Sattilio						 * Accordingly with provided
264201879Sattilio						 * thresholds, this thread is
265201879Sattilio						 * stuck for too long on a
266201879Sattilio						 * sleepqueue.
267206482Sattilio						 * However, being on a
268206482Sattilio						 * sleepqueue, we might still
269206482Sattilio						 * check for the blessed
270206482Sattilio						 * list.
271201879Sattilio						 */
272206482Sattilio						tryl = 0;
273206482Sattilio						for (i = 0; blessed[i] != NULL;
274206482Sattilio						    i++) {
275206482Sattilio							if (!strcmp(blessed[i],
276206482Sattilio							    td->td_wmesg)) {
277206482Sattilio								tryl = 1;
278206482Sattilio								break;
279206482Sattilio							}
280206482Sattilio						}
281206482Sattilio						if (tryl != 0) {
282206482Sattilio							tryl = 0;
283206482Sattilio							continue;
284206482Sattilio						}
285201879Sattilio						PROC_UNLOCK(p);
286201879Sattilio						sx_sunlock(&allproc_lock);
287201879Sattilio	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
288201879Sattilio						    __func__, td, tticks);
289201879Sattilio					}
290201879Sattilio				} else
291201879Sattilio					thread_unlock(td);
292201879Sattilio			}
293201879Sattilio			PROC_UNLOCK(p);
294201879Sattilio		}
295201879Sattilio		sx_sunlock(&allproc_lock);
296201879Sattilio
297201879Sattilio		/* Sleep for sleepfreq seconds. */
298214682Sjhb		pause("-", sleepfreq * hz);
299201879Sattilio	}
300201879Sattilio}
301201879Sattilio
302201879Sattiliostatic struct kthread_desc deadlkres_kd = {
303201879Sattilio	"deadlkres",
304201879Sattilio	deadlkres,
305201879Sattilio	(struct thread **)NULL
306201879Sattilio};
307201879Sattilio
308201879SattilioSYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
309201879Sattilio
310227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0,
311227309Sed    "Deadlock resolver");
312201879SattilioSYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
313201879Sattilio    &slptime_threshold, 0,
314201879Sattilio    "Number of seconds within is valid to sleep on a sleepqueue");
315201879SattilioSYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
316201879Sattilio    &blktime_threshold, 0,
317201879Sattilio    "Number of seconds within is valid to block on a turnstile");
318201879SattilioSYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
319201879Sattilio    "Number of seconds between any deadlock resolver thread run");
320201879Sattilio#endif	/* DEADLKRES */
321201879Sattilio
322174070Spetervoid
323174070Speterread_cpu_time(long *cp_time)
324174070Speter{
325174070Speter	struct pcpu *pc;
326174070Speter	int i, j;
327174070Speter
328174070Speter	/* Sum up global cp_time[]. */
329174070Speter	bzero(cp_time, sizeof(long) * CPUSTATES);
330209059Sjhb	CPU_FOREACH(i) {
331174070Speter		pc = pcpu_find(i);
332174070Speter		for (j = 0; j < CPUSTATES; j++)
333174070Speter			cp_time[j] += pc->pc_cp_time[j];
334174070Speter	}
335174070Speter}
336174070Speter
337126383Sphk#ifdef SW_WATCHDOG
338126383Sphk#include <sys/watchdog.h>
339116874Ssmkelly
340126383Sphkstatic int watchdog_ticks;
341116874Ssmkellystatic int watchdog_enabled;
342126383Sphkstatic void watchdog_fire(void);
343126383Sphkstatic void watchdog_config(void *, u_int, int *);
344126383Sphk#endif /* SW_WATCHDOG */
345116874Ssmkelly
3461541Srgrimes/*
3471541Srgrimes * Clock handling routines.
3481541Srgrimes *
34934618Sphk * This code is written to operate with two timers that run independently of
35034618Sphk * each other.
35133690Sphk *
35234618Sphk * The main timer, running hz times per second, is used to trigger interval
35334618Sphk * timers, timeouts and rescheduling as needed.
35433690Sphk *
35534618Sphk * The second timer handles kernel and user profiling,
35634618Sphk * and does resource use estimation.  If the second timer is programmable,
35734618Sphk * it is randomized to avoid aliasing between the two clocks.  For example,
35834618Sphk * the randomization prevents an adversary from always giving up the cpu
3591541Srgrimes * just before its quantum expires.  Otherwise, it would never accumulate
3601541Srgrimes * cpu ticks.  The mean frequency of the second timer is stathz.
36134618Sphk *
36234618Sphk * If no second timer exists, stathz will be zero; in this case we drive
36334618Sphk * profiling and statistics off the main clock.  This WILL NOT be accurate;
36434618Sphk * do not do it unless absolutely necessary.
36534618Sphk *
3661541Srgrimes * The statistics clock may (or may not) be run at a higher rate while
36734618Sphk * profiling.  This profile clock runs at profhz.  We require that profhz
36834618Sphk * be an integral multiple of stathz.
3691541Srgrimes *
37034618Sphk * If the statistics clock is running fast, it must be divided by the ratio
37134618Sphk * profhz/stathz for statistics.  (For profiling, every tick counts.)
37234618Sphk *
37333690Sphk * Time-of-day is maintained using a "timecounter", which may or may
37433690Sphk * not be related to the hardware generating the above mentioned
37533690Sphk * interrupts.
3761541Srgrimes */
3771541Srgrimes
3781541Srgrimesint	stathz;
3791541Srgrimesint	profhz;
380110296Sjakeint	profprocs;
381246037Sjhbvolatile int	ticks;
382110296Sjakeint	psratio;
3831541Srgrimes
384215701Sdimstatic DPCPU_DEFINE(int, pcputicks);	/* Per-CPU version of ticks. */
385212601Smavstatic int global_hardclock_run = 0;
386208494Smav
3871541Srgrimes/*
3881541Srgrimes * Initialize clock frequencies and start both clocks running.
3891541Srgrimes */
39010358Sjulian/* ARGSUSED*/
39110358Sjulianstatic void
39212569Sbdeinitclocks(dummy)
39312569Sbde	void *dummy;
3941541Srgrimes{
3951541Srgrimes	register int i;
3961541Srgrimes
3971541Srgrimes	/*
3981541Srgrimes	 * Set divisors to 1 (normal case) and let the machine-specific
3991541Srgrimes	 * code do its bit.
4001541Srgrimes	 */
401209371Smav	mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
4021541Srgrimes	cpu_initclocks();
4031541Srgrimes
4041541Srgrimes	/*
4051541Srgrimes	 * Compute profhz/stathz, and fix profhz if needed.
4061541Srgrimes	 */
4071541Srgrimes	i = stathz ? stathz : hz;
4081541Srgrimes	if (profhz == 0)
4091541Srgrimes		profhz = i;
4101541Srgrimes	psratio = profhz / i;
411126383Sphk#ifdef SW_WATCHDOG
412126383Sphk	EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
413126383Sphk#endif
4141541Srgrimes}
4151541Srgrimes
4161541Srgrimes/*
417110296Sjake * Each time the real-time timer fires, this function is called on all CPUs.
418153666Sjhb * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
419110296Sjake * the other CPUs in the system need to call this function.
42076078Sjhb */
42176078Sjhbvoid
422153666Sjhbhardclock_cpu(int usermode)
42376078Sjhb{
42476078Sjhb	struct pstats *pstats;
425110296Sjake	struct thread *td = curthread;
42683366Sjulian	struct proc *p = td->td_proc;
427172207Sjeff	int flags;
42876078Sjhb
42976078Sjhb	/*
43076078Sjhb	 * Run current process's virtual and profile time, as needed.
43176078Sjhb	 */
432163709Sjb	pstats = p->p_stats;
433172207Sjeff	flags = 0;
434163709Sjb	if (usermode &&
435170297Sjeff	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
436170297Sjeff		PROC_SLOCK(p);
437172207Sjeff		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
438172207Sjeff			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
439170297Sjeff		PROC_SUNLOCK(p);
440163709Sjb	}
441170297Sjeff	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
442170297Sjeff		PROC_SLOCK(p);
443172207Sjeff		if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
444172207Sjeff			flags |= TDF_PROFPEND | TDF_ASTPENDING;
445170297Sjeff		PROC_SUNLOCK(p);
446170297Sjeff	}
447170297Sjeff	thread_lock(td);
448212541Smav	sched_tick(1);
449172207Sjeff	td->td_flags |= flags;
450170297Sjeff	thread_unlock(td);
451146799Sjkoshy
452233628Sfabient#ifdef HWPMC_HOOKS
453146799Sjkoshy	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
454146799Sjkoshy		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
455233628Sfabient	if (td->td_intr_frame != NULL)
456233628Sfabient		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
457146799Sjkoshy#endif
458247777Sdavide	callout_process(sbinuptime());
45976078Sjhb}
46076078Sjhb
46176078Sjhb/*
4621541Srgrimes * The real-time timer, interrupting hz times per second.
4631541Srgrimes */
4641541Srgrimesvoid
465153666Sjhbhardclock(int usermode, uintfptr_t pc)
4661541Srgrimes{
4671541Srgrimes
468246037Sjhb	atomic_add_int(&ticks, 1);
469153666Sjhb	hardclock_cpu(usermode);
470212603Smav	tc_ticktock(1);
471212541Smav	cpu_tick_calibration();
4721541Srgrimes	/*
4731541Srgrimes	 * If no separate statistics clock is available, run it from here.
47476078Sjhb	 *
47576078Sjhb	 * XXX: this only works for UP
4761541Srgrimes	 */
477110296Sjake	if (stathz == 0) {
478153666Sjhb		profclock(usermode, pc);
479153666Sjhb		statclock(usermode);
480110296Sjake	}
48187902Sluigi#ifdef DEVICE_POLLING
48290550Sluigi	hardclock_device_poll();	/* this is very short and quick */
48387902Sluigi#endif /* DEVICE_POLLING */
484126383Sphk#ifdef SW_WATCHDOG
485126383Sphk	if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
486116874Ssmkelly		watchdog_fire();
487126383Sphk#endif /* SW_WATCHDOG */
4881541Srgrimes}
4891541Srgrimes
490212541Smavvoid
491232783Smavhardclock_cnt(int cnt, int usermode)
492212541Smav{
493212541Smav	struct pstats *pstats;
494212541Smav	struct thread *td = curthread;
495212541Smav	struct proc *p = td->td_proc;
496212541Smav	int *t = DPCPU_PTR(pcputicks);
497212601Smav	int flags, global, newticks;
498212601Smav#ifdef SW_WATCHDOG
499212601Smav	int i;
500212601Smav#endif /* SW_WATCHDOG */
501212541Smav
502212541Smav	/*
503212541Smav	 * Update per-CPU and possibly global ticks values.
504212541Smav	 */
505212541Smav	*t += cnt;
506212541Smav	do {
507212541Smav		global = ticks;
508212541Smav		newticks = *t - global;
509212541Smav		if (newticks <= 0) {
510212541Smav			if (newticks < -1)
511212541Smav				*t = global - 1;
512212541Smav			newticks = 0;
513212541Smav			break;
514212541Smav		}
515212541Smav	} while (!atomic_cmpset_int(&ticks, global, *t));
516212541Smav
517212541Smav	/*
518212541Smav	 * Run current process's virtual and profile time, as needed.
519212541Smav	 */
520212541Smav	pstats = p->p_stats;
521212541Smav	flags = 0;
522212541Smav	if (usermode &&
523212541Smav	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
524212541Smav		PROC_SLOCK(p);
525212541Smav		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
526212541Smav		    tick * cnt) == 0)
527212541Smav			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
528212541Smav		PROC_SUNLOCK(p);
529212541Smav	}
530212541Smav	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
531212541Smav		PROC_SLOCK(p);
532212541Smav		if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
533212541Smav		    tick * cnt) == 0)
534212541Smav			flags |= TDF_PROFPEND | TDF_ASTPENDING;
535212541Smav		PROC_SUNLOCK(p);
536212541Smav	}
537212541Smav	thread_lock(td);
538212541Smav	sched_tick(cnt);
539212541Smav	td->td_flags |= flags;
540212541Smav	thread_unlock(td);
541212541Smav
542212541Smav#ifdef	HWPMC_HOOKS
543212541Smav	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
544212541Smav		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
545233628Sfabient	if (td->td_intr_frame != NULL)
546233628Sfabient		PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
547212541Smav#endif
548212541Smav	/* We are in charge to handle this tick duty. */
549212541Smav	if (newticks > 0) {
550212601Smav		/* Dangerous and no need to call these things concurrently. */
551212601Smav		if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) {
552212603Smav			tc_ticktock(newticks);
553212541Smav#ifdef DEVICE_POLLING
554212601Smav			/* This is very short and quick. */
555212601Smav			hardclock_device_poll();
556212541Smav#endif /* DEVICE_POLLING */
557212601Smav			atomic_store_rel_int(&global_hardclock_run, 0);
558212601Smav		}
559212541Smav#ifdef SW_WATCHDOG
560212541Smav		if (watchdog_enabled > 0) {
561212601Smav			i = atomic_fetchadd_int(&watchdog_ticks, -newticks);
562212601Smav			if (i > 0 && i <= newticks)
563212541Smav				watchdog_fire();
564212541Smav		}
565212541Smav#endif /* SW_WATCHDOG */
566212541Smav	}
567212541Smav	if (curcpu == CPU_FIRST())
568212541Smav		cpu_tick_calibration();
569212541Smav}
570212541Smav
571212541Smavvoid
572212541Smavhardclock_sync(int cpu)
573212541Smav{
574212541Smav	int	*t = DPCPU_ID_PTR(cpu, pcputicks);
575212541Smav
576212541Smav	*t = ticks;
577212541Smav}
578212541Smav
5791541Srgrimes/*
58034961Sphk * Compute number of ticks in the specified amount of time.
5811541Srgrimes */
5821541Srgrimesint
58334961Sphktvtohz(tv)
5841541Srgrimes	struct timeval *tv;
5851541Srgrimes{
5865081Sbde	register unsigned long ticks;
5875081Sbde	register long sec, usec;
5881541Srgrimes
5891541Srgrimes	/*
5905081Sbde	 * If the number of usecs in the whole seconds part of the time
5915081Sbde	 * difference fits in a long, then the total number of usecs will
5925081Sbde	 * fit in an unsigned long.  Compute the total and convert it to
5935081Sbde	 * ticks, rounding up and adding 1 to allow for the current tick
5945081Sbde	 * to expire.  Rounding also depends on unsigned long arithmetic
5955081Sbde	 * to avoid overflow.
5961541Srgrimes	 *
5975081Sbde	 * Otherwise, if the number of ticks in the whole seconds part of
5985081Sbde	 * the time difference fits in a long, then convert the parts to
5995081Sbde	 * ticks separately and add, using similar rounding methods and
6005081Sbde	 * overflow avoidance.  This method would work in the previous
6015081Sbde	 * case but it is slightly slower and assumes that hz is integral.
6025081Sbde	 *
6035081Sbde	 * Otherwise, round the time difference down to the maximum
6045081Sbde	 * representable value.
6055081Sbde	 *
6065081Sbde	 * If ints have 32 bits, then the maximum value for any timeout in
6075081Sbde	 * 10ms ticks is 248 days.
6081541Srgrimes	 */
60934961Sphk	sec = tv->tv_sec;
61034961Sphk	usec = tv->tv_usec;
6115081Sbde	if (usec < 0) {
6125081Sbde		sec--;
6135081Sbde		usec += 1000000;
6145081Sbde	}
6155081Sbde	if (sec < 0) {
6165081Sbde#ifdef DIAGNOSTIC
61734618Sphk		if (usec > 0) {
61833690Sphk			sec++;
61933690Sphk			usec -= 1000000;
62033690Sphk		}
62134961Sphk		printf("tvotohz: negative time difference %ld sec %ld usec\n",
6225081Sbde		       sec, usec);
6235081Sbde#endif
6245081Sbde		ticks = 1;
6255081Sbde	} else if (sec <= LONG_MAX / 1000000)
6265081Sbde		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
6275081Sbde			/ tick + 1;
6285081Sbde	else if (sec <= LONG_MAX / hz)
6295081Sbde		ticks = sec * hz
6305081Sbde			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
6311541Srgrimes	else
6325081Sbde		ticks = LONG_MAX;
6335081Sbde	if (ticks > INT_MAX)
6345081Sbde		ticks = INT_MAX;
63540012Salex	return ((int)ticks);
6361541Srgrimes}
6371541Srgrimes
6381541Srgrimes/*
6391541Srgrimes * Start profiling on a process.
6401541Srgrimes *
6411541Srgrimes * Kernel profiling passes proc0 which never exits and hence
6421541Srgrimes * keeps the profile clock running constantly.
6431541Srgrimes */
6441541Srgrimesvoid
6451541Srgrimesstartprofclock(p)
6461541Srgrimes	register struct proc *p;
6471541Srgrimes{
6481541Srgrimes
649113874Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
650113874Sjhb	if (p->p_flag & P_STOPPROF)
651110530Sjulian		return;
652113874Sjhb	if ((p->p_flag & P_PROFIL) == 0) {
653113874Sjhb		p->p_flag |= P_PROFIL;
654209371Smav		mtx_lock(&time_lock);
655110296Sjake		if (++profprocs == 1)
656110296Sjake			cpu_startprofclock();
657209371Smav		mtx_unlock(&time_lock);
6581541Srgrimes	}
6591541Srgrimes}
6601541Srgrimes
6611541Srgrimes/*
6621541Srgrimes * Stop profiling on a process.
6631541Srgrimes */
6641541Srgrimesvoid
6651541Srgrimesstopprofclock(p)
6661541Srgrimes	register struct proc *p;
6671541Srgrimes{
6681541Srgrimes
669110530Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
670113874Sjhb	if (p->p_flag & P_PROFIL) {
671113874Sjhb		if (p->p_profthreads != 0) {
672113874Sjhb			p->p_flag |= P_STOPPROF;
673113874Sjhb			while (p->p_profthreads != 0)
674113874Sjhb				msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
675123740Speter				    "stopprof", 0);
676113874Sjhb			p->p_flag &= ~P_STOPPROF;
677110530Sjulian		}
678128852Scperciva		if ((p->p_flag & P_PROFIL) == 0)
679128852Scperciva			return;
680113874Sjhb		p->p_flag &= ~P_PROFIL;
681209371Smav		mtx_lock(&time_lock);
682110296Sjake		if (--profprocs == 0)
683110296Sjake			cpu_stopprofclock();
684209371Smav		mtx_unlock(&time_lock);
6851541Srgrimes	}
6861541Srgrimes}
6871541Srgrimes
6881541Srgrimes/*
689170174Sjeff * Statistics clock.  Updates rusage information and calls the scheduler
690170174Sjeff * to adjust priorities of the active thread.
691170174Sjeff *
692110296Sjake * This should be called by all active processors.
6931541Srgrimes */
6941541Srgrimesvoid
695153666Sjhbstatclock(int usermode)
6961541Srgrimes{
697232783Smav
698232783Smav	statclock_cnt(1, usermode);
699232783Smav}
700232783Smav
701232783Smavvoid
702232783Smavstatclock_cnt(int cnt, int usermode)
703232783Smav{
70417342Sbde	struct rusage *ru;
70517342Sbde	struct vmspace *vm;
706110296Sjake	struct thread *td;
707110296Sjake	struct proc *p;
708110296Sjake	long rss;
709174070Speter	long *cp_time;
7101541Srgrimes
711110296Sjake	td = curthread;
712110296Sjake	p = td->td_proc;
713110296Sjake
714174070Speter	cp_time = (long *)PCPU_PTR(cp_time);
715153666Sjhb	if (usermode) {
71653751Sbde		/*
71753751Sbde		 * Charge the time as appropriate.
7181541Srgrimes		 */
719232783Smav		td->td_uticks += cnt;
720130551Sjulian		if (p->p_nice > NZERO)
721232783Smav			cp_time[CP_NICE] += cnt;
7221541Srgrimes		else
723232783Smav			cp_time[CP_USER] += cnt;
7241541Srgrimes	} else {
7251541Srgrimes		/*
7261541Srgrimes		 * Came from kernel mode, so we were:
7271541Srgrimes		 * - handling an interrupt,
7281541Srgrimes		 * - doing syscall or trap work on behalf of the current
7291541Srgrimes		 *   user process, or
7301541Srgrimes		 * - spinning in the idle loop.
7311541Srgrimes		 * Whichever it is, charge the time as appropriate.
7321541Srgrimes		 * Note that we charge interrupts to the current process,
7331541Srgrimes		 * regardless of whether they are ``for'' that process,
7341541Srgrimes		 * so that we know how much of its real time was spent
7351541Srgrimes		 * in ``non-process'' (i.e., interrupt) work.
7361541Srgrimes		 */
737151658Sjhb		if ((td->td_pflags & TDP_ITHREAD) ||
738151658Sjhb		    td->td_intr_nesting_level >= 2) {
739232783Smav			td->td_iticks += cnt;
740232783Smav			cp_time[CP_INTR] += cnt;
74165557Sjasone		} else {
742232783Smav			td->td_pticks += cnt;
743232783Smav			td->td_sticks += cnt;
744167327Sjulian			if (!TD_IS_IDLETHREAD(td))
745232783Smav				cp_time[CP_SYS] += cnt;
74665557Sjasone			else
747232783Smav				cp_time[CP_IDLE] += cnt;
74865557Sjasone		}
7491541Srgrimes	}
7501541Srgrimes
75165782Sjhb	/* Update resource usage integrals and maximums. */
752131436Sjhb	MPASS(p->p_vmspace != NULL);
753131436Sjhb	vm = p->p_vmspace;
754170174Sjeff	ru = &td->td_ru;
755232783Smav	ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt;
756232783Smav	ru->ru_idrss += pgtok(vm->vm_dsize) * cnt;
757232783Smav	ru->ru_isrss += pgtok(vm->vm_ssize) * cnt;
758131436Sjhb	rss = pgtok(vmspace_resident_count(vm));
759131436Sjhb	if (ru->ru_maxrss < rss)
760131436Sjhb		ru->ru_maxrss = rss;
761187357Sjeff	KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
762187357Sjeff	    "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
763235459Srstone	SDT_PROBE2(sched, , , tick, td, td->td_proc);
764174070Speter	thread_lock_flags(td, MTX_QUIET);
765232783Smav	for ( ; cnt > 0; cnt--)
766232783Smav		sched_clock(td);
767170297Sjeff	thread_unlock(td);
768233628Sfabient#ifdef HWPMC_HOOKS
769233628Sfabient	if (td->td_intr_frame != NULL)
770233628Sfabient		PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame);
771233628Sfabient#endif
77276078Sjhb}
77366716Sjhb
77476078Sjhbvoid
775153666Sjhbprofclock(int usermode, uintfptr_t pc)
77676078Sjhb{
777232783Smav
778232783Smav	profclock_cnt(1, usermode, pc);
779232783Smav}
780232783Smav
781232783Smavvoid
782232783Smavprofclock_cnt(int cnt, int usermode, uintfptr_t pc)
783232783Smav{
784110296Sjake	struct thread *td;
785110296Sjake#ifdef GPROF
786110296Sjake	struct gmonparam *g;
787153490Sjhb	uintfptr_t i;
788110296Sjake#endif
78976078Sjhb
790111032Sjulian	td = curthread;
791153666Sjhb	if (usermode) {
792110296Sjake		/*
793110296Sjake		 * Came from user mode; CPU was in user state.
794110296Sjake		 * If this process is being profiled, record the tick.
795110530Sjulian		 * if there is no related user location yet, don't
796110530Sjulian		 * bother trying to count it.
797110296Sjake		 */
798113874Sjhb		if (td->td_proc->p_flag & P_PROFIL)
799232783Smav			addupc_intr(td, pc, cnt);
800110296Sjake	}
801110296Sjake#ifdef GPROF
802110296Sjake	else {
803110296Sjake		/*
804110296Sjake		 * Kernel statistics are just like addupc_intr, only easier.
805110296Sjake		 */
806110296Sjake		g = &_gmonparam;
807153666Sjhb		if (g->state == GMON_PROF_ON && pc >= g->lowpc) {
808153666Sjhb			i = PC_TO_I(g, pc);
809110296Sjake			if (i < g->textsize) {
810232783Smav				KCOUNT(g, i) += cnt;
811110296Sjake			}
812110296Sjake		}
813110296Sjake	}
814110296Sjake#endif
815247329Smav#ifdef HWPMC_HOOKS
816247329Smav	if (td->td_intr_frame != NULL)
817247329Smav		PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame);
818247329Smav#endif
8191541Srgrimes}
8201541Srgrimes
8211541Srgrimes/*
8221541Srgrimes * Return information about system clocks.
8231541Srgrimes */
82412152Sphkstatic int
82562573Sphksysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
8261541Srgrimes{
8271541Srgrimes	struct clockinfo clkinfo;
8281541Srgrimes	/*
8291541Srgrimes	 * Construct clockinfo structure.
8301541Srgrimes	 */
83196052Sbde	bzero(&clkinfo, sizeof(clkinfo));
8321541Srgrimes	clkinfo.hz = hz;
8331541Srgrimes	clkinfo.tick = tick;
8341541Srgrimes	clkinfo.profhz = profhz;
8351541Srgrimes	clkinfo.stathz = stathz ? stathz : hz;
83612243Sphk	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
8371541Srgrimes}
8382858Swollman
839192304SedSYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
840192304Sed	CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
84188019Sluigi	0, 0, sysctl_kern_clockrate, "S,clockinfo",
84288019Sluigi	"Rate and period of various kernel clocks");
843116874Ssmkelly
844126383Sphk#ifdef SW_WATCHDOG
845126383Sphk
846126383Sphkstatic void
847165260Sn_hibmawatchdog_config(void *unused __unused, u_int cmd, int *error)
848116874Ssmkelly{
849126383Sphk	u_int u;
850116874Ssmkelly
851126386Sphk	u = cmd & WD_INTERVAL;
852165260Sn_hibma	if (u >= WD_TO_1SEC) {
853126383Sphk		watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
854126383Sphk		watchdog_enabled = 1;
855165260Sn_hibma		*error = 0;
856126383Sphk	} else {
857126383Sphk		watchdog_enabled = 0;
858126383Sphk	}
859116874Ssmkelly}
860116874Ssmkelly
861116874Ssmkelly/*
862116874Ssmkelly * Handle a watchdog timeout by dumping interrupt information and
863170075Semaste * then either dropping to DDB or panicking.
864116874Ssmkelly */
865116874Ssmkellystatic void
866116874Ssmkellywatchdog_fire(void)
867116874Ssmkelly{
868116874Ssmkelly	int nintr;
869209390Sed	uint64_t inttotal;
870116874Ssmkelly	u_long *curintr;
871116874Ssmkelly	char *curname;
872116874Ssmkelly
873116874Ssmkelly	curintr = intrcnt;
874116874Ssmkelly	curname = intrnames;
875116874Ssmkelly	inttotal = 0;
876225788Smav	nintr = sintrcnt / sizeof(u_long);
877157822Sjhb
878116874Ssmkelly	printf("interrupt                   total\n");
879116874Ssmkelly	while (--nintr >= 0) {
880116874Ssmkelly		if (*curintr)
881116874Ssmkelly			printf("%-12s %20lu\n", curname, *curintr);
882116874Ssmkelly		curname += strlen(curname) + 1;
883116874Ssmkelly		inttotal += *curintr++;
884116874Ssmkelly	}
885116908Ssmkelly	printf("Total        %20ju\n", (uintmax_t)inttotal);
886170075Semaste
887170075Semaste#if defined(KDB) && !defined(KDB_UNATTENDED)
888170075Semaste	kdb_backtrace();
889174898Srwatson	kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
890170075Semaste#else
891116874Ssmkelly	panic("watchdog timeout");
892170075Semaste#endif
893116874Ssmkelly}
894116874Ssmkelly
895126383Sphk#endif /* SW_WATCHDOG */
896