1209371Smav/*-
2247777Sdavide * Copyright (c) 2010-2013 Alexander Motin <mav@FreeBSD.org>
3209371Smav * All rights reserved.
4209371Smav *
5209371Smav * Redistribution and use in source and binary forms, with or without
6209371Smav * modification, are permitted provided that the following conditions
7209371Smav * are met:
8209371Smav * 1. Redistributions of source code must retain the above copyright
9209371Smav *    notice, this list of conditions and the following disclaimer,
10209371Smav *    without modification, immediately at the beginning of the file.
11209371Smav * 2. Redistributions in binary form must reproduce the above copyright
12209371Smav *    notice, this list of conditions and the following disclaimer in the
13209371Smav *    documentation and/or other materials provided with the distribution.
14209371Smav *
15209371Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16209371Smav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17209371Smav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18209371Smav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19209371Smav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20209371Smav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21209371Smav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22209371Smav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23209371Smav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24209371Smav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25209371Smav */
26209371Smav
27209371Smav#include <sys/cdefs.h>
28209371Smav__FBSDID("$FreeBSD: stable/10/sys/kern/kern_clocksource.c 315255 2017-03-14 15:37:29Z hselasky $");
29209371Smav
30209371Smav/*
31209371Smav * Common routines to manage event timers hardware.
32209371Smav */
33209371Smav
34212992Smav#include "opt_device_polling.h"
35209371Smav#include "opt_kdtrace.h"
36209371Smav
37209371Smav#include <sys/param.h>
38209371Smav#include <sys/systm.h>
39209371Smav#include <sys/bus.h>
40247777Sdavide#include <sys/limits.h>
41209371Smav#include <sys/lock.h>
42209371Smav#include <sys/kdb.h>
43212541Smav#include <sys/ktr.h>
44209371Smav#include <sys/mutex.h>
45209371Smav#include <sys/proc.h>
46209371Smav#include <sys/kernel.h>
47209371Smav#include <sys/sched.h>
48209371Smav#include <sys/smp.h>
49209371Smav#include <sys/sysctl.h>
50209371Smav#include <sys/timeet.h>
51212603Smav#include <sys/timetc.h>
52209371Smav
53209371Smav#include <machine/atomic.h>
54209371Smav#include <machine/clock.h>
55209371Smav#include <machine/cpu.h>
56209371Smav#include <machine/smp.h>
57209371Smav
58280973Sjhbint			cpu_deepest_sleep = 0;	/* Deepest Cx state available. */
59280973Sjhbint			cpu_disable_c2_sleep = 0; /* Timer dies in C2. */
60280973Sjhbint			cpu_disable_c3_sleep = 0; /* Timer dies in C3. */
61209371Smav
62212541Smavstatic void		setuptimer(void);
63247777Sdavidestatic void		loadtimer(sbintime_t now, int first);
64212541Smavstatic int		doconfigtimer(void);
65212541Smavstatic void		configtimer(int start);
66212541Smavstatic int		round_freq(struct eventtimer *et, int freq);
67209371Smav
68247777Sdavidestatic sbintime_t	getnextcpuevent(int idle);
69247777Sdavidestatic sbintime_t	getnextevent(void);
70247777Sdavidestatic int		handleevents(sbintime_t now, int fake);
71209371Smav
72212541Smavstatic struct mtx	et_hw_mtx;
73212541Smav
74212541Smav#define	ET_HW_LOCK(state)						\
75212541Smav	{								\
76212541Smav		if (timer->et_flags & ET_FLAGS_PERCPU)			\
77212541Smav			mtx_lock_spin(&(state)->et_hw_mtx);		\
78212541Smav		else							\
79212541Smav			mtx_lock_spin(&et_hw_mtx);			\
80212541Smav	}
81212541Smav
82212541Smav#define	ET_HW_UNLOCK(state)						\
83212541Smav	{								\
84212541Smav		if (timer->et_flags & ET_FLAGS_PERCPU)			\
85212541Smav			mtx_unlock_spin(&(state)->et_hw_mtx);		\
86212541Smav		else							\
87212541Smav			mtx_unlock_spin(&et_hw_mtx);			\
88212541Smav	}
89212541Smav
90212541Smavstatic struct eventtimer *timer = NULL;
91247777Sdavidestatic sbintime_t	timerperiod;	/* Timer period for periodic mode. */
92247777Sdavidestatic sbintime_t	statperiod;	/* statclock() events period. */
93247777Sdavidestatic sbintime_t	profperiod;	/* profclock() events period. */
94247777Sdavidestatic sbintime_t	nexttick;	/* Next global timer tick time. */
95247777Sdavidestatic u_int		busy = 1;	/* Reconfiguration is in progress. */
96212541Smavstatic int		profiling = 0;	/* Profiling events enabled. */
97212541Smav
98212541Smavstatic char		timername[32];	/* Wanted timer. */
99212541SmavTUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
100212541Smav
101212600Smavstatic int		singlemul = 0;	/* Multiplier for periodic mode. */
102209371SmavTUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
103209371SmavSYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
104212541Smav    0, "Multiplier for periodic mode");
105209371Smav
106232919Smavstatic u_int		idletick = 0;	/* Run periodic events when idle. */
107212541SmavTUNABLE_INT("kern.eventtimer.idletick", &idletick);
108217326SmdfSYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
109212541Smav    0, "Run periodic events when idle");
110209371Smav
111212541Smavstatic int		periodic = 0;	/* Periodic or one-shot mode. */
112212967Smavstatic int		want_periodic = 0; /* What mode to prefer. */
113212967SmavTUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
114212541Smav
115212541Smavstruct pcpu_state {
116212541Smav	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
117212541Smav	u_int		action;		/* Reconfiguration requests. */
118212541Smav	u_int		handle;		/* Immediate handle resuests. */
119247777Sdavide	sbintime_t	now;		/* Last tick time. */
120247777Sdavide	sbintime_t	nextevent;	/* Next scheduled event on this CPU. */
121247777Sdavide	sbintime_t	nexttick;	/* Next timer tick time. */
122247777Sdavide	sbintime_t	nexthard;	/* Next hardlock() event. */
123247777Sdavide	sbintime_t	nextstat;	/* Next statclock() event. */
124247777Sdavide	sbintime_t	nextprof;	/* Next profclock() event. */
125247777Sdavide	sbintime_t	nextcall;	/* Next callout event. */
126247777Sdavide	sbintime_t	nextcallopt;	/* Next optional callout event. */
127212541Smav	int		ipi;		/* This CPU needs IPI. */
128212541Smav	int		idle;		/* This CPU is in idle mode. */
129212541Smav};
130212541Smav
131215701Sdimstatic DPCPU_DEFINE(struct pcpu_state, timerstate);
132247777SdavideDPCPU_DEFINE(sbintime_t, hardclocktime);
133212541Smav
134212541Smav/*
135212541Smav * Timer broadcast IPI handler.
136212541Smav */
137212541Smavint
138212541Smavhardclockintr(void)
139212541Smav{
140247777Sdavide	sbintime_t now;
141212541Smav	struct pcpu_state *state;
142212541Smav	int done;
143212541Smav
144212541Smav	if (doconfigtimer() || busy)
145212541Smav		return (FILTER_HANDLED);
146212541Smav	state = DPCPU_PTR(timerstate);
147212541Smav	now = state->now;
148247777Sdavide	CTR3(KTR_SPARE2, "ipi  at %d:    now  %d.%08x",
149247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
150247777Sdavide	done = handleevents(now, 0);
151212541Smav	return (done ? FILTER_HANDLED : FILTER_STRAY);
152212541Smav}
153212541Smav
154212541Smav/*
155212541Smav * Handle all events for specified time on this CPU
156212541Smav */
157209371Smavstatic int
158247777Sdavidehandleevents(sbintime_t now, int fake)
159209371Smav{
160247777Sdavide	sbintime_t t, *hct;
161212541Smav	struct trapframe *frame;
162212541Smav	struct pcpu_state *state;
163212541Smav	int usermode;
164212541Smav	int done, runs;
165209371Smav
166247777Sdavide	CTR3(KTR_SPARE2, "handle at %d:  now  %d.%08x",
167247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
168212541Smav	done = 0;
169212541Smav	if (fake) {
170212541Smav		frame = NULL;
171212541Smav		usermode = 0;
172212541Smav	} else {
173212541Smav		frame = curthread->td_intr_frame;
174212541Smav		usermode = TRAPF_USERMODE(frame);
175212541Smav	}
176221990Savg
177212541Smav	state = DPCPU_PTR(timerstate);
178221990Savg
179232783Smav	runs = 0;
180247777Sdavide	while (now >= state->nexthard) {
181247777Sdavide		state->nexthard += tick_sbt;
182212541Smav		runs++;
183212541Smav	}
184239005Smav	if (runs) {
185247777Sdavide		hct = DPCPU_PTR(hardclocktime);
186247777Sdavide		*hct = state->nexthard - tick_sbt;
187239005Smav		if (fake < 2) {
188239005Smav			hardclock_cnt(runs, usermode);
189239005Smav			done = 1;
190239005Smav		}
191212541Smav	}
192232783Smav	runs = 0;
193247777Sdavide	while (now >= state->nextstat) {
194247777Sdavide		state->nextstat += statperiod;
195232783Smav		runs++;
196232783Smav	}
197232783Smav	if (runs && fake < 2) {
198232783Smav		statclock_cnt(runs, usermode);
199212541Smav		done = 1;
200212541Smav	}
201212541Smav	if (profiling) {
202232783Smav		runs = 0;
203247777Sdavide		while (now >= state->nextprof) {
204247777Sdavide			state->nextprof += profperiod;
205232783Smav			runs++;
206232783Smav		}
207232783Smav		if (runs && !fake) {
208247777Sdavide			profclock_cnt(runs, usermode, TRAPF_PC(frame));
209212541Smav			done = 1;
210212541Smav		}
211212541Smav	} else
212212541Smav		state->nextprof = state->nextstat;
213315255Shselasky	if (now >= state->nextcallopt || now >= state->nextcall) {
214304894Skib		state->nextcall = state->nextcallopt = SBT_MAX;
215247777Sdavide		callout_process(now);
216247777Sdavide	}
217221990Savg
218247777Sdavide	t = getnextcpuevent(0);
219212541Smav	ET_HW_LOCK(state);
220212541Smav	if (!busy) {
221212541Smav		state->idle = 0;
222212541Smav		state->nextevent = t;
223260244Smav		loadtimer(now, (fake == 2) &&
224260244Smav		    (timer->et_flags & ET_FLAGS_PERCPU));
225212541Smav	}
226212541Smav	ET_HW_UNLOCK(state);
227212541Smav	return (done);
228209371Smav}
229209371Smav
230212541Smav/*
231212541Smav * Schedule binuptime of the next event on current CPU.
232212541Smav */
233247777Sdavidestatic sbintime_t
234247777Sdavidegetnextcpuevent(int idle)
235209371Smav{
236247777Sdavide	sbintime_t event;
237212541Smav	struct pcpu_state *state;
238247777Sdavide	u_int hardfreq;
239209371Smav
240212541Smav	state = DPCPU_PTR(timerstate);
241247777Sdavide	/* Handle hardclock() events, skipping some if CPU is idle. */
242247777Sdavide	event = state->nexthard;
243247777Sdavide	if (idle) {
244247777Sdavide		hardfreq = (u_int)hz / 2;
245247777Sdavide		if (tc_min_ticktock_freq > 2
246247777Sdavide#ifdef SMP
247247777Sdavide		    && curcpu == CPU_FIRST()
248247777Sdavide#endif
249247777Sdavide		    )
250247777Sdavide			hardfreq = hz / tc_min_ticktock_freq;
251247777Sdavide		if (hardfreq > 1)
252247777Sdavide			event += tick_sbt * (hardfreq - 1);
253232919Smav	}
254247777Sdavide	/* Handle callout events. */
255247777Sdavide	if (event > state->nextcall)
256247777Sdavide		event = state->nextcall;
257232919Smav	if (!idle) { /* If CPU is active - handle other types of events. */
258247777Sdavide		if (event > state->nextstat)
259247777Sdavide			event = state->nextstat;
260247777Sdavide		if (profiling && event > state->nextprof)
261247777Sdavide			event = state->nextprof;
262212541Smav	}
263247777Sdavide	return (event);
264209371Smav}
265209371Smav
266212541Smav/*
267212541Smav * Schedule binuptime of the next event on all CPUs.
268212541Smav */
269247777Sdavidestatic sbintime_t
270247777Sdavidegetnextevent(void)
271209371Smav{
272212541Smav	struct pcpu_state *state;
273247777Sdavide	sbintime_t event;
274212541Smav#ifdef SMP
275212541Smav	int	cpu;
276212541Smav#endif
277247777Sdavide	int	c;
278209371Smav
279212541Smav	state = DPCPU_PTR(timerstate);
280247777Sdavide	event = state->nextevent;
281247777Sdavide	c = -1;
282247777Sdavide#ifdef SMP
283232919Smav	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
284247777Sdavide		CPU_FOREACH(cpu) {
285247777Sdavide			state = DPCPU_ID_PTR(cpu, timerstate);
286247777Sdavide			if (event > state->nextevent) {
287247777Sdavide				event = state->nextevent;
288247777Sdavide				c = cpu;
289212541Smav			}
290212541Smav		}
291247777Sdavide	}
292232919Smav#endif
293247777Sdavide	CTR4(KTR_SPARE2, "next at %d:    next %d.%08x by %d",
294247777Sdavide	    curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c);
295247777Sdavide	return (event);
296209371Smav}
297209371Smav
298212541Smav/* Hardware timer callback function. */
299212541Smavstatic void
300212541Smavtimercb(struct eventtimer *et, void *arg)
301209371Smav{
302247777Sdavide	sbintime_t now;
303247777Sdavide	sbintime_t *next;
304212541Smav	struct pcpu_state *state;
305212541Smav#ifdef SMP
306212541Smav	int cpu, bcast;
307212541Smav#endif
308209371Smav
309212541Smav	/* Do not touch anything if somebody reconfiguring timers. */
310212541Smav	if (busy)
311212541Smav		return;
312212541Smav	/* Update present and next tick times. */
313212541Smav	state = DPCPU_PTR(timerstate);
314212541Smav	if (et->et_flags & ET_FLAGS_PERCPU) {
315212541Smav		next = &state->nexttick;
316212541Smav	} else
317212541Smav		next = &nexttick;
318247777Sdavide	now = sbinuptime();
319247777Sdavide	if (periodic)
320247777Sdavide		*next = now + timerperiod;
321247777Sdavide	else
322247777Sdavide		*next = -1;	/* Next tick is not scheduled yet. */
323212541Smav	state->now = now;
324247777Sdavide	CTR3(KTR_SPARE2, "intr at %d:    now  %d.%08x",
325247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
326209371Smav
327209371Smav#ifdef SMP
328212541Smav	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
329212541Smav	bcast = 0;
330212541Smav	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
331212541Smav		CPU_FOREACH(cpu) {
332212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
333212541Smav			ET_HW_LOCK(state);
334212541Smav			state->now = now;
335247777Sdavide			if (now >= state->nextevent) {
336247777Sdavide				state->nextevent += SBT_1S;
337212811Smav				if (curcpu != cpu) {
338212811Smav					state->ipi = 1;
339212811Smav					bcast = 1;
340212811Smav				}
341209371Smav			}
342212541Smav			ET_HW_UNLOCK(state);
343209371Smav		}
344209371Smav	}
345212541Smav#endif
346209371Smav
347212541Smav	/* Handle events for this time on this CPU. */
348247777Sdavide	handleevents(now, 0);
349209371Smav
350209371Smav#ifdef SMP
351212541Smav	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
352212541Smav	if (bcast) {
353212541Smav		CPU_FOREACH(cpu) {
354212541Smav			if (curcpu == cpu)
355212541Smav				continue;
356212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
357212541Smav			if (state->ipi) {
358212541Smav				state->ipi = 0;
359212541Smav				ipi_cpu(cpu, IPI_HARDCLOCK);
360209371Smav			}
361209371Smav		}
362209371Smav	}
363212541Smav#endif
364209371Smav}
365209371Smav
366209371Smav/*
367212541Smav * Load new value into hardware timer.
368209371Smav */
369209371Smavstatic void
370247777Sdavideloadtimer(sbintime_t now, int start)
371209371Smav{
372212541Smav	struct pcpu_state *state;
373247777Sdavide	sbintime_t new;
374247777Sdavide	sbintime_t *next;
375212541Smav	uint64_t tmp;
376212541Smav	int eq;
377209371Smav
378214987Smav	if (timer->et_flags & ET_FLAGS_PERCPU) {
379214987Smav		state = DPCPU_PTR(timerstate);
380214987Smav		next = &state->nexttick;
381214987Smav	} else
382214987Smav		next = &nexttick;
383212541Smav	if (periodic) {
384212541Smav		if (start) {
385212541Smav			/*
386212541Smav			 * Try to start all periodic timers aligned
387212541Smav			 * to period to make events synchronous.
388212541Smav			 */
389247777Sdavide			tmp = now % timerperiod;
390247777Sdavide			new = timerperiod - tmp;
391247777Sdavide			if (new < tmp)		/* Left less then passed. */
392247777Sdavide				new += timerperiod;
393212541Smav			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
394247777Sdavide			    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
395247777Sdavide			    (int)(new >> 32), (u_int)(new & 0xffffffff));
396247777Sdavide			*next = new + now;
397247777Sdavide			et_start(timer, new, timerperiod);
398209371Smav		}
399212541Smav	} else {
400247777Sdavide		new = getnextevent();
401247777Sdavide		eq = (new == *next);
402247777Sdavide		CTR4(KTR_SPARE2, "load at %d:    next %d.%08x eq %d",
403247777Sdavide		    curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
404212541Smav		if (!eq) {
405212541Smav			*next = new;
406247777Sdavide			et_start(timer, new - now, 0);
407212541Smav		}
408209371Smav	}
409209371Smav}
410209371Smav
411209371Smav/*
412212541Smav * Prepare event timer parameters after configuration changes.
413212541Smav */
414212541Smavstatic void
415212541Smavsetuptimer(void)
416212541Smav{
417212541Smav	int freq;
418212541Smav
419212541Smav	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
420212541Smav		periodic = 0;
421212541Smav	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
422212541Smav		periodic = 1;
423212600Smav	singlemul = MIN(MAX(singlemul, 1), 20);
424212541Smav	freq = hz * singlemul;
425212541Smav	while (freq < (profiling ? profhz : stathz))
426212541Smav		freq += hz;
427212541Smav	freq = round_freq(timer, freq);
428247777Sdavide	timerperiod = SBT_1S / freq;
429212541Smav}
430212541Smav
431212541Smav/*
432209371Smav * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
433209371Smav */
434212541Smavstatic int
435212541Smavdoconfigtimer(void)
436209371Smav{
437247777Sdavide	sbintime_t now;
438212541Smav	struct pcpu_state *state;
439209371Smav
440212541Smav	state = DPCPU_PTR(timerstate);
441212541Smav	switch (atomic_load_acq_int(&state->action)) {
442212541Smav	case 1:
443247777Sdavide		now = sbinuptime();
444212541Smav		ET_HW_LOCK(state);
445247777Sdavide		loadtimer(now, 1);
446212541Smav		ET_HW_UNLOCK(state);
447212541Smav		state->handle = 0;
448212541Smav		atomic_store_rel_int(&state->action, 0);
449209371Smav		return (1);
450212541Smav	case 2:
451212541Smav		ET_HW_LOCK(state);
452212541Smav		et_stop(timer);
453212541Smav		ET_HW_UNLOCK(state);
454212541Smav		state->handle = 0;
455212541Smav		atomic_store_rel_int(&state->action, 0);
456212541Smav		return (1);
457209371Smav	}
458212541Smav	if (atomic_readandclear_int(&state->handle) && !busy) {
459247777Sdavide		now = sbinuptime();
460247777Sdavide		handleevents(now, 0);
461212541Smav		return (1);
462212541Smav	}
463209371Smav	return (0);
464209371Smav}
465209371Smav
466209371Smav/*
467209371Smav * Reconfigure specified timer.
468209371Smav * For per-CPU timers use IPI to make other CPUs to reconfigure.
469209371Smav */
470209371Smavstatic void
471212541Smavconfigtimer(int start)
472209371Smav{
473247777Sdavide	sbintime_t now, next;
474212541Smav	struct pcpu_state *state;
475209371Smav	int cpu;
476209371Smav
477212541Smav	if (start) {
478212541Smav		setuptimer();
479247777Sdavide		now = sbinuptime();
480247777Sdavide	} else
481247777Sdavide		now = 0;
482209371Smav	critical_enter();
483212541Smav	ET_HW_LOCK(DPCPU_PTR(timerstate));
484212541Smav	if (start) {
485212541Smav		/* Initialize time machine parameters. */
486247777Sdavide		next = now + timerperiod;
487212541Smav		if (periodic)
488212541Smav			nexttick = next;
489212541Smav		else
490247777Sdavide			nexttick = -1;
491212541Smav		CPU_FOREACH(cpu) {
492212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
493212541Smav			state->now = now;
494247777Sdavide			if (!smp_started && cpu != CPU_FIRST())
495304894Skib				state->nextevent = SBT_MAX;
496247777Sdavide			else
497247777Sdavide				state->nextevent = next;
498212541Smav			if (periodic)
499212541Smav				state->nexttick = next;
500212541Smav			else
501247777Sdavide				state->nexttick = -1;
502212541Smav			state->nexthard = next;
503212541Smav			state->nextstat = next;
504212541Smav			state->nextprof = next;
505247777Sdavide			state->nextcall = next;
506247777Sdavide			state->nextcallopt = next;
507212541Smav			hardclock_sync(cpu);
508212541Smav		}
509212541Smav		busy = 0;
510212541Smav		/* Start global timer or per-CPU timer of this CPU. */
511247777Sdavide		loadtimer(now, 1);
512212541Smav	} else {
513212541Smav		busy = 1;
514212541Smav		/* Stop global timer or per-CPU timer of this CPU. */
515212541Smav		et_stop(timer);
516212541Smav	}
517212541Smav	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
518209371Smav#ifdef SMP
519212541Smav	/* If timer is global or there is no other CPUs yet - we are done. */
520212541Smav	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
521209371Smav		critical_exit();
522209371Smav		return;
523209371Smav	}
524209371Smav	/* Set reconfigure flags for other CPUs. */
525209371Smav	CPU_FOREACH(cpu) {
526212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
527212541Smav		atomic_store_rel_int(&state->action,
528212541Smav		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
529209371Smav	}
530212541Smav	/* Broadcast reconfigure IPI. */
531212541Smav	ipi_all_but_self(IPI_HARDCLOCK);
532209371Smav	/* Wait for reconfiguration completed. */
533209371Smavrestart:
534209371Smav	cpu_spinwait();
535209371Smav	CPU_FOREACH(cpu) {
536209371Smav		if (cpu == curcpu)
537209371Smav			continue;
538212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
539212541Smav		if (atomic_load_acq_int(&state->action))
540209371Smav			goto restart;
541209371Smav	}
542212541Smav#endif
543209371Smav	critical_exit();
544209371Smav}
545209371Smav
546212541Smav/*
547212541Smav * Calculate nearest frequency supported by hardware timer.
548212541Smav */
549210290Smavstatic int
550210290Smavround_freq(struct eventtimer *et, int freq)
551210290Smav{
552210290Smav	uint64_t div;
553210290Smav
554210290Smav	if (et->et_frequency != 0) {
555210298Smav		div = lmax((et->et_frequency + freq / 2) / freq, 1);
556210290Smav		if (et->et_flags & ET_FLAGS_POW2DIV)
557210290Smav			div = 1 << (flsl(div + div / 2) - 1);
558210290Smav		freq = (et->et_frequency + div / 2) / div;
559210290Smav	}
560247463Smav	if (et->et_min_period > SBT_1S)
561241413Smav		panic("Event timer \"%s\" doesn't support sub-second periods!",
562241413Smav		    et->et_name);
563247463Smav	else if (et->et_min_period != 0)
564247463Smav		freq = min(freq, SBT2FREQ(et->et_min_period));
565247463Smav	if (et->et_max_period < SBT_1S && et->et_max_period != 0)
566247463Smav		freq = max(freq, SBT2FREQ(et->et_max_period));
567210290Smav	return (freq);
568210290Smav}
569210290Smav
570209371Smav/*
571212541Smav * Configure and start event timers (BSP part).
572209371Smav */
573209371Smavvoid
574209371Smavcpu_initclocks_bsp(void)
575209371Smav{
576212541Smav	struct pcpu_state *state;
577212541Smav	int base, div, cpu;
578209371Smav
579212541Smav	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
580212541Smav	CPU_FOREACH(cpu) {
581212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
582212541Smav		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
583304894Skib		state->nextcall = SBT_MAX;
584304894Skib		state->nextcallopt = SBT_MAX;
585212541Smav	}
586212967Smav	periodic = want_periodic;
587212541Smav	/* Grab requested timer or the best of present. */
588212541Smav	if (timername[0])
589212541Smav		timer = et_find(timername, 0, 0);
590212541Smav	if (timer == NULL && periodic) {
591212541Smav		timer = et_find(NULL,
592212541Smav		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
593212541Smav	}
594212541Smav	if (timer == NULL) {
595212541Smav		timer = et_find(NULL,
596212541Smav		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
597212541Smav	}
598212541Smav	if (timer == NULL && !periodic) {
599212541Smav		timer = et_find(NULL,
600212541Smav		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
601212541Smav	}
602212541Smav	if (timer == NULL)
603209901Smav		panic("No usable event timer found!");
604212541Smav	et_init(timer, timercb, NULL, NULL);
605212541Smav
606212541Smav	/* Adapt to timer capabilities. */
607212541Smav	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
608212541Smav		periodic = 0;
609212541Smav	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
610212541Smav		periodic = 1;
611212541Smav	if (timer->et_flags & ET_FLAGS_C3STOP)
612280973Sjhb		cpu_disable_c3_sleep++;
613212541Smav
614209371Smav	/*
615209371Smav	 * We honor the requested 'hz' value.
616209371Smav	 * We want to run stathz in the neighborhood of 128hz.
617209371Smav	 * We would like profhz to run as often as possible.
618209371Smav	 */
619212600Smav	if (singlemul <= 0 || singlemul > 20) {
620209371Smav		if (hz >= 1500 || (hz % 128) == 0)
621209371Smav			singlemul = 1;
622209371Smav		else if (hz >= 750)
623209371Smav			singlemul = 2;
624209371Smav		else
625209371Smav			singlemul = 4;
626209371Smav	}
627212541Smav	if (periodic) {
628212541Smav		base = round_freq(timer, hz * singlemul);
629210290Smav		singlemul = max((base + hz / 2) / hz, 1);
630210290Smav		hz = (base + singlemul / 2) / singlemul;
631210290Smav		if (base <= 128)
632209371Smav			stathz = base;
633209371Smav		else {
634209371Smav			div = base / 128;
635210290Smav			if (div >= singlemul && (div % singlemul) == 0)
636209371Smav				div++;
637209371Smav			stathz = base / div;
638209371Smav		}
639209371Smav		profhz = stathz;
640210290Smav		while ((profhz + stathz) <= 128 * 64)
641209371Smav			profhz += stathz;
642212541Smav		profhz = round_freq(timer, profhz);
643209371Smav	} else {
644212541Smav		hz = round_freq(timer, hz);
645212541Smav		stathz = round_freq(timer, 127);
646212541Smav		profhz = round_freq(timer, stathz * 64);
647209371Smav	}
648210298Smav	tick = 1000000 / hz;
649247777Sdavide	tick_sbt = SBT_1S / hz;
650247777Sdavide	tick_bt = sbttobt(tick_sbt);
651247777Sdavide	statperiod = SBT_1S / stathz;
652247777Sdavide	profperiod = SBT_1S / profhz;
653209371Smav	ET_LOCK();
654212541Smav	configtimer(1);
655209371Smav	ET_UNLOCK();
656209371Smav}
657209371Smav
658212541Smav/*
659212541Smav * Start per-CPU event timers on APs.
660212541Smav */
661209371Smavvoid
662209371Smavcpu_initclocks_ap(void)
663209371Smav{
664247777Sdavide	sbintime_t now;
665212541Smav	struct pcpu_state *state;
666247777Sdavide	struct thread *td;
667209371Smav
668214987Smav	state = DPCPU_PTR(timerstate);
669247777Sdavide	now = sbinuptime();
670214987Smav	ET_HW_LOCK(state);
671239036Smav	state->now = now;
672214987Smav	hardclock_sync(curcpu);
673247777Sdavide	spinlock_enter();
674214987Smav	ET_HW_UNLOCK(state);
675247777Sdavide	td = curthread;
676247777Sdavide	td->td_intr_nesting_level++;
677247777Sdavide	handleevents(state->now, 2);
678247777Sdavide	td->td_intr_nesting_level--;
679247777Sdavide	spinlock_exit();
680209371Smav}
681209371Smav
682212541Smav/*
683212541Smav * Switch to profiling clock rates.
684212541Smav */
685212541Smavvoid
686212541Smavcpu_startprofclock(void)
687209371Smav{
688209371Smav
689212541Smav	ET_LOCK();
690247329Smav	if (profiling == 0) {
691247329Smav		if (periodic) {
692247329Smav			configtimer(0);
693247329Smav			profiling = 1;
694247329Smav			configtimer(1);
695247329Smav		} else
696247329Smav			profiling = 1;
697212541Smav	} else
698247329Smav		profiling++;
699212541Smav	ET_UNLOCK();
700209371Smav}
701209371Smav
702212541Smav/*
703212541Smav * Switch to regular clock rates.
704212541Smav */
705209371Smavvoid
706212541Smavcpu_stopprofclock(void)
707209371Smav{
708209371Smav
709209371Smav	ET_LOCK();
710247329Smav	if (profiling == 1) {
711247329Smav		if (periodic) {
712247329Smav			configtimer(0);
713247329Smav			profiling = 0;
714247329Smav			configtimer(1);
715247329Smav		} else
716212541Smav		profiling = 0;
717212541Smav	} else
718247329Smav		profiling--;
719209371Smav	ET_UNLOCK();
720209371Smav}
721209371Smav
722212541Smav/*
723212541Smav * Switch to idle mode (all ticks handled).
724212541Smav */
725247454Sdavidesbintime_t
726212541Smavcpu_idleclock(void)
727209371Smav{
728247777Sdavide	sbintime_t now, t;
729212541Smav	struct pcpu_state *state;
730209371Smav
731212541Smav	if (idletick || busy ||
732212992Smav	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
733212992Smav#ifdef DEVICE_POLLING
734212992Smav	    || curcpu == CPU_FIRST()
735212992Smav#endif
736212992Smav	    )
737247454Sdavide		return (-1);
738212541Smav	state = DPCPU_PTR(timerstate);
739212541Smav	if (periodic)
740212541Smav		now = state->now;
741212541Smav	else
742247777Sdavide		now = sbinuptime();
743247777Sdavide	CTR3(KTR_SPARE2, "idle at %d:    now  %d.%08x",
744247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
745247777Sdavide	t = getnextcpuevent(1);
746212541Smav	ET_HW_LOCK(state);
747212541Smav	state->idle = 1;
748212541Smav	state->nextevent = t;
749212541Smav	if (!periodic)
750247777Sdavide		loadtimer(now, 0);
751212541Smav	ET_HW_UNLOCK(state);
752247777Sdavide	return (MAX(t - now, 0));
753209371Smav}
754209371Smav
755212541Smav/*
756212541Smav * Switch to active mode (skip empty ticks).
757212541Smav */
758212541Smavvoid
759212541Smavcpu_activeclock(void)
760212541Smav{
761247777Sdavide	sbintime_t now;
762212541Smav	struct pcpu_state *state;
763212541Smav	struct thread *td;
764212541Smav
765212541Smav	state = DPCPU_PTR(timerstate);
766212541Smav	if (state->idle == 0 || busy)
767212541Smav		return;
768212541Smav	if (periodic)
769212541Smav		now = state->now;
770212541Smav	else
771247777Sdavide		now = sbinuptime();
772247777Sdavide	CTR3(KTR_SPARE2, "active at %d:  now  %d.%08x",
773247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
774212541Smav	spinlock_enter();
775212541Smav	td = curthread;
776212541Smav	td->td_intr_nesting_level++;
777247777Sdavide	handleevents(now, 1);
778212541Smav	td->td_intr_nesting_level--;
779212541Smav	spinlock_exit();
780212541Smav}
781212541Smav
782266347Sian/*
783266347Sian * Change the frequency of the given timer.  This changes et->et_frequency and
784266347Sian * if et is the active timer it reconfigures the timer on all CPUs.  This is
785266347Sian * intended to be a private interface for the use of et_change_frequency() only.
786266347Sian */
787266347Sianvoid
788266347Siancpu_et_frequency(struct eventtimer *et, uint64_t newfreq)
789266347Sian{
790266347Sian
791266347Sian	ET_LOCK();
792266347Sian	if (et == timer) {
793266347Sian		configtimer(0);
794266347Sian		et->et_frequency = newfreq;
795266347Sian		configtimer(1);
796266347Sian	} else
797266347Sian		et->et_frequency = newfreq;
798266347Sian	ET_UNLOCK();
799266347Sian}
800266347Sian
801221990Savgvoid
802247777Sdavidecpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
803212541Smav{
804212541Smav	struct pcpu_state *state;
805212541Smav
806247777Sdavide	/* Do not touch anything if somebody reconfiguring timers. */
807247777Sdavide	if (busy)
808247777Sdavide		return;
809247777Sdavide	CTR6(KTR_SPARE2, "new co at %d:    on %d at %d.%08x - %d.%08x",
810247777Sdavide	    curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
811247777Sdavide	    (int)(bt >> 32), (u_int)(bt & 0xffffffff));
812212541Smav	state = DPCPU_ID_PTR(cpu, timerstate);
813212541Smav	ET_HW_LOCK(state);
814247777Sdavide
815247777Sdavide	/*
816247777Sdavide	 * If there is callout time already set earlier -- do nothing.
817247777Sdavide	 * This check may appear redundant because we check already in
818247777Sdavide	 * callout_process() but this double check guarantees we're safe
819247777Sdavide	 * with respect to race conditions between interrupts execution
820247777Sdavide	 * and scheduling.
821247777Sdavide	 */
822247777Sdavide	state->nextcallopt = bt_opt;
823247777Sdavide	if (bt >= state->nextcall)
824247777Sdavide		goto done;
825247777Sdavide	state->nextcall = bt;
826247777Sdavide	/* If there is some other event set earlier -- do nothing. */
827247777Sdavide	if (bt >= state->nextevent)
828247777Sdavide		goto done;
829247777Sdavide	state->nextevent = bt;
830247777Sdavide	/* If timer is periodic -- there is nothing to reprogram. */
831247777Sdavide	if (periodic)
832247777Sdavide		goto done;
833247777Sdavide	/* If timer is global or of the current CPU -- reprogram it. */
834247777Sdavide	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
835247777Sdavide		loadtimer(sbinuptime(), 0);
836247777Sdavidedone:
837212541Smav		ET_HW_UNLOCK(state);
838212541Smav		return;
839212541Smav	}
840247777Sdavide	/* Otherwise make other CPU to reprogram it. */
841247777Sdavide	state->handle = 1;
842212541Smav	ET_HW_UNLOCK(state);
843247777Sdavide#ifdef SMP
844247777Sdavide	ipi_cpu(cpu, IPI_HARDCLOCK);
845247777Sdavide#endif
846212541Smav}
847212541Smav
848212541Smav/*
849212541Smav * Report or change the active event timers hardware.
850212541Smav */
851209371Smavstatic int
852212541Smavsysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
853209371Smav{
854209371Smav	char buf[32];
855209371Smav	struct eventtimer *et;
856209371Smav	int error;
857209371Smav
858209371Smav	ET_LOCK();
859212541Smav	et = timer;
860209371Smav	snprintf(buf, sizeof(buf), "%s", et->et_name);
861209371Smav	ET_UNLOCK();
862209371Smav	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
863209371Smav	ET_LOCK();
864212541Smav	et = timer;
865209371Smav	if (error != 0 || req->newptr == NULL ||
866212541Smav	    strcasecmp(buf, et->et_name) == 0) {
867209371Smav		ET_UNLOCK();
868209371Smav		return (error);
869209371Smav	}
870212541Smav	et = et_find(buf, 0, 0);
871209371Smav	if (et == NULL) {
872209371Smav		ET_UNLOCK();
873209371Smav		return (ENOENT);
874209371Smav	}
875209371Smav	configtimer(0);
876212541Smav	et_free(timer);
877212541Smav	if (et->et_flags & ET_FLAGS_C3STOP)
878280973Sjhb		cpu_disable_c3_sleep++;
879212541Smav	if (timer->et_flags & ET_FLAGS_C3STOP)
880280973Sjhb		cpu_disable_c3_sleep--;
881212967Smav	periodic = want_periodic;
882212541Smav	timer = et;
883212541Smav	et_init(timer, timercb, NULL, NULL);
884212541Smav	configtimer(1);
885209371Smav	ET_UNLOCK();
886209371Smav	return (error);
887209371Smav}
888212541SmavSYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
889209371Smav    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
890212600Smav    0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
891209371Smav
892212541Smav/*
893212541Smav * Report or change the active event timer periodicity.
894212541Smav */
895209371Smavstatic int
896212541Smavsysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
897209371Smav{
898212541Smav	int error, val;
899209371Smav
900212541Smav	val = periodic;
901212541Smav	error = sysctl_handle_int(oidp, &val, 0, req);
902212541Smav	if (error != 0 || req->newptr == NULL)
903212541Smav		return (error);
904209371Smav	ET_LOCK();
905212541Smav	configtimer(0);
906212967Smav	periodic = want_periodic = val;
907212541Smav	configtimer(1);
908209371Smav	ET_UNLOCK();
909209371Smav	return (error);
910209371Smav}
911212541SmavSYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
912212541Smav    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
913212600Smav    0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
914278573Skib
915278573Skib#include "opt_ddb.h"
916278573Skib
917278573Skib#ifdef DDB
918278573Skib#include <ddb/ddb.h>
919278573Skib
920278573SkibDB_SHOW_COMMAND(clocksource, db_show_clocksource)
921278573Skib{
922278573Skib	struct pcpu_state *st;
923278573Skib	int c;
924278573Skib
925278573Skib	CPU_FOREACH(c) {
926278573Skib		st = DPCPU_ID_PTR(c, timerstate);
927278573Skib		db_printf(
928278573Skib		    "CPU %2d: action %d handle %d  ipi %d idle %d\n"
929278573Skib		    "        now %#jx nevent %#jx (%jd)\n"
930278573Skib		    "        ntick %#jx (%jd) nhard %#jx (%jd)\n"
931278573Skib		    "        nstat %#jx (%jd) nprof %#jx (%jd)\n"
932278573Skib		    "        ncall %#jx (%jd) ncallopt %#jx (%jd)\n",
933278573Skib		    c, st->action, st->handle, st->ipi, st->idle,
934278573Skib		    (uintmax_t)st->now,
935278573Skib		    (uintmax_t)st->nextevent,
936278573Skib		    (uintmax_t)(st->nextevent - st->now) / tick_sbt,
937278573Skib		    (uintmax_t)st->nexttick,
938278573Skib		    (uintmax_t)(st->nexttick - st->now) / tick_sbt,
939278573Skib		    (uintmax_t)st->nexthard,
940278573Skib		    (uintmax_t)(st->nexthard - st->now) / tick_sbt,
941278573Skib		    (uintmax_t)st->nextstat,
942278573Skib		    (uintmax_t)(st->nextstat - st->now) / tick_sbt,
943278573Skib		    (uintmax_t)st->nextprof,
944278573Skib		    (uintmax_t)(st->nextprof - st->now) / tick_sbt,
945278573Skib		    (uintmax_t)st->nextcall,
946278573Skib		    (uintmax_t)(st->nextcall - st->now) / tick_sbt,
947278573Skib		    (uintmax_t)st->nextcallopt,
948278573Skib		    (uintmax_t)(st->nextcallopt - st->now) / tick_sbt);
949278573Skib	}
950278573Skib}
951278573Skib
952278573Skib#endif
953