1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2010-2013 Alexander Motin <mav@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer,
12 *    without modification, immediately at the beginning of the file.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30/*
31 * Common routines to manage event timers hardware.
32 */
33
34#include "opt_device_polling.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/bus.h>
39#include <sys/limits.h>
40#include <sys/lock.h>
41#include <sys/kdb.h>
42#include <sys/ktr.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/sched.h>
47#include <sys/smp.h>
48#include <sys/sysctl.h>
49#include <sys/timeet.h>
50#include <sys/timetc.h>
51
52#include <machine/atomic.h>
53#include <machine/clock.h>
54#include <machine/cpu.h>
55#include <machine/smp.h>
56
57int			cpu_disable_c2_sleep = 0; /* Timer dies in C2. */
58int			cpu_disable_c3_sleep = 0; /* Timer dies in C3. */
59
60static void		setuptimer(void);
61static void		loadtimer(sbintime_t now, int first);
62static int		doconfigtimer(void);
63static void		configtimer(int start);
64static int		round_freq(struct eventtimer *et, int freq);
65
66struct pcpu_state;
67static sbintime_t	getnextcpuevent(struct pcpu_state *state, int idle);
68static sbintime_t	getnextevent(struct pcpu_state *state);
69static int		handleevents(sbintime_t now, int fake);
70
71static struct mtx	et_hw_mtx;
72
73#define	ET_HW_LOCK(state)						\
74	{								\
75		if (timer->et_flags & ET_FLAGS_PERCPU)			\
76			mtx_lock_spin(&(state)->et_hw_mtx);		\
77		else							\
78			mtx_lock_spin(&et_hw_mtx);			\
79	}
80
81#define	ET_HW_UNLOCK(state)						\
82	{								\
83		if (timer->et_flags & ET_FLAGS_PERCPU)			\
84			mtx_unlock_spin(&(state)->et_hw_mtx);		\
85		else							\
86			mtx_unlock_spin(&et_hw_mtx);			\
87	}
88
89static struct eventtimer *timer = NULL;
90static sbintime_t	timerperiod;	/* Timer period for periodic mode. */
91static sbintime_t	statperiod;	/* statclock() events period. */
92static sbintime_t	profperiod;	/* profclock() events period. */
93static sbintime_t	nexttick;	/* Next global timer tick time. */
94static u_int		busy = 1;	/* Reconfiguration is in progress. */
95static int		profiling;	/* Profiling events enabled. */
96
97static char		timername[32];	/* Wanted timer. */
98TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
99
100static int		singlemul;	/* Multiplier for periodic mode. */
101SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RWTUN, &singlemul,
102    0, "Multiplier for periodic mode");
103
104static u_int		idletick;	/* Run periodic events when idle. */
105SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RWTUN, &idletick,
106    0, "Run periodic events when idle");
107
108static int		periodic;	/* Periodic or one-shot mode. */
109static int		want_periodic;	/* What mode to prefer. */
110TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
111
112struct pcpu_state {
113	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
114	u_int		action;		/* Reconfiguration requests. */
115	u_int		handle;		/* Immediate handle resuests. */
116	sbintime_t	now;		/* Last tick time. */
117	sbintime_t	nextevent;	/* Next scheduled event on this CPU. */
118	sbintime_t	nexttick;	/* Next timer tick time. */
119	sbintime_t	nexthard;	/* Next hardclock() event. */
120	sbintime_t	nextstat;	/* Next statclock() event. */
121	sbintime_t	nextprof;	/* Next profclock() event. */
122	sbintime_t	nextcall;	/* Next callout event. */
123	sbintime_t	nextcallopt;	/* Next optional callout event. */
124	int		ipi;		/* This CPU needs IPI. */
125	int		idle;		/* This CPU is in idle mode. */
126};
127
128DPCPU_DEFINE_STATIC(struct pcpu_state, timerstate);
129DPCPU_DEFINE(sbintime_t, hardclocktime);
130
131/*
132 * Timer broadcast IPI handler.
133 */
134int
135hardclockintr(void)
136{
137	sbintime_t now;
138	struct pcpu_state *state;
139	int done;
140
141	if (doconfigtimer() || busy)
142		return (FILTER_HANDLED);
143	state = DPCPU_PTR(timerstate);
144	now = state->now;
145	CTR2(KTR_SPARE2, "ipi:    now  %d.%08x",
146	    (int)(now >> 32), (u_int)(now & 0xffffffff));
147	done = handleevents(now, 0);
148	return (done ? FILTER_HANDLED : FILTER_STRAY);
149}
150
151/*
152 * Handle all events for specified time on this CPU
153 */
154static int
155handleevents(sbintime_t now, int fake)
156{
157	sbintime_t t, *hct;
158	struct trapframe *frame;
159	struct pcpu_state *state;
160	int usermode;
161	int done, runs;
162
163	CTR2(KTR_SPARE2, "handle:  now  %d.%08x",
164	    (int)(now >> 32), (u_int)(now & 0xffffffff));
165	done = 0;
166	if (fake) {
167		frame = NULL;
168		usermode = 0;
169	} else {
170		frame = curthread->td_intr_frame;
171		usermode = TRAPF_USERMODE(frame);
172	}
173
174	state = DPCPU_PTR(timerstate);
175
176	runs = 0;
177	while (now >= state->nexthard) {
178		state->nexthard += tick_sbt;
179		runs++;
180	}
181	if (runs) {
182		hct = DPCPU_PTR(hardclocktime);
183		*hct = state->nexthard - tick_sbt;
184		if (fake < 2) {
185			hardclock(runs, usermode);
186			done = 1;
187		}
188	}
189	runs = 0;
190	while (now >= state->nextstat) {
191		state->nextstat += statperiod;
192		runs++;
193	}
194	if (runs && fake < 2) {
195		statclock(runs, usermode);
196		done = 1;
197	}
198	if (profiling) {
199		runs = 0;
200		while (now >= state->nextprof) {
201			state->nextprof += profperiod;
202			runs++;
203		}
204		if (runs && !fake) {
205			profclock(runs, usermode, TRAPF_PC(frame));
206			done = 1;
207		}
208	} else
209		state->nextprof = state->nextstat;
210	if (now >= state->nextcallopt || now >= state->nextcall) {
211		state->nextcall = state->nextcallopt = SBT_MAX;
212		callout_process(now);
213	}
214
215	ET_HW_LOCK(state);
216	t = getnextcpuevent(state, 0);
217	if (!busy) {
218		state->idle = 0;
219		state->nextevent = t;
220		loadtimer(now, (fake == 2) &&
221		    (timer->et_flags & ET_FLAGS_PERCPU));
222	}
223	ET_HW_UNLOCK(state);
224	return (done);
225}
226
227/*
228 * Schedule binuptime of the next event on current CPU.
229 */
230static sbintime_t
231getnextcpuevent(struct pcpu_state *state, int idle)
232{
233	sbintime_t event;
234	u_int hardfreq;
235
236	/* Handle hardclock() events, skipping some if CPU is idle. */
237	event = state->nexthard;
238	if (idle) {
239		if (tc_min_ticktock_freq > 1
240#ifdef SMP
241		    && curcpu == CPU_FIRST()
242#endif
243		    )
244			hardfreq = hz / tc_min_ticktock_freq;
245		else
246			hardfreq = hz;
247		if (hardfreq > 1)
248			event += tick_sbt * (hardfreq - 1);
249	}
250	/* Handle callout events. */
251	if (event > state->nextcall)
252		event = state->nextcall;
253	if (!idle) { /* If CPU is active - handle other types of events. */
254		if (event > state->nextstat)
255			event = state->nextstat;
256		if (profiling && event > state->nextprof)
257			event = state->nextprof;
258	}
259	return (event);
260}
261
262/*
263 * Schedule binuptime of the next event on all CPUs.
264 */
265static sbintime_t
266getnextevent(struct pcpu_state *state)
267{
268	sbintime_t event;
269#ifdef SMP
270	int	cpu;
271#endif
272#ifdef KTR
273	int	c;
274
275	c = -1;
276#endif
277	event = state->nextevent;
278#ifdef SMP
279	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
280		CPU_FOREACH(cpu) {
281			state = DPCPU_ID_PTR(cpu, timerstate);
282			if (event > state->nextevent) {
283				event = state->nextevent;
284#ifdef KTR
285				c = cpu;
286#endif
287			}
288		}
289	}
290#endif
291	CTR3(KTR_SPARE2, "next:    next %d.%08x by %d",
292	    (int)(event >> 32), (u_int)(event & 0xffffffff), c);
293	return (event);
294}
295
296/* Hardware timer callback function. */
297static void
298timercb(struct eventtimer *et, void *arg)
299{
300	sbintime_t now;
301	sbintime_t *next;
302	struct pcpu_state *state;
303#ifdef SMP
304	int cpu, bcast;
305#endif
306
307	/* Do not touch anything if somebody reconfiguring timers. */
308	if (busy)
309		return;
310	/* Update present and next tick times. */
311	state = DPCPU_PTR(timerstate);
312	if (et->et_flags & ET_FLAGS_PERCPU) {
313		next = &state->nexttick;
314	} else
315		next = &nexttick;
316	now = sbinuptime();
317	if (periodic)
318		*next = now + timerperiod;
319	else
320		*next = -1;	/* Next tick is not scheduled yet. */
321	state->now = now;
322	CTR2(KTR_SPARE2, "intr:    now  %d.%08x",
323	    (int)(now >> 32), (u_int)(now & 0xffffffff));
324
325#ifdef SMP
326#ifdef EARLY_AP_STARTUP
327	MPASS(mp_ncpus == 1 || smp_started);
328#endif
329	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
330	bcast = 0;
331#ifdef EARLY_AP_STARTUP
332	if ((et->et_flags & ET_FLAGS_PERCPU) == 0) {
333#else
334	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
335#endif
336		CPU_FOREACH(cpu) {
337			state = DPCPU_ID_PTR(cpu, timerstate);
338			ET_HW_LOCK(state);
339			state->now = now;
340			if (now >= state->nextevent) {
341				state->nextevent += SBT_1S;
342				if (curcpu != cpu) {
343					state->ipi = 1;
344					bcast = 1;
345				}
346			}
347			ET_HW_UNLOCK(state);
348		}
349	}
350#endif
351
352	/* Handle events for this time on this CPU. */
353	handleevents(now, 0);
354
355#ifdef SMP
356	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
357	if (bcast) {
358		CPU_FOREACH(cpu) {
359			if (curcpu == cpu)
360				continue;
361			state = DPCPU_ID_PTR(cpu, timerstate);
362			if (state->ipi) {
363				state->ipi = 0;
364				ipi_cpu(cpu, IPI_HARDCLOCK);
365			}
366		}
367	}
368#endif
369}
370
371/*
372 * Load new value into hardware timer.
373 */
374static void
375loadtimer(sbintime_t now, int start)
376{
377	struct pcpu_state *state;
378	sbintime_t new;
379	sbintime_t *next;
380	uint64_t tmp;
381	int eq;
382
383	state = DPCPU_PTR(timerstate);
384	if (timer->et_flags & ET_FLAGS_PERCPU)
385		next = &state->nexttick;
386	else
387		next = &nexttick;
388	if (periodic) {
389		if (start) {
390			/*
391			 * Try to start all periodic timers aligned
392			 * to period to make events synchronous.
393			 */
394			tmp = now % timerperiod;
395			new = timerperiod - tmp;
396			if (new < tmp)		/* Left less then passed. */
397				new += timerperiod;
398			CTR4(KTR_SPARE2, "load p:   now %d.%08x first in %d.%08x",
399			    (int)(now >> 32), (u_int)(now & 0xffffffff),
400			    (int)(new >> 32), (u_int)(new & 0xffffffff));
401			*next = new + now;
402			et_start(timer, new, timerperiod);
403		}
404	} else {
405		new = getnextevent(state);
406		eq = (new == *next);
407		CTR3(KTR_SPARE2, "load:    next %d.%08x eq %d",
408		    (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
409		if (!eq) {
410			*next = new;
411			et_start(timer, new - now, 0);
412		}
413	}
414}
415
416/*
417 * Prepare event timer parameters after configuration changes.
418 */
419static void
420setuptimer(void)
421{
422	int freq;
423
424	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
425		periodic = 0;
426	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
427		periodic = 1;
428	singlemul = MIN(MAX(singlemul, 1), 20);
429	freq = hz * singlemul;
430	while (freq < (profiling ? profhz : stathz))
431		freq += hz;
432	freq = round_freq(timer, freq);
433	timerperiod = SBT_1S / freq;
434}
435
436/*
437 * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
438 */
439static int
440doconfigtimer(void)
441{
442	sbintime_t now;
443	struct pcpu_state *state;
444
445	state = DPCPU_PTR(timerstate);
446	switch (atomic_load_acq_int(&state->action)) {
447	case 1:
448		now = sbinuptime();
449		ET_HW_LOCK(state);
450		loadtimer(now, 1);
451		ET_HW_UNLOCK(state);
452		state->handle = 0;
453		atomic_store_rel_int(&state->action, 0);
454		return (1);
455	case 2:
456		ET_HW_LOCK(state);
457		et_stop(timer);
458		ET_HW_UNLOCK(state);
459		state->handle = 0;
460		atomic_store_rel_int(&state->action, 0);
461		return (1);
462	}
463	if (atomic_readandclear_int(&state->handle) && !busy) {
464		now = sbinuptime();
465		handleevents(now, 0);
466		return (1);
467	}
468	return (0);
469}
470
471/*
472 * Reconfigure specified timer.
473 * For per-CPU timers use IPI to make other CPUs to reconfigure.
474 */
475static void
476configtimer(int start)
477{
478	sbintime_t now, next;
479	struct pcpu_state *state;
480	int cpu;
481
482	if (start) {
483		setuptimer();
484		now = sbinuptime();
485	} else
486		now = 0;
487	critical_enter();
488	ET_HW_LOCK(DPCPU_PTR(timerstate));
489	if (start) {
490		/* Initialize time machine parameters. */
491		next = now + timerperiod;
492		if (periodic)
493			nexttick = next;
494		else
495			nexttick = -1;
496#ifdef EARLY_AP_STARTUP
497		MPASS(mp_ncpus == 1 || smp_started);
498#endif
499		CPU_FOREACH(cpu) {
500			state = DPCPU_ID_PTR(cpu, timerstate);
501			state->now = now;
502#ifndef EARLY_AP_STARTUP
503			if (!smp_started && cpu != CPU_FIRST())
504				state->nextevent = SBT_MAX;
505			else
506#endif
507				state->nextevent = next;
508			if (periodic)
509				state->nexttick = next;
510			else
511				state->nexttick = -1;
512			state->nexthard = next;
513			state->nextstat = next;
514			state->nextprof = next;
515			state->nextcall = next;
516			state->nextcallopt = next;
517			hardclock_sync(cpu);
518		}
519		busy = 0;
520		/* Start global timer or per-CPU timer of this CPU. */
521		loadtimer(now, 1);
522	} else {
523		busy = 1;
524		/* Stop global timer or per-CPU timer of this CPU. */
525		et_stop(timer);
526	}
527	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
528#ifdef SMP
529#ifdef EARLY_AP_STARTUP
530	/* If timer is global we are done. */
531	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
532#else
533	/* If timer is global or there is no other CPUs yet - we are done. */
534	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
535#endif
536		critical_exit();
537		return;
538	}
539	/* Set reconfigure flags for other CPUs. */
540	CPU_FOREACH(cpu) {
541		state = DPCPU_ID_PTR(cpu, timerstate);
542		atomic_store_rel_int(&state->action,
543		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
544	}
545	/* Broadcast reconfigure IPI. */
546	ipi_all_but_self(IPI_HARDCLOCK);
547	/* Wait for reconfiguration completed. */
548restart:
549	cpu_spinwait();
550	CPU_FOREACH(cpu) {
551		if (cpu == curcpu)
552			continue;
553		state = DPCPU_ID_PTR(cpu, timerstate);
554		if (atomic_load_acq_int(&state->action))
555			goto restart;
556	}
557#endif
558	critical_exit();
559}
560
561/*
562 * Calculate nearest frequency supported by hardware timer.
563 */
564static int
565round_freq(struct eventtimer *et, int freq)
566{
567	uint64_t div;
568
569	if (et->et_frequency != 0) {
570		div = lmax((et->et_frequency + freq / 2) / freq, 1);
571		if (et->et_flags & ET_FLAGS_POW2DIV)
572			div = 1 << (flsl(div + div / 2) - 1);
573		freq = (et->et_frequency + div / 2) / div;
574	}
575	if (et->et_min_period > SBT_1S)
576		panic("Event timer \"%s\" doesn't support sub-second periods!",
577		    et->et_name);
578	else if (et->et_min_period != 0)
579		freq = min(freq, SBT2FREQ(et->et_min_period));
580	if (et->et_max_period < SBT_1S && et->et_max_period != 0)
581		freq = max(freq, SBT2FREQ(et->et_max_period));
582	return (freq);
583}
584
585/*
586 * Configure and start event timers (BSP part).
587 */
588void
589cpu_initclocks_bsp(void)
590{
591	struct pcpu_state *state;
592	int base, div, cpu;
593
594	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
595	CPU_FOREACH(cpu) {
596		state = DPCPU_ID_PTR(cpu, timerstate);
597		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
598		state->nextcall = SBT_MAX;
599		state->nextcallopt = SBT_MAX;
600	}
601	periodic = want_periodic;
602	/* Grab requested timer or the best of present. */
603	if (timername[0])
604		timer = et_find(timername, 0, 0);
605	if (timer == NULL && periodic) {
606		timer = et_find(NULL,
607		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
608	}
609	if (timer == NULL) {
610		timer = et_find(NULL,
611		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
612	}
613	if (timer == NULL && !periodic) {
614		timer = et_find(NULL,
615		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
616	}
617	if (timer == NULL)
618		panic("No usable event timer found!");
619	et_init(timer, timercb, NULL, NULL);
620
621	/* Adapt to timer capabilities. */
622	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
623		periodic = 0;
624	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
625		periodic = 1;
626	if (timer->et_flags & ET_FLAGS_C3STOP)
627		cpu_disable_c3_sleep++;
628
629	/*
630	 * We honor the requested 'hz' value.
631	 * We want to run stathz in the neighborhood of 128hz.
632	 * We would like profhz to run as often as possible.
633	 */
634	if (singlemul <= 0 || singlemul > 20) {
635		if (hz >= 1500 || (hz % 128) == 0)
636			singlemul = 1;
637		else if (hz >= 750)
638			singlemul = 2;
639		else
640			singlemul = 4;
641	}
642	if (periodic) {
643		base = round_freq(timer, hz * singlemul);
644		singlemul = max((base + hz / 2) / hz, 1);
645		hz = (base + singlemul / 2) / singlemul;
646		if (base <= 128)
647			stathz = base;
648		else {
649			div = base / 128;
650			if (div >= singlemul && (div % singlemul) == 0)
651				div++;
652			stathz = base / div;
653		}
654		profhz = stathz;
655		while ((profhz + stathz) <= 128 * 64)
656			profhz += stathz;
657		profhz = round_freq(timer, profhz);
658	} else {
659		hz = round_freq(timer, hz);
660		stathz = round_freq(timer, 127);
661		profhz = round_freq(timer, stathz * 64);
662	}
663	tick = 1000000 / hz;
664	tick_sbt = SBT_1S / hz;
665	tick_bt = sbttobt(tick_sbt);
666	statperiod = SBT_1S / stathz;
667	profperiod = SBT_1S / profhz;
668	ET_LOCK();
669	configtimer(1);
670	ET_UNLOCK();
671}
672
673/*
674 * Start per-CPU event timers on APs.
675 */
676void
677cpu_initclocks_ap(void)
678{
679	struct pcpu_state *state;
680	struct thread *td;
681
682	state = DPCPU_PTR(timerstate);
683	ET_HW_LOCK(state);
684	state->now = sbinuptime();
685	hardclock_sync(curcpu);
686	spinlock_enter();
687	ET_HW_UNLOCK(state);
688	td = curthread;
689	td->td_intr_nesting_level++;
690	handleevents(state->now, 2);
691	td->td_intr_nesting_level--;
692	spinlock_exit();
693}
694
695void
696suspendclock(void)
697{
698	ET_LOCK();
699	configtimer(0);
700	ET_UNLOCK();
701}
702
703void
704resumeclock(void)
705{
706	ET_LOCK();
707	configtimer(1);
708	ET_UNLOCK();
709}
710
711/*
712 * Switch to profiling clock rates.
713 */
714void
715cpu_startprofclock(void)
716{
717
718	ET_LOCK();
719	if (profiling == 0) {
720		if (periodic) {
721			configtimer(0);
722			profiling = 1;
723			configtimer(1);
724		} else
725			profiling = 1;
726	} else
727		profiling++;
728	ET_UNLOCK();
729}
730
731/*
732 * Switch to regular clock rates.
733 */
734void
735cpu_stopprofclock(void)
736{
737
738	ET_LOCK();
739	if (profiling == 1) {
740		if (periodic) {
741			configtimer(0);
742			profiling = 0;
743			configtimer(1);
744		} else
745		profiling = 0;
746	} else
747		profiling--;
748	ET_UNLOCK();
749}
750
751/*
752 * Switch to idle mode (all ticks handled).
753 */
754sbintime_t
755cpu_idleclock(void)
756{
757	sbintime_t now, t;
758	struct pcpu_state *state;
759
760	if (idletick || busy ||
761	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
762#ifdef DEVICE_POLLING
763	    || curcpu == CPU_FIRST()
764#endif
765	    )
766		return (-1);
767	state = DPCPU_PTR(timerstate);
768	ET_HW_LOCK(state);
769	if (periodic)
770		now = state->now;
771	else
772		now = sbinuptime();
773	CTR2(KTR_SPARE2, "idle:    now  %d.%08x",
774	    (int)(now >> 32), (u_int)(now & 0xffffffff));
775	t = getnextcpuevent(state, 1);
776	state->idle = 1;
777	state->nextevent = t;
778	if (!periodic)
779		loadtimer(now, 0);
780	ET_HW_UNLOCK(state);
781	return (MAX(t - now, 0));
782}
783
784/*
785 * Switch to active mode (skip empty ticks).
786 */
787void
788cpu_activeclock(void)
789{
790	sbintime_t now;
791	struct pcpu_state *state;
792	struct thread *td;
793
794	state = DPCPU_PTR(timerstate);
795	if (atomic_load_int(&state->idle) == 0 || busy)
796		return;
797	spinlock_enter();
798	if (periodic)
799		now = state->now;
800	else
801		now = sbinuptime();
802	CTR2(KTR_SPARE2, "active:  now  %d.%08x",
803	    (int)(now >> 32), (u_int)(now & 0xffffffff));
804	td = curthread;
805	td->td_intr_nesting_level++;
806	handleevents(now, 1);
807	td->td_intr_nesting_level--;
808	spinlock_exit();
809}
810
811/*
812 * Change the frequency of the given timer.  This changes et->et_frequency and
813 * if et is the active timer it reconfigures the timer on all CPUs.  This is
814 * intended to be a private interface for the use of et_change_frequency() only.
815 */
816void
817cpu_et_frequency(struct eventtimer *et, uint64_t newfreq)
818{
819
820	ET_LOCK();
821	if (et == timer) {
822		configtimer(0);
823		et->et_frequency = newfreq;
824		configtimer(1);
825	} else
826		et->et_frequency = newfreq;
827	ET_UNLOCK();
828}
829
830void
831cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
832{
833	struct pcpu_state *state;
834
835	/* Do not touch anything if somebody reconfiguring timers. */
836	if (busy)
837		return;
838
839	CTR5(KTR_SPARE2, "new co:  on %d at %d.%08x - %d.%08x",
840	    cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
841	    (int)(bt >> 32), (u_int)(bt & 0xffffffff));
842
843	KASSERT(!CPU_ABSENT(cpu), ("Absent CPU %d", cpu));
844	state = DPCPU_ID_PTR(cpu, timerstate);
845	ET_HW_LOCK(state);
846
847	/*
848	 * If there is callout time already set earlier -- do nothing.
849	 * This check may appear redundant because we check already in
850	 * callout_process() but this double check guarantees we're safe
851	 * with respect to race conditions between interrupts execution
852	 * and scheduling.
853	 */
854	state->nextcallopt = bt_opt;
855	if (bt >= state->nextcall)
856		goto done;
857	state->nextcall = bt;
858	/* If there is some other event set earlier -- do nothing. */
859	if (bt >= state->nextevent)
860		goto done;
861	state->nextevent = bt;
862	/* If timer is periodic -- there is nothing to reprogram. */
863	if (periodic)
864		goto done;
865	/* If timer is global or of the current CPU -- reprogram it. */
866	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
867		loadtimer(sbinuptime(), 0);
868done:
869		ET_HW_UNLOCK(state);
870		return;
871	}
872	/* Otherwise make other CPU to reprogram it. */
873	state->handle = 1;
874	ET_HW_UNLOCK(state);
875#ifdef SMP
876	ipi_cpu(cpu, IPI_HARDCLOCK);
877#endif
878}
879
880/*
881 * Report or change the active event timers hardware.
882 */
883static int
884sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
885{
886	char buf[32];
887	struct eventtimer *et;
888	int error;
889
890	ET_LOCK();
891	et = timer;
892	snprintf(buf, sizeof(buf), "%s", et->et_name);
893	ET_UNLOCK();
894	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
895	ET_LOCK();
896	et = timer;
897	if (error != 0 || req->newptr == NULL ||
898	    strcasecmp(buf, et->et_name) == 0) {
899		ET_UNLOCK();
900		return (error);
901	}
902	et = et_find(buf, 0, 0);
903	if (et == NULL) {
904		ET_UNLOCK();
905		return (ENOENT);
906	}
907	configtimer(0);
908	et_free(timer);
909	if (et->et_flags & ET_FLAGS_C3STOP)
910		cpu_disable_c3_sleep++;
911	if (timer->et_flags & ET_FLAGS_C3STOP)
912		cpu_disable_c3_sleep--;
913	periodic = want_periodic;
914	timer = et;
915	et_init(timer, timercb, NULL, NULL);
916	configtimer(1);
917	ET_UNLOCK();
918	return (error);
919}
920SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
921    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
922    0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
923
924/*
925 * Report or change the active event timer periodicity.
926 */
927static int
928sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
929{
930	int error, val;
931
932	val = periodic;
933	error = sysctl_handle_int(oidp, &val, 0, req);
934	if (error != 0 || req->newptr == NULL)
935		return (error);
936	ET_LOCK();
937	configtimer(0);
938	periodic = want_periodic = val;
939	configtimer(1);
940	ET_UNLOCK();
941	return (error);
942}
943SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
944    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
945    0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
946
947#include "opt_ddb.h"
948
949#ifdef DDB
950#include <ddb/ddb.h>
951
952DB_SHOW_COMMAND(clocksource, db_show_clocksource)
953{
954	struct pcpu_state *st;
955	int c;
956
957	CPU_FOREACH(c) {
958		st = DPCPU_ID_PTR(c, timerstate);
959		db_printf(
960		    "CPU %2d: action %d handle %d  ipi %d idle %d\n"
961		    "        now %#jx nevent %#jx (%jd)\n"
962		    "        ntick %#jx (%jd) nhard %#jx (%jd)\n"
963		    "        nstat %#jx (%jd) nprof %#jx (%jd)\n"
964		    "        ncall %#jx (%jd) ncallopt %#jx (%jd)\n",
965		    c, st->action, st->handle, st->ipi, st->idle,
966		    (uintmax_t)st->now,
967		    (uintmax_t)st->nextevent,
968		    (uintmax_t)(st->nextevent - st->now) / tick_sbt,
969		    (uintmax_t)st->nexttick,
970		    (uintmax_t)(st->nexttick - st->now) / tick_sbt,
971		    (uintmax_t)st->nexthard,
972		    (uintmax_t)(st->nexthard - st->now) / tick_sbt,
973		    (uintmax_t)st->nextstat,
974		    (uintmax_t)(st->nextstat - st->now) / tick_sbt,
975		    (uintmax_t)st->nextprof,
976		    (uintmax_t)(st->nextprof - st->now) / tick_sbt,
977		    (uintmax_t)st->nextcall,
978		    (uintmax_t)(st->nextcall - st->now) / tick_sbt,
979		    (uintmax_t)st->nextcallopt,
980		    (uintmax_t)(st->nextcallopt - st->now) / tick_sbt);
981	}
982}
983
984#endif
985