tsc.c revision 328386
1/*-
2 * Copyright (c) 1998-2003 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/x86/x86/tsc.c 328386 2018-01-25 02:45:21Z pkelsey $");
29
30#include "opt_compat.h"
31#include "opt_clock.h"
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/cpu.h>
36#include <sys/limits.h>
37#include <sys/malloc.h>
38#include <sys/systm.h>
39#include <sys/sysctl.h>
40#include <sys/time.h>
41#include <sys/timetc.h>
42#include <sys/kernel.h>
43#include <sys/power.h>
44#include <sys/smp.h>
45#include <sys/vdso.h>
46#include <machine/clock.h>
47#include <machine/cputypes.h>
48#include <machine/md_var.h>
49#include <machine/specialreg.h>
50#include <x86/vmware.h>
51#include <dev/acpica/acpi_hpet.h>
52
53#include "cpufreq_if.h"
54
55uint64_t	tsc_freq;
56int		tsc_is_invariant;
57int		tsc_perf_stat;
58
59static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
60
61SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
62    &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
63
64#ifdef SMP
65int	smp_tsc;
66SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
67    "Indicates whether the TSC is safe to use in SMP mode");
68
69int	smp_tsc_adjust = 0;
70SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
71    &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
72#endif
73
74static int	tsc_shift = 1;
75SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
76    &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
77
78static int	tsc_disabled;
79SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
80    "Disable x86 Time Stamp Counter");
81
82static int	tsc_skip_calibration;
83SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
84    &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
85
86static void tsc_freq_changed(void *arg, const struct cf_level *level,
87    int status);
88static void tsc_freq_changing(void *arg, const struct cf_level *level,
89    int *status);
90static unsigned tsc_get_timecount(struct timecounter *tc);
91static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
92static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
93static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
94static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
95static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
96static void tsc_levels_changed(void *arg, int unit);
97static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
98    struct timecounter *tc);
99#ifdef COMPAT_FREEBSD32
100static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
101    struct timecounter *tc);
102#endif
103
104static struct timecounter tsc_timecounter = {
105	.tc_get_timecount =		tsc_get_timecount,
106	.tc_counter_mask =		~0u,
107	.tc_name =			"TSC",
108	.tc_quality =			800,	/* adjusted in code */
109	.tc_fill_vdso_timehands = 	x86_tsc_vdso_timehands,
110#ifdef COMPAT_FREEBSD32
111	.tc_fill_vdso_timehands32 = 	x86_tsc_vdso_timehands32,
112#endif
113};
114
115static void
116tsc_freq_vmware(void)
117{
118	u_int regs[4];
119
120	if (hv_high >= 0x40000010) {
121		do_cpuid(0x40000010, regs);
122		tsc_freq = regs[0] * 1000;
123	} else {
124		vmware_hvcall(VMW_HVCMD_GETHZ, regs);
125		if (regs[1] != UINT_MAX)
126			tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
127	}
128	tsc_is_invariant = 1;
129}
130
131static void
132tsc_freq_intel(void)
133{
134	char brand[48];
135	u_int regs[4];
136	uint64_t freq;
137	char *p;
138	u_int i;
139
140	/*
141	 * Intel Processor Identification and the CPUID Instruction
142	 * Application Note 485.
143	 * http://www.intel.com/assets/pdf/appnote/241618.pdf
144	 */
145	if (cpu_exthigh >= 0x80000004) {
146		p = brand;
147		for (i = 0x80000002; i < 0x80000005; i++) {
148			do_cpuid(i, regs);
149			memcpy(p, regs, sizeof(regs));
150			p += sizeof(regs);
151		}
152		p = NULL;
153		for (i = 0; i < sizeof(brand) - 1; i++)
154			if (brand[i] == 'H' && brand[i + 1] == 'z')
155				p = brand + i;
156		if (p != NULL) {
157			p -= 5;
158			switch (p[4]) {
159			case 'M':
160				i = 1;
161				break;
162			case 'G':
163				i = 1000;
164				break;
165			case 'T':
166				i = 1000000;
167				break;
168			default:
169				return;
170			}
171#define	C2D(c)	((c) - '0')
172			if (p[1] == '.') {
173				freq = C2D(p[0]) * 1000;
174				freq += C2D(p[2]) * 100;
175				freq += C2D(p[3]) * 10;
176				freq *= i * 1000;
177			} else {
178				freq = C2D(p[0]) * 1000;
179				freq += C2D(p[1]) * 100;
180				freq += C2D(p[2]) * 10;
181				freq += C2D(p[3]);
182				freq *= i * 1000000;
183			}
184#undef C2D
185			tsc_freq = freq;
186		}
187	}
188}
189
190static void
191probe_tsc_freq(void)
192{
193	u_int regs[4];
194	uint64_t tsc1, tsc2;
195
196	if (cpu_high >= 6) {
197		do_cpuid(6, regs);
198		if ((regs[2] & CPUID_PERF_STAT) != 0) {
199			/*
200			 * XXX Some emulators expose host CPUID without actual
201			 * support for these MSRs.  We must test whether they
202			 * really work.
203			 */
204			wrmsr(MSR_MPERF, 0);
205			wrmsr(MSR_APERF, 0);
206			DELAY(10);
207			if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
208				tsc_perf_stat = 1;
209		}
210	}
211
212	if (vm_guest == VM_GUEST_VMWARE) {
213		tsc_freq_vmware();
214		return;
215	}
216
217	switch (cpu_vendor_id) {
218	case CPU_VENDOR_AMD:
219		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
220		    (vm_guest == VM_GUEST_NO &&
221		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
222			tsc_is_invariant = 1;
223		if (cpu_feature & CPUID_SSE2) {
224			tsc_timecounter.tc_get_timecount =
225			    tsc_get_timecount_mfence;
226		}
227		break;
228	case CPU_VENDOR_INTEL:
229		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
230		    (vm_guest == VM_GUEST_NO &&
231		    ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
232		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
233		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
234		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
235			tsc_is_invariant = 1;
236		if (cpu_feature & CPUID_SSE2) {
237			tsc_timecounter.tc_get_timecount =
238			    tsc_get_timecount_lfence;
239		}
240		break;
241	case CPU_VENDOR_CENTAUR:
242		if (vm_guest == VM_GUEST_NO &&
243		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
244		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
245		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
246			tsc_is_invariant = 1;
247		if (cpu_feature & CPUID_SSE2) {
248			tsc_timecounter.tc_get_timecount =
249			    tsc_get_timecount_lfence;
250		}
251		break;
252	}
253
254	if (tsc_skip_calibration) {
255		if (cpu_vendor_id == CPU_VENDOR_INTEL)
256			tsc_freq_intel();
257		return;
258	}
259
260	if (bootverbose)
261	        printf("Calibrating TSC clock ... ");
262	tsc1 = rdtsc();
263	DELAY(1000000);
264	tsc2 = rdtsc();
265	tsc_freq = tsc2 - tsc1;
266	if (bootverbose)
267		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
268}
269
270void
271init_TSC(void)
272{
273
274	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
275		return;
276
277#ifdef __i386__
278	/* The TSC is known to be broken on certain CPUs. */
279	switch (cpu_vendor_id) {
280	case CPU_VENDOR_AMD:
281		switch (cpu_id & 0xFF0) {
282		case 0x500:
283			/* K5 Model 0 */
284			return;
285		}
286		break;
287	case CPU_VENDOR_CENTAUR:
288		switch (cpu_id & 0xff0) {
289		case 0x540:
290			/*
291			 * http://www.centtech.com/c6_data_sheet.pdf
292			 *
293			 * I-12 RDTSC may return incoherent values in EDX:EAX
294			 * I-13 RDTSC hangs when certain event counters are used
295			 */
296			return;
297		}
298		break;
299	case CPU_VENDOR_NSC:
300		switch (cpu_id & 0xff0) {
301		case 0x540:
302			if ((cpu_id & CPUID_STEPPING) == 0)
303				return;
304			break;
305		}
306		break;
307	}
308#endif
309
310	probe_tsc_freq();
311
312	/*
313	 * Inform CPU accounting about our boot-time clock rate.  This will
314	 * be updated if someone loads a cpufreq driver after boot that
315	 * discovers a new max frequency.
316	 */
317	if (tsc_freq != 0)
318		set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
319
320	if (tsc_is_invariant)
321		return;
322
323	/* Register to find out about changes in CPU frequency. */
324	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
325	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
326	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
327	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
328	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
329	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
330}
331
332#ifdef SMP
333
334/*
335 * RDTSC is not a serializing instruction, and does not drain
336 * instruction stream, so we need to drain the stream before executing
337 * it.  It could be fixed by use of RDTSCP, except the instruction is
338 * not available everywhere.
339 *
340 * Use CPUID for draining in the boot-time SMP constistency test.  The
341 * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
342 * and VIA) when SSE2 is present, and nothing on older machines which
343 * also do not issue RDTSC prematurely.  There, testing for SSE2 and
344 * vendor is too cumbersome, and we learn about TSC presence from CPUID.
345 *
346 * Do not use do_cpuid(), since we do not need CPUID results, which
347 * have to be written into memory with do_cpuid().
348 */
349#define	TSC_READ(x)							\
350static void								\
351tsc_read_##x(void *arg)							\
352{									\
353	uint64_t *tsc = arg;						\
354	u_int cpu = PCPU_GET(cpuid);					\
355									\
356	__asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");	\
357	tsc[cpu * 3 + x] = rdtsc();					\
358}
359TSC_READ(0)
360TSC_READ(1)
361TSC_READ(2)
362#undef TSC_READ
363
364#define	N	1000
365
366static void
367comp_smp_tsc(void *arg)
368{
369	uint64_t *tsc;
370	int64_t d1, d2;
371	u_int cpu = PCPU_GET(cpuid);
372	u_int i, j, size;
373
374	size = (mp_maxid + 1) * 3;
375	for (i = 0, tsc = arg; i < N; i++, tsc += size)
376		CPU_FOREACH(j) {
377			if (j == cpu)
378				continue;
379			d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
380			d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
381			if (d1 <= 0 || d2 <= 0) {
382				smp_tsc = 0;
383				return;
384			}
385		}
386}
387
388static void
389adj_smp_tsc(void *arg)
390{
391	uint64_t *tsc;
392	int64_t d, min, max;
393	u_int cpu = PCPU_GET(cpuid);
394	u_int first, i, size;
395
396	first = CPU_FIRST();
397	if (cpu == first)
398		return;
399	min = INT64_MIN;
400	max = INT64_MAX;
401	size = (mp_maxid + 1) * 3;
402	for (i = 0, tsc = arg; i < N; i++, tsc += size) {
403		d = tsc[first * 3] - tsc[cpu * 3 + 1];
404		if (d > min)
405			min = d;
406		d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
407		if (d > min)
408			min = d;
409		d = tsc[first * 3 + 1] - tsc[cpu * 3];
410		if (d < max)
411			max = d;
412		d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
413		if (d < max)
414			max = d;
415	}
416	if (min > max)
417		return;
418	d = min / 2 + max / 2;
419	__asm __volatile (
420		"movl $0x10, %%ecx\n\t"
421		"rdmsr\n\t"
422		"addl %%edi, %%eax\n\t"
423		"adcl %%esi, %%edx\n\t"
424		"wrmsr\n"
425		: /* No output */
426		: "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
427		: "ax", "cx", "dx", "cc"
428	);
429}
430
431static int
432test_tsc(void)
433{
434	uint64_t *data, *tsc;
435	u_int i, size, adj;
436
437	if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
438		return (-100);
439	size = (mp_maxid + 1) * 3;
440	data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
441	adj = 0;
442retry:
443	for (i = 0, tsc = data; i < N; i++, tsc += size)
444		smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
445	smp_tsc = 1;	/* XXX */
446	smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
447	    smp_no_rendezvous_barrier, data);
448	if (!smp_tsc && adj < smp_tsc_adjust) {
449		adj++;
450		smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
451		    smp_no_rendezvous_barrier, data);
452		goto retry;
453	}
454	free(data, M_TEMP);
455	if (bootverbose)
456		printf("SMP: %sed TSC synchronization test%s\n",
457		    smp_tsc ? "pass" : "fail",
458		    adj > 0 ? " after adjustment" : "");
459	if (smp_tsc && tsc_is_invariant) {
460		switch (cpu_vendor_id) {
461		case CPU_VENDOR_AMD:
462			/*
463			 * Starting with Family 15h processors, TSC clock
464			 * source is in the north bridge.  Check whether
465			 * we have a single-socket/multi-core platform.
466			 * XXX Need more work for complex cases.
467			 */
468			if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
469			    (amd_feature2 & AMDID2_CMP) == 0 ||
470			    smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
471				break;
472			return (1000);
473		case CPU_VENDOR_INTEL:
474			/*
475			 * XXX Assume Intel platforms have synchronized TSCs.
476			 */
477			return (1000);
478		}
479		return (800);
480	}
481	return (-100);
482}
483
484#undef N
485
486#else
487
488/*
489 * The function is not called, it is provided to avoid linking failure
490 * on uniprocessor kernel.
491 */
492static int
493test_tsc(void)
494{
495
496	return (0);
497}
498
499#endif /* SMP */
500
501static void
502init_TSC_tc(void)
503{
504	uint64_t max_freq;
505	int shift;
506
507	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
508		return;
509
510	/*
511	 * Limit timecounter frequency to fit in an int and prevent it from
512	 * overflowing too fast.
513	 */
514	max_freq = UINT_MAX;
515
516	/*
517	 * We can not use the TSC if we support APM.  Precise timekeeping
518	 * on an APM'ed machine is at best a fools pursuit, since
519	 * any and all of the time spent in various SMM code can't
520	 * be reliably accounted for.  Reading the RTC is your only
521	 * source of reliable time info.  The i8254 loses too, of course,
522	 * but we need to have some kind of time...
523	 * We don't know at this point whether APM is going to be used
524	 * or not, nor when it might be activated.  Play it safe.
525	 */
526	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
527		tsc_timecounter.tc_quality = -1000;
528		if (bootverbose)
529			printf("TSC timecounter disabled: APM enabled.\n");
530		goto init;
531	}
532
533	/*
534	 * Intel CPUs without a C-state invariant TSC can stop the TSC
535	 * in either C2 or C3.  Disable use of C2 and C3 while using
536	 * the TSC as the timecounter.  The timecounter can be changed
537	 * to enable C2 and C3.
538	 *
539	 * Note that the TSC is used as the cputicker for computing
540	 * thread runtime regardless of the timecounter setting, so
541	 * using an alternate timecounter and enabling C2 or C3 can
542	 * result incorrect runtimes for kernel idle threads (but not
543	 * for any non-idle threads).
544	 */
545	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
546	    (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
547		tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
548		if (bootverbose)
549			printf("TSC timecounter disables C2 and C3.\n");
550	}
551
552	/*
553	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
554	 * are synchronized.  If the user is sure that the system has
555	 * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
556	 * non-zero value.  The TSC seems unreliable in virtualized SMP
557	 * environments, so it is set to a negative quality in those cases.
558	 */
559	if (mp_ncpus > 1)
560		tsc_timecounter.tc_quality = test_tsc();
561	else if (tsc_is_invariant)
562		tsc_timecounter.tc_quality = 1000;
563	max_freq >>= tsc_shift;
564
565init:
566	for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
567		;
568	if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
569		if (cpu_vendor_id == CPU_VENDOR_AMD) {
570			tsc_timecounter.tc_get_timecount = shift > 0 ?
571			    tsc_get_timecount_low_mfence :
572			    tsc_get_timecount_mfence;
573		} else {
574			tsc_timecounter.tc_get_timecount = shift > 0 ?
575			    tsc_get_timecount_low_lfence :
576			    tsc_get_timecount_lfence;
577		}
578	} else {
579		tsc_timecounter.tc_get_timecount = shift > 0 ?
580		    tsc_get_timecount_low : tsc_get_timecount;
581	}
582	if (shift > 0) {
583		tsc_timecounter.tc_name = "TSC-low";
584		if (bootverbose)
585			printf("TSC timecounter discards lower %d bit(s)\n",
586			    shift);
587	}
588	if (tsc_freq != 0) {
589		tsc_timecounter.tc_frequency = tsc_freq >> shift;
590		tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
591		tc_init(&tsc_timecounter);
592	}
593}
594SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
595
596/*
597 * When cpufreq levels change, find out about the (new) max frequency.  We
598 * use this to update CPU accounting in case it got a lower estimate at boot.
599 */
600static void
601tsc_levels_changed(void *arg, int unit)
602{
603	device_t cf_dev;
604	struct cf_level *levels;
605	int count, error;
606	uint64_t max_freq;
607
608	/* Only use values from the first CPU, assuming all are equal. */
609	if (unit != 0)
610		return;
611
612	/* Find the appropriate cpufreq device instance. */
613	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
614	if (cf_dev == NULL) {
615		printf("tsc_levels_changed() called but no cpufreq device?\n");
616		return;
617	}
618
619	/* Get settings from the device and find the max frequency. */
620	count = 64;
621	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
622	if (levels == NULL)
623		return;
624	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
625	if (error == 0 && count != 0) {
626		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
627		set_cputicker(rdtsc, max_freq, 1);
628	} else
629		printf("tsc_levels_changed: no max freq found\n");
630	free(levels, M_TEMP);
631}
632
633/*
634 * If the TSC timecounter is in use, veto the pending change.  It may be
635 * possible in the future to handle a dynamically-changing timecounter rate.
636 */
637static void
638tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
639{
640
641	if (*status != 0 || timecounter != &tsc_timecounter)
642		return;
643
644	printf("timecounter TSC must not be in use when "
645	    "changing frequencies; change denied\n");
646	*status = EBUSY;
647}
648
649/* Update TSC freq with the value indicated by the caller. */
650static void
651tsc_freq_changed(void *arg, const struct cf_level *level, int status)
652{
653	uint64_t freq;
654
655	/* If there was an error during the transition, don't do anything. */
656	if (tsc_disabled || status != 0)
657		return;
658
659	/* Total setting for this level gives the new frequency in MHz. */
660	freq = (uint64_t)level->total_set.freq * 1000000;
661	atomic_store_rel_64(&tsc_freq, freq);
662	tsc_timecounter.tc_frequency =
663	    freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
664}
665
666static int
667sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
668{
669	int error;
670	uint64_t freq;
671
672	freq = atomic_load_acq_64(&tsc_freq);
673	if (freq == 0)
674		return (EOPNOTSUPP);
675	error = sysctl_handle_64(oidp, &freq, 0, req);
676	if (error == 0 && req->newptr != NULL) {
677		atomic_store_rel_64(&tsc_freq, freq);
678		atomic_store_rel_64(&tsc_timecounter.tc_frequency,
679		    freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
680	}
681	return (error);
682}
683
684SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
685    0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
686
687static u_int
688tsc_get_timecount(struct timecounter *tc __unused)
689{
690
691	return (rdtsc32());
692}
693
694static inline u_int
695tsc_get_timecount_low(struct timecounter *tc)
696{
697	uint32_t rv;
698
699	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
700	    : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
701	return (rv);
702}
703
704static u_int
705tsc_get_timecount_lfence(struct timecounter *tc __unused)
706{
707
708	lfence();
709	return (rdtsc32());
710}
711
712static u_int
713tsc_get_timecount_low_lfence(struct timecounter *tc)
714{
715
716	lfence();
717	return (tsc_get_timecount_low(tc));
718}
719
720static u_int
721tsc_get_timecount_mfence(struct timecounter *tc __unused)
722{
723
724	mfence();
725	return (rdtsc32());
726}
727
728static u_int
729tsc_get_timecount_low_mfence(struct timecounter *tc)
730{
731
732	mfence();
733	return (tsc_get_timecount_low(tc));
734}
735
736static uint32_t
737x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
738{
739
740	vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
741	vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
742	vdso_th->th_x86_hpet_idx = 0xffffffff;
743	bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
744	return (1);
745}
746
747#ifdef COMPAT_FREEBSD32
748static uint32_t
749x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
750    struct timecounter *tc)
751{
752
753	vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
754	vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
755	vdso_th32->th_x86_hpet_idx = 0xffffffff;
756	bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
757	return (1);
758}
759#endif
760