1/*-
2 * Copyright (c) 2003-2008 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_hwpmc_hooks.h"
35
36#include <sys/types.h>
37#include <sys/ctype.h>
38#include <sys/param.h>
39#include <sys/malloc.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/pmc.h>
44#include <sys/pmckern.h>
45#include <sys/smp.h>
46#include <sys/sysctl.h>
47#include <sys/systm.h>
48
49#ifdef	HWPMC_HOOKS
50FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
51#define	PMC_KERNEL_VERSION	PMC_VERSION
52#else
53#define	PMC_KERNEL_VERSION	0
54#endif
55
56MALLOC_DECLARE(M_PMCHOOKS);
57MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
58
59const int pmc_kernel_version = PMC_KERNEL_VERSION;
60
61/* Hook variable. */
62int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
63
64/* Interrupt handler */
65int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
66
67/* Bitmask of CPUs requiring servicing at hardclock time */
68volatile cpuset_t pmc_cpumask;
69
70/*
71 * A global count of SS mode PMCs.  When non-zero, this means that
72 * we have processes that are sampling the system as a whole.
73 */
74volatile int pmc_ss_count;
75
76/*
77 * Since PMC(4) may not be loaded in the current kernel, the
78 * convention followed is that a non-NULL value of 'pmc_hook' implies
79 * the presence of this kernel module.
80 *
81 * This requires us to protect 'pmc_hook' with a
82 * shared (sx) lock -- thus making the process of calling into PMC(4)
83 * somewhat more expensive than a simple 'if' check and indirect call.
84 */
85struct sx pmc_sx;
86
87/*
88 * PMC Soft per cpu trapframe.
89 */
90struct trapframe pmc_tf[MAXCPU];
91
92/*
93 * PMC Soft use a global table to store registered events.
94 */
95
96SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
97
98static int pmc_softevents = 16;
99TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "softevents", &pmc_softevents);
100SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_TUN|CTLFLAG_RD,
101    &pmc_softevents, 0, "maximum number of soft events");
102
103struct mtx pmc_softs_mtx;
104int pmc_softs_count;
105struct pmc_soft **pmc_softs;
106
107MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
108
109static void
110pmc_init_sx(void)
111{
112	sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
113}
114
115SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
116
117/*
118 * Helper functions.
119 */
120
121/*
122 * A note on the CPU numbering scheme used by the hwpmc(4) driver.
123 *
124 * CPUs are denoted using numbers in the range 0..[pmc_cpu_max()-1].
125 * CPUs could be numbered "sparsely" in this range; the predicate
126 * `pmc_cpu_is_present()' is used to test whether a given CPU is
127 * physically present.
128 *
129 * Further, a CPU that is physically present may be administratively
130 * disabled or otherwise unavailable for use by hwpmc(4).  The
131 * `pmc_cpu_is_active()' predicate tests for CPU usability.  An
132 * "active" CPU participates in thread scheduling and can field
133 * interrupts raised by PMC hardware.
134 *
135 * On systems with hyperthreaded CPUs, multiple logical CPUs may share
136 * PMC hardware resources.  For such processors one logical CPU is
137 * denoted as the primary owner of the in-CPU PMC resources. The
138 * pmc_cpu_is_primary() predicate is used to distinguish this primary
139 * CPU from the others.
140 */
141
142int
143pmc_cpu_is_active(int cpu)
144{
145#ifdef	SMP
146	return (pmc_cpu_is_present(cpu) &&
147	    !CPU_ISSET(cpu, &hlt_cpus_mask));
148#else
149	return (1);
150#endif
151}
152
153/* Deprecated. */
154int
155pmc_cpu_is_disabled(int cpu)
156{
157	return (!pmc_cpu_is_active(cpu));
158}
159
160int
161pmc_cpu_is_present(int cpu)
162{
163#ifdef	SMP
164	return (!CPU_ABSENT(cpu));
165#else
166	return (1);
167#endif
168}
169
170int
171pmc_cpu_is_primary(int cpu)
172{
173#ifdef	SMP
174	return (!CPU_ISSET(cpu, &logical_cpus_mask));
175#else
176	return (1);
177#endif
178}
179
180
181/*
182 * Return the maximum CPU number supported by the system.  The return
183 * value is used for scaling internal data structures and for runtime
184 * checks.
185 */
186unsigned int
187pmc_cpu_max(void)
188{
189#ifdef	SMP
190	return (mp_maxid+1);
191#else
192	return (1);
193#endif
194}
195
196#ifdef	INVARIANTS
197
198/*
199 * Return the count of CPUs in the `active' state in the system.
200 */
201int
202pmc_cpu_max_active(void)
203{
204#ifdef	SMP
205	/*
206	 * When support for CPU hot-plugging is added to the kernel,
207	 * this function would change to return the current number
208	 * of "active" CPUs.
209	 */
210	return (mp_ncpus);
211#else
212	return (1);
213#endif
214}
215
216#endif
217
218/*
219 * Cleanup event name:
220 * - remove duplicate '_'
221 * - all uppercase
222 */
223static void
224pmc_soft_namecleanup(char *name)
225{
226	char *p, *q;
227
228	p = q = name;
229
230	for ( ; *p == '_' ; p++)
231		;
232	for ( ; *p ; p++) {
233		if (*p == '_' && (*(p + 1) == '_' || *(p + 1) == '\0'))
234			continue;
235		else
236			*q++ = toupper(*p);
237	}
238	*q = '\0';
239}
240
241void
242pmc_soft_ev_register(struct pmc_soft *ps)
243{
244	static int warned = 0;
245	int n;
246
247	ps->ps_running  = 0;
248	ps->ps_ev.pm_ev_code = 0; /* invalid */
249	pmc_soft_namecleanup(ps->ps_ev.pm_ev_name);
250
251	mtx_lock_spin(&pmc_softs_mtx);
252
253	if (pmc_softs_count >= pmc_softevents) {
254		/*
255		 * XXX Reusing events can enter a race condition where
256		 * new allocated event will be used as an old one.
257		 */
258		for (n = 0; n < pmc_softevents; n++)
259			if (pmc_softs[n] == NULL)
260				break;
261		if (n == pmc_softevents) {
262			mtx_unlock_spin(&pmc_softs_mtx);
263			if (!warned) {
264				printf("hwpmc: too many soft events, "
265				    "increase kern.hwpmc.softevents tunable\n");
266				warned = 1;
267			}
268			return;
269		}
270
271		ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + n;
272		pmc_softs[n] = ps;
273	} else {
274		ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + pmc_softs_count;
275		pmc_softs[pmc_softs_count++] = ps;
276	}
277
278	mtx_unlock_spin(&pmc_softs_mtx);
279}
280
281void
282pmc_soft_ev_deregister(struct pmc_soft *ps)
283{
284
285	KASSERT(ps != NULL, ("pmc_soft_deregister: called with NULL"));
286
287	mtx_lock_spin(&pmc_softs_mtx);
288
289	if (ps->ps_ev.pm_ev_code != 0 &&
290	    (ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST) < pmc_softevents) {
291		KASSERT(ps->ps_ev.pm_ev_code >= PMC_EV_SOFT_FIRST &&
292		    ps->ps_ev.pm_ev_code <= PMC_EV_SOFT_LAST,
293		    ("pmc_soft_deregister: invalid event value"));
294		pmc_softs[ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST] = NULL;
295	}
296
297	mtx_unlock_spin(&pmc_softs_mtx);
298}
299
300struct pmc_soft *
301pmc_soft_ev_acquire(enum pmc_event ev)
302{
303	struct pmc_soft *ps;
304
305	if (ev == 0 || (ev - PMC_EV_SOFT_FIRST) >= pmc_softevents)
306		return NULL;
307
308	KASSERT(ev >= PMC_EV_SOFT_FIRST &&
309	    ev <= PMC_EV_SOFT_LAST,
310	    ("event out of range"));
311
312	mtx_lock_spin(&pmc_softs_mtx);
313
314	ps = pmc_softs[ev - PMC_EV_SOFT_FIRST];
315	if (ps == NULL)
316		mtx_unlock_spin(&pmc_softs_mtx);
317
318	return ps;
319}
320
321void
322pmc_soft_ev_release(struct pmc_soft *ps)
323{
324
325	mtx_unlock_spin(&pmc_softs_mtx);
326}
327
328/*
329 *  Initialise hwpmc.
330 */
331static void
332init_hwpmc(void *dummy __unused)
333{
334	if (pmc_softevents <= 0 ||
335	    pmc_softevents > PMC_EV_DYN_COUNT) {
336		(void) printf("hwpmc: tunable \"softevents\"=%d out of "
337		    "range.\n", pmc_softevents);
338		pmc_softevents = PMC_EV_DYN_COUNT;
339	}
340	pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO);
341	KASSERT(pmc_softs != NULL, ("cannot allocate soft events table"));
342}
343
344SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);
345
346