subr_pcpu.c revision 262739
1/*-
2 * Copyright (c) 2001 Wind River Systems, Inc.
3 * All rights reserved.
4 * Written by: John Baldwin <jhb@FreeBSD.org>
5 *
6 * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the author nor the names of any co-contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
35 * This module provides MI support for per-cpu data.
36 *
37 * Each architecture determines the mapping of logical CPU IDs to physical
38 * CPUs.  The requirements of this mapping are as follows:
39 *  - Logical CPU IDs must reside in the range 0 ... MAXCPU - 1.
40 *  - The mapping is not required to be dense.  That is, there may be
41 *    gaps in the mappings.
42 *  - The platform sets the value of MAXCPU in <machine/param.h>.
43 *  - It is suggested, but not required, that in the non-SMP case, the
44 *    platform define MAXCPU to be 1 and define the logical ID of the
45 *    sole CPU as 0.
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: stable/10/sys/kern/subr_pcpu.c 262739 2014-03-04 14:46:30Z glebius $");
50
51#include "opt_ddb.h"
52
53#include <sys/param.h>
54#include <sys/systm.h>
55#include <sys/sysctl.h>
56#include <sys/lock.h>
57#include <sys/malloc.h>
58#include <sys/pcpu.h>
59#include <sys/proc.h>
60#include <sys/smp.h>
61#include <sys/sx.h>
62#include <vm/uma.h>
63#include <ddb/ddb.h>
64
65static MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting.");
66
67struct dpcpu_free {
68	uintptr_t	df_start;
69	int		df_len;
70	TAILQ_ENTRY(dpcpu_free) df_link;
71};
72
73static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]);
74static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
75static struct sx dpcpu_lock;
76uintptr_t dpcpu_off[MAXCPU];
77struct pcpu *cpuid_to_pcpu[MAXCPU];
78struct cpuhead cpuhead = STAILQ_HEAD_INITIALIZER(cpuhead);
79
80/*
81 * Initialize the MI portions of a struct pcpu.
82 */
83void
84pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
85{
86
87	bzero(pcpu, size);
88	KASSERT(cpuid >= 0 && cpuid < MAXCPU,
89	    ("pcpu_init: invalid cpuid %d", cpuid));
90	pcpu->pc_cpuid = cpuid;
91	cpuid_to_pcpu[cpuid] = pcpu;
92	STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
93	cpu_pcpu_init(pcpu, cpuid, size);
94	pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue;
95	pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue;
96}
97
98void
99dpcpu_init(void *dpcpu, int cpuid)
100{
101	struct pcpu *pcpu;
102
103	pcpu = pcpu_find(cpuid);
104	pcpu->pc_dynamic = (uintptr_t)dpcpu - DPCPU_START;
105
106	/*
107	 * Initialize defaults from our linker section.
108	 */
109	memcpy(dpcpu, (void *)DPCPU_START, DPCPU_BYTES);
110
111	/*
112	 * Place it in the global pcpu offset array.
113	 */
114	dpcpu_off[cpuid] = pcpu->pc_dynamic;
115}
116
117static void
118dpcpu_startup(void *dummy __unused)
119{
120	struct dpcpu_free *df;
121
122	df = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
123	df->df_start = (uintptr_t)&DPCPU_NAME(modspace);
124	df->df_len = DPCPU_MODMIN;
125	TAILQ_INSERT_HEAD(&dpcpu_head, df, df_link);
126	sx_init(&dpcpu_lock, "dpcpu alloc lock");
127}
128SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, 0);
129
130/*
131 * UMA_PCPU_ZONE zones, that are available for all kernel
132 * consumers. Right now 64 bit zone is used for counter(9)
133 * and pointer zone is used by flowtable.
134 */
135
136uma_zone_t pcpu_zone_64;
137uma_zone_t pcpu_zone_ptr;
138
139static void
140pcpu_zones_startup(void)
141{
142
143	pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
144	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
145
146	if (sizeof(uint64_t) == sizeof(void *))
147		pcpu_zone_ptr = pcpu_zone_64;
148	else
149		pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *),
150		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
151}
152SYSINIT(pcpu_zones, SI_SUB_KMEM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
153
154/*
155 * First-fit extent based allocator for allocating space in the per-cpu
156 * region reserved for modules.  This is only intended for use by the
157 * kernel linkers to place module linker sets.
158 */
159void *
160dpcpu_alloc(int size)
161{
162	struct dpcpu_free *df;
163	void *s;
164
165	s = NULL;
166	size = roundup2(size, sizeof(void *));
167	sx_xlock(&dpcpu_lock);
168	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
169		if (df->df_len < size)
170			continue;
171		if (df->df_len == size) {
172			s = (void *)df->df_start;
173			TAILQ_REMOVE(&dpcpu_head, df, df_link);
174			free(df, M_PCPU);
175			break;
176		}
177		s = (void *)df->df_start;
178		df->df_len -= size;
179		df->df_start = df->df_start + size;
180		break;
181	}
182	sx_xunlock(&dpcpu_lock);
183
184	return (s);
185}
186
187/*
188 * Free dynamic per-cpu space at module unload time.
189 */
190void
191dpcpu_free(void *s, int size)
192{
193	struct dpcpu_free *df;
194	struct dpcpu_free *dn;
195	uintptr_t start;
196	uintptr_t end;
197
198	size = roundup2(size, sizeof(void *));
199	start = (uintptr_t)s;
200	end = start + size;
201	/*
202	 * Free a region of space and merge it with as many neighbors as
203	 * possible.  Keeping the list sorted simplifies this operation.
204	 */
205	sx_xlock(&dpcpu_lock);
206	TAILQ_FOREACH(df, &dpcpu_head, df_link) {
207		if (df->df_start > end)
208			break;
209		/*
210		 * If we expand at the end of an entry we may have to
211		 * merge it with the one following it as well.
212		 */
213		if (df->df_start + df->df_len == start) {
214			df->df_len += size;
215			dn = TAILQ_NEXT(df, df_link);
216			if (df->df_start + df->df_len == dn->df_start) {
217				df->df_len += dn->df_len;
218				TAILQ_REMOVE(&dpcpu_head, dn, df_link);
219				free(dn, M_PCPU);
220			}
221			sx_xunlock(&dpcpu_lock);
222			return;
223		}
224		if (df->df_start == end) {
225			df->df_start = start;
226			df->df_len += size;
227			sx_xunlock(&dpcpu_lock);
228			return;
229		}
230	}
231	dn = malloc(sizeof(*df), M_PCPU, M_WAITOK | M_ZERO);
232	dn->df_start = start;
233	dn->df_len = size;
234	if (df)
235		TAILQ_INSERT_BEFORE(df, dn, df_link);
236	else
237		TAILQ_INSERT_TAIL(&dpcpu_head, dn, df_link);
238	sx_xunlock(&dpcpu_lock);
239}
240
241/*
242 * Initialize the per-cpu storage from an updated linker-set region.
243 */
244void
245dpcpu_copy(void *s, int size)
246{
247#ifdef SMP
248	uintptr_t dpcpu;
249	int i;
250
251	for (i = 0; i < mp_ncpus; ++i) {
252		dpcpu = dpcpu_off[i];
253		if (dpcpu == 0)
254			continue;
255		memcpy((void *)(dpcpu + (uintptr_t)s), s, size);
256	}
257#else
258	memcpy((void *)(dpcpu_off[0] + (uintptr_t)s), s, size);
259#endif
260}
261
262/*
263 * Destroy a struct pcpu.
264 */
265void
266pcpu_destroy(struct pcpu *pcpu)
267{
268
269	STAILQ_REMOVE(&cpuhead, pcpu, pcpu, pc_allcpu);
270	cpuid_to_pcpu[pcpu->pc_cpuid] = NULL;
271	dpcpu_off[pcpu->pc_cpuid] = 0;
272}
273
274/*
275 * Locate a struct pcpu by cpu id.
276 */
277struct pcpu *
278pcpu_find(u_int cpuid)
279{
280
281	return (cpuid_to_pcpu[cpuid]);
282}
283
284int
285sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
286{
287	uintptr_t dpcpu;
288	int64_t count;
289	int i;
290
291	count = 0;
292	for (i = 0; i < mp_ncpus; ++i) {
293		dpcpu = dpcpu_off[i];
294		if (dpcpu == 0)
295			continue;
296		count += *(int64_t *)(dpcpu + (uintptr_t)arg1);
297	}
298	return (SYSCTL_OUT(req, &count, sizeof(count)));
299}
300
301int
302sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
303{
304	uintptr_t dpcpu;
305	long count;
306	int i;
307
308	count = 0;
309	for (i = 0; i < mp_ncpus; ++i) {
310		dpcpu = dpcpu_off[i];
311		if (dpcpu == 0)
312			continue;
313		count += *(long *)(dpcpu + (uintptr_t)arg1);
314	}
315	return (SYSCTL_OUT(req, &count, sizeof(count)));
316}
317
318int
319sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
320{
321	uintptr_t dpcpu;
322	int count;
323	int i;
324
325	count = 0;
326	for (i = 0; i < mp_ncpus; ++i) {
327		dpcpu = dpcpu_off[i];
328		if (dpcpu == 0)
329			continue;
330		count += *(int *)(dpcpu + (uintptr_t)arg1);
331	}
332	return (SYSCTL_OUT(req, &count, sizeof(count)));
333}
334
335#ifdef DDB
336DB_SHOW_COMMAND(dpcpu_off, db_show_dpcpu_off)
337{
338	int id;
339
340	CPU_FOREACH(id) {
341		db_printf("dpcpu_off[%2d] = 0x%jx (+ DPCPU_START = %p)\n",
342		    id, (uintmax_t)dpcpu_off[id],
343		    (void *)(uintptr_t)(dpcpu_off[id] + DPCPU_START));
344	}
345}
346
347static void
348show_pcpu(struct pcpu *pc)
349{
350	struct thread *td;
351
352	db_printf("cpuid        = %d\n", pc->pc_cpuid);
353	db_printf("dynamic pcpu = %p\n", (void *)pc->pc_dynamic);
354	db_printf("curthread    = ");
355	td = pc->pc_curthread;
356	if (td != NULL)
357		db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
358		    td->td_name);
359	else
360		db_printf("none\n");
361	db_printf("curpcb       = %p\n", pc->pc_curpcb);
362	db_printf("fpcurthread  = ");
363	td = pc->pc_fpcurthread;
364	if (td != NULL)
365		db_printf("%p: pid %d \"%s\"\n", td, td->td_proc->p_pid,
366		    td->td_name);
367	else
368		db_printf("none\n");
369	db_printf("idlethread   = ");
370	td = pc->pc_idlethread;
371	if (td != NULL)
372		db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name);
373	else
374		db_printf("none\n");
375	db_show_mdpcpu(pc);
376
377#ifdef VIMAGE
378	db_printf("curvnet      = %p\n", pc->pc_curthread->td_vnet);
379#endif
380
381#ifdef WITNESS
382	db_printf("spin locks held:\n");
383	witness_list_locks(&pc->pc_spinlocks, db_printf);
384#endif
385}
386
387DB_SHOW_COMMAND(pcpu, db_show_pcpu)
388{
389	struct pcpu *pc;
390	int id;
391
392	if (have_addr)
393		id = ((addr >> 4) % 16) * 10 + (addr % 16);
394	else
395		id = PCPU_GET(cpuid);
396	pc = pcpu_find(id);
397	if (pc == NULL) {
398		db_printf("CPU %d not found\n", id);
399		return;
400	}
401	show_pcpu(pc);
402}
403
404DB_SHOW_ALL_COMMAND(pcpu, db_show_cpu_all)
405{
406	struct pcpu *pc;
407	int id;
408
409	db_printf("Current CPU: %d\n\n", PCPU_GET(cpuid));
410	for (id = 0; id <= mp_maxid; id++) {
411		pc = pcpu_find(id);
412		if (pc != NULL) {
413			show_pcpu(pc);
414			db_printf("\n");
415		}
416	}
417}
418DB_SHOW_ALIAS(allpcpu, db_show_cpu_all);
419#endif
420