memstat_uma.c revision 224569
1/*-
2 * Copyright (c) 2005-2006 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/lib/libmemstat/memstat_uma.c 224569 2011-08-01 09:43:35Z pluknet $
27 */
28
29#include <sys/param.h>
30#include <sys/cpuset.h>
31#include <sys/sysctl.h>
32
33#define	LIBMEMSTAT	/* Cause vm_page.h not to include opt_vmpage.h */
34#include <vm/vm.h>
35#include <vm/vm_page.h>
36
37#include <vm/uma.h>
38#include <vm/uma_int.h>
39
40#include <err.h>
41#include <errno.h>
42#include <kvm.h>
43#include <nlist.h>
44#include <stddef.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49
50#include "memstat.h"
51#include "memstat_internal.h"
52
53static struct nlist namelist[] = {
54#define	X_UMA_KEGS	0
55	{ .n_name = "_uma_kegs" },
56#define	X_MP_MAXID	1
57	{ .n_name = "_mp_maxid" },
58#define	X_ALL_CPUS	2
59	{ .n_name = "_all_cpus" },
60	{ .n_name = "" },
61};
62
63/*
64 * Extract uma(9) statistics from the running kernel, and store all memory
65 * type information in the passed list.  For each type, check the list for an
66 * existing entry with the right name/allocator -- if present, update that
67 * entry.  Otherwise, add a new entry.  On error, the entire list will be
68 * cleared, as entries will be in an inconsistent state.
69 *
70 * To reduce the level of work for a list that starts empty, we keep around a
71 * hint as to whether it was empty when we began, so we can avoid searching
72 * the list for entries to update.  Updates are O(n^2) due to searching for
73 * each entry before adding it.
74 */
75int
76memstat_sysctl_uma(struct memory_type_list *list, int flags)
77{
78	struct uma_stream_header *ushp;
79	struct uma_type_header *uthp;
80	struct uma_percpu_stat *upsp;
81	struct memory_type *mtp;
82	int count, hint_dontsearch, i, j, maxcpus, maxid;
83	char *buffer, *p;
84	size_t size;
85
86	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
87
88	/*
89	 * Query the number of CPUs, number of malloc types so that we can
90	 * guess an initial buffer size.  We loop until we succeed or really
91	 * fail.  Note that the value of maxcpus we query using sysctl is not
92	 * the version we use when processing the real data -- that is read
93	 * from the header.
94	 */
95retry:
96	size = sizeof(maxid);
97	if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) {
98		if (errno == EACCES || errno == EPERM)
99			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
100		else
101			list->mtl_error = MEMSTAT_ERROR_DATAERROR;
102		return (-1);
103	}
104	if (size != sizeof(maxid)) {
105		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
106		return (-1);
107	}
108
109	size = sizeof(count);
110	if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) {
111		if (errno == EACCES || errno == EPERM)
112			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
113		else
114			list->mtl_error = MEMSTAT_ERROR_VERSION;
115		return (-1);
116	}
117	if (size != sizeof(count)) {
118		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
119		return (-1);
120	}
121
122	size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) *
123	    (maxid + 1));
124
125	buffer = malloc(size);
126	if (buffer == NULL) {
127		list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
128		return (-1);
129	}
130
131	if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) {
132		/*
133		 * XXXRW: ENOMEM is an ambiguous return, we should bound the
134		 * number of loops, perhaps.
135		 */
136		if (errno == ENOMEM) {
137			free(buffer);
138			goto retry;
139		}
140		if (errno == EACCES || errno == EPERM)
141			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
142		else
143			list->mtl_error = MEMSTAT_ERROR_VERSION;
144		free(buffer);
145		return (-1);
146	}
147
148	if (size == 0) {
149		free(buffer);
150		return (0);
151	}
152
153	if (size < sizeof(*ushp)) {
154		list->mtl_error = MEMSTAT_ERROR_VERSION;
155		free(buffer);
156		return (-1);
157	}
158	p = buffer;
159	ushp = (struct uma_stream_header *)p;
160	p += sizeof(*ushp);
161
162	if (ushp->ush_version != UMA_STREAM_VERSION) {
163		list->mtl_error = MEMSTAT_ERROR_VERSION;
164		free(buffer);
165		return (-1);
166	}
167
168	/*
169	 * For the remainder of this function, we are quite trusting about
170	 * the layout of structures and sizes, since we've determined we have
171	 * a matching version and acceptable CPU count.
172	 */
173	maxcpus = ushp->ush_maxcpus;
174	count = ushp->ush_count;
175	for (i = 0; i < count; i++) {
176		uthp = (struct uma_type_header *)p;
177		p += sizeof(*uthp);
178
179		if (hint_dontsearch == 0) {
180			mtp = memstat_mtl_find(list, ALLOCATOR_UMA,
181			    uthp->uth_name);
182		} else
183			mtp = NULL;
184		if (mtp == NULL)
185			mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA,
186			    uthp->uth_name, maxid + 1);
187		if (mtp == NULL) {
188			_memstat_mtl_empty(list);
189			free(buffer);
190			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
191			return (-1);
192		}
193
194		/*
195		 * Reset the statistics on a current node.
196		 */
197		_memstat_mt_reset_stats(mtp, maxid + 1);
198
199		mtp->mt_numallocs = uthp->uth_allocs;
200		mtp->mt_numfrees = uthp->uth_frees;
201		mtp->mt_failures = uthp->uth_fails;
202		mtp->mt_sleeps = uthp->uth_sleeps;
203
204		for (j = 0; j < maxcpus; j++) {
205			upsp = (struct uma_percpu_stat *)p;
206			p += sizeof(*upsp);
207
208			mtp->mt_percpu_cache[j].mtp_free =
209			    upsp->ups_cache_free;
210			mtp->mt_free += upsp->ups_cache_free;
211			mtp->mt_numallocs += upsp->ups_allocs;
212			mtp->mt_numfrees += upsp->ups_frees;
213		}
214
215		mtp->mt_size = uthp->uth_size;
216		mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size;
217		mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size;
218		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
219		mtp->mt_countlimit = uthp->uth_limit;
220		mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size;
221
222		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
223		mtp->mt_zonefree = uthp->uth_zone_free;
224
225		/*
226		 * UMA secondary zones share a keg with the primary zone.  To
227		 * avoid double-reporting of free items, report keg free
228		 * items only in the primary zone.
229		 */
230		if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) {
231			mtp->mt_kegfree = uthp->uth_keg_free;
232			mtp->mt_free += mtp->mt_kegfree;
233		}
234		mtp->mt_free += mtp->mt_zonefree;
235	}
236
237	free(buffer);
238
239	return (0);
240}
241
242static int
243kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size,
244    size_t offset)
245{
246	ssize_t ret;
247
248	ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address,
249	    size);
250	if (ret < 0)
251		return (MEMSTAT_ERROR_KVM);
252	if ((size_t)ret != size)
253		return (MEMSTAT_ERROR_KVM_SHORTREAD);
254	return (0);
255}
256
257static int
258kread_string(kvm_t *kvm, void *kvm_pointer, char *buffer, int buflen)
259{
260	ssize_t ret;
261	int i;
262
263	for (i = 0; i < buflen; i++) {
264		ret = kvm_read(kvm, (unsigned long)kvm_pointer + i,
265		    &(buffer[i]), sizeof(char));
266		if (ret < 0)
267			return (MEMSTAT_ERROR_KVM);
268		if ((size_t)ret != sizeof(char))
269			return (MEMSTAT_ERROR_KVM_SHORTREAD);
270		if (buffer[i] == '\0')
271			return (0);
272	}
273	/* Truncate. */
274	buffer[i-1] = '\0';
275	return (0);
276}
277
278static int
279kread_symbol(kvm_t *kvm, int index, void *address, size_t size,
280    size_t offset)
281{
282	ssize_t ret;
283
284	ret = kvm_read(kvm, namelist[index].n_value + offset, address, size);
285	if (ret < 0)
286		return (MEMSTAT_ERROR_KVM);
287	if ((size_t)ret != size)
288		return (MEMSTAT_ERROR_KVM_SHORTREAD);
289	return (0);
290}
291
292/*
293 * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts
294 * UMA(9) statistics from a kernel core/memory file.
295 */
296int
297memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
298{
299	LIST_HEAD(, uma_keg) uma_kegs;
300	struct memory_type *mtp;
301	struct uma_bucket *ubp, ub;
302	struct uma_cache *ucp, *ucp_array;
303	struct uma_zone *uzp, uz;
304	struct uma_keg *kzp, kz;
305	int hint_dontsearch, i, mp_maxid, ret;
306	char name[MEMTYPE_MAXNAME];
307	cpuset_t all_cpus;
308	long cpusetsize;
309	kvm_t *kvm;
310
311	kvm = (kvm_t *)kvm_handle;
312	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
313	if (kvm_nlist(kvm, namelist) != 0) {
314		list->mtl_error = MEMSTAT_ERROR_KVM;
315		return (-1);
316	}
317	if (namelist[X_UMA_KEGS].n_type == 0 ||
318	    namelist[X_UMA_KEGS].n_value == 0) {
319		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
320		return (-1);
321	}
322	ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0);
323	if (ret != 0) {
324		list->mtl_error = ret;
325		return (-1);
326	}
327	ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0);
328	if (ret != 0) {
329		list->mtl_error = ret;
330		return (-1);
331	}
332	cpusetsize = sysconf(_SC_CPUSET_SIZE);
333	if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
334		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
335		return (-1);
336	}
337	CPU_ZERO(&all_cpus);
338	ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0);
339	if (ret != 0) {
340		list->mtl_error = ret;
341		return (-1);
342	}
343	ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1));
344	if (ucp_array == NULL) {
345		list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
346		return (-1);
347	}
348	for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp =
349	    LIST_NEXT(&kz, uk_link)) {
350		ret = kread(kvm, kzp, &kz, sizeof(kz), 0);
351		if (ret != 0) {
352			free(ucp_array);
353			_memstat_mtl_empty(list);
354			list->mtl_error = ret;
355			return (-1);
356		}
357		for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp =
358		    LIST_NEXT(&uz, uz_link)) {
359			ret = kread(kvm, uzp, &uz, sizeof(uz), 0);
360			if (ret != 0) {
361				free(ucp_array);
362				_memstat_mtl_empty(list);
363				list->mtl_error = ret;
364				return (-1);
365			}
366			ret = kread(kvm, uzp, ucp_array,
367			    sizeof(struct uma_cache) * (mp_maxid + 1),
368			    offsetof(struct uma_zone, uz_cpu[0]));
369			if (ret != 0) {
370				free(ucp_array);
371				_memstat_mtl_empty(list);
372				list->mtl_error = ret;
373				return (-1);
374			}
375			ret = kread_string(kvm, uz.uz_name, name,
376			    MEMTYPE_MAXNAME);
377			if (ret != 0) {
378				free(ucp_array);
379				_memstat_mtl_empty(list);
380				list->mtl_error = ret;
381				return (-1);
382			}
383			if (hint_dontsearch == 0) {
384				mtp = memstat_mtl_find(list, ALLOCATOR_UMA,
385				    name);
386			} else
387				mtp = NULL;
388			if (mtp == NULL)
389				mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA,
390				    name, mp_maxid + 1);
391			if (mtp == NULL) {
392				free(ucp_array);
393				_memstat_mtl_empty(list);
394				list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
395				return (-1);
396			}
397			/*
398			 * Reset the statistics on a current node.
399			 */
400			_memstat_mt_reset_stats(mtp, mp_maxid + 1);
401			mtp->mt_numallocs = uz.uz_allocs;
402			mtp->mt_numfrees = uz.uz_frees;
403			mtp->mt_failures = uz.uz_fails;
404			mtp->mt_sleeps = uz.uz_sleeps;
405			if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
406				goto skip_percpu;
407			for (i = 0; i < mp_maxid + 1; i++) {
408				if (!CPU_ISSET(i, &all_cpus))
409					continue;
410				ucp = &ucp_array[i];
411				mtp->mt_numallocs += ucp->uc_allocs;
412				mtp->mt_numfrees += ucp->uc_frees;
413
414				if (ucp->uc_allocbucket != NULL) {
415					ret = kread(kvm, ucp->uc_allocbucket,
416					    &ub, sizeof(ub), 0);
417					if (ret != 0) {
418						free(ucp_array);
419						_memstat_mtl_empty(list);
420						list->mtl_error = ret;
421						return (-1);
422					}
423					mtp->mt_free += ub.ub_cnt;
424				}
425				if (ucp->uc_freebucket != NULL) {
426					ret = kread(kvm, ucp->uc_freebucket,
427					    &ub, sizeof(ub), 0);
428					if (ret != 0) {
429						free(ucp_array);
430						_memstat_mtl_empty(list);
431						list->mtl_error = ret;
432						return (-1);
433					}
434					mtp->mt_free += ub.ub_cnt;
435				}
436			}
437skip_percpu:
438			mtp->mt_size = kz.uk_size;
439			mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size;
440			mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size;
441			mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
442			if (kz.uk_ppera > 1)
443				mtp->mt_countlimit = kz.uk_maxpages /
444				    kz.uk_ipers;
445			else
446				mtp->mt_countlimit = kz.uk_maxpages *
447				    kz.uk_ipers;
448			mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
449			mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
450			for (ubp = LIST_FIRST(&uz.uz_full_bucket); ubp !=
451			    NULL; ubp = LIST_NEXT(&ub, ub_link)) {
452				ret = kread(kvm, ubp, &ub, sizeof(ub), 0);
453				mtp->mt_zonefree += ub.ub_cnt;
454			}
455			if (!((kz.uk_flags & UMA_ZONE_SECONDARY) &&
456			    LIST_FIRST(&kz.uk_zones) != uzp)) {
457				mtp->mt_kegfree = kz.uk_free;
458				mtp->mt_free += mtp->mt_kegfree;
459			}
460			mtp->mt_free += mtp->mt_zonefree;
461		}
462	}
463	free(ucp_array);
464	return (0);
465}
466