1/*-
2 * Copyright (c) 2005 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30#include <sys/param.h>
31#include <sys/malloc.h>
32#include <sys/sysctl.h>
33
34#include <err.h>
35#include <errno.h>
36#include <kvm.h>
37#include <nlist.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41
42#include "memstat.h"
43#include "memstat_internal.h"
44
45static struct nlist namelist[] = {
46#define	X_KMEMSTATISTICS	0
47	{ .n_name = "_kmemstatistics" },
48#define	X_MP_MAXCPUS		1
49	{ .n_name = "_mp_maxcpus" },
50	{ .n_name = "" },
51};
52
53/*
54 * Extract malloc(9) statistics from the running kernel, and store all memory
55 * type information in the passed list.  For each type, check the list for an
56 * existing entry with the right name/allocator -- if present, update that
57 * entry.  Otherwise, add a new entry.  On error, the entire list will be
58 * cleared, as entries will be in an inconsistent state.
59 *
60 * To reduce the level of work for a list that starts empty, we keep around a
61 * hint as to whether it was empty when we began, so we can avoid searching
62 * the list for entries to update.  Updates are O(n^2) due to searching for
63 * each entry before adding it.
64 */
65int
66memstat_sysctl_malloc(struct memory_type_list *list, int flags)
67{
68	struct malloc_type_stream_header *mtshp;
69	struct malloc_type_header *mthp;
70	struct malloc_type_stats *mtsp;
71	struct memory_type *mtp;
72	int count, hint_dontsearch, i, j, maxcpus;
73	char *buffer, *p;
74	size_t size;
75
76	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
77
78	/*
79	 * Query the number of CPUs, number of malloc types so that we can
80	 * guess an initial buffer size.  We loop until we succeed or really
81	 * fail.  Note that the value of maxcpus we query using sysctl is not
82	 * the version we use when processing the real data -- that is read
83	 * from the header.
84	 */
85retry:
86	size = sizeof(maxcpus);
87	if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) {
88		if (errno == EACCES || errno == EPERM)
89			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
90		else
91			list->mtl_error = MEMSTAT_ERROR_DATAERROR;
92		return (-1);
93	}
94	if (size != sizeof(maxcpus)) {
95		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
96		return (-1);
97	}
98
99	size = sizeof(count);
100	if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) {
101		if (errno == EACCES || errno == EPERM)
102			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
103		else
104			list->mtl_error = MEMSTAT_ERROR_VERSION;
105		return (-1);
106	}
107	if (size != sizeof(count)) {
108		list->mtl_error = MEMSTAT_ERROR_DATAERROR;
109		return (-1);
110	}
111
112	size = sizeof(*mthp) + count * (sizeof(*mthp) + sizeof(*mtsp) *
113	    maxcpus);
114
115	buffer = malloc(size);
116	if (buffer == NULL) {
117		list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
118		return (-1);
119	}
120
121	if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) {
122		/*
123		 * XXXRW: ENOMEM is an ambiguous return, we should bound the
124		 * number of loops, perhaps.
125		 */
126		if (errno == ENOMEM) {
127			free(buffer);
128			goto retry;
129		}
130		if (errno == EACCES || errno == EPERM)
131			list->mtl_error = MEMSTAT_ERROR_PERMISSION;
132		else
133			list->mtl_error = MEMSTAT_ERROR_VERSION;
134		free(buffer);
135		return (-1);
136	}
137
138	if (size == 0) {
139		free(buffer);
140		return (0);
141	}
142
143	if (size < sizeof(*mtshp)) {
144		list->mtl_error = MEMSTAT_ERROR_VERSION;
145		free(buffer);
146		return (-1);
147	}
148	p = buffer;
149	mtshp = (struct malloc_type_stream_header *)p;
150	p += sizeof(*mtshp);
151
152	if (mtshp->mtsh_version != MALLOC_TYPE_STREAM_VERSION) {
153		list->mtl_error = MEMSTAT_ERROR_VERSION;
154		free(buffer);
155		return (-1);
156	}
157
158	/*
159	 * For the remainder of this function, we are quite trusting about
160	 * the layout of structures and sizes, since we've determined we have
161	 * a matching version and acceptable CPU count.
162	 */
163	maxcpus = mtshp->mtsh_maxcpus;
164	count = mtshp->mtsh_count;
165	for (i = 0; i < count; i++) {
166		mthp = (struct malloc_type_header *)p;
167		p += sizeof(*mthp);
168
169		if (hint_dontsearch == 0) {
170			mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC,
171			    mthp->mth_name);
172		} else
173			mtp = NULL;
174		if (mtp == NULL)
175			mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
176			    mthp->mth_name, maxcpus);
177		if (mtp == NULL) {
178			_memstat_mtl_empty(list);
179			free(buffer);
180			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
181			return (-1);
182		}
183
184		/*
185		 * Reset the statistics on a current node.
186		 */
187		_memstat_mt_reset_stats(mtp, maxcpus);
188
189		for (j = 0; j < maxcpus; j++) {
190			mtsp = (struct malloc_type_stats *)p;
191			p += sizeof(*mtsp);
192
193			/*
194			 * Sumarize raw statistics across CPUs into coalesced
195			 * statistics.
196			 */
197			mtp->mt_memalloced += mtsp->mts_memalloced;
198			mtp->mt_memfreed += mtsp->mts_memfreed;
199			mtp->mt_numallocs += mtsp->mts_numallocs;
200			mtp->mt_numfrees += mtsp->mts_numfrees;
201			mtp->mt_sizemask |= mtsp->mts_size;
202
203			/*
204			 * Copies of per-CPU statistics.
205			 */
206			mtp->mt_percpu_alloc[j].mtp_memalloced =
207			    mtsp->mts_memalloced;
208			mtp->mt_percpu_alloc[j].mtp_memfreed =
209			    mtsp->mts_memfreed;
210			mtp->mt_percpu_alloc[j].mtp_numallocs =
211			    mtsp->mts_numallocs;
212			mtp->mt_percpu_alloc[j].mtp_numfrees =
213			    mtsp->mts_numfrees;
214			mtp->mt_percpu_alloc[j].mtp_sizemask =
215			    mtsp->mts_size;
216		}
217
218		/*
219		 * Derived cross-CPU statistics.
220		 */
221		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
222		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
223	}
224
225	free(buffer);
226
227	return (0);
228}
229
230static int
231kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size,
232    size_t offset)
233{
234	ssize_t ret;
235
236	ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address,
237	    size);
238	if (ret < 0)
239		return (MEMSTAT_ERROR_KVM);
240	if ((size_t)ret != size)
241		return (MEMSTAT_ERROR_KVM_SHORTREAD);
242	return (0);
243}
244
245static int
246kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen)
247{
248	ssize_t ret;
249	int i;
250
251	for (i = 0; i < buflen; i++) {
252		ret = kvm_read(kvm, __DECONST(unsigned long, kvm_pointer) +
253		    i, &(buffer[i]), sizeof(char));
254		if (ret < 0)
255			return (MEMSTAT_ERROR_KVM);
256		if ((size_t)ret != sizeof(char))
257			return (MEMSTAT_ERROR_KVM_SHORTREAD);
258		if (buffer[i] == '\0')
259			return (0);
260	}
261	/* Truncate. */
262	buffer[i-1] = '\0';
263	return (0);
264}
265
266static int
267kread_symbol(kvm_t *kvm, int index, void *address, size_t size,
268    size_t offset)
269{
270	ssize_t ret;
271
272	ret = kvm_read(kvm, namelist[index].n_value + offset, address, size);
273	if (ret < 0)
274		return (MEMSTAT_ERROR_KVM);
275	if ((size_t)ret != size)
276		return (MEMSTAT_ERROR_KVM_SHORTREAD);
277	return (0);
278}
279
280int
281memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
282{
283	struct memory_type *mtp;
284	void *kmemstatistics;
285	int hint_dontsearch, j, mp_maxcpus, ret;
286	char name[MEMTYPE_MAXNAME];
287	struct malloc_type_stats *mts, *mtsp;
288	struct malloc_type_internal *mtip;
289	struct malloc_type type, *typep;
290	kvm_t *kvm;
291
292	kvm = (kvm_t *)kvm_handle;
293
294	hint_dontsearch = LIST_EMPTY(&list->mtl_list);
295
296	if (kvm_nlist(kvm, namelist) != 0) {
297		list->mtl_error = MEMSTAT_ERROR_KVM;
298		return (-1);
299	}
300
301	if (namelist[X_KMEMSTATISTICS].n_type == 0 ||
302	    namelist[X_KMEMSTATISTICS].n_value == 0) {
303		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
304		return (-1);
305	}
306
307	ret = kread_symbol(kvm, X_MP_MAXCPUS, &mp_maxcpus,
308	    sizeof(mp_maxcpus), 0);
309	if (ret != 0) {
310		list->mtl_error = ret;
311		return (-1);
312	}
313
314	ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics,
315	    sizeof(kmemstatistics), 0);
316	if (ret != 0) {
317		list->mtl_error = ret;
318		return (-1);
319	}
320
321	mts = malloc(sizeof(struct malloc_type_stats) * mp_maxcpus);
322	if (mts == NULL) {
323		list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
324		return (-1);
325	}
326
327	for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) {
328		ret = kread(kvm, typep, &type, sizeof(type), 0);
329		if (ret != 0) {
330			_memstat_mtl_empty(list);
331			free(mts);
332			list->mtl_error = ret;
333			return (-1);
334		}
335		ret = kread_string(kvm, (void *)type.ks_shortdesc, name,
336		    MEMTYPE_MAXNAME);
337		if (ret != 0) {
338			_memstat_mtl_empty(list);
339			free(mts);
340			list->mtl_error = ret;
341			return (-1);
342		}
343
344		/*
345		 * Since our compile-time value for MAXCPU may differ from the
346		 * kernel's, we populate our own array.
347		 */
348		mtip = type.ks_handle;
349		ret = kread(kvm, mtip->mti_stats, mts, mp_maxcpus *
350		    sizeof(struct malloc_type_stats), 0);
351		if (ret != 0) {
352			_memstat_mtl_empty(list);
353			free(mts);
354			list->mtl_error = ret;
355			return (-1);
356		}
357
358		if (hint_dontsearch == 0) {
359			mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, name);
360		} else
361			mtp = NULL;
362		if (mtp == NULL)
363			mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
364			    name, mp_maxcpus);
365		if (mtp == NULL) {
366			_memstat_mtl_empty(list);
367			free(mts);
368			list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
369			return (-1);
370		}
371
372		/*
373		 * This logic is replicated from kern_malloc.c, and should
374		 * be kept in sync.
375		 */
376		_memstat_mt_reset_stats(mtp, mp_maxcpus);
377		for (j = 0; j < mp_maxcpus; j++) {
378			mtsp = &mts[j];
379			mtp->mt_memalloced += mtsp->mts_memalloced;
380			mtp->mt_memfreed += mtsp->mts_memfreed;
381			mtp->mt_numallocs += mtsp->mts_numallocs;
382			mtp->mt_numfrees += mtsp->mts_numfrees;
383			mtp->mt_sizemask |= mtsp->mts_size;
384
385			mtp->mt_percpu_alloc[j].mtp_memalloced =
386			    mtsp->mts_memalloced;
387			mtp->mt_percpu_alloc[j].mtp_memfreed =
388			    mtsp->mts_memfreed;
389			mtp->mt_percpu_alloc[j].mtp_numallocs =
390			    mtsp->mts_numallocs;
391			mtp->mt_percpu_alloc[j].mtp_numfrees =
392			    mtsp->mts_numfrees;
393			mtp->mt_percpu_alloc[j].mtp_sizemask =
394			    mtsp->mts_size;
395		}
396
397		mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
398		mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
399	}
400
401	return (0);
402}
403