1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2005-2007 Joseph Koshy
5 * Copyright (c) 2007 The FreeBSD Foundation
6 * All rights reserved.
7 *
8 * Portions of this software were developed by A. Joseph Koshy under
9 * sponsorship from the FreeBSD Foundation and Google, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/param.h>
34#include <sys/pmc.h>
35#include <sys/pmclog.h>
36
37#include <assert.h>
38#include <errno.h>
39#include <pmc.h>
40#include <pmclog.h>
41#include <stddef.h>
42#include <stdlib.h>
43#include <string.h>
44#include <strings.h>
45#include <unistd.h>
46#include <stdio.h>
47
48#include <machine/pmc_mdep.h>
49
50#include "libpmcinternal.h"
51
52#define	PMCLOG_BUFFER_SIZE			512*1024
53
54/*
55 * API NOTES
56 *
57 * The pmclog(3) API is oriented towards parsing an event stream in
58 * "realtime", i.e., from an data source that may or may not preserve
59 * record boundaries -- for example when the data source is elsewhere
60 * on a network.  The API allows data to be fed into the parser zero
61 * or more bytes at a time.
62 *
63 * The state for a log file parser is maintained in a 'struct
64 * pmclog_parse_state'.  Parser invocations are done by calling
65 * 'pmclog_read()'; this function will inform the caller when a
66 * complete event is parsed.
67 *
68 * The parser first assembles a complete log file event in an internal
69 * work area (see "ps_saved" below).  Once a complete log file event
70 * is read, the parser then parses it and converts it to an event
71 * descriptor usable by the client.  We could possibly avoid this two
72 * step process by directly parsing the input log to set fields in the
73 * event record.  However the parser's state machine would get
74 * insanely complicated, and this code is unlikely to be used in
75 * performance critical paths.
76 */
77
78#define	PMCLOG_HEADER_FROM_SAVED_STATE(PS)				\
79	(* ((uint32_t *) &(PS)->ps_saved))
80
81#define	PMCLOG_INITIALIZE_READER(LE,A)	LE = (uint32_t *) &(A)
82#define	PMCLOG_SKIP32(LE)		(LE)++
83#define	PMCLOG_READ32(LE,V) 		do {				\
84		(V)  = *(LE)++;						\
85	} while (0)
86#define	PMCLOG_READ64(LE,V)		do {				\
87		uint64_t _v;						\
88		_v  = (uint64_t) *(LE)++;				\
89		_v |= ((uint64_t) *(LE)++) << 32;			\
90		(V) = _v;						\
91	} while (0)
92
93#define	PMCLOG_READSTRING(LE,DST,LEN)	strlcpy((DST), (char *) (LE), (LEN))
94
95/*
96 * Assemble a log record from '*len' octets starting from address '*data'.
97 * Update 'data' and 'len' to reflect the number of bytes consumed.
98 *
99 * '*data' is potentially an unaligned address and '*len' octets may
100 * not be enough to complete a event record.
101 */
102
103static enum pmclog_parser_state
104pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len)
105{
106	int avail, copylen, recordsize, used;
107	uint32_t h;
108	const int HEADERSIZE = sizeof(uint32_t);
109	char *src, *dst;
110
111	if ((avail = *len) <= 0)
112		return (ps->ps_state = PL_STATE_ERROR);
113
114	src = *data;
115	used = 0;
116
117	if (ps->ps_state == PL_STATE_NEW_RECORD)
118		ps->ps_svcount = 0;
119
120	dst = (char *) &ps->ps_saved + ps->ps_svcount;
121
122	switch (ps->ps_state) {
123	case PL_STATE_NEW_RECORD:
124
125		/*
126		 * Transitions:
127		 *
128		 * Case A: avail < headersize
129		 *	-> 'expecting header'
130		 *
131		 * Case B: avail >= headersize
132		 *    B.1: avail < recordsize
133		 *	   -> 'partial record'
134		 *    B.2: avail >= recordsize
135		 *         -> 'new record'
136		 */
137
138		copylen = avail < HEADERSIZE ? avail : HEADERSIZE;
139		bcopy(src, dst, copylen);
140		ps->ps_svcount = used = copylen;
141
142		if (copylen < HEADERSIZE) {
143			ps->ps_state = PL_STATE_EXPECTING_HEADER;
144			goto done;
145		}
146
147		src += copylen;
148		dst += copylen;
149
150		h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
151		recordsize = PMCLOG_HEADER_TO_LENGTH(h);
152
153		if (recordsize <= 0)
154			goto error;
155
156		if (recordsize <= avail) { /* full record available */
157			bcopy(src, dst, recordsize - copylen);
158			ps->ps_svcount = used = recordsize;
159			goto done;
160		}
161
162		/* header + a partial record is available */
163		bcopy(src, dst, avail - copylen);
164		ps->ps_svcount = used = avail;
165		ps->ps_state = PL_STATE_PARTIAL_RECORD;
166
167		break;
168
169	case PL_STATE_EXPECTING_HEADER:
170
171		/*
172		 * Transitions:
173		 *
174		 * Case C: avail+saved < headersize
175		 * 	-> 'expecting header'
176		 *
177		 * Case D: avail+saved >= headersize
178		 *    D.1: avail+saved < recordsize
179		 *    	-> 'partial record'
180		 *    D.2: avail+saved >= recordsize
181		 *    	-> 'new record'
182		 *    (see PARTIAL_RECORD handling below)
183		 */
184
185		if (avail + ps->ps_svcount < HEADERSIZE) {
186			bcopy(src, dst, avail);
187			ps->ps_svcount += avail;
188			used = avail;
189			break;
190		}
191
192		used = copylen = HEADERSIZE - ps->ps_svcount;
193		bcopy(src, dst, copylen);
194		src += copylen;
195		dst += copylen;
196		avail -= copylen;
197		ps->ps_svcount += copylen;
198
199		/*FALLTHROUGH*/
200
201	case PL_STATE_PARTIAL_RECORD:
202
203		/*
204		 * Transitions:
205		 *
206		 * Case E: avail+saved < recordsize
207		 * 	-> 'partial record'
208		 *
209		 * Case F: avail+saved >= recordsize
210		 * 	-> 'new record'
211		 */
212
213		h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
214		recordsize = PMCLOG_HEADER_TO_LENGTH(h);
215
216		if (recordsize <= 0)
217			goto error;
218
219		if (avail + ps->ps_svcount < recordsize) {
220			copylen = avail;
221			ps->ps_state = PL_STATE_PARTIAL_RECORD;
222		} else {
223			copylen = recordsize - ps->ps_svcount;
224			ps->ps_state = PL_STATE_NEW_RECORD;
225		}
226
227		bcopy(src, dst, copylen);
228		ps->ps_svcount += copylen;
229		used += copylen;
230		break;
231
232	default:
233		goto error;
234	}
235
236 done:
237	*data += used;
238	*len  -= used;
239	return ps->ps_state;
240
241 error:
242	ps->ps_state = PL_STATE_ERROR;
243	return ps->ps_state;
244}
245
246/*
247 * Get an event from the stream pointed to by '*data'.  '*len'
248 * indicates the number of bytes available to parse.  Arguments
249 * '*data' and '*len' are updated to indicate the number of bytes
250 * consumed.
251 */
252
253static int
254pmclog_get_event(void *cookie, char **data, ssize_t *len,
255    struct pmclog_ev *ev)
256{
257	int evlen, pathlen;
258	uint32_t h, *le, npc;
259	enum pmclog_parser_state e;
260	struct pmclog_parse_state *ps;
261	struct pmclog_header *ph;
262
263	ps = (struct pmclog_parse_state *) cookie;
264
265	assert(ps->ps_state != PL_STATE_ERROR);
266
267	if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) {
268		ev->pl_state = PMCLOG_ERROR;
269		printf("state error\n");
270		return -1;
271	}
272
273	if (e != PL_STATE_NEW_RECORD) {
274		ev->pl_state = PMCLOG_REQUIRE_DATA;
275		return -1;
276	}
277
278	PMCLOG_INITIALIZE_READER(le, ps->ps_saved);
279	ev->pl_data = le;
280	ph = (struct pmclog_header *)(uintptr_t)le;
281
282	h = ph->pl_header;
283	if (!PMCLOG_HEADER_CHECK_MAGIC(h)) {
284		printf("bad magic\n");
285		ps->ps_state = PL_STATE_ERROR;
286		ev->pl_state = PMCLOG_ERROR;
287		return -1;
288	}
289
290	/* copy out the time stamp */
291	ev->pl_ts.tv_sec = ph->pl_tsc;
292	le += sizeof(*ph)/4;
293
294	evlen = PMCLOG_HEADER_TO_LENGTH(h);
295
296#define	PMCLOG_GET_PATHLEN(P,E,TYPE) do {				\
297		(P) = (E) - offsetof(struct TYPE, pl_pathname);		\
298		if ((P) > PATH_MAX || (P) < 0)				\
299			goto error;					\
300	} while (0)
301
302#define	PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do {				\
303		(SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc))	\
304			/ sizeof(uintfptr_t);				\
305	} while (0);
306
307	switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) {
308	case PMCLOG_TYPE_CALLCHAIN:
309		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid);
310		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_tid);
311		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid);
312		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags);
313		PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen);
314		for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++)
315			PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]);
316		for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++)
317			ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0;
318		break;
319	case PMCLOG_TYPE_CLOSELOG:
320		ev->pl_state = PMCLOG_EOF;
321		return (-1);
322	case PMCLOG_TYPE_DROPNOTIFY:
323		/* nothing to do */
324		break;
325	case PMCLOG_TYPE_INITIALIZE:
326		PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version);
327		PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch);
328		PMCLOG_READ64(le,ev->pl_u.pl_i.pl_tsc_freq);
329		memcpy(&ev->pl_u.pl_i.pl_ts, le, sizeof(struct timespec));
330		le += sizeof(struct timespec)/4;
331		PMCLOG_READSTRING(le, ev->pl_u.pl_i.pl_cpuid, PMC_CPUID_LEN);
332		memcpy(ev->pl_u.pl_i.pl_cpuid, le, PMC_CPUID_LEN);
333		ps->ps_cpuid = strdup(ev->pl_u.pl_i.pl_cpuid);
334		ps->ps_version = ev->pl_u.pl_i.pl_version;
335		ps->ps_arch = ev->pl_u.pl_i.pl_arch;
336		ps->ps_initialized = 1;
337		break;
338	case PMCLOG_TYPE_MAP_IN:
339		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in);
340		PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid);
341		PMCLOG_SKIP32(le);
342		PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start);
343		PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen);
344		break;
345	case PMCLOG_TYPE_MAP_OUT:
346		PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid);
347		PMCLOG_SKIP32(le);
348		PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start);
349		PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end);
350		break;
351	case PMCLOG_TYPE_PMCALLOCATE:
352		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid);
353		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event);
354		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags);
355		PMCLOG_SKIP32(le);
356		PMCLOG_READ64(le,ev->pl_u.pl_a.pl_rate);
357
358		/*
359		 * pl_event could contain either a PMC event code or a PMU
360		 * event index.
361		 */
362		if ((ev->pl_u.pl_a.pl_flags & PMC_F_EV_PMU) != 0)
363			ev->pl_u.pl_a.pl_evname =
364			    pmc_pmu_event_get_by_idx(ps->ps_cpuid,
365				ev->pl_u.pl_a.pl_event);
366		else if (ev->pl_u.pl_a.pl_event <= PMC_EVENT_LAST)
367			ev->pl_u.pl_a.pl_evname =
368			    _pmc_name_of_event(ev->pl_u.pl_a.pl_event,
369				ps->ps_arch);
370		else
371			ev->pl_u.pl_a.pl_evname = NULL;
372		if (ev->pl_u.pl_a.pl_evname == NULL) {
373			printf("unknown event\n");
374			goto error;
375		}
376		break;
377	case PMCLOG_TYPE_PMCALLOCATEDYN:
378		PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_pmcid);
379		PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_event);
380		PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_flags);
381		PMCLOG_SKIP32(le);
382		PMCLOG_READSTRING(le,ev->pl_u.pl_ad.pl_evname,PMC_NAME_MAX);
383		break;
384	case PMCLOG_TYPE_PMCATTACH:
385		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach);
386		PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid);
387		PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid);
388		PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen);
389		break;
390	case PMCLOG_TYPE_PMCDETACH:
391		PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid);
392		PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid);
393		break;
394	case PMCLOG_TYPE_PROCCSW:
395		PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value);
396		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid);
397		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid);
398		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_tid);
399		break;
400	case PMCLOG_TYPE_PROCEXEC:
401		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec);
402		PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid);
403		PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid);
404		PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_baseaddr);
405		PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_dynaddr);
406		PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen);
407		break;
408	case PMCLOG_TYPE_PROCEXIT:
409		PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid);
410		PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid);
411		PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value);
412		break;
413	case PMCLOG_TYPE_PROCFORK:
414		PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid);
415		PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid);
416		break;
417	case PMCLOG_TYPE_SYSEXIT:
418		PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid);
419		break;
420	case PMCLOG_TYPE_USERDATA:
421		PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata);
422		break;
423	case PMCLOG_TYPE_THR_CREATE:
424		PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_tid);
425		PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_pid);
426		PMCLOG_READ32(le,ev->pl_u.pl_tc.pl_flags);
427		PMCLOG_SKIP32(le);
428		memcpy(ev->pl_u.pl_tc.pl_tdname, le, MAXCOMLEN+1);
429		break;
430	case PMCLOG_TYPE_THR_EXIT:
431		PMCLOG_READ32(le,ev->pl_u.pl_te.pl_tid);
432		break;
433	case PMCLOG_TYPE_PROC_CREATE:
434		PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_pid);
435		PMCLOG_READ32(le,ev->pl_u.pl_pc.pl_flags);
436		memcpy(ev->pl_u.pl_pc.pl_pcomm, le, MAXCOMLEN+1);
437		break;
438	default:	/* unknown record type */
439		ps->ps_state = PL_STATE_ERROR;
440		ev->pl_state = PMCLOG_ERROR;
441		return (-1);
442	}
443
444	ev->pl_offset = (ps->ps_offset += evlen);
445	ev->pl_count  = (ps->ps_count += 1);
446	ev->pl_len = evlen;
447	ev->pl_state = PMCLOG_OK;
448	return 0;
449
450 error:
451	ev->pl_state = PMCLOG_ERROR;
452	ps->ps_state = PL_STATE_ERROR;
453	return -1;
454}
455
456/*
457 * Extract and return the next event from the byte stream.
458 *
459 * Returns 0 and sets the event's state to PMCLOG_OK in case an event
460 * was successfully parsed.  Otherwise this function returns -1 and
461 * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data
462 * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if
463 * a parse error was encountered.
464 */
465
466int
467pmclog_read(void *cookie, struct pmclog_ev *ev)
468{
469	int retval;
470	ssize_t nread;
471	struct pmclog_parse_state *ps;
472
473	ps = (struct pmclog_parse_state *) cookie;
474
475	if (ps->ps_state == PL_STATE_ERROR) {
476		ev->pl_state = PMCLOG_ERROR;
477		return -1;
478	}
479
480	/*
481	 * If there isn't enough data left for a new event try and get
482	 * more data.
483	 */
484	if (ps->ps_len == 0) {
485		ev->pl_state = PMCLOG_REQUIRE_DATA;
486
487		/*
488		 * If we have a valid file descriptor to read from, attempt
489		 * to read from that.  This read may return with an error,
490		 * (which may be EAGAIN or other recoverable error), or
491		 * can return EOF.
492		 */
493		if (ps->ps_fd != PMCLOG_FD_NONE) {
494		refill:
495			nread = read(ps->ps_fd, ps->ps_buffer,
496			    PMCLOG_BUFFER_SIZE);
497
498			if (nread <= 0) {
499				if (nread == 0)
500					ev->pl_state = PMCLOG_EOF;
501				else if (errno != EAGAIN) /* not restartable */
502					ev->pl_state = PMCLOG_ERROR;
503				return -1;
504			}
505
506			ps->ps_len = nread;
507			ps->ps_data = ps->ps_buffer;
508		} else {
509			return -1;
510		}
511	}
512
513	assert(ps->ps_len > 0);
514
515
516	 /* Retrieve one event from the byte stream. */
517	retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev);
518	/*
519	 * If we need more data and we have a configured fd, try read
520	 * from it.
521	 */
522	if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA &&
523	    ps->ps_fd != -1) {
524		assert(ps->ps_len == 0);
525		goto refill;
526	}
527
528	return retval;
529}
530
531/*
532 * Feed data to a memory based parser.
533 *
534 * The memory area pointed to by 'data' needs to be valid till the
535 * next error return from pmclog_next_event().
536 */
537
538int
539pmclog_feed(void *cookie, char *data, int len)
540{
541	struct pmclog_parse_state *ps;
542
543	ps = (struct pmclog_parse_state *) cookie;
544
545	if (len < 0 ||		/* invalid length */
546	    ps->ps_buffer ||	/* called for a file parser */
547	    ps->ps_len != 0)	/* unnecessary call */
548		return -1;
549
550	ps->ps_data = data;
551	ps->ps_len  = len;
552
553	return 0;
554}
555
556/*
557 * Allocate and initialize parser state.
558 */
559
560void *
561pmclog_open(int fd)
562{
563	struct pmclog_parse_state *ps;
564
565	if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL)
566		return NULL;
567
568	ps->ps_state = PL_STATE_NEW_RECORD;
569	ps->ps_arch = -1;
570	ps->ps_initialized = 0;
571	ps->ps_count = 0;
572	ps->ps_offset = (off_t) 0;
573	bzero(&ps->ps_saved, sizeof(ps->ps_saved));
574	ps->ps_cpuid = NULL;
575	ps->ps_svcount = 0;
576	ps->ps_fd    = fd;
577	ps->ps_data  = NULL;
578	ps->ps_buffer = NULL;
579	ps->ps_len   = 0;
580
581	/* allocate space for a work area */
582	if (ps->ps_fd != PMCLOG_FD_NONE) {
583		if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) {
584			free(ps);
585			return NULL;
586		}
587	}
588
589	return ps;
590}
591
592
593/*
594 * Free up parser state.
595 */
596
597void
598pmclog_close(void *cookie)
599{
600	struct pmclog_parse_state *ps;
601
602	ps = (struct pmclog_parse_state *) cookie;
603
604	if (ps->ps_buffer)
605		free(ps->ps_buffer);
606
607	free(ps);
608}
609