1/*-
2 * Copyright (c) 2005-2007 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/pmc.h>
36#include <sys/pmclog.h>
37
38#include <assert.h>
39#include <errno.h>
40#include <pmc.h>
41#include <pmclog.h>
42#include <stddef.h>
43#include <stdlib.h>
44#include <string.h>
45#include <strings.h>
46#include <unistd.h>
47
48#include <machine/pmc_mdep.h>
49
50#include "libpmcinternal.h"
51
52#define	PMCLOG_BUFFER_SIZE			4096
53
54/*
55 * API NOTES
56 *
57 * The pmclog(3) API is oriented towards parsing an event stream in
58 * "realtime", i.e., from an data source that may or may not preserve
59 * record boundaries -- for example when the data source is elsewhere
60 * on a network.  The API allows data to be fed into the parser zero
61 * or more bytes at a time.
62 *
63 * The state for a log file parser is maintained in a 'struct
64 * pmclog_parse_state'.  Parser invocations are done by calling
65 * 'pmclog_read()'; this function will inform the caller when a
66 * complete event is parsed.
67 *
68 * The parser first assembles a complete log file event in an internal
69 * work area (see "ps_saved" below).  Once a complete log file event
70 * is read, the parser then parses it and converts it to an event
71 * descriptor usable by the client.  We could possibly avoid this two
72 * step process by directly parsing the input log to set fields in the
73 * event record.  However the parser's state machine would get
74 * insanely complicated, and this code is unlikely to be used in
75 * performance critical paths.
76 */
77
78enum pmclog_parser_state {
79	PL_STATE_NEW_RECORD,		/* in-between records */
80	PL_STATE_EXPECTING_HEADER,	/* header being read */
81	PL_STATE_PARTIAL_RECORD,	/* header present but not the record */
82	PL_STATE_ERROR			/* parsing error encountered */
83};
84
85struct pmclog_parse_state {
86	enum pmclog_parser_state ps_state;
87	enum pmc_cputype	ps_arch;	/* log file architecture */
88	uint32_t		ps_version;	/* hwpmc version */
89	int			ps_initialized;	/* whether initialized */
90	int			ps_count;	/* count of records processed */
91	off_t			ps_offset;	/* stream byte offset */
92	union pmclog_entry	ps_saved;	/* saved partial log entry */
93	int			ps_svcount;	/* #bytes saved */
94	int			ps_fd;		/* active fd or -1 */
95	char			*ps_buffer;	/* scratch buffer if fd != -1 */
96	char			*ps_data;	/* current parse pointer */
97	size_t			ps_len;		/* length of buffered data */
98};
99
100#define	PMCLOG_HEADER_FROM_SAVED_STATE(PS)				\
101	(* ((uint32_t *) &(PS)->ps_saved))
102
103#define	PMCLOG_INITIALIZE_READER(LE,A)	LE = (uint32_t *) &(A)
104#define	PMCLOG_READ32(LE,V) 		do {				\
105		(V)  = *(LE)++;						\
106	} while (0)
107#define	PMCLOG_READ64(LE,V)		do {				\
108		uint64_t _v;						\
109		_v  = (uint64_t) *(LE)++;				\
110		_v |= ((uint64_t) *(LE)++) << 32;			\
111		(V) = _v;						\
112	} while (0)
113
114#define	PMCLOG_READSTRING(LE,DST,LEN)	strlcpy((DST), (char *) (LE), (LEN))
115
116/*
117 * Assemble a log record from '*len' octets starting from address '*data'.
118 * Update 'data' and 'len' to reflect the number of bytes consumed.
119 *
120 * '*data' is potentially an unaligned address and '*len' octets may
121 * not be enough to complete a event record.
122 */
123
124static enum pmclog_parser_state
125pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len)
126{
127	int avail, copylen, recordsize, used;
128	uint32_t h;
129	const int HEADERSIZE = sizeof(uint32_t);
130	char *src, *dst;
131
132	if ((avail = *len) <= 0)
133		return (ps->ps_state = PL_STATE_ERROR);
134
135	src = *data;
136	h = used = 0;
137
138	if (ps->ps_state == PL_STATE_NEW_RECORD)
139		ps->ps_svcount = 0;
140
141	dst = (char *) &ps->ps_saved + ps->ps_svcount;
142
143	switch (ps->ps_state) {
144	case PL_STATE_NEW_RECORD:
145
146		/*
147		 * Transitions:
148		 *
149		 * Case A: avail < headersize
150		 *	-> 'expecting header'
151		 *
152		 * Case B: avail >= headersize
153		 *    B.1: avail < recordsize
154		 *	   -> 'partial record'
155		 *    B.2: avail >= recordsize
156		 *         -> 'new record'
157		 */
158
159		copylen = avail < HEADERSIZE ? avail : HEADERSIZE;
160		bcopy(src, dst, copylen);
161		ps->ps_svcount = used = copylen;
162
163		if (copylen < HEADERSIZE) {
164			ps->ps_state = PL_STATE_EXPECTING_HEADER;
165			goto done;
166		}
167
168		src += copylen;
169		dst += copylen;
170
171		h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
172		recordsize = PMCLOG_HEADER_TO_LENGTH(h);
173
174		if (recordsize <= 0)
175			goto error;
176
177		if (recordsize <= avail) { /* full record available */
178			bcopy(src, dst, recordsize - copylen);
179			ps->ps_svcount = used = recordsize;
180			goto done;
181		}
182
183		/* header + a partial record is available */
184		bcopy(src, dst, avail - copylen);
185		ps->ps_svcount = used = avail;
186		ps->ps_state = PL_STATE_PARTIAL_RECORD;
187
188		break;
189
190	case PL_STATE_EXPECTING_HEADER:
191
192		/*
193		 * Transitions:
194		 *
195		 * Case C: avail+saved < headersize
196		 * 	-> 'expecting header'
197		 *
198		 * Case D: avail+saved >= headersize
199		 *    D.1: avail+saved < recordsize
200		 *    	-> 'partial record'
201		 *    D.2: avail+saved >= recordsize
202		 *    	-> 'new record'
203		 *    (see PARTIAL_RECORD handling below)
204		 */
205
206		if (avail + ps->ps_svcount < HEADERSIZE) {
207			bcopy(src, dst, avail);
208			ps->ps_svcount += avail;
209			used = avail;
210			break;
211		}
212
213		used = copylen = HEADERSIZE - ps->ps_svcount;
214		bcopy(src, dst, copylen);
215		src += copylen;
216		dst += copylen;
217		avail -= copylen;
218		ps->ps_svcount += copylen;
219
220		/*FALLTHROUGH*/
221
222	case PL_STATE_PARTIAL_RECORD:
223
224		/*
225		 * Transitions:
226		 *
227		 * Case E: avail+saved < recordsize
228		 * 	-> 'partial record'
229		 *
230		 * Case F: avail+saved >= recordsize
231		 * 	-> 'new record'
232		 */
233
234		h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
235		recordsize = PMCLOG_HEADER_TO_LENGTH(h);
236
237		if (recordsize <= 0)
238			goto error;
239
240		if (avail + ps->ps_svcount < recordsize) {
241			copylen = avail;
242			ps->ps_state = PL_STATE_PARTIAL_RECORD;
243		} else {
244			copylen = recordsize - ps->ps_svcount;
245			ps->ps_state = PL_STATE_NEW_RECORD;
246		}
247
248		bcopy(src, dst, copylen);
249		ps->ps_svcount += copylen;
250		used += copylen;
251		break;
252
253	default:
254		goto error;
255	}
256
257 done:
258	*data += used;
259	*len  -= used;
260	return ps->ps_state;
261
262 error:
263	ps->ps_state = PL_STATE_ERROR;
264	return ps->ps_state;
265}
266
267/*
268 * Get an event from the stream pointed to by '*data'.  '*len'
269 * indicates the number of bytes available to parse.  Arguments
270 * '*data' and '*len' are updated to indicate the number of bytes
271 * consumed.
272 */
273
274static int
275pmclog_get_event(void *cookie, char **data, ssize_t *len,
276    struct pmclog_ev *ev)
277{
278	int evlen, pathlen;
279	uint32_t h, *le, npc;
280	enum pmclog_parser_state e;
281	struct pmclog_parse_state *ps;
282
283	ps = (struct pmclog_parse_state *) cookie;
284
285	assert(ps->ps_state != PL_STATE_ERROR);
286
287	if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) {
288		ev->pl_state = PMCLOG_ERROR;
289		return -1;
290	}
291
292	if (e != PL_STATE_NEW_RECORD) {
293		ev->pl_state = PMCLOG_REQUIRE_DATA;
294		return -1;
295	}
296
297	PMCLOG_INITIALIZE_READER(le, ps->ps_saved);
298
299	PMCLOG_READ32(le,h);
300
301	if (!PMCLOG_HEADER_CHECK_MAGIC(h)) {
302		ps->ps_state = PL_STATE_ERROR;
303		ev->pl_state = PMCLOG_ERROR;
304		return -1;
305	}
306
307	/* copy out the time stamp */
308	PMCLOG_READ32(le,ev->pl_ts.tv_sec);
309	PMCLOG_READ32(le,ev->pl_ts.tv_nsec);
310
311	evlen = PMCLOG_HEADER_TO_LENGTH(h);
312
313#define	PMCLOG_GET_PATHLEN(P,E,TYPE) do {				\
314		(P) = (E) - offsetof(struct TYPE, pl_pathname);		\
315		if ((P) > PATH_MAX || (P) < 0)				\
316			goto error;					\
317	} while (0)
318
319#define	PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do {				\
320		(SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc))	\
321			/ sizeof(uintfptr_t);				\
322	} while (0);
323
324	switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) {
325	case PMCLOG_TYPE_CALLCHAIN:
326		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid);
327		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid);
328		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags);
329		PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen);
330		for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++)
331			PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]);
332		for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++)
333			ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0;
334		break;
335	case PMCLOG_TYPE_CLOSELOG:
336	case PMCLOG_TYPE_DROPNOTIFY:
337		/* nothing to do */
338		break;
339	case PMCLOG_TYPE_INITIALIZE:
340		PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version);
341		PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch);
342		ps->ps_version = ev->pl_u.pl_i.pl_version;
343		ps->ps_arch = ev->pl_u.pl_i.pl_arch;
344		ps->ps_initialized = 1;
345		break;
346	case PMCLOG_TYPE_MAP_IN:
347		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in);
348		PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid);
349		PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start);
350		PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen);
351		break;
352	case PMCLOG_TYPE_MAP_OUT:
353		PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid);
354		PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start);
355		PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end);
356		break;
357	case PMCLOG_TYPE_PCSAMPLE:
358		PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pid);
359		PMCLOG_READADDR(le,ev->pl_u.pl_s.pl_pc);
360		PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pmcid);
361		PMCLOG_READ32(le,ev->pl_u.pl_s.pl_usermode);
362		break;
363	case PMCLOG_TYPE_PMCALLOCATE:
364		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid);
365		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event);
366		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags);
367		if ((ev->pl_u.pl_a.pl_evname =
368		    _pmc_name_of_event(ev->pl_u.pl_a.pl_event, ps->ps_arch))
369		    == NULL)
370			goto error;
371		break;
372	case PMCLOG_TYPE_PMCALLOCATEDYN:
373		PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_pmcid);
374		PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_event);
375		PMCLOG_READ32(le,ev->pl_u.pl_ad.pl_flags);
376		PMCLOG_READSTRING(le,ev->pl_u.pl_ad.pl_evname,PMC_NAME_MAX);
377		break;
378	case PMCLOG_TYPE_PMCATTACH:
379		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach);
380		PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid);
381		PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid);
382		PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen);
383		break;
384	case PMCLOG_TYPE_PMCDETACH:
385		PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid);
386		PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid);
387		break;
388	case PMCLOG_TYPE_PROCCSW:
389		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid);
390		PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value);
391		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid);
392		break;
393	case PMCLOG_TYPE_PROCEXEC:
394		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec);
395		PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid);
396		PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_entryaddr);
397		PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid);
398		PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen);
399		break;
400	case PMCLOG_TYPE_PROCEXIT:
401		PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid);
402		PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value);
403		PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid);
404		break;
405	case PMCLOG_TYPE_PROCFORK:
406		PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid);
407		PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid);
408		break;
409	case PMCLOG_TYPE_SYSEXIT:
410		PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid);
411		break;
412	case PMCLOG_TYPE_USERDATA:
413		PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata);
414		break;
415	default:	/* unknown record type */
416		ps->ps_state = PL_STATE_ERROR;
417		ev->pl_state = PMCLOG_ERROR;
418		return (-1);
419	}
420
421	ev->pl_offset = (ps->ps_offset += evlen);
422	ev->pl_count  = (ps->ps_count += 1);
423	ev->pl_state = PMCLOG_OK;
424	return 0;
425
426 error:
427	ev->pl_state = PMCLOG_ERROR;
428	ps->ps_state = PL_STATE_ERROR;
429	return -1;
430}
431
432/*
433 * Extract and return the next event from the byte stream.
434 *
435 * Returns 0 and sets the event's state to PMCLOG_OK in case an event
436 * was successfully parsed.  Otherwise this function returns -1 and
437 * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data
438 * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if
439 * a parse error was encountered.
440 */
441
442int
443pmclog_read(void *cookie, struct pmclog_ev *ev)
444{
445	int retval;
446	ssize_t nread;
447	struct pmclog_parse_state *ps;
448
449	ps = (struct pmclog_parse_state *) cookie;
450
451	if (ps->ps_state == PL_STATE_ERROR) {
452		ev->pl_state = PMCLOG_ERROR;
453		return -1;
454	}
455
456	/*
457	 * If there isn't enough data left for a new event try and get
458	 * more data.
459	 */
460	if (ps->ps_len == 0) {
461		ev->pl_state = PMCLOG_REQUIRE_DATA;
462
463		/*
464		 * If we have a valid file descriptor to read from, attempt
465		 * to read from that.  This read may return with an error,
466		 * (which may be EAGAIN or other recoverable error), or
467		 * can return EOF.
468		 */
469		if (ps->ps_fd != PMCLOG_FD_NONE) {
470		refill:
471			nread = read(ps->ps_fd, ps->ps_buffer,
472			    PMCLOG_BUFFER_SIZE);
473
474			if (nread <= 0) {
475				if (nread == 0)
476					ev->pl_state = PMCLOG_EOF;
477				else if (errno != EAGAIN) /* not restartable */
478					ev->pl_state = PMCLOG_ERROR;
479				return -1;
480			}
481
482			ps->ps_len = nread;
483			ps->ps_data = ps->ps_buffer;
484		} else
485			return -1;
486	}
487
488	assert(ps->ps_len > 0);
489
490
491	 /* Retrieve one event from the byte stream. */
492	retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev);
493
494	/*
495	 * If we need more data and we have a configured fd, try read
496	 * from it.
497	 */
498	if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA &&
499	    ps->ps_fd != -1) {
500		assert(ps->ps_len == 0);
501		goto refill;
502	}
503
504	return retval;
505}
506
507/*
508 * Feed data to a memory based parser.
509 *
510 * The memory area pointed to by 'data' needs to be valid till the
511 * next error return from pmclog_next_event().
512 */
513
514int
515pmclog_feed(void *cookie, char *data, int len)
516{
517	struct pmclog_parse_state *ps;
518
519	ps = (struct pmclog_parse_state *) cookie;
520
521	if (len < 0 ||		/* invalid length */
522	    ps->ps_buffer ||	/* called for a file parser */
523	    ps->ps_len != 0)	/* unnecessary call */
524		return -1;
525
526	ps->ps_data = data;
527	ps->ps_len  = len;
528
529	return 0;
530}
531
532/*
533 * Allocate and initialize parser state.
534 */
535
536void *
537pmclog_open(int fd)
538{
539	struct pmclog_parse_state *ps;
540
541	if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL)
542		return NULL;
543
544	ps->ps_state = PL_STATE_NEW_RECORD;
545	ps->ps_arch = -1;
546	ps->ps_initialized = 0;
547	ps->ps_count = 0;
548	ps->ps_offset = (off_t) 0;
549	bzero(&ps->ps_saved, sizeof(ps->ps_saved));
550	ps->ps_svcount = 0;
551	ps->ps_fd    = fd;
552	ps->ps_data  = NULL;
553	ps->ps_buffer = NULL;
554	ps->ps_len   = 0;
555
556	/* allocate space for a work area */
557	if (ps->ps_fd != PMCLOG_FD_NONE) {
558		if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) {
559			free(ps);
560			return NULL;
561		}
562	}
563
564	return ps;
565}
566
567
568/*
569 * Free up parser state.
570 */
571
572void
573pmclog_close(void *cookie)
574{
575	struct pmclog_parse_state *ps;
576
577	ps = (struct pmclog_parse_state *) cookie;
578
579	if (ps->ps_buffer)
580		free(ps->ps_buffer);
581
582	free(ps);
583}
584