1// SPDX-License-Identifier: GPL-2.0
2/*
3 * builtin-inject.c
4 *
5 * Builtin inject command: Examine the live mode (stdin) event stream
6 * and repipe it to stdout while optionally injecting additional
7 * events into it.
8 */
9#include "builtin.h"
10
11#include "util/color.h"
12#include "util/dso.h"
13#include "util/vdso.h"
14#include "util/evlist.h"
15#include "util/evsel.h"
16#include "util/map.h"
17#include "util/session.h"
18#include "util/tool.h"
19#include "util/debug.h"
20#include "util/build-id.h"
21#include "util/data.h"
22#include "util/auxtrace.h"
23#include "util/jit.h"
24#include "util/string2.h"
25#include "util/symbol.h"
26#include "util/synthetic-events.h"
27#include "util/thread.h"
28#include "util/namespaces.h"
29#include "util/util.h"
30#include "util/tsc.h"
31
32#include <internal/lib.h>
33
34#include <linux/err.h>
35#include <subcmd/parse-options.h>
36#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37
38#include <linux/list.h>
39#include <linux/string.h>
40#include <linux/zalloc.h>
41#include <linux/hash.h>
42#include <ctype.h>
43#include <errno.h>
44#include <signal.h>
45#include <inttypes.h>
46
47struct guest_event {
48	struct perf_sample		sample;
49	union perf_event		*event;
50	char				*event_buf;
51};
52
53struct guest_id {
54	/* hlist_node must be first, see free_hlist() */
55	struct hlist_node		node;
56	u64				id;
57	u64				host_id;
58	u32				vcpu;
59};
60
61struct guest_tid {
62	/* hlist_node must be first, see free_hlist() */
63	struct hlist_node		node;
64	/* Thread ID of QEMU thread */
65	u32				tid;
66	u32				vcpu;
67};
68
69struct guest_vcpu {
70	/* Current host CPU */
71	u32				cpu;
72	/* Thread ID of QEMU thread */
73	u32				tid;
74};
75
76struct guest_session {
77	char				*perf_data_file;
78	u32				machine_pid;
79	u64				time_offset;
80	double				time_scale;
81	struct perf_tool		tool;
82	struct perf_data		data;
83	struct perf_session		*session;
84	char				*tmp_file_name;
85	int				tmp_fd;
86	struct perf_tsc_conversion	host_tc;
87	struct perf_tsc_conversion	guest_tc;
88	bool				copy_kcore_dir;
89	bool				have_tc;
90	bool				fetched;
91	bool				ready;
92	u16				dflt_id_hdr_size;
93	u64				dflt_id;
94	u64				highest_id;
95	/* Array of guest_vcpu */
96	struct guest_vcpu		*vcpu;
97	size_t				vcpu_cnt;
98	/* Hash table for guest_id */
99	struct hlist_head		heads[PERF_EVLIST__HLIST_SIZE];
100	/* Hash table for guest_tid */
101	struct hlist_head		tids[PERF_EVLIST__HLIST_SIZE];
102	/* Place to stash next guest event */
103	struct guest_event		ev;
104};
105
106struct perf_inject {
107	struct perf_tool	tool;
108	struct perf_session	*session;
109	bool			build_ids;
110	bool			build_id_all;
111	bool			sched_stat;
112	bool			have_auxtrace;
113	bool			strip;
114	bool			jit_mode;
115	bool			in_place_update;
116	bool			in_place_update_dry_run;
117	bool			is_pipe;
118	bool			copy_kcore_dir;
119	const char		*input_name;
120	struct perf_data	output;
121	u64			bytes_written;
122	u64			aux_id;
123	struct list_head	samples;
124	struct itrace_synth_opts itrace_synth_opts;
125	char			*event_copy;
126	struct perf_file_section secs[HEADER_FEAT_BITS];
127	struct guest_session	guest_session;
128	struct strlist		*known_build_ids;
129};
130
131struct event_entry {
132	struct list_head node;
133	u32		 tid;
134	union perf_event event[];
135};
136
137static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
138				struct machine *machine, u8 cpumode, u32 flags);
139
140static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
141{
142	ssize_t size;
143
144	size = perf_data__write(&inject->output, buf, sz);
145	if (size < 0)
146		return -errno;
147
148	inject->bytes_written += size;
149	return 0;
150}
151
152static int perf_event__repipe_synth(struct perf_tool *tool,
153				    union perf_event *event)
154{
155	struct perf_inject *inject = container_of(tool, struct perf_inject,
156						  tool);
157
158	return output_bytes(inject, event, event->header.size);
159}
160
161static int perf_event__repipe_oe_synth(struct perf_tool *tool,
162				       union perf_event *event,
163				       struct ordered_events *oe __maybe_unused)
164{
165	return perf_event__repipe_synth(tool, event);
166}
167
168#ifdef HAVE_JITDUMP
169static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
170			       union perf_event *event __maybe_unused,
171			       struct ordered_events *oe __maybe_unused)
172{
173	return 0;
174}
175#endif
176
177static int perf_event__repipe_op2_synth(struct perf_session *session,
178					union perf_event *event)
179{
180	return perf_event__repipe_synth(session->tool, event);
181}
182
183static int perf_event__repipe_op4_synth(struct perf_session *session,
184					union perf_event *event,
185					u64 data __maybe_unused,
186					const char *str __maybe_unused)
187{
188	return perf_event__repipe_synth(session->tool, event);
189}
190
191static int perf_event__repipe_attr(struct perf_tool *tool,
192				   union perf_event *event,
193				   struct evlist **pevlist)
194{
195	struct perf_inject *inject = container_of(tool, struct perf_inject,
196						  tool);
197	int ret;
198
199	ret = perf_event__process_attr(tool, event, pevlist);
200	if (ret)
201		return ret;
202
203	if (!inject->is_pipe)
204		return 0;
205
206	return perf_event__repipe_synth(tool, event);
207}
208
209static int perf_event__repipe_event_update(struct perf_tool *tool,
210					   union perf_event *event,
211					   struct evlist **pevlist __maybe_unused)
212{
213	return perf_event__repipe_synth(tool, event);
214}
215
216#ifdef HAVE_AUXTRACE_SUPPORT
217
218static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
219{
220	char buf[4096];
221	ssize_t ssz;
222	int ret;
223
224	while (size > 0) {
225		ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
226		if (ssz < 0)
227			return -errno;
228		ret = output_bytes(inject, buf, ssz);
229		if (ret)
230			return ret;
231		size -= ssz;
232	}
233
234	return 0;
235}
236
237static s64 perf_event__repipe_auxtrace(struct perf_session *session,
238				       union perf_event *event)
239{
240	struct perf_tool *tool = session->tool;
241	struct perf_inject *inject = container_of(tool, struct perf_inject,
242						  tool);
243	int ret;
244
245	inject->have_auxtrace = true;
246
247	if (!inject->output.is_pipe) {
248		off_t offset;
249
250		offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
251		if (offset == -1)
252			return -errno;
253		ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
254						     event, offset);
255		if (ret < 0)
256			return ret;
257	}
258
259	if (perf_data__is_pipe(session->data) || !session->one_mmap) {
260		ret = output_bytes(inject, event, event->header.size);
261		if (ret < 0)
262			return ret;
263		ret = copy_bytes(inject, session->data,
264				 event->auxtrace.size);
265	} else {
266		ret = output_bytes(inject, event,
267				   event->header.size + event->auxtrace.size);
268	}
269	if (ret < 0)
270		return ret;
271
272	return event->auxtrace.size;
273}
274
275#else
276
277static s64
278perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
279			    union perf_event *event __maybe_unused)
280{
281	pr_err("AUX area tracing not supported\n");
282	return -EINVAL;
283}
284
285#endif
286
287static int perf_event__repipe(struct perf_tool *tool,
288			      union perf_event *event,
289			      struct perf_sample *sample __maybe_unused,
290			      struct machine *machine __maybe_unused)
291{
292	return perf_event__repipe_synth(tool, event);
293}
294
295static int perf_event__drop(struct perf_tool *tool __maybe_unused,
296			    union perf_event *event __maybe_unused,
297			    struct perf_sample *sample __maybe_unused,
298			    struct machine *machine __maybe_unused)
299{
300	return 0;
301}
302
303static int perf_event__drop_aux(struct perf_tool *tool,
304				union perf_event *event __maybe_unused,
305				struct perf_sample *sample,
306				struct machine *machine __maybe_unused)
307{
308	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
309
310	if (!inject->aux_id)
311		inject->aux_id = sample->id;
312
313	return 0;
314}
315
316static union perf_event *
317perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
318				 union perf_event *event,
319				 struct perf_sample *sample)
320{
321	size_t sz1 = sample->aux_sample.data - (void *)event;
322	size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
323	union perf_event *ev;
324
325	if (inject->event_copy == NULL) {
326		inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
327		if (!inject->event_copy)
328			return ERR_PTR(-ENOMEM);
329	}
330	ev = (union perf_event *)inject->event_copy;
331	if (sz1 > event->header.size || sz2 > event->header.size ||
332	    sz1 + sz2 > event->header.size ||
333	    sz1 < sizeof(struct perf_event_header) + sizeof(u64))
334		return event;
335
336	memcpy(ev, event, sz1);
337	memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
338	ev->header.size = sz1 + sz2;
339	((u64 *)((void *)ev + sz1))[-1] = 0;
340
341	return ev;
342}
343
344typedef int (*inject_handler)(struct perf_tool *tool,
345			      union perf_event *event,
346			      struct perf_sample *sample,
347			      struct evsel *evsel,
348			      struct machine *machine);
349
350static int perf_event__repipe_sample(struct perf_tool *tool,
351				     union perf_event *event,
352				     struct perf_sample *sample,
353				     struct evsel *evsel,
354				     struct machine *machine)
355{
356	struct perf_inject *inject = container_of(tool, struct perf_inject,
357						  tool);
358
359	if (evsel && evsel->handler) {
360		inject_handler f = evsel->handler;
361		return f(tool, event, sample, evsel, machine);
362	}
363
364	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
365
366	if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
367		event = perf_inject__cut_auxtrace_sample(inject, event, sample);
368		if (IS_ERR(event))
369			return PTR_ERR(event);
370	}
371
372	return perf_event__repipe_synth(tool, event);
373}
374
375static int perf_event__repipe_mmap(struct perf_tool *tool,
376				   union perf_event *event,
377				   struct perf_sample *sample,
378				   struct machine *machine)
379{
380	int err;
381
382	err = perf_event__process_mmap(tool, event, sample, machine);
383	perf_event__repipe(tool, event, sample, machine);
384
385	return err;
386}
387
388#ifdef HAVE_JITDUMP
389static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
390				       union perf_event *event,
391				       struct perf_sample *sample,
392				       struct machine *machine)
393{
394	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
395	u64 n = 0;
396	int ret;
397
398	/*
399	 * if jit marker, then inject jit mmaps and generate ELF images
400	 */
401	ret = jit_process(inject->session, &inject->output, machine,
402			  event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
403	if (ret < 0)
404		return ret;
405	if (ret) {
406		inject->bytes_written += n;
407		return 0;
408	}
409	return perf_event__repipe_mmap(tool, event, sample, machine);
410}
411#endif
412
413static struct dso *findnew_dso(int pid, int tid, const char *filename,
414			       struct dso_id *id, struct machine *machine)
415{
416	struct thread *thread;
417	struct nsinfo *nsi = NULL;
418	struct nsinfo *nnsi;
419	struct dso *dso;
420	bool vdso;
421
422	thread = machine__findnew_thread(machine, pid, tid);
423	if (thread == NULL) {
424		pr_err("cannot find or create a task %d/%d.\n", tid, pid);
425		return NULL;
426	}
427
428	vdso = is_vdso_map(filename);
429	nsi = nsinfo__get(thread__nsinfo(thread));
430
431	if (vdso) {
432		/* The vdso maps are always on the host and not the
433		 * container.  Ensure that we don't use setns to look
434		 * them up.
435		 */
436		nnsi = nsinfo__copy(nsi);
437		if (nnsi) {
438			nsinfo__put(nsi);
439			nsinfo__clear_need_setns(nnsi);
440			nsi = nnsi;
441		}
442		dso = machine__findnew_vdso(machine, thread);
443	} else {
444		dso = machine__findnew_dso_id(machine, filename, id);
445	}
446
447	if (dso) {
448		mutex_lock(dso__lock(dso));
449		dso__set_nsinfo(dso, nsi);
450		mutex_unlock(dso__lock(dso));
451	} else
452		nsinfo__put(nsi);
453
454	thread__put(thread);
455	return dso;
456}
457
458static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
459					   union perf_event *event,
460					   struct perf_sample *sample,
461					   struct machine *machine)
462{
463	struct dso *dso;
464
465	dso = findnew_dso(event->mmap.pid, event->mmap.tid,
466			  event->mmap.filename, NULL, machine);
467
468	if (dso && !dso__hit(dso)) {
469		dso__set_hit(dso);
470		dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
471	}
472	dso__put(dso);
473
474	return perf_event__repipe(tool, event, sample, machine);
475}
476
477static int perf_event__repipe_mmap2(struct perf_tool *tool,
478				   union perf_event *event,
479				   struct perf_sample *sample,
480				   struct machine *machine)
481{
482	int err;
483
484	err = perf_event__process_mmap2(tool, event, sample, machine);
485	perf_event__repipe(tool, event, sample, machine);
486
487	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
488		struct dso *dso;
489
490		dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
491				  event->mmap2.filename, NULL, machine);
492		if (dso) {
493			/* mark it not to inject build-id */
494			dso__set_hit(dso);
495		}
496		dso__put(dso);
497	}
498
499	return err;
500}
501
502#ifdef HAVE_JITDUMP
503static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
504					union perf_event *event,
505					struct perf_sample *sample,
506					struct machine *machine)
507{
508	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
509	u64 n = 0;
510	int ret;
511
512	/*
513	 * if jit marker, then inject jit mmaps and generate ELF images
514	 */
515	ret = jit_process(inject->session, &inject->output, machine,
516			  event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
517	if (ret < 0)
518		return ret;
519	if (ret) {
520		inject->bytes_written += n;
521		return 0;
522	}
523	return perf_event__repipe_mmap2(tool, event, sample, machine);
524}
525#endif
526
527static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
528					    union perf_event *event,
529					    struct perf_sample *sample,
530					    struct machine *machine)
531{
532	struct dso_id dso_id = {
533		.maj = event->mmap2.maj,
534		.min = event->mmap2.min,
535		.ino = event->mmap2.ino,
536		.ino_generation = event->mmap2.ino_generation,
537	};
538	struct dso *dso;
539
540	if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
541		/* cannot use dso_id since it'd have invalid info */
542		dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
543				  event->mmap2.filename, NULL, machine);
544		if (dso) {
545			/* mark it not to inject build-id */
546			dso__set_hit(dso);
547		}
548		dso__put(dso);
549		perf_event__repipe(tool, event, sample, machine);
550		return 0;
551	}
552
553	dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
554			  event->mmap2.filename, &dso_id, machine);
555
556	if (dso && !dso__hit(dso)) {
557		dso__set_hit(dso);
558		dso__inject_build_id(dso, tool, machine, sample->cpumode,
559				     event->mmap2.flags);
560	}
561	dso__put(dso);
562
563	perf_event__repipe(tool, event, sample, machine);
564
565	return 0;
566}
567
568static int perf_event__repipe_fork(struct perf_tool *tool,
569				   union perf_event *event,
570				   struct perf_sample *sample,
571				   struct machine *machine)
572{
573	int err;
574
575	err = perf_event__process_fork(tool, event, sample, machine);
576	perf_event__repipe(tool, event, sample, machine);
577
578	return err;
579}
580
581static int perf_event__repipe_comm(struct perf_tool *tool,
582				   union perf_event *event,
583				   struct perf_sample *sample,
584				   struct machine *machine)
585{
586	int err;
587
588	err = perf_event__process_comm(tool, event, sample, machine);
589	perf_event__repipe(tool, event, sample, machine);
590
591	return err;
592}
593
594static int perf_event__repipe_namespaces(struct perf_tool *tool,
595					 union perf_event *event,
596					 struct perf_sample *sample,
597					 struct machine *machine)
598{
599	int err = perf_event__process_namespaces(tool, event, sample, machine);
600
601	perf_event__repipe(tool, event, sample, machine);
602
603	return err;
604}
605
606static int perf_event__repipe_exit(struct perf_tool *tool,
607				   union perf_event *event,
608				   struct perf_sample *sample,
609				   struct machine *machine)
610{
611	int err;
612
613	err = perf_event__process_exit(tool, event, sample, machine);
614	perf_event__repipe(tool, event, sample, machine);
615
616	return err;
617}
618
619#ifdef HAVE_LIBTRACEEVENT
620static int perf_event__repipe_tracing_data(struct perf_session *session,
621					   union perf_event *event)
622{
623	perf_event__repipe_synth(session->tool, event);
624
625	return perf_event__process_tracing_data(session, event);
626}
627#endif
628
629static int dso__read_build_id(struct dso *dso)
630{
631	struct nscookie nsc;
632
633	if (dso__has_build_id(dso))
634		return 0;
635
636	mutex_lock(dso__lock(dso));
637	nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
638	if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0)
639		dso__set_has_build_id(dso);
640	else if (dso__nsinfo(dso)) {
641		char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
642
643		if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0)
644			dso__set_has_build_id(dso);
645		free(new_name);
646	}
647	nsinfo__mountns_exit(&nsc);
648	mutex_unlock(dso__lock(dso));
649
650	return dso__has_build_id(dso) ? 0 : -1;
651}
652
653static struct strlist *perf_inject__parse_known_build_ids(
654	const char *known_build_ids_string)
655{
656	struct str_node *pos, *tmp;
657	struct strlist *known_build_ids;
658	int bid_len;
659
660	known_build_ids = strlist__new(known_build_ids_string, NULL);
661	if (known_build_ids == NULL)
662		return NULL;
663	strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
664		const char *build_id, *dso_name;
665
666		build_id = skip_spaces(pos->s);
667		dso_name = strchr(build_id, ' ');
668		if (dso_name == NULL) {
669			strlist__remove(known_build_ids, pos);
670			continue;
671		}
672		bid_len = dso_name - pos->s;
673		dso_name = skip_spaces(dso_name);
674		if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
675			strlist__remove(known_build_ids, pos);
676			continue;
677		}
678		for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
679			if (!isxdigit(build_id[2 * ix]) ||
680			    !isxdigit(build_id[2 * ix + 1])) {
681				strlist__remove(known_build_ids, pos);
682				break;
683			}
684		}
685	}
686	return known_build_ids;
687}
688
689static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
690					       struct dso *dso)
691{
692	struct str_node *pos;
693	int bid_len;
694
695	strlist__for_each_entry(pos, inject->known_build_ids) {
696		const char *build_id, *dso_name;
697
698		build_id = skip_spaces(pos->s);
699		dso_name = strchr(build_id, ' ');
700		bid_len = dso_name - pos->s;
701		dso_name = skip_spaces(dso_name);
702		if (strcmp(dso__long_name(dso), dso_name))
703			continue;
704		for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
705			dso__bid(dso)->data[ix] = (hex(build_id[2 * ix]) << 4 |
706						  hex(build_id[2 * ix + 1]));
707		}
708		dso__bid(dso)->size = bid_len / 2;
709		dso__set_has_build_id(dso);
710		return true;
711	}
712	return false;
713}
714
715static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
716				struct machine *machine, u8 cpumode, u32 flags)
717{
718	struct perf_inject *inject = container_of(tool, struct perf_inject,
719						  tool);
720	int err;
721
722	if (is_anon_memory(dso__long_name(dso)) || flags & MAP_HUGETLB)
723		return 0;
724	if (is_no_dso_memory(dso__long_name(dso)))
725		return 0;
726
727	if (inject->known_build_ids != NULL &&
728	    perf_inject__lookup_known_build_id(inject, dso))
729		return 1;
730
731	if (dso__read_build_id(dso) < 0) {
732		pr_debug("no build_id found for %s\n", dso__long_name(dso));
733		return -1;
734	}
735
736	err = perf_event__synthesize_build_id(tool, dso, cpumode,
737					      perf_event__repipe, machine);
738	if (err) {
739		pr_err("Can't synthesize build_id event for %s\n", dso__long_name(dso));
740		return -1;
741	}
742
743	return 0;
744}
745
746int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
747			       struct perf_sample *sample,
748			       struct evsel *evsel __maybe_unused,
749			       struct machine *machine)
750{
751	struct addr_location al;
752	struct thread *thread;
753
754	addr_location__init(&al);
755	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
756	if (thread == NULL) {
757		pr_err("problem processing %d event, skipping it.\n",
758		       event->header.type);
759		goto repipe;
760	}
761
762	if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
763		struct dso *dso = map__dso(al.map);
764
765		if (!dso__hit(dso)) {
766			dso__set_hit(dso);
767			dso__inject_build_id(dso, tool, machine,
768					     sample->cpumode, map__flags(al.map));
769		}
770	}
771
772	thread__put(thread);
773repipe:
774	perf_event__repipe(tool, event, sample, machine);
775	addr_location__exit(&al);
776	return 0;
777}
778
779static int perf_inject__sched_process_exit(struct perf_tool *tool,
780					   union perf_event *event __maybe_unused,
781					   struct perf_sample *sample,
782					   struct evsel *evsel __maybe_unused,
783					   struct machine *machine __maybe_unused)
784{
785	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
786	struct event_entry *ent;
787
788	list_for_each_entry(ent, &inject->samples, node) {
789		if (sample->tid == ent->tid) {
790			list_del_init(&ent->node);
791			free(ent);
792			break;
793		}
794	}
795
796	return 0;
797}
798
799static int perf_inject__sched_switch(struct perf_tool *tool,
800				     union perf_event *event,
801				     struct perf_sample *sample,
802				     struct evsel *evsel,
803				     struct machine *machine)
804{
805	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
806	struct event_entry *ent;
807
808	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
809
810	ent = malloc(event->header.size + sizeof(struct event_entry));
811	if (ent == NULL) {
812		color_fprintf(stderr, PERF_COLOR_RED,
813			     "Not enough memory to process sched switch event!");
814		return -1;
815	}
816
817	ent->tid = sample->tid;
818	memcpy(&ent->event, event, event->header.size);
819	list_add(&ent->node, &inject->samples);
820	return 0;
821}
822
823#ifdef HAVE_LIBTRACEEVENT
824static int perf_inject__sched_stat(struct perf_tool *tool,
825				   union perf_event *event __maybe_unused,
826				   struct perf_sample *sample,
827				   struct evsel *evsel,
828				   struct machine *machine)
829{
830	struct event_entry *ent;
831	union perf_event *event_sw;
832	struct perf_sample sample_sw;
833	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
834	u32 pid = evsel__intval(evsel, sample, "pid");
835
836	list_for_each_entry(ent, &inject->samples, node) {
837		if (pid == ent->tid)
838			goto found;
839	}
840
841	return 0;
842found:
843	event_sw = &ent->event[0];
844	evsel__parse_sample(evsel, event_sw, &sample_sw);
845
846	sample_sw.period = sample->period;
847	sample_sw.time	 = sample->time;
848	perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
849				      evsel->core.attr.read_format, &sample_sw);
850	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
851	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
852}
853#endif
854
855static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
856{
857	if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
858		return NULL;
859	return &gs->vcpu[vcpu];
860}
861
862static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
863{
864	ssize_t ret = writen(gs->tmp_fd, buf, sz);
865
866	return ret < 0 ? ret : 0;
867}
868
869static int guest_session__repipe(struct perf_tool *tool,
870				 union perf_event *event,
871				 struct perf_sample *sample __maybe_unused,
872				 struct machine *machine __maybe_unused)
873{
874	struct guest_session *gs = container_of(tool, struct guest_session, tool);
875
876	return guest_session__output_bytes(gs, event, event->header.size);
877}
878
879static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
880{
881	struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
882	int hash;
883
884	if (!guest_tid)
885		return -ENOMEM;
886
887	guest_tid->tid = tid;
888	guest_tid->vcpu = vcpu;
889	hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
890	hlist_add_head(&guest_tid->node, &gs->tids[hash]);
891
892	return 0;
893}
894
895static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
896				 union perf_event *event,
897				 u64 offset __maybe_unused, void *data)
898{
899	struct guest_session *gs = data;
900	unsigned int vcpu;
901	struct guest_vcpu *guest_vcpu;
902	int ret;
903
904	if (event->header.type != PERF_RECORD_COMM ||
905	    event->comm.pid != gs->machine_pid)
906		return 0;
907
908	/*
909	 * QEMU option -name debug-threads=on, causes thread names formatted as
910	 * below, although it is not an ABI. Also libvirt seems to use this by
911	 * default. Here we rely on it to tell us which thread is which VCPU.
912	 */
913	ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
914	if (ret <= 0)
915		return ret;
916	pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
917		 event->comm.tid, event->comm.comm, vcpu);
918	if (vcpu > INT_MAX) {
919		pr_err("Invalid VCPU %u\n", vcpu);
920		return -EINVAL;
921	}
922	guest_vcpu = guest_session__vcpu(gs, vcpu);
923	if (!guest_vcpu)
924		return -ENOMEM;
925	if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
926		pr_err("Fatal error: Two threads found with the same VCPU\n");
927		return -EINVAL;
928	}
929	guest_vcpu->tid = event->comm.tid;
930
931	return guest_session__map_tid(gs, event->comm.tid, vcpu);
932}
933
934static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
935{
936	return perf_session__peek_events(session, session->header.data_offset,
937					 session->header.data_size,
938					 host_peek_vm_comms_cb, gs);
939}
940
941static bool evlist__is_id_used(struct evlist *evlist, u64 id)
942{
943	return evlist__id2sid(evlist, id);
944}
945
946static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
947{
948	do {
949		gs->highest_id += 1;
950	} while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
951
952	return gs->highest_id;
953}
954
955static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
956{
957	struct guest_id *guest_id = zalloc(sizeof(*guest_id));
958	int hash;
959
960	if (!guest_id)
961		return -ENOMEM;
962
963	guest_id->id = id;
964	guest_id->host_id = host_id;
965	guest_id->vcpu = vcpu;
966	hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
967	hlist_add_head(&guest_id->node, &gs->heads[hash]);
968
969	return 0;
970}
971
972static u64 evlist__find_highest_id(struct evlist *evlist)
973{
974	struct evsel *evsel;
975	u64 highest_id = 1;
976
977	evlist__for_each_entry(evlist, evsel) {
978		u32 j;
979
980		for (j = 0; j < evsel->core.ids; j++) {
981			u64 id = evsel->core.id[j];
982
983			if (id > highest_id)
984				highest_id = id;
985		}
986	}
987
988	return highest_id;
989}
990
991static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
992{
993	struct evlist *evlist = gs->session->evlist;
994	struct evsel *evsel;
995	int ret;
996
997	evlist__for_each_entry(evlist, evsel) {
998		u32 j;
999
1000		for (j = 0; j < evsel->core.ids; j++) {
1001			struct perf_sample_id *sid;
1002			u64 host_id;
1003			u64 id;
1004
1005			id = evsel->core.id[j];
1006			sid = evlist__id2sid(evlist, id);
1007			if (!sid || sid->cpu.cpu == -1)
1008				continue;
1009			host_id = guest_session__allocate_new_id(gs, host_evlist);
1010			ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1011			if (ret)
1012				return ret;
1013		}
1014	}
1015
1016	return 0;
1017}
1018
1019static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1020{
1021	struct hlist_head *head;
1022	struct guest_id *guest_id;
1023	int hash;
1024
1025	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1026	head = &gs->heads[hash];
1027
1028	hlist_for_each_entry(guest_id, head, node)
1029		if (guest_id->id == id)
1030			return guest_id;
1031
1032	return NULL;
1033}
1034
1035static int process_attr(struct perf_tool *tool, union perf_event *event,
1036			struct perf_sample *sample __maybe_unused,
1037			struct machine *machine __maybe_unused)
1038{
1039	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1040
1041	return perf_event__process_attr(tool, event, &inject->session->evlist);
1042}
1043
1044static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1045{
1046	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1047	struct perf_event_attr attr = evsel->core.attr;
1048	u64 *id_array;
1049	u32 *vcpu_array;
1050	int ret = -ENOMEM;
1051	u32 i;
1052
1053	id_array = calloc(evsel->core.ids, sizeof(*id_array));
1054	if (!id_array)
1055		return -ENOMEM;
1056
1057	vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1058	if (!vcpu_array)
1059		goto out;
1060
1061	for (i = 0; i < evsel->core.ids; i++) {
1062		u64 id = evsel->core.id[i];
1063		struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1064
1065		if (!guest_id) {
1066			pr_err("Failed to find guest id %"PRIu64"\n", id);
1067			ret = -EINVAL;
1068			goto out;
1069		}
1070		id_array[i] = guest_id->host_id;
1071		vcpu_array[i] = guest_id->vcpu;
1072	}
1073
1074	attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1075	attr.exclude_host = 1;
1076	attr.exclude_guest = 0;
1077
1078	ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1079					  id_array, process_attr);
1080	if (ret)
1081		pr_err("Failed to add guest attr.\n");
1082
1083	for (i = 0; i < evsel->core.ids; i++) {
1084		struct perf_sample_id *sid;
1085		u32 vcpu = vcpu_array[i];
1086
1087		sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1088		/* Guest event is per-thread from the host point of view */
1089		sid->cpu.cpu = -1;
1090		sid->tid = gs->vcpu[vcpu].tid;
1091		sid->machine_pid = gs->machine_pid;
1092		sid->vcpu.cpu = vcpu;
1093	}
1094out:
1095	free(vcpu_array);
1096	free(id_array);
1097	return ret;
1098}
1099
1100static int guest_session__add_attrs(struct guest_session *gs)
1101{
1102	struct evlist *evlist = gs->session->evlist;
1103	struct evsel *evsel;
1104	int ret;
1105
1106	evlist__for_each_entry(evlist, evsel) {
1107		ret = guest_session__add_attr(gs, evsel);
1108		if (ret)
1109			return ret;
1110	}
1111
1112	return 0;
1113}
1114
1115static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1116{
1117	struct perf_session *session = inject->session;
1118	struct evlist *evlist = session->evlist;
1119	struct machine *machine = &session->machines.host;
1120	size_t from = evlist->core.nr_entries - new_cnt;
1121
1122	return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1123						 evlist, machine, from);
1124}
1125
1126static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1127{
1128	struct hlist_head *head;
1129	struct guest_tid *guest_tid;
1130	int hash;
1131
1132	hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1133	head = &gs->tids[hash];
1134
1135	hlist_for_each_entry(guest_tid, head, node)
1136		if (guest_tid->tid == tid)
1137			return guest_tid;
1138
1139	return NULL;
1140}
1141
1142static bool dso__is_in_kernel_space(struct dso *dso)
1143{
1144	if (dso__is_vdso(dso))
1145		return false;
1146
1147	return dso__is_kcore(dso) ||
1148	       dso__kernel(dso) ||
1149	       is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1150}
1151
1152static u64 evlist__first_id(struct evlist *evlist)
1153{
1154	struct evsel *evsel;
1155
1156	evlist__for_each_entry(evlist, evsel) {
1157		if (evsel->core.ids)
1158			return evsel->core.id[0];
1159	}
1160	return 0;
1161}
1162
1163static int process_build_id(struct perf_tool *tool,
1164			    union perf_event *event,
1165			    struct perf_sample *sample __maybe_unused,
1166			    struct machine *machine __maybe_unused)
1167{
1168	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1169
1170	return perf_event__process_build_id(inject->session, event);
1171}
1172
1173static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1174{
1175	struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1176	u8 cpumode = dso__is_in_kernel_space(dso) ?
1177			PERF_RECORD_MISC_GUEST_KERNEL :
1178			PERF_RECORD_MISC_GUEST_USER;
1179
1180	if (!machine)
1181		return -ENOMEM;
1182
1183	dso__set_hit(dso);
1184
1185	return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
1186					       process_build_id, machine);
1187}
1188
1189static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
1190{
1191	struct guest_session *gs = data;
1192	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1193
1194	if (!dso__has_build_id(dso))
1195		return 0;
1196
1197	return synthesize_build_id(inject, dso, gs->machine_pid);
1198
1199}
1200
1201static int guest_session__add_build_ids(struct guest_session *gs)
1202{
1203	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1204
1205	/* Build IDs will be put in the Build ID feature section */
1206	perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1207
1208	return dsos__for_each_dso(&gs->session->machines.host.dsos,
1209				  guest_session__add_build_ids_cb,
1210				  gs);
1211}
1212
1213static int guest_session__ksymbol_event(struct perf_tool *tool,
1214					union perf_event *event,
1215					struct perf_sample *sample __maybe_unused,
1216					struct machine *machine __maybe_unused)
1217{
1218	struct guest_session *gs = container_of(tool, struct guest_session, tool);
1219
1220	/* Only support out-of-line i.e. no BPF support */
1221	if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1222		return 0;
1223
1224	return guest_session__output_bytes(gs, event, event->header.size);
1225}
1226
1227static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1228{
1229	char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1230	struct perf_session *session;
1231	int ret;
1232
1233	/* Only these events will be injected */
1234	gs->tool.mmap		= guest_session__repipe;
1235	gs->tool.mmap2		= guest_session__repipe;
1236	gs->tool.comm		= guest_session__repipe;
1237	gs->tool.fork		= guest_session__repipe;
1238	gs->tool.exit		= guest_session__repipe;
1239	gs->tool.lost		= guest_session__repipe;
1240	gs->tool.context_switch	= guest_session__repipe;
1241	gs->tool.ksymbol	= guest_session__ksymbol_event;
1242	gs->tool.text_poke	= guest_session__repipe;
1243	/*
1244	 * Processing a build ID creates a struct dso with that build ID. Later,
1245	 * all guest dsos are iterated and the build IDs processed into the host
1246	 * session where they will be output to the Build ID feature section
1247	 * when the perf.data file header is written.
1248	 */
1249	gs->tool.build_id	= perf_event__process_build_id;
1250	/* Process the id index to know what VCPU an ID belongs to */
1251	gs->tool.id_index	= perf_event__process_id_index;
1252
1253	gs->tool.ordered_events	= true;
1254	gs->tool.ordering_requires_timestamps = true;
1255
1256	gs->data.path	= name;
1257	gs->data.force	= force;
1258	gs->data.mode	= PERF_DATA_MODE_READ;
1259
1260	session = perf_session__new(&gs->data, &gs->tool);
1261	if (IS_ERR(session))
1262		return PTR_ERR(session);
1263	gs->session = session;
1264
1265	/*
1266	 * Initial events have zero'd ID samples. Get default ID sample size
1267	 * used for removing them.
1268	 */
1269	gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1270	/* And default ID for adding back a host-compatible ID sample */
1271	gs->dflt_id = evlist__first_id(session->evlist);
1272	if (!gs->dflt_id) {
1273		pr_err("Guest data has no sample IDs");
1274		return -EINVAL;
1275	}
1276
1277	/* Temporary file for guest events */
1278	gs->tmp_file_name = strdup(tmp_file_name);
1279	if (!gs->tmp_file_name)
1280		return -ENOMEM;
1281	gs->tmp_fd = mkstemp(gs->tmp_file_name);
1282	if (gs->tmp_fd < 0)
1283		return -errno;
1284
1285	if (zstd_init(&gs->session->zstd_data, 0) < 0)
1286		pr_warning("Guest session decompression initialization failed.\n");
1287
1288	/*
1289	 * perf does not support processing 2 sessions simultaneously, so output
1290	 * guest events to a temporary file.
1291	 */
1292	ret = perf_session__process_events(gs->session);
1293	if (ret)
1294		return ret;
1295
1296	if (lseek(gs->tmp_fd, 0, SEEK_SET))
1297		return -errno;
1298
1299	return 0;
1300}
1301
1302/* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1303static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1304{
1305	struct hlist_node *pos, *n;
1306	size_t i;
1307
1308	for (i = 0; i < hlist_sz; ++i) {
1309		hlist_for_each_safe(pos, n, &heads[i]) {
1310			hlist_del(pos);
1311			free(pos);
1312		}
1313	}
1314}
1315
1316static void guest_session__exit(struct guest_session *gs)
1317{
1318	if (gs->session) {
1319		perf_session__delete(gs->session);
1320		free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1321		free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1322	}
1323	if (gs->tmp_file_name) {
1324		if (gs->tmp_fd >= 0)
1325			close(gs->tmp_fd);
1326		unlink(gs->tmp_file_name);
1327		zfree(&gs->tmp_file_name);
1328	}
1329	zfree(&gs->vcpu);
1330	zfree(&gs->perf_data_file);
1331}
1332
1333static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1334{
1335	tc->time_shift		= time_conv->time_shift;
1336	tc->time_mult		= time_conv->time_mult;
1337	tc->time_zero		= time_conv->time_zero;
1338	tc->time_cycles		= time_conv->time_cycles;
1339	tc->time_mask		= time_conv->time_mask;
1340	tc->cap_user_time_zero	= time_conv->cap_user_time_zero;
1341	tc->cap_user_time_short	= time_conv->cap_user_time_short;
1342}
1343
1344static void guest_session__get_tc(struct guest_session *gs)
1345{
1346	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1347
1348	get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1349	get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1350}
1351
1352static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1353{
1354	u64 tsc;
1355
1356	if (!guest_time) {
1357		*host_time = 0;
1358		return;
1359	}
1360
1361	if (gs->guest_tc.cap_user_time_zero)
1362		tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1363	else
1364		tsc = guest_time;
1365
1366	/*
1367	 * This is the correct order of operations for x86 if the TSC Offset and
1368	 * Multiplier values are used.
1369	 */
1370	tsc -= gs->time_offset;
1371	tsc /= gs->time_scale;
1372
1373	if (gs->host_tc.cap_user_time_zero)
1374		*host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1375	else
1376		*host_time = tsc;
1377}
1378
1379static int guest_session__fetch(struct guest_session *gs)
1380{
1381	void *buf;
1382	struct perf_event_header *hdr;
1383	size_t hdr_sz = sizeof(*hdr);
1384	ssize_t ret;
1385
1386	buf = gs->ev.event_buf;
1387	if (!buf) {
1388		buf = malloc(PERF_SAMPLE_MAX_SIZE);
1389		if (!buf)
1390			return -ENOMEM;
1391		gs->ev.event_buf = buf;
1392	}
1393	hdr = buf;
1394	ret = readn(gs->tmp_fd, buf, hdr_sz);
1395	if (ret < 0)
1396		return ret;
1397
1398	if (!ret) {
1399		/* Zero size means EOF */
1400		hdr->size = 0;
1401		return 0;
1402	}
1403
1404	buf += hdr_sz;
1405
1406	ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1407	if (ret < 0)
1408		return ret;
1409
1410	gs->ev.event = (union perf_event *)gs->ev.event_buf;
1411	gs->ev.sample.time = 0;
1412
1413	if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1414		pr_err("Unexpected type fetching guest event");
1415		return 0;
1416	}
1417
1418	ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1419	if (ret) {
1420		pr_err("Parse failed fetching guest event");
1421		return ret;
1422	}
1423
1424	if (!gs->have_tc) {
1425		guest_session__get_tc(gs);
1426		gs->have_tc = true;
1427	}
1428
1429	guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1430
1431	return 0;
1432}
1433
1434static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1435				    const struct perf_sample *sample)
1436{
1437	struct evsel *evsel;
1438	void *array;
1439	int ret;
1440
1441	evsel = evlist__id2evsel(evlist, sample->id);
1442	array = ev;
1443
1444	if (!evsel) {
1445		pr_err("No evsel for id %"PRIu64"\n", sample->id);
1446		return -EINVAL;
1447	}
1448
1449	array += ev->header.size;
1450	ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1451	if (ret < 0)
1452		return ret;
1453
1454	if (ret & 7) {
1455		pr_err("Bad id sample size %d\n", ret);
1456		return -EINVAL;
1457	}
1458
1459	ev->header.size += ret;
1460
1461	return 0;
1462}
1463
1464static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1465{
1466	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1467	int ret;
1468
1469	if (!gs->ready)
1470		return 0;
1471
1472	while (1) {
1473		struct perf_sample *sample;
1474		struct guest_id *guest_id;
1475		union perf_event *ev;
1476		u16 id_hdr_size;
1477		u8 cpumode;
1478		u64 id;
1479
1480		if (!gs->fetched) {
1481			ret = guest_session__fetch(gs);
1482			if (ret)
1483				return ret;
1484			gs->fetched = true;
1485		}
1486
1487		ev = gs->ev.event;
1488		sample = &gs->ev.sample;
1489
1490		if (!ev->header.size)
1491			return 0; /* EOF */
1492
1493		if (sample->time > timestamp)
1494			return 0;
1495
1496		/* Change cpumode to guest */
1497		cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1498		if (cpumode & PERF_RECORD_MISC_USER)
1499			cpumode = PERF_RECORD_MISC_GUEST_USER;
1500		else
1501			cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1502		ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1503		ev->header.misc |= cpumode;
1504
1505		id = sample->id;
1506		if (!id) {
1507			id = gs->dflt_id;
1508			id_hdr_size = gs->dflt_id_hdr_size;
1509		} else {
1510			struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1511
1512			id_hdr_size = evsel__id_hdr_size(evsel);
1513		}
1514
1515		if (id_hdr_size & 7) {
1516			pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1517			return -EINVAL;
1518		}
1519
1520		if (ev->header.size & 7) {
1521			pr_err("Bad event size %u\n", ev->header.size);
1522			return -EINVAL;
1523		}
1524
1525		/* Remove guest id sample */
1526		ev->header.size -= id_hdr_size;
1527
1528		if (ev->header.size & 7) {
1529			pr_err("Bad raw event size %u\n", ev->header.size);
1530			return -EINVAL;
1531		}
1532
1533		guest_id = guest_session__lookup_id(gs, id);
1534		if (!guest_id) {
1535			pr_err("Guest event with unknown id %llu\n",
1536			       (unsigned long long)id);
1537			return -EINVAL;
1538		}
1539
1540		/* Change to host ID to avoid conflicting ID values */
1541		sample->id = guest_id->host_id;
1542		sample->stream_id = guest_id->host_id;
1543
1544		if (sample->cpu != (u32)-1) {
1545			if (sample->cpu >= gs->vcpu_cnt) {
1546				pr_err("Guest event with unknown VCPU %u\n",
1547				       sample->cpu);
1548				return -EINVAL;
1549			}
1550			/* Change to host CPU instead of guest VCPU */
1551			sample->cpu = gs->vcpu[sample->cpu].cpu;
1552		}
1553
1554		/* New id sample with new ID and CPU */
1555		ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1556		if (ret)
1557			return ret;
1558
1559		if (ev->header.size & 7) {
1560			pr_err("Bad new event size %u\n", ev->header.size);
1561			return -EINVAL;
1562		}
1563
1564		gs->fetched = false;
1565
1566		ret = output_bytes(inject, ev, ev->header.size);
1567		if (ret)
1568			return ret;
1569	}
1570}
1571
1572static int guest_session__flush_events(struct guest_session *gs)
1573{
1574	return guest_session__inject_events(gs, -1);
1575}
1576
1577static int host__repipe(struct perf_tool *tool,
1578			union perf_event *event,
1579			struct perf_sample *sample,
1580			struct machine *machine)
1581{
1582	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1583	int ret;
1584
1585	ret = guest_session__inject_events(&inject->guest_session, sample->time);
1586	if (ret)
1587		return ret;
1588
1589	return perf_event__repipe(tool, event, sample, machine);
1590}
1591
1592static int host__finished_init(struct perf_session *session, union perf_event *event)
1593{
1594	struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
1595	struct guest_session *gs = &inject->guest_session;
1596	int ret;
1597
1598	/*
1599	 * Peek through host COMM events to find QEMU threads and the VCPU they
1600	 * are running.
1601	 */
1602	ret = host_peek_vm_comms(session, gs);
1603	if (ret)
1604		return ret;
1605
1606	if (!gs->vcpu_cnt) {
1607		pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1608		return -EINVAL;
1609	}
1610
1611	/*
1612	 * Allocate new (unused) host sample IDs and map them to the guest IDs.
1613	 */
1614	gs->highest_id = evlist__find_highest_id(session->evlist);
1615	ret = guest_session__map_ids(gs, session->evlist);
1616	if (ret)
1617		return ret;
1618
1619	ret = guest_session__add_attrs(gs);
1620	if (ret)
1621		return ret;
1622
1623	ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1624	if (ret) {
1625		pr_err("Failed to synthesize id_index\n");
1626		return ret;
1627	}
1628
1629	ret = guest_session__add_build_ids(gs);
1630	if (ret) {
1631		pr_err("Failed to add guest build IDs\n");
1632		return ret;
1633	}
1634
1635	gs->ready = true;
1636
1637	ret = guest_session__inject_events(gs, 0);
1638	if (ret)
1639		return ret;
1640
1641	return perf_event__repipe_op2_synth(session, event);
1642}
1643
1644/*
1645 * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1646 * which flushes host events to file up until the last flush time. Then inject
1647 * guest events up to the same time. Finally write out the FINISHED_ROUND event
1648 * itself.
1649 */
1650static int host__finished_round(struct perf_tool *tool,
1651				union perf_event *event,
1652				struct ordered_events *oe)
1653{
1654	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1655	int ret = perf_event__process_finished_round(tool, event, oe);
1656	u64 timestamp = ordered_events__last_flush_time(oe);
1657
1658	if (ret)
1659		return ret;
1660
1661	ret = guest_session__inject_events(&inject->guest_session, timestamp);
1662	if (ret)
1663		return ret;
1664
1665	return perf_event__repipe_oe_synth(tool, event, oe);
1666}
1667
1668static int host__context_switch(struct perf_tool *tool,
1669				union perf_event *event,
1670				struct perf_sample *sample,
1671				struct machine *machine)
1672{
1673	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1674	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1675	struct guest_session *gs = &inject->guest_session;
1676	u32 pid = event->context_switch.next_prev_pid;
1677	u32 tid = event->context_switch.next_prev_tid;
1678	struct guest_tid *guest_tid;
1679	u32 vcpu;
1680
1681	if (out || pid != gs->machine_pid)
1682		goto out;
1683
1684	guest_tid = guest_session__lookup_tid(gs, tid);
1685	if (!guest_tid)
1686		goto out;
1687
1688	if (sample->cpu == (u32)-1) {
1689		pr_err("Switch event does not have CPU\n");
1690		return -EINVAL;
1691	}
1692
1693	vcpu = guest_tid->vcpu;
1694	if (vcpu >= gs->vcpu_cnt)
1695		return -EINVAL;
1696
1697	/* Guest is switching in, record which CPU the VCPU is now running on */
1698	gs->vcpu[vcpu].cpu = sample->cpu;
1699out:
1700	return host__repipe(tool, event, sample, machine);
1701}
1702
1703static void sig_handler(int sig __maybe_unused)
1704{
1705	session_done = 1;
1706}
1707
1708static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1709{
1710	struct perf_event_attr *attr = &evsel->core.attr;
1711	const char *name = evsel__name(evsel);
1712
1713	if (!(attr->sample_type & sample_type)) {
1714		pr_err("Samples for %s event do not have %s attribute set.",
1715			name, sample_msg);
1716		return -EINVAL;
1717	}
1718
1719	return 0;
1720}
1721
1722static int drop_sample(struct perf_tool *tool __maybe_unused,
1723		       union perf_event *event __maybe_unused,
1724		       struct perf_sample *sample __maybe_unused,
1725		       struct evsel *evsel __maybe_unused,
1726		       struct machine *machine __maybe_unused)
1727{
1728	return 0;
1729}
1730
1731static void strip_init(struct perf_inject *inject)
1732{
1733	struct evlist *evlist = inject->session->evlist;
1734	struct evsel *evsel;
1735
1736	inject->tool.context_switch = perf_event__drop;
1737
1738	evlist__for_each_entry(evlist, evsel)
1739		evsel->handler = drop_sample;
1740}
1741
1742static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1743{
1744	struct perf_inject *inject = opt->value;
1745	const char *args;
1746	char *dry_run;
1747
1748	if (unset)
1749		return 0;
1750
1751	inject->itrace_synth_opts.set = true;
1752	inject->itrace_synth_opts.vm_time_correlation = true;
1753	inject->in_place_update = true;
1754
1755	if (!str)
1756		return 0;
1757
1758	dry_run = skip_spaces(str);
1759	if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1760		inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1761		inject->in_place_update_dry_run = true;
1762		args = dry_run + strlen("dry-run");
1763	} else {
1764		args = str;
1765	}
1766
1767	inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1768
1769	return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1770}
1771
1772static int parse_guest_data(const struct option *opt, const char *str, int unset)
1773{
1774	struct perf_inject *inject = opt->value;
1775	struct guest_session *gs = &inject->guest_session;
1776	char *tok;
1777	char *s;
1778
1779	if (unset)
1780		return 0;
1781
1782	if (!str)
1783		goto bad_args;
1784
1785	s = strdup(str);
1786	if (!s)
1787		return -ENOMEM;
1788
1789	gs->perf_data_file = strsep(&s, ",");
1790	if (!gs->perf_data_file)
1791		goto bad_args;
1792
1793	gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1794	if (gs->copy_kcore_dir)
1795		inject->output.is_dir = true;
1796
1797	tok = strsep(&s, ",");
1798	if (!tok)
1799		goto bad_args;
1800	gs->machine_pid = strtoul(tok, NULL, 0);
1801	if (!inject->guest_session.machine_pid)
1802		goto bad_args;
1803
1804	gs->time_scale = 1;
1805
1806	tok = strsep(&s, ",");
1807	if (!tok)
1808		goto out;
1809	gs->time_offset = strtoull(tok, NULL, 0);
1810
1811	tok = strsep(&s, ",");
1812	if (!tok)
1813		goto out;
1814	gs->time_scale = strtod(tok, NULL);
1815	if (!gs->time_scale)
1816		goto bad_args;
1817out:
1818	return 0;
1819
1820bad_args:
1821	pr_err("--guest-data option requires guest perf.data file name, "
1822	       "guest machine PID, and optionally guest timestamp offset, "
1823	       "and guest timestamp scale factor, separated by commas.\n");
1824	return -1;
1825}
1826
1827static int save_section_info_cb(struct perf_file_section *section,
1828				struct perf_header *ph __maybe_unused,
1829				int feat, int fd __maybe_unused, void *data)
1830{
1831	struct perf_inject *inject = data;
1832
1833	inject->secs[feat] = *section;
1834	return 0;
1835}
1836
1837static int save_section_info(struct perf_inject *inject)
1838{
1839	struct perf_header *header = &inject->session->header;
1840	int fd = perf_data__fd(inject->session->data);
1841
1842	return perf_header__process_sections(header, fd, inject, save_section_info_cb);
1843}
1844
1845static bool keep_feat(int feat)
1846{
1847	switch (feat) {
1848	/* Keep original information that describes the machine or software */
1849	case HEADER_TRACING_DATA:
1850	case HEADER_HOSTNAME:
1851	case HEADER_OSRELEASE:
1852	case HEADER_VERSION:
1853	case HEADER_ARCH:
1854	case HEADER_NRCPUS:
1855	case HEADER_CPUDESC:
1856	case HEADER_CPUID:
1857	case HEADER_TOTAL_MEM:
1858	case HEADER_CPU_TOPOLOGY:
1859	case HEADER_NUMA_TOPOLOGY:
1860	case HEADER_PMU_MAPPINGS:
1861	case HEADER_CACHE:
1862	case HEADER_MEM_TOPOLOGY:
1863	case HEADER_CLOCKID:
1864	case HEADER_BPF_PROG_INFO:
1865	case HEADER_BPF_BTF:
1866	case HEADER_CPU_PMU_CAPS:
1867	case HEADER_CLOCK_DATA:
1868	case HEADER_HYBRID_TOPOLOGY:
1869	case HEADER_PMU_CAPS:
1870		return true;
1871	/* Information that can be updated */
1872	case HEADER_BUILD_ID:
1873	case HEADER_CMDLINE:
1874	case HEADER_EVENT_DESC:
1875	case HEADER_BRANCH_STACK:
1876	case HEADER_GROUP_DESC:
1877	case HEADER_AUXTRACE:
1878	case HEADER_STAT:
1879	case HEADER_SAMPLE_TIME:
1880	case HEADER_DIR_FORMAT:
1881	case HEADER_COMPRESSED:
1882	default:
1883		return false;
1884	};
1885}
1886
1887static int read_file(int fd, u64 offs, void *buf, size_t sz)
1888{
1889	ssize_t ret = preadn(fd, buf, sz, offs);
1890
1891	if (ret < 0)
1892		return -errno;
1893	if ((size_t)ret != sz)
1894		return -EINVAL;
1895	return 0;
1896}
1897
1898static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
1899{
1900	int fd = perf_data__fd(inject->session->data);
1901	u64 offs = inject->secs[feat].offset;
1902	size_t sz = inject->secs[feat].size;
1903	void *buf = malloc(sz);
1904	int ret;
1905
1906	if (!buf)
1907		return -ENOMEM;
1908
1909	ret = read_file(fd, offs, buf, sz);
1910	if (ret)
1911		goto out_free;
1912
1913	ret = fw->write(fw, buf, sz);
1914out_free:
1915	free(buf);
1916	return ret;
1917}
1918
1919struct inject_fc {
1920	struct feat_copier fc;
1921	struct perf_inject *inject;
1922};
1923
1924static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
1925{
1926	struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
1927	struct perf_inject *inject = inj_fc->inject;
1928	int ret;
1929
1930	if (!inject->secs[feat].offset ||
1931	    !keep_feat(feat))
1932		return 0;
1933
1934	ret = feat_copy(inject, feat, fw);
1935	if (ret < 0)
1936		return ret;
1937
1938	return 1; /* Feature section copied */
1939}
1940
1941static int copy_kcore_dir(struct perf_inject *inject)
1942{
1943	char *cmd;
1944	int ret;
1945
1946	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
1947		       inject->input_name, inject->output.path);
1948	if (ret < 0)
1949		return ret;
1950	pr_debug("%s\n", cmd);
1951	ret = system(cmd);
1952	free(cmd);
1953	return ret;
1954}
1955
1956static int guest_session__copy_kcore_dir(struct guest_session *gs)
1957{
1958	struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1959	char *cmd;
1960	int ret;
1961
1962	ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
1963		       gs->perf_data_file, inject->output.path, gs->machine_pid);
1964	if (ret < 0)
1965		return ret;
1966	pr_debug("%s\n", cmd);
1967	ret = system(cmd);
1968	free(cmd);
1969	return ret;
1970}
1971
1972static int output_fd(struct perf_inject *inject)
1973{
1974	return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
1975}
1976
1977static int __cmd_inject(struct perf_inject *inject)
1978{
1979	int ret = -EINVAL;
1980	struct guest_session *gs = &inject->guest_session;
1981	struct perf_session *session = inject->session;
1982	int fd = output_fd(inject);
1983	u64 output_data_offset;
1984
1985	signal(SIGINT, sig_handler);
1986
1987	if (inject->build_ids || inject->sched_stat ||
1988	    inject->itrace_synth_opts.set || inject->build_id_all) {
1989		inject->tool.mmap	  = perf_event__repipe_mmap;
1990		inject->tool.mmap2	  = perf_event__repipe_mmap2;
1991		inject->tool.fork	  = perf_event__repipe_fork;
1992#ifdef HAVE_LIBTRACEEVENT
1993		inject->tool.tracing_data = perf_event__repipe_tracing_data;
1994#endif
1995	}
1996
1997	output_data_offset = perf_session__data_offset(session->evlist);
1998
1999	if (inject->build_id_all) {
2000		inject->tool.mmap	  = perf_event__repipe_buildid_mmap;
2001		inject->tool.mmap2	  = perf_event__repipe_buildid_mmap2;
2002	} else if (inject->build_ids) {
2003		inject->tool.sample = perf_event__inject_buildid;
2004	} else if (inject->sched_stat) {
2005		struct evsel *evsel;
2006
2007		evlist__for_each_entry(session->evlist, evsel) {
2008			const char *name = evsel__name(evsel);
2009
2010			if (!strcmp(name, "sched:sched_switch")) {
2011				if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
2012					return -EINVAL;
2013
2014				evsel->handler = perf_inject__sched_switch;
2015			} else if (!strcmp(name, "sched:sched_process_exit"))
2016				evsel->handler = perf_inject__sched_process_exit;
2017#ifdef HAVE_LIBTRACEEVENT
2018			else if (!strncmp(name, "sched:sched_stat_", 17))
2019				evsel->handler = perf_inject__sched_stat;
2020#endif
2021		}
2022	} else if (inject->itrace_synth_opts.vm_time_correlation) {
2023		session->itrace_synth_opts = &inject->itrace_synth_opts;
2024		memset(&inject->tool, 0, sizeof(inject->tool));
2025		inject->tool.id_index	    = perf_event__process_id_index;
2026		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2027		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2028		inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2029		inject->tool.ordered_events = true;
2030		inject->tool.ordering_requires_timestamps = true;
2031	} else if (inject->itrace_synth_opts.set) {
2032		session->itrace_synth_opts = &inject->itrace_synth_opts;
2033		inject->itrace_synth_opts.inject = true;
2034		inject->tool.comm	    = perf_event__repipe_comm;
2035		inject->tool.namespaces	    = perf_event__repipe_namespaces;
2036		inject->tool.exit	    = perf_event__repipe_exit;
2037		inject->tool.id_index	    = perf_event__process_id_index;
2038		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
2039		inject->tool.auxtrace	    = perf_event__process_auxtrace;
2040		inject->tool.aux	    = perf_event__drop_aux;
2041		inject->tool.itrace_start   = perf_event__drop_aux;
2042		inject->tool.aux_output_hw_id = perf_event__drop_aux;
2043		inject->tool.ordered_events = true;
2044		inject->tool.ordering_requires_timestamps = true;
2045		/* Allow space in the header for new attributes */
2046		output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2047		if (inject->strip)
2048			strip_init(inject);
2049	} else if (gs->perf_data_file) {
2050		char *name = gs->perf_data_file;
2051
2052		/*
2053		 * Not strictly necessary, but keep these events in order wrt
2054		 * guest events.
2055		 */
2056		inject->tool.mmap		= host__repipe;
2057		inject->tool.mmap2		= host__repipe;
2058		inject->tool.comm		= host__repipe;
2059		inject->tool.fork		= host__repipe;
2060		inject->tool.exit		= host__repipe;
2061		inject->tool.lost		= host__repipe;
2062		inject->tool.context_switch	= host__repipe;
2063		inject->tool.ksymbol		= host__repipe;
2064		inject->tool.text_poke		= host__repipe;
2065		/*
2066		 * Once the host session has initialized, set up sample ID
2067		 * mapping and feed in guest attrs, build IDs and initial
2068		 * events.
2069		 */
2070		inject->tool.finished_init	= host__finished_init;
2071		/* Obey finished round ordering */
2072		inject->tool.finished_round	= host__finished_round,
2073		/* Keep track of which CPU a VCPU is runnng on */
2074		inject->tool.context_switch	= host__context_switch;
2075		/*
2076		 * Must order events to be able to obey finished round
2077		 * ordering.
2078		 */
2079		inject->tool.ordered_events	= true;
2080		inject->tool.ordering_requires_timestamps = true;
2081		/* Set up a separate session to process guest perf.data file */
2082		ret = guest_session__start(gs, name, session->data->force);
2083		if (ret) {
2084			pr_err("Failed to process %s, error %d\n", name, ret);
2085			return ret;
2086		}
2087		/* Allow space in the header for guest attributes */
2088		output_data_offset += gs->session->header.data_offset;
2089		output_data_offset = roundup(output_data_offset, 4096);
2090	}
2091
2092	if (!inject->itrace_synth_opts.set)
2093		auxtrace_index__free(&session->auxtrace_index);
2094
2095	if (!inject->is_pipe && !inject->in_place_update)
2096		lseek(fd, output_data_offset, SEEK_SET);
2097
2098	ret = perf_session__process_events(session);
2099	if (ret)
2100		return ret;
2101
2102	if (gs->session) {
2103		/*
2104		 * Remaining guest events have later timestamps. Flush them
2105		 * out to file.
2106		 */
2107		ret = guest_session__flush_events(gs);
2108		if (ret) {
2109			pr_err("Failed to flush guest events\n");
2110			return ret;
2111		}
2112	}
2113
2114	if (!inject->is_pipe && !inject->in_place_update) {
2115		struct inject_fc inj_fc = {
2116			.fc.copy = feat_copy_cb,
2117			.inject = inject,
2118		};
2119
2120		if (inject->build_ids)
2121			perf_header__set_feat(&session->header,
2122					      HEADER_BUILD_ID);
2123		/*
2124		 * Keep all buildids when there is unprocessed AUX data because
2125		 * it is not known which ones the AUX trace hits.
2126		 */
2127		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2128		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
2129			perf_session__dsos_hit_all(session);
2130		/*
2131		 * The AUX areas have been removed and replaced with
2132		 * synthesized hardware events, so clear the feature flag.
2133		 */
2134		if (inject->itrace_synth_opts.set) {
2135			perf_header__clear_feat(&session->header,
2136						HEADER_AUXTRACE);
2137			if (inject->itrace_synth_opts.last_branch ||
2138			    inject->itrace_synth_opts.add_last_branch)
2139				perf_header__set_feat(&session->header,
2140						      HEADER_BRANCH_STACK);
2141		}
2142		session->header.data_offset = output_data_offset;
2143		session->header.data_size = inject->bytes_written;
2144		perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
2145
2146		if (inject->copy_kcore_dir) {
2147			ret = copy_kcore_dir(inject);
2148			if (ret) {
2149				pr_err("Failed to copy kcore\n");
2150				return ret;
2151			}
2152		}
2153		if (gs->copy_kcore_dir) {
2154			ret = guest_session__copy_kcore_dir(gs);
2155			if (ret) {
2156				pr_err("Failed to copy guest kcore\n");
2157				return ret;
2158			}
2159		}
2160	}
2161
2162	return ret;
2163}
2164
2165int cmd_inject(int argc, const char **argv)
2166{
2167	struct perf_inject inject = {
2168		.tool = {
2169			.sample		= perf_event__repipe_sample,
2170			.read		= perf_event__repipe_sample,
2171			.mmap		= perf_event__repipe,
2172			.mmap2		= perf_event__repipe,
2173			.comm		= perf_event__repipe,
2174			.namespaces	= perf_event__repipe,
2175			.cgroup		= perf_event__repipe,
2176			.fork		= perf_event__repipe,
2177			.exit		= perf_event__repipe,
2178			.lost		= perf_event__repipe,
2179			.lost_samples	= perf_event__repipe,
2180			.aux		= perf_event__repipe,
2181			.itrace_start	= perf_event__repipe,
2182			.aux_output_hw_id = perf_event__repipe,
2183			.context_switch	= perf_event__repipe,
2184			.throttle	= perf_event__repipe,
2185			.unthrottle	= perf_event__repipe,
2186			.ksymbol	= perf_event__repipe,
2187			.bpf		= perf_event__repipe,
2188			.text_poke	= perf_event__repipe,
2189			.attr		= perf_event__repipe_attr,
2190			.event_update	= perf_event__repipe_event_update,
2191			.tracing_data	= perf_event__repipe_op2_synth,
2192			.finished_round	= perf_event__repipe_oe_synth,
2193			.build_id	= perf_event__repipe_op2_synth,
2194			.id_index	= perf_event__repipe_op2_synth,
2195			.auxtrace_info	= perf_event__repipe_op2_synth,
2196			.auxtrace_error	= perf_event__repipe_op2_synth,
2197			.time_conv	= perf_event__repipe_op2_synth,
2198			.thread_map	= perf_event__repipe_op2_synth,
2199			.cpu_map	= perf_event__repipe_op2_synth,
2200			.stat_config	= perf_event__repipe_op2_synth,
2201			.stat		= perf_event__repipe_op2_synth,
2202			.stat_round	= perf_event__repipe_op2_synth,
2203			.feature	= perf_event__repipe_op2_synth,
2204			.finished_init	= perf_event__repipe_op2_synth,
2205			.compressed	= perf_event__repipe_op4_synth,
2206			.auxtrace	= perf_event__repipe_auxtrace,
2207		},
2208		.input_name  = "-",
2209		.samples = LIST_HEAD_INIT(inject.samples),
2210		.output = {
2211			.path = "-",
2212			.mode = PERF_DATA_MODE_WRITE,
2213			.use_stdio = true,
2214		},
2215	};
2216	struct perf_data data = {
2217		.mode = PERF_DATA_MODE_READ,
2218		.use_stdio = true,
2219	};
2220	int ret;
2221	bool repipe = true;
2222	const char *known_build_ids = NULL;
2223
2224	struct option options[] = {
2225		OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
2226			    "Inject build-ids into the output stream"),
2227		OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
2228			    "Inject build-ids of all DSOs into the output stream"),
2229		OPT_STRING(0, "known-build-ids", &known_build_ids,
2230			   "buildid path [,buildid path...]",
2231			   "build-ids to use for given paths"),
2232		OPT_STRING('i', "input", &inject.input_name, "file",
2233			   "input file name"),
2234		OPT_STRING('o', "output", &inject.output.path, "file",
2235			   "output file name"),
2236		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2237			    "Merge sched-stat and sched-switch for getting events "
2238			    "where and how long tasks slept"),
2239#ifdef HAVE_JITDUMP
2240		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2241#endif
2242		OPT_INCR('v', "verbose", &verbose,
2243			 "be more verbose (show build ids, etc)"),
2244		OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2245			   "file", "vmlinux pathname"),
2246		OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2247			    "don't load vmlinux even if found"),
2248		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2249			   "kallsyms pathname"),
2250		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2251		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2252				    NULL, "opts", "Instruction Tracing options\n"
2253				    ITRACE_HELP,
2254				    itrace_parse_synth_opts),
2255		OPT_BOOLEAN(0, "strip", &inject.strip,
2256			    "strip non-synthesized events (use with --itrace)"),
2257		OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2258				    "correlate time between VM guests and the host",
2259				    parse_vm_time_correlation),
2260		OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2261				    "inject events from a guest perf.data file",
2262				    parse_guest_data),
2263		OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2264			   "guest mount directory under which every guest os"
2265			   " instance has a subdir"),
2266		OPT_END()
2267	};
2268	const char * const inject_usage[] = {
2269		"perf inject [<options>]",
2270		NULL
2271	};
2272
2273	if (!inject.itrace_synth_opts.set) {
2274		/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
2275		symbol_conf.lazy_load_kernel_maps = true;
2276	}
2277
2278#ifndef HAVE_JITDUMP
2279	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2280#endif
2281	argc = parse_options(argc, argv, options, inject_usage, 0);
2282
2283	/*
2284	 * Any (unrecognized) arguments left?
2285	 */
2286	if (argc)
2287		usage_with_options(inject_usage, options);
2288
2289	if (inject.strip && !inject.itrace_synth_opts.set) {
2290		pr_err("--strip option requires --itrace option\n");
2291		return -1;
2292	}
2293
2294	if (symbol__validate_sym_arguments())
2295		return -1;
2296
2297	if (inject.in_place_update) {
2298		if (!strcmp(inject.input_name, "-")) {
2299			pr_err("Input file name required for in-place updating\n");
2300			return -1;
2301		}
2302		if (strcmp(inject.output.path, "-")) {
2303			pr_err("Output file name must not be specified for in-place updating\n");
2304			return -1;
2305		}
2306		if (!data.force && !inject.in_place_update_dry_run) {
2307			pr_err("The input file would be updated in place, "
2308				"the --force option is required.\n");
2309			return -1;
2310		}
2311		if (!inject.in_place_update_dry_run)
2312			data.in_place_update = true;
2313	} else {
2314		if (strcmp(inject.output.path, "-") && !inject.strip &&
2315		    has_kcore_dir(inject.input_name)) {
2316			inject.output.is_dir = true;
2317			inject.copy_kcore_dir = true;
2318		}
2319		if (perf_data__open(&inject.output)) {
2320			perror("failed to create output file");
2321			return -1;
2322		}
2323	}
2324
2325	data.path = inject.input_name;
2326	if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
2327		inject.is_pipe = true;
2328		/*
2329		 * Do not repipe header when input is a regular file
2330		 * since either it can rewrite the header at the end
2331		 * or write a new pipe header.
2332		 */
2333		if (strcmp(inject.input_name, "-"))
2334			repipe = false;
2335	}
2336
2337	inject.session = __perf_session__new(&data, repipe,
2338					     output_fd(&inject),
2339					     &inject.tool);
2340	if (IS_ERR(inject.session)) {
2341		ret = PTR_ERR(inject.session);
2342		goto out_close_output;
2343	}
2344
2345	if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2346		pr_warning("Decompression initialization failed.\n");
2347
2348	/* Save original section info before feature bits change */
2349	ret = save_section_info(&inject);
2350	if (ret)
2351		goto out_delete;
2352
2353	if (!data.is_pipe && inject.output.is_pipe) {
2354		ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2355		if (ret < 0) {
2356			pr_err("Couldn't write a new pipe header.\n");
2357			goto out_delete;
2358		}
2359
2360		ret = perf_event__synthesize_for_pipe(&inject.tool,
2361						      inject.session,
2362						      &inject.output,
2363						      perf_event__repipe);
2364		if (ret < 0)
2365			goto out_delete;
2366	}
2367
2368	if (inject.build_ids && !inject.build_id_all) {
2369		/*
2370		 * to make sure the mmap records are ordered correctly
2371		 * and so that the correct especially due to jitted code
2372		 * mmaps. We cannot generate the buildid hit list and
2373		 * inject the jit mmaps at the same time for now.
2374		 */
2375		inject.tool.ordered_events = true;
2376		inject.tool.ordering_requires_timestamps = true;
2377		if (known_build_ids != NULL) {
2378			inject.known_build_ids =
2379				perf_inject__parse_known_build_ids(known_build_ids);
2380
2381			if (inject.known_build_ids == NULL) {
2382				pr_err("Couldn't parse known build ids.\n");
2383				goto out_delete;
2384			}
2385		}
2386	}
2387
2388	if (inject.sched_stat) {
2389		inject.tool.ordered_events = true;
2390	}
2391
2392#ifdef HAVE_JITDUMP
2393	if (inject.jit_mode) {
2394		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
2395		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
2396		inject.tool.ordered_events = true;
2397		inject.tool.ordering_requires_timestamps = true;
2398		/*
2399		 * JIT MMAP injection injects all MMAP events in one go, so it
2400		 * does not obey finished_round semantics.
2401		 */
2402		inject.tool.finished_round = perf_event__drop_oe;
2403	}
2404#endif
2405	ret = symbol__init(&inject.session->header.env);
2406	if (ret < 0)
2407		goto out_delete;
2408
2409	ret = __cmd_inject(&inject);
2410
2411	guest_session__exit(&inject.guest_session);
2412
2413out_delete:
2414	strlist__delete(inject.known_build_ids);
2415	zstd_fini(&(inject.session->zstd_data));
2416	perf_session__delete(inject.session);
2417out_close_output:
2418	if (!inject.in_place_update)
2419		perf_data__close(&inject.output);
2420	free(inject.itrace_synth_opts.vm_tm_corr_args);
2421	free(inject.event_copy);
2422	free(inject.guest_session.ev.event_buf);
2423	return ret;
2424}
2425