1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6#include <stdlib.h>
7#include <errno.h>
8#include "utils.h"
9#include "osnoise.h"
10#include "timerlat.h"
11#include <unistd.h>
12
13enum timelat_state {
14	TIMERLAT_INIT = 0,
15	TIMERLAT_WAITING_IRQ,
16	TIMERLAT_WAITING_THREAD,
17};
18
19/* Used to fill spaces in the output */
20static const char *spaces  = "                                                         ";
21
22#define MAX_COMM		24
23
24/*
25 * Per-cpu data statistics and data.
26 */
27struct timerlat_aa_data {
28	/* Current CPU state */
29	int			curr_state;
30
31	/* timerlat IRQ latency */
32	unsigned long long	tlat_irq_seqnum;
33	unsigned long long	tlat_irq_latency;
34	unsigned long long	tlat_irq_timstamp;
35
36	/* timerlat Thread latency */
37	unsigned long long	tlat_thread_seqnum;
38	unsigned long long	tlat_thread_latency;
39	unsigned long long	tlat_thread_timstamp;
40
41	/*
42	 * Information about the thread running when the IRQ
43	 * arrived.
44	 *
45	 * This can be blocking or interference, depending on the
46	 * priority of the thread. Assuming timerlat is the highest
47	 * prio, it is blocking. If timerlat has a lower prio, it is
48	 * interference.
49	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
50	 */
51	unsigned long long	run_thread_pid;
52	char			run_thread_comm[MAX_COMM];
53	unsigned long long	thread_blocking_duration;
54	unsigned long long	max_exit_idle_latency;
55
56	/* Information about the timerlat timer irq */
57	unsigned long long	timer_irq_start_time;
58	unsigned long long	timer_irq_start_delay;
59	unsigned long long	timer_irq_duration;
60	unsigned long long	timer_exit_from_idle;
61
62	/*
63	 * Information about the last IRQ before the timerlat irq
64	 * arrived.
65	 *
66	 * If now - timestamp is <= latency, it might have influenced
67	 * in the timerlat irq latency. Otherwise, ignore it.
68	 */
69	unsigned long long	prev_irq_duration;
70	unsigned long long	prev_irq_timstamp;
71
72	/*
73	 * Interference sum.
74	 */
75	unsigned long long	thread_nmi_sum;
76	unsigned long long	thread_irq_sum;
77	unsigned long long	thread_softirq_sum;
78	unsigned long long	thread_thread_sum;
79
80	/*
81	 * Interference task information.
82	 */
83	struct trace_seq	*prev_irqs_seq;
84	struct trace_seq	*nmi_seq;
85	struct trace_seq	*irqs_seq;
86	struct trace_seq	*softirqs_seq;
87	struct trace_seq	*threads_seq;
88	struct trace_seq	*stack_seq;
89
90	/*
91	 * Current thread.
92	 */
93	char			current_comm[MAX_COMM];
94	unsigned long long	current_pid;
95
96	/*
97	 * Is the system running a kworker?
98	 */
99	unsigned long long	kworker;
100	unsigned long long	kworker_func;
101};
102
103/*
104 * The analysis context and system wide view
105 */
106struct timerlat_aa_context {
107	int nr_cpus;
108	int dump_tasks;
109
110	/* per CPU data */
111	struct timerlat_aa_data *taa_data;
112
113	/*
114	 * required to translate function names and register
115	 * events.
116	 */
117	struct osnoise_tool *tool;
118};
119
120/*
121 * The data is stored as a local variable, but accessed via a helper function.
122 *
123 * It could be stored inside the trace context. But every access would
124 * require container_of() + a series of pointers. Do we need it? Not sure.
125 *
126 * For now keep it simple. If needed, store it in the tool, add the *context
127 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
128 */
129static struct timerlat_aa_context *__timerlat_aa_ctx;
130
131static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
132{
133	return __timerlat_aa_ctx;
134}
135
136/*
137 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
138 */
139static struct timerlat_aa_data
140*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
141{
142	return &taa_ctx->taa_data[cpu];
143}
144
145/*
146 * timerlat_aa_irq_latency - Handles timerlat IRQ event
147 */
148static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
149				   struct trace_seq *s, struct tep_record *record,
150				   struct tep_event *event)
151{
152	/*
153	 * For interference, we start now looking for things that can delay
154	 * the thread.
155	 */
156	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
157	taa_data->tlat_irq_timstamp = record->ts;
158
159	/*
160	 * Zero values.
161	 */
162	taa_data->thread_nmi_sum = 0;
163	taa_data->thread_irq_sum = 0;
164	taa_data->thread_softirq_sum = 0;
165	taa_data->thread_thread_sum = 0;
166	taa_data->thread_blocking_duration = 0;
167	taa_data->timer_irq_start_time = 0;
168	taa_data->timer_irq_duration = 0;
169	taa_data->timer_exit_from_idle = 0;
170
171	/*
172	 * Zero interference tasks.
173	 */
174	trace_seq_reset(taa_data->nmi_seq);
175	trace_seq_reset(taa_data->irqs_seq);
176	trace_seq_reset(taa_data->softirqs_seq);
177	trace_seq_reset(taa_data->threads_seq);
178
179	/* IRQ latency values */
180	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
181	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
182
183	/* The thread that can cause blocking */
184	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
185
186	/*
187	 * Get exit from idle case.
188	 *
189	 * If it is not idle thread:
190	 */
191	if (taa_data->run_thread_pid)
192		return 0;
193
194	/*
195	 * if the latency is shorter than the known exit from idle:
196	 */
197	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
198		return 0;
199
200	/*
201	 * To be safe, ignore the cases in which an IRQ/NMI could have
202	 * interfered with the timerlat IRQ.
203	 */
204	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
205	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
206		return 0;
207
208	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
209
210	return 0;
211}
212
213/*
214 * timerlat_aa_thread_latency - Handles timerlat thread event
215 */
216static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
217				      struct trace_seq *s, struct tep_record *record,
218				      struct tep_event *event)
219{
220	/*
221	 * For interference, we start now looking for things that can delay
222	 * the IRQ of the next cycle.
223	 */
224	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
225	taa_data->tlat_thread_timstamp = record->ts;
226
227	/* Thread latency values */
228	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
229	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
230
231	return 0;
232}
233
234/*
235 * timerlat_aa_handler - Handle timerlat events
236 *
237 * This function is called to handle timerlat events recording statistics.
238 *
239 * Returns 0 on success, -1 otherwise.
240 */
241static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
242			struct tep_event *event, void *context)
243{
244	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
245	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
246	unsigned long long thread;
247
248	if (!taa_data)
249		return -1;
250
251	tep_get_field_val(s, event, "context", record, &thread, 1);
252	if (!thread)
253		return timerlat_aa_irq_latency(taa_data, s, record, event);
254	else
255		return timerlat_aa_thread_latency(taa_data, s, record, event);
256}
257
258/*
259 * timerlat_aa_nmi_handler - Handles NMI noise
260 *
261 * It is used to collect information about interferences from NMI. It is
262 * hooked to the osnoise:nmi_noise event.
263 */
264static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
265				   struct tep_event *event, void *context)
266{
267	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
268	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
269	unsigned long long duration;
270	unsigned long long start;
271
272	tep_get_field_val(s, event, "duration", record, &duration, 1);
273	tep_get_field_val(s, event, "start", record, &start, 1);
274
275	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
276		taa_data->prev_irq_duration = duration;
277		taa_data->prev_irq_timstamp = start;
278
279		trace_seq_reset(taa_data->prev_irqs_seq);
280		trace_seq_printf(taa_data->prev_irqs_seq, "  %24s %.*s %9.2f us\n",
281				 "nmi",
282				 24, spaces,
283				 ns_to_usf(duration));
284		return 0;
285	}
286
287	taa_data->thread_nmi_sum += duration;
288	trace_seq_printf(taa_data->nmi_seq, "  %24s %.*s %9.2f us\n",
289			 "nmi",
290			 24, spaces, ns_to_usf(duration));
291
292	return 0;
293}
294
295/*
296 * timerlat_aa_irq_handler - Handles IRQ noise
297 *
298 * It is used to collect information about interferences from IRQ. It is
299 * hooked to the osnoise:irq_noise event.
300 *
301 * It is a little bit more complex than the other because it measures:
302 *	- The IRQs that can delay the timer IRQ before it happened.
303 *	- The Timerlat IRQ handler
304 *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
305 *	  (IRQ interference).
306 */
307static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
308				   struct tep_event *event, void *context)
309{
310	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
311	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
312	unsigned long long expected_start;
313	unsigned long long duration;
314	unsigned long long vector;
315	unsigned long long start;
316	char *desc;
317	int val;
318
319	tep_get_field_val(s, event, "duration", record, &duration, 1);
320	tep_get_field_val(s, event, "start", record, &start, 1);
321	tep_get_field_val(s, event, "vector", record, &vector, 1);
322	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
323
324	/*
325	 * Before the timerlat IRQ.
326	 */
327	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
328		taa_data->prev_irq_duration = duration;
329		taa_data->prev_irq_timstamp = start;
330
331		trace_seq_reset(taa_data->prev_irqs_seq);
332		trace_seq_printf(taa_data->prev_irqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
333				 desc, vector,
334				 15, spaces,
335				 ns_to_usf(duration));
336		return 0;
337	}
338
339	/*
340	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
341	 * the timerlat irq handler.
342	 */
343	if (!taa_data->timer_irq_start_time) {
344		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
345
346		taa_data->timer_irq_start_time = start;
347		taa_data->timer_irq_duration = duration;
348
349		/*
350		 * We are dealing with two different clock sources: the
351		 * external clock source that timerlat uses as a reference
352		 * and the clock used by the tracer. There are also two
353		 * moments: the time reading the clock and the timer in
354		 * which the event is placed in the buffer (the trace
355		 * event timestamp). If the processor is slow or there
356		 * is some hardware noise, the difference between the
357		 * timestamp and the external clock read can be longer
358		 * than the IRQ handler delay, resulting in a negative
359		 * time. If so, set IRQ start delay as 0. In the end,
360		 * it is less relevant than the noise.
361		 */
362		if (expected_start < taa_data->timer_irq_start_time)
363			taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
364		else
365			taa_data->timer_irq_start_delay = 0;
366
367		/*
368		 * not exit from idle.
369		 */
370		if (taa_data->run_thread_pid)
371			return 0;
372
373		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
374			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
375
376		return 0;
377	}
378
379	/*
380	 * IRQ interference.
381	 */
382	taa_data->thread_irq_sum += duration;
383	trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
384			 desc, vector,
385			 24, spaces,
386			 ns_to_usf(duration));
387
388	return 0;
389}
390
391static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
392				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
393
394
395/*
396 * timerlat_aa_softirq_handler - Handles Softirq noise
397 *
398 * It is used to collect information about interferences from Softirq. It is
399 * hooked to the osnoise:softirq_noise event.
400 *
401 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
402 */
403static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
404				       struct tep_event *event, void *context)
405{
406	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
407	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
408	unsigned long long duration;
409	unsigned long long vector;
410	unsigned long long start;
411
412	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
413		return 0;
414
415	tep_get_field_val(s, event, "duration", record, &duration, 1);
416	tep_get_field_val(s, event, "start", record, &start, 1);
417	tep_get_field_val(s, event, "vector", record, &vector, 1);
418
419	taa_data->thread_softirq_sum += duration;
420
421	trace_seq_printf(taa_data->softirqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
422			 softirq_name[vector], vector,
423			 24, spaces,
424			 ns_to_usf(duration));
425	return 0;
426}
427
428/*
429 * timerlat_aa_softirq_handler - Handles thread noise
430 *
431 * It is used to collect information about interferences from threads. It is
432 * hooked to the osnoise:thread_noise event.
433 *
434 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
435 */
436static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
437				      struct tep_event *event, void *context)
438{
439	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
440	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
441	unsigned long long duration;
442	unsigned long long start;
443	unsigned long long pid;
444	const char *comm;
445	int val;
446
447	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
448		return 0;
449
450	tep_get_field_val(s, event, "duration", record, &duration, 1);
451	tep_get_field_val(s, event, "start", record, &start, 1);
452
453	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
454	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
455
456	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
457		taa_data->thread_blocking_duration = duration;
458
459		if (comm)
460			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
461		else
462			sprintf(taa_data->run_thread_comm, "<...>");
463
464	} else {
465		taa_data->thread_thread_sum += duration;
466
467		trace_seq_printf(taa_data->threads_seq, "  %24s:%-12llu %.*s %9.2f us\n",
468				 comm, pid,
469				 15, spaces,
470				 ns_to_usf(duration));
471	}
472
473	return 0;
474}
475
476/*
477 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
478 *
479 * Saves and parse the stack trace generated by the timerlat IRQ.
480 */
481static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
482			      struct tep_event *event, void *context)
483{
484	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
485	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
486	unsigned long *caller;
487	const char *function;
488	int val, i;
489
490	trace_seq_reset(taa_data->stack_seq);
491
492	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
493	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
494	if (caller) {
495		for (i = 0; ; i++) {
496			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
497			if (!function)
498				break;
499			trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n",
500					 14, spaces, function);
501		}
502	}
503	return 0;
504}
505
506/*
507 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
508 *
509 * Handles the sched:sched_switch event to trace the current thread running on the
510 * CPU. It is used to display the threads running on the other CPUs when the trace
511 * stops.
512 */
513static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
514					    struct tep_event *event, void *context)
515{
516	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
517	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
518	const char *comm;
519	int val;
520
521	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
522	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
523
524	strncpy(taa_data->current_comm, comm, MAX_COMM);
525
526	/*
527	 * If this was a kworker, clean the last kworkers that ran.
528	 */
529	taa_data->kworker = 0;
530	taa_data->kworker_func = 0;
531
532	return 0;
533}
534
535/*
536 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
537 *
538 * Handles workqueue:workqueue_execute_start event, keeping track of
539 * the job that a kworker could be doing in the CPU.
540 *
541 * We already catch problems of hardware related latencies caused by work queues
542 * running driver code that causes hardware stall. For example, with DRM drivers.
543 */
544static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
545					     struct tep_event *event, void *context)
546{
547	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
548	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
549
550	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
551	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
552	return 0;
553}
554
555/*
556 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
557 *
558 * This is the core of the analysis.
559 */
560static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
561				     int irq_thresh, int thread_thresh)
562{
563	long long exp_irq_ts;
564	int total;
565	int irq;
566
567	/*
568	 * IRQ latency or Thread latency?
569	 */
570	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
571		irq = 1;
572		total = taa_data->tlat_irq_latency;
573	} else {
574		irq = 0;
575		total = taa_data->tlat_thread_latency;
576	}
577
578	/*
579	 * Expected IRQ arrival time using the trace clock as the base.
580	 *
581	 * TODO: Add a list of previous IRQ, and then run the list backwards.
582	 */
583	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
584	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
585		if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
586			printf("  Previous IRQ interference: %.*s up to  %9.2f us\n",
587			       16, spaces,
588			       ns_to_usf(taa_data->prev_irq_duration));
589	}
590
591	/*
592	 * The delay that the IRQ suffered before starting.
593	 */
594	printf("  IRQ handler delay: %.*s %16s  %9.2f us (%.2f %%)\n", 16, spaces,
595	       (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
596	       ns_to_usf(taa_data->timer_irq_start_delay),
597	       ns_to_per(total, taa_data->timer_irq_start_delay));
598
599	/*
600	 * Timerlat IRQ.
601	 */
602	printf("  IRQ latency: %.*s %9.2f us\n", 40, spaces,
603	       ns_to_usf(taa_data->tlat_irq_latency));
604
605	if (irq) {
606		/*
607		 * If the trace stopped due to IRQ, the other events will not happen
608		 * because... the trace stopped :-).
609		 *
610		 * That is all folks, the stack trace was printed before the stop,
611		 * so it will be displayed, it is the key.
612		 */
613		printf("  Blocking thread:\n");
614		printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm,
615		       taa_data->run_thread_pid);
616	} else  {
617		/*
618		 * The duration of the IRQ handler that handled the timerlat IRQ.
619		 */
620		printf("  Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n",
621		       30, spaces,
622		       ns_to_usf(taa_data->timer_irq_duration),
623		       ns_to_per(total, taa_data->timer_irq_duration));
624
625		/*
626		 * The amount of time that the current thread postponed the scheduler.
627		 *
628		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
629		 * is no need to compute values here.
630		 */
631		printf("  Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces,
632		       ns_to_usf(taa_data->thread_blocking_duration),
633		       ns_to_per(total, taa_data->thread_blocking_duration));
634
635		printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces,
636		       taa_data->run_thread_comm, taa_data->run_thread_pid,
637		       12, spaces, ns_to_usf(taa_data->thread_blocking_duration));
638	}
639
640	/*
641	 * Print the stack trace!
642	 */
643	trace_seq_do_printf(taa_data->stack_seq);
644
645	/*
646	 * NMIs can happen during the IRQ, so they are always possible.
647	 */
648	if (taa_data->thread_nmi_sum)
649		printf("  NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
650		       ns_to_usf(taa_data->thread_nmi_sum),
651		       ns_to_per(total, taa_data->thread_nmi_sum));
652
653	/*
654	 * If it is an IRQ latency, the other factors can be skipped.
655	 */
656	if (irq)
657		goto print_total;
658
659	/*
660	 * Prints the interference caused by IRQs to the thread latency.
661	 */
662	if (taa_data->thread_irq_sum) {
663		printf("  IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
664		       ns_to_usf(taa_data->thread_irq_sum),
665		       ns_to_per(total, taa_data->thread_irq_sum));
666
667		trace_seq_do_printf(taa_data->irqs_seq);
668	}
669
670	/*
671	 * Prints the interference caused by Softirqs to the thread latency.
672	 */
673	if (taa_data->thread_softirq_sum) {
674		printf("  Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces,
675		       ns_to_usf(taa_data->thread_softirq_sum),
676		       ns_to_per(total, taa_data->thread_softirq_sum));
677
678		trace_seq_do_printf(taa_data->softirqs_seq);
679	}
680
681	/*
682	 * Prints the interference caused by other threads to the thread latency.
683	 *
684	 * If this happens, your timerlat is not the highest prio. OK, migration
685	 * thread can happen. But otherwise, you are not measuring the "scheduling
686	 * latency" only, and here is the difference from scheduling latency and
687	 * timer handling latency.
688	 */
689	if (taa_data->thread_thread_sum) {
690		printf("  Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces,
691		       ns_to_usf(taa_data->thread_thread_sum),
692		       ns_to_per(total, taa_data->thread_thread_sum));
693
694		trace_seq_do_printf(taa_data->threads_seq);
695	}
696
697	/*
698	 * Done.
699	 */
700print_total:
701	printf("------------------------------------------------------------------------\n");
702	printf("  %s latency: %.*s %9.2f us (100%%)\n", irq ? "   IRQ" : "Thread",
703	       37, spaces, ns_to_usf(total));
704}
705
706static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
707{
708	struct trace_instance *trace = &taa_ctx->tool->trace;
709	int retval;
710
711	retval = tracefs_iterate_raw_events(trace->tep,
712					    trace->inst,
713					    NULL,
714					    0,
715					    collect_registered_events,
716					    trace);
717		if (retval < 0) {
718			err_msg("Error iterating on events\n");
719			return 0;
720		}
721
722	return 1;
723}
724
725/**
726 * timerlat_auto_analysis - Analyze the collected data
727 */
728void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
729{
730	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
731	unsigned long long max_exit_from_idle = 0;
732	struct timerlat_aa_data *taa_data;
733	int max_exit_from_idle_cpu;
734	struct tep_handle *tep;
735	int cpu;
736
737	timerlat_auto_analysis_collect_trace(taa_ctx);
738
739	/* bring stop tracing to the ns scale */
740	irq_thresh = irq_thresh * 1000;
741	thread_thresh = thread_thresh * 1000;
742
743	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
744		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
745
746		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
747			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
748			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
749		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
750			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
751			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
752		}
753
754		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
755			max_exit_from_idle = taa_data->max_exit_idle_latency;
756			max_exit_from_idle_cpu = cpu;
757		}
758
759	}
760
761	if (max_exit_from_idle) {
762		printf("\n");
763		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
764			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
765	}
766	if (!taa_ctx->dump_tasks)
767		return;
768
769	printf("\n");
770	printf("Printing CPU tasks:\n");
771	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
772		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
773		tep = taa_ctx->tool->trace.tep;
774
775		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
776
777		if (taa_data->kworker_func)
778			printf(" kworker:%s:%s",
779				tep_find_function(tep, taa_data->kworker) ? : "<...>",
780				tep_find_function(tep, taa_data->kworker_func));
781		printf("\n");
782	}
783
784}
785
786/*
787 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
788 */
789static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
790{
791	struct timerlat_aa_data *taa_data;
792	int i;
793
794	if (!taa_ctx->taa_data)
795		return;
796
797	for (i = 0; i < taa_ctx->nr_cpus; i++) {
798		taa_data = timerlat_aa_get_data(taa_ctx, i);
799
800		if (taa_data->prev_irqs_seq) {
801			trace_seq_destroy(taa_data->prev_irqs_seq);
802			free(taa_data->prev_irqs_seq);
803		}
804
805		if (taa_data->nmi_seq) {
806			trace_seq_destroy(taa_data->nmi_seq);
807			free(taa_data->nmi_seq);
808		}
809
810		if (taa_data->irqs_seq) {
811			trace_seq_destroy(taa_data->irqs_seq);
812			free(taa_data->irqs_seq);
813		}
814
815		if (taa_data->softirqs_seq) {
816			trace_seq_destroy(taa_data->softirqs_seq);
817			free(taa_data->softirqs_seq);
818		}
819
820		if (taa_data->threads_seq) {
821			trace_seq_destroy(taa_data->threads_seq);
822			free(taa_data->threads_seq);
823		}
824
825		if (taa_data->stack_seq) {
826			trace_seq_destroy(taa_data->stack_seq);
827			free(taa_data->stack_seq);
828		}
829	}
830}
831
832/*
833 * timerlat_aa_init_seqs - Init seq files used to store parsed information
834 *
835 * Instead of keeping data structures to store raw data, use seq files to
836 * store parsed data.
837 *
838 * Allocates and initialize seq files.
839 *
840 * Returns 0 on success, -1 otherwise.
841 */
842static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
843{
844	struct timerlat_aa_data *taa_data;
845	int i;
846
847	for (i = 0; i < taa_ctx->nr_cpus; i++) {
848
849		taa_data = timerlat_aa_get_data(taa_ctx, i);
850
851		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
852		if (!taa_data->prev_irqs_seq)
853			goto out_err;
854
855		trace_seq_init(taa_data->prev_irqs_seq);
856
857		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
858		if (!taa_data->nmi_seq)
859			goto out_err;
860
861		trace_seq_init(taa_data->nmi_seq);
862
863		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
864		if (!taa_data->irqs_seq)
865			goto out_err;
866
867		trace_seq_init(taa_data->irqs_seq);
868
869		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
870		if (!taa_data->softirqs_seq)
871			goto out_err;
872
873		trace_seq_init(taa_data->softirqs_seq);
874
875		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
876		if (!taa_data->threads_seq)
877			goto out_err;
878
879		trace_seq_init(taa_data->threads_seq);
880
881		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
882		if (!taa_data->stack_seq)
883			goto out_err;
884
885		trace_seq_init(taa_data->stack_seq);
886	}
887
888	return 0;
889
890out_err:
891	timerlat_aa_destroy_seqs(taa_ctx);
892	return -1;
893}
894
895/*
896 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
897 */
898static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
899{
900
901	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
902				     timerlat_aa_handler, tool);
903
904	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
905
906	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
907				     timerlat_aa_nmi_handler, tool);
908
909	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
910				     timerlat_aa_irq_handler, tool);
911
912	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
913				     timerlat_aa_softirq_handler, tool);
914
915	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
916				     timerlat_aa_thread_handler, tool);
917
918	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
919				     timerlat_aa_stack_handler, tool);
920	if (!dump_tasks)
921		return;
922
923	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
924	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
925				     timerlat_aa_sched_switch_handler, tool);
926
927	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
928	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
929				     timerlat_aa_kworker_start_handler, tool);
930}
931
932/*
933 * timerlat_aa_register_events - Register events used in the auto-analysis
934 *
935 * Returns 0 on success, -1 otherwise.
936 */
937static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
938{
939	int retval;
940
941	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
942				timerlat_aa_handler, tool);
943
944
945	/*
946	 * register auto-analysis handlers.
947	 */
948	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
949	if (retval < 0 && !errno) {
950		err_msg("Could not find osnoise events\n");
951		goto out_err;
952	}
953
954	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
955				   timerlat_aa_nmi_handler, tool);
956
957	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
958				   timerlat_aa_irq_handler, tool);
959
960	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
961				   timerlat_aa_softirq_handler, tool);
962
963	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
964				   timerlat_aa_thread_handler, tool);
965
966	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
967				   timerlat_aa_stack_handler, tool);
968
969	if (!dump_tasks)
970		return 0;
971
972	/*
973	 * Dump task events.
974	 */
975	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
976	if (retval < 0 && !errno) {
977		err_msg("Could not find sched_switch\n");
978		goto out_err;
979	}
980
981	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
982				   timerlat_aa_sched_switch_handler, tool);
983
984	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
985	if (retval < 0 && !errno) {
986		err_msg("Could not find workqueue_execute_start\n");
987		goto out_err;
988	}
989
990	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
991				   timerlat_aa_kworker_start_handler, tool);
992
993	return 0;
994
995out_err:
996	timerlat_aa_unregister_events(tool, dump_tasks);
997	return -1;
998}
999
1000/**
1001 * timerlat_aa_destroy - Destroy timerlat auto-analysis
1002 */
1003void timerlat_aa_destroy(void)
1004{
1005	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
1006
1007	if (!taa_ctx)
1008		return;
1009
1010	if (!taa_ctx->taa_data)
1011		goto out_ctx;
1012
1013	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
1014	timerlat_aa_destroy_seqs(taa_ctx);
1015	free(taa_ctx->taa_data);
1016out_ctx:
1017	free(taa_ctx);
1018}
1019
1020/**
1021 * timerlat_aa_init - Initialize timerlat auto-analysis
1022 *
1023 * Returns 0 on success, -1 otherwise.
1024 */
1025int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1026{
1027	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1028	struct timerlat_aa_context *taa_ctx;
1029	int retval;
1030
1031	taa_ctx = calloc(1, sizeof(*taa_ctx));
1032	if (!taa_ctx)
1033		return -1;
1034
1035	__timerlat_aa_ctx = taa_ctx;
1036
1037	taa_ctx->nr_cpus = nr_cpus;
1038	taa_ctx->tool = tool;
1039	taa_ctx->dump_tasks = dump_tasks;
1040
1041	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1042	if (!taa_ctx->taa_data)
1043		goto out_err;
1044
1045	retval = timerlat_aa_init_seqs(taa_ctx);
1046	if (retval)
1047		goto out_err;
1048
1049	retval = timerlat_aa_register_events(tool, dump_tasks);
1050	if (retval)
1051		goto out_err;
1052
1053	return 0;
1054
1055out_err:
1056	timerlat_aa_destroy();
1057	return -1;
1058}
1059