157416Smarkm// SPDX-License-Identifier: GPL-2.0
257416Smarkm#include <linux/delay.h>
357416Smarkm#include <linux/module.h>
457416Smarkm#include <linux/kthread.h>
557416Smarkm#include <linux/trace_clock.h>
657416Smarkm
757416Smarkm#define CREATE_TRACE_POINTS
857416Smarkm#include "trace_benchmark.h"
957416Smarkm
1057416Smarkmstatic struct task_struct *bm_event_thread;
1157416Smarkm
1257416Smarkmstatic char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
1357416Smarkm
1457416Smarkmstatic u64 bm_total;
1557416Smarkmstatic u64 bm_totalsq;
1657416Smarkmstatic u64 bm_last;
1757416Smarkmstatic u64 bm_max;
1857416Smarkmstatic u64 bm_min;
1957416Smarkmstatic u64 bm_first;
2057416Smarkmstatic u64 bm_cnt;
2157416Smarkmstatic u64 bm_stddev;
2257416Smarkmstatic unsigned int bm_avg;
2357416Smarkmstatic unsigned int bm_std;
2457416Smarkm
2557416Smarkmstatic bool ok_to_run;
2657416Smarkm
2757416Smarkm/*
2857416Smarkm * This gets called in a loop recording the time it took to write
2957416Smarkm * the tracepoint. What it writes is the time statistics of the last
3057416Smarkm * tracepoint write. As there is nothing to write the first time
3157416Smarkm * it simply writes "START". As the first write is cold cache and
3257416Smarkm * the rest is hot, we save off that time in bm_first and it is
3357416Smarkm * reported as "first", which is shown in the second write to the
3457416Smarkm * tracepoint. The "first" field is written within the statics from
3557416Smarkm * then on but never changes.
36233294Sstas */
3757416Smarkmstatic void trace_do_benchmark(void)
3857416Smarkm{
3957416Smarkm	u64 start;
4057416Smarkm	u64 stop;
4157416Smarkm	u64 delta;
4257416Smarkm	u64 stddev;
4357416Smarkm	u64 seed;
4457416Smarkm	u64 last_seed;
4557416Smarkm	unsigned int avg;
4657416Smarkm	unsigned int std = 0;
4757416Smarkm
4857416Smarkm	/* Only run if the tracepoint is actually active */
4957416Smarkm	if (!trace_benchmark_event_enabled() || !tracing_is_on())
5057416Smarkm		return;
5157416Smarkm
5257416Smarkm	local_irq_disable();
5357416Smarkm	start = trace_clock_local();
5457416Smarkm	trace_benchmark_event(bm_str, bm_last);
5557416Smarkm	stop = trace_clock_local();
5657416Smarkm	local_irq_enable();
5757416Smarkm
5857416Smarkm	bm_cnt++;
5957416Smarkm
6057416Smarkm	delta = stop - start;
6157416Smarkm
6257416Smarkm	/*
6357416Smarkm	 * The first read is cold cached, keep it separate from the
6457416Smarkm	 * other calculations.
6557416Smarkm	 */
6657416Smarkm	if (bm_cnt == 1) {
6757416Smarkm		bm_first = delta;
6857416Smarkm		scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
6957416Smarkm			  "first=%llu [COLD CACHED]", bm_first);
7057416Smarkm		return;
7157416Smarkm	}
7257416Smarkm
7357416Smarkm	bm_last = delta;
7457416Smarkm
7557416Smarkm	if (delta > bm_max)
7657416Smarkm		bm_max = delta;
77178825Sdfr	if (!bm_min || delta < bm_min)
7857416Smarkm		bm_min = delta;
7957416Smarkm
8057416Smarkm	/*
8157416Smarkm	 * When bm_cnt is greater than UINT_MAX, it breaks the statistics
8257416Smarkm	 * accounting. Freeze the statistics when that happens.
8357416Smarkm	 * We should have enough data for the avg and stddev anyway.
8457416Smarkm	 */
8557416Smarkm	if (bm_cnt > UINT_MAX) {
8657416Smarkm		scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
8757416Smarkm		    "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
8857416Smarkm			  bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
8957416Smarkm		return;
9057416Smarkm	}
9157416Smarkm
9257416Smarkm	bm_total += delta;
9357416Smarkm	bm_totalsq += delta * delta;
9457416Smarkm
9557416Smarkm	if (bm_cnt > 1) {
9657416Smarkm		/*
9757416Smarkm		 * Apply Welford's method to calculate standard deviation:
9857416Smarkm		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
99178825Sdfr		 */
10057416Smarkm		stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
10157416Smarkm		do_div(stddev, (u32)bm_cnt);
10257416Smarkm		do_div(stddev, (u32)bm_cnt - 1);
10357416Smarkm	} else
10457416Smarkm		stddev = 0;
10557416Smarkm
10657416Smarkm	delta = bm_total;
10757416Smarkm	do_div(delta, (u32)bm_cnt);
10857416Smarkm	avg = delta;
10957416Smarkm
11057416Smarkm	if (stddev > 0) {
11157416Smarkm		int i = 0;
11257416Smarkm		/*
11357416Smarkm		 * stddev is the square of standard deviation but
11457416Smarkm		 * we want the actually number. Use the average
11557416Smarkm		 * as our seed to find the std.
11657416Smarkm		 *
11757416Smarkm		 * The next try is:
11857416Smarkm		 *  x = (x + N/x) / 2
11957416Smarkm		 *
12057416Smarkm		 * Where N is the squared number to find the square
12157416Smarkm		 * root of.
12257416Smarkm		 */
12357416Smarkm		seed = avg;
12457416Smarkm		do {
12557416Smarkm			last_seed = seed;
12657416Smarkm			seed = stddev;
12757416Smarkm			if (!last_seed)
12857416Smarkm				break;
12957416Smarkm			seed = div64_u64(seed, last_seed);
13057416Smarkm			seed += last_seed;
13157416Smarkm			do_div(seed, 2);
13257416Smarkm		} while (i++ < 10 && last_seed != seed);
13357416Smarkm
13457416Smarkm		std = seed;
13557416Smarkm	}
13657416Smarkm
13757416Smarkm	scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
13857416Smarkm		  "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
13957416Smarkm		  bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
14057416Smarkm
14157416Smarkm	bm_std = std;
14257416Smarkm	bm_avg = avg;
14357416Smarkm	bm_stddev = stddev;
14457416Smarkm}
14557416Smarkm
14657416Smarkmstatic int benchmark_event_kthread(void *arg)
14757416Smarkm{
14857416Smarkm	/* sleep a bit to make sure the tracepoint gets activated */
14957416Smarkm	msleep(100);
15057416Smarkm
15157416Smarkm	while (!kthread_should_stop()) {
15257416Smarkm
15357416Smarkm		trace_do_benchmark();
15457416Smarkm
15557416Smarkm		/*
15657416Smarkm		 * We don't go to sleep, but let others run as well.
15757416Smarkm		 * This is basically a "yield()" to let any task that
15857416Smarkm		 * wants to run, schedule in, but if the CPU is idle,
15957416Smarkm		 * we'll keep burning cycles.
16057416Smarkm		 *
16157416Smarkm		 * Note the tasks_rcu_qs() version of cond_resched() will
16257416Smarkm		 * notify synchronize_rcu_tasks() that this thread has
16357416Smarkm		 * passed a quiescent state for rcu_tasks. Otherwise
16457416Smarkm		 * this thread will never voluntarily schedule which would
16557416Smarkm		 * block synchronize_rcu_tasks() indefinitely.
16657416Smarkm		 */
16757416Smarkm		cond_resched_tasks_rcu_qs();
16857416Smarkm	}
16957416Smarkm
17057416Smarkm	return 0;
17157416Smarkm}
17257416Smarkm
17357416Smarkm/*
17457416Smarkm * When the benchmark tracepoint is enabled, it calls this
17557416Smarkm * function and the thread that calls the tracepoint is created.
17657416Smarkm */
17757416Smarkmint trace_benchmark_reg(void)
17857416Smarkm{
17957416Smarkm	if (!ok_to_run) {
18057416Smarkm		pr_warn("trace benchmark cannot be started via kernel command line\n");
18157416Smarkm		return -EBUSY;
18257416Smarkm	}
18357416Smarkm
18457416Smarkm	bm_event_thread = kthread_run(benchmark_event_kthread,
18557416Smarkm				      NULL, "event_benchmark");
18657416Smarkm	if (IS_ERR(bm_event_thread)) {
18757416Smarkm		pr_warn("trace benchmark failed to create kernel thread\n");
18857416Smarkm		return PTR_ERR(bm_event_thread);
18957416Smarkm	}
19057416Smarkm
19157416Smarkm	return 0;
19257416Smarkm}
19357416Smarkm
19457416Smarkm/*
19557416Smarkm * When the benchmark tracepoint is disabled, it calls this
19657416Smarkm * function and the thread that calls the tracepoint is deleted
19757416Smarkm * and all the numbers are reset.
19857416Smarkm */
19957416Smarkmvoid trace_benchmark_unreg(void)
20057416Smarkm{
20157416Smarkm	if (!bm_event_thread)
20257416Smarkm		return;
20357416Smarkm
20457416Smarkm	kthread_stop(bm_event_thread);
20557416Smarkm	bm_event_thread = NULL;
20657416Smarkm
20757416Smarkm	strcpy(bm_str, "START");
20857416Smarkm	bm_total = 0;
20957416Smarkm	bm_totalsq = 0;
21057416Smarkm	bm_last = 0;
21157416Smarkm	bm_max = 0;
21257416Smarkm	bm_min = 0;
21357416Smarkm	bm_cnt = 0;
21457416Smarkm	/* These don't need to be reset but reset them anyway */
21557416Smarkm	bm_first = 0;
21657416Smarkm	bm_std = 0;
21757416Smarkm	bm_avg = 0;
21857416Smarkm	bm_stddev = 0;
21957416Smarkm}
22057416Smarkm
22157416Smarkmstatic __init int ok_to_run_trace_benchmark(void)
22257416Smarkm{
22357416Smarkm	ok_to_run = true;
22457416Smarkm
22557416Smarkm	return 0;
22657416Smarkm}
22757416Smarkm
22857416Smarkmearly_initcall(ok_to_run_trace_benchmark);
22957416Smarkm