157416Smarkm// SPDX-License-Identifier: GPL-2.0 257416Smarkm#include <linux/delay.h> 357416Smarkm#include <linux/module.h> 457416Smarkm#include <linux/kthread.h> 557416Smarkm#include <linux/trace_clock.h> 657416Smarkm 757416Smarkm#define CREATE_TRACE_POINTS 857416Smarkm#include "trace_benchmark.h" 957416Smarkm 1057416Smarkmstatic struct task_struct *bm_event_thread; 1157416Smarkm 1257416Smarkmstatic char bm_str[BENCHMARK_EVENT_STRLEN] = "START"; 1357416Smarkm 1457416Smarkmstatic u64 bm_total; 1557416Smarkmstatic u64 bm_totalsq; 1657416Smarkmstatic u64 bm_last; 1757416Smarkmstatic u64 bm_max; 1857416Smarkmstatic u64 bm_min; 1957416Smarkmstatic u64 bm_first; 2057416Smarkmstatic u64 bm_cnt; 2157416Smarkmstatic u64 bm_stddev; 2257416Smarkmstatic unsigned int bm_avg; 2357416Smarkmstatic unsigned int bm_std; 2457416Smarkm 2557416Smarkmstatic bool ok_to_run; 2657416Smarkm 2757416Smarkm/* 2857416Smarkm * This gets called in a loop recording the time it took to write 2957416Smarkm * the tracepoint. What it writes is the time statistics of the last 3057416Smarkm * tracepoint write. As there is nothing to write the first time 3157416Smarkm * it simply writes "START". As the first write is cold cache and 3257416Smarkm * the rest is hot, we save off that time in bm_first and it is 3357416Smarkm * reported as "first", which is shown in the second write to the 3457416Smarkm * tracepoint. The "first" field is written within the statics from 3557416Smarkm * then on but never changes. 36233294Sstas */ 3757416Smarkmstatic void trace_do_benchmark(void) 3857416Smarkm{ 3957416Smarkm u64 start; 4057416Smarkm u64 stop; 4157416Smarkm u64 delta; 4257416Smarkm u64 stddev; 4357416Smarkm u64 seed; 4457416Smarkm u64 last_seed; 4557416Smarkm unsigned int avg; 4657416Smarkm unsigned int std = 0; 4757416Smarkm 4857416Smarkm /* Only run if the tracepoint is actually active */ 4957416Smarkm if (!trace_benchmark_event_enabled() || !tracing_is_on()) 5057416Smarkm return; 5157416Smarkm 5257416Smarkm local_irq_disable(); 5357416Smarkm start = trace_clock_local(); 5457416Smarkm trace_benchmark_event(bm_str, bm_last); 5557416Smarkm stop = trace_clock_local(); 5657416Smarkm local_irq_enable(); 5757416Smarkm 5857416Smarkm bm_cnt++; 5957416Smarkm 6057416Smarkm delta = stop - start; 6157416Smarkm 6257416Smarkm /* 6357416Smarkm * The first read is cold cached, keep it separate from the 6457416Smarkm * other calculations. 6557416Smarkm */ 6657416Smarkm if (bm_cnt == 1) { 6757416Smarkm bm_first = delta; 6857416Smarkm scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, 6957416Smarkm "first=%llu [COLD CACHED]", bm_first); 7057416Smarkm return; 7157416Smarkm } 7257416Smarkm 7357416Smarkm bm_last = delta; 7457416Smarkm 7557416Smarkm if (delta > bm_max) 7657416Smarkm bm_max = delta; 77178825Sdfr if (!bm_min || delta < bm_min) 7857416Smarkm bm_min = delta; 7957416Smarkm 8057416Smarkm /* 8157416Smarkm * When bm_cnt is greater than UINT_MAX, it breaks the statistics 8257416Smarkm * accounting. Freeze the statistics when that happens. 8357416Smarkm * We should have enough data for the avg and stddev anyway. 8457416Smarkm */ 8557416Smarkm if (bm_cnt > UINT_MAX) { 8657416Smarkm scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, 8757416Smarkm "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld", 8857416Smarkm bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev); 8957416Smarkm return; 9057416Smarkm } 9157416Smarkm 9257416Smarkm bm_total += delta; 9357416Smarkm bm_totalsq += delta * delta; 9457416Smarkm 9557416Smarkm if (bm_cnt > 1) { 9657416Smarkm /* 9757416Smarkm * Apply Welford's method to calculate standard deviation: 9857416Smarkm * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) 99178825Sdfr */ 10057416Smarkm stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total; 10157416Smarkm do_div(stddev, (u32)bm_cnt); 10257416Smarkm do_div(stddev, (u32)bm_cnt - 1); 10357416Smarkm } else 10457416Smarkm stddev = 0; 10557416Smarkm 10657416Smarkm delta = bm_total; 10757416Smarkm do_div(delta, (u32)bm_cnt); 10857416Smarkm avg = delta; 10957416Smarkm 11057416Smarkm if (stddev > 0) { 11157416Smarkm int i = 0; 11257416Smarkm /* 11357416Smarkm * stddev is the square of standard deviation but 11457416Smarkm * we want the actually number. Use the average 11557416Smarkm * as our seed to find the std. 11657416Smarkm * 11757416Smarkm * The next try is: 11857416Smarkm * x = (x + N/x) / 2 11957416Smarkm * 12057416Smarkm * Where N is the squared number to find the square 12157416Smarkm * root of. 12257416Smarkm */ 12357416Smarkm seed = avg; 12457416Smarkm do { 12557416Smarkm last_seed = seed; 12657416Smarkm seed = stddev; 12757416Smarkm if (!last_seed) 12857416Smarkm break; 12957416Smarkm seed = div64_u64(seed, last_seed); 13057416Smarkm seed += last_seed; 13157416Smarkm do_div(seed, 2); 13257416Smarkm } while (i++ < 10 && last_seed != seed); 13357416Smarkm 13457416Smarkm std = seed; 13557416Smarkm } 13657416Smarkm 13757416Smarkm scnprintf(bm_str, BENCHMARK_EVENT_STRLEN, 13857416Smarkm "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld", 13957416Smarkm bm_last, bm_first, bm_max, bm_min, avg, std, stddev); 14057416Smarkm 14157416Smarkm bm_std = std; 14257416Smarkm bm_avg = avg; 14357416Smarkm bm_stddev = stddev; 14457416Smarkm} 14557416Smarkm 14657416Smarkmstatic int benchmark_event_kthread(void *arg) 14757416Smarkm{ 14857416Smarkm /* sleep a bit to make sure the tracepoint gets activated */ 14957416Smarkm msleep(100); 15057416Smarkm 15157416Smarkm while (!kthread_should_stop()) { 15257416Smarkm 15357416Smarkm trace_do_benchmark(); 15457416Smarkm 15557416Smarkm /* 15657416Smarkm * We don't go to sleep, but let others run as well. 15757416Smarkm * This is basically a "yield()" to let any task that 15857416Smarkm * wants to run, schedule in, but if the CPU is idle, 15957416Smarkm * we'll keep burning cycles. 16057416Smarkm * 16157416Smarkm * Note the tasks_rcu_qs() version of cond_resched() will 16257416Smarkm * notify synchronize_rcu_tasks() that this thread has 16357416Smarkm * passed a quiescent state for rcu_tasks. Otherwise 16457416Smarkm * this thread will never voluntarily schedule which would 16557416Smarkm * block synchronize_rcu_tasks() indefinitely. 16657416Smarkm */ 16757416Smarkm cond_resched_tasks_rcu_qs(); 16857416Smarkm } 16957416Smarkm 17057416Smarkm return 0; 17157416Smarkm} 17257416Smarkm 17357416Smarkm/* 17457416Smarkm * When the benchmark tracepoint is enabled, it calls this 17557416Smarkm * function and the thread that calls the tracepoint is created. 17657416Smarkm */ 17757416Smarkmint trace_benchmark_reg(void) 17857416Smarkm{ 17957416Smarkm if (!ok_to_run) { 18057416Smarkm pr_warn("trace benchmark cannot be started via kernel command line\n"); 18157416Smarkm return -EBUSY; 18257416Smarkm } 18357416Smarkm 18457416Smarkm bm_event_thread = kthread_run(benchmark_event_kthread, 18557416Smarkm NULL, "event_benchmark"); 18657416Smarkm if (IS_ERR(bm_event_thread)) { 18757416Smarkm pr_warn("trace benchmark failed to create kernel thread\n"); 18857416Smarkm return PTR_ERR(bm_event_thread); 18957416Smarkm } 19057416Smarkm 19157416Smarkm return 0; 19257416Smarkm} 19357416Smarkm 19457416Smarkm/* 19557416Smarkm * When the benchmark tracepoint is disabled, it calls this 19657416Smarkm * function and the thread that calls the tracepoint is deleted 19757416Smarkm * and all the numbers are reset. 19857416Smarkm */ 19957416Smarkmvoid trace_benchmark_unreg(void) 20057416Smarkm{ 20157416Smarkm if (!bm_event_thread) 20257416Smarkm return; 20357416Smarkm 20457416Smarkm kthread_stop(bm_event_thread); 20557416Smarkm bm_event_thread = NULL; 20657416Smarkm 20757416Smarkm strcpy(bm_str, "START"); 20857416Smarkm bm_total = 0; 20957416Smarkm bm_totalsq = 0; 21057416Smarkm bm_last = 0; 21157416Smarkm bm_max = 0; 21257416Smarkm bm_min = 0; 21357416Smarkm bm_cnt = 0; 21457416Smarkm /* These don't need to be reset but reset them anyway */ 21557416Smarkm bm_first = 0; 21657416Smarkm bm_std = 0; 21757416Smarkm bm_avg = 0; 21857416Smarkm bm_stddev = 0; 21957416Smarkm} 22057416Smarkm 22157416Smarkmstatic __init int ok_to_run_trace_benchmark(void) 22257416Smarkm{ 22357416Smarkm ok_to_run = true; 22457416Smarkm 22557416Smarkm return 0; 22657416Smarkm} 22757416Smarkm 22857416Smarkmearly_initcall(ok_to_run_trace_benchmark); 22957416Smarkm