1/* 2 * linux/drivers/char/kernprof.c 3 * 4 * Implementation of profiling devices. We reserve minor number 255 for a 5 * control interface. ioctl()s on this device control various profiling 6 * settings. 7 * 8 * Copyright (C) SGI 1999, 2000, 2001 9 * 10 * Written by Dimitris Michailidis (dimitris@engr.sgi.com) 11 * Modified by John Hawkes (hawkes@engr.sgi.com) 12 * Contributions from Niels Christiansen (nchr@us.ibm.com) 13 */ 14 15#include <linux/config.h> 16#include <linux/module.h> 17#include <linux/kernprof.h> 18#include <linux/init.h> 19#include <linux/fs.h> 20#include <linux/major.h> 21#include <linux/proc_fs.h> 22#include <linux/slab.h> 23#include <linux/vmalloc.h> 24#include <linux/smp.h> 25#include <linux/devfs_fs_kernel.h> 26#include <linux/module.h> 27#include <linux/compiler.h> 28 29#include <asm/uaccess.h> 30#include <asm/kernprof.h> 31 32#define PROF_CNTRL_MINOR 0 33 34int prof_enabled = 0; /* any profiling active */ 35int prof_domain = PROF_DOMAIN_TIME, prof_mode = PROF_MODE_PC_SAMPLING; 36int prof_pid = 0; 37int perfctr_event = 0; 38unsigned int prof_shift, PC_resolution = DFL_PC_RES; 39unsigned int perfctr_freq = 1000; 40unsigned long unload_timeout = 0; 41 42prof_hook_p *prof_intr_hook = &prof_timer_hook; 43prof_hook_p prof_perfctr_aux_hook = NULL; 44 45/* This buffer holds PC samples */ 46PC_sample_count_t *PC_sample_buf = NULL; 47size_t PC_buf_sz; 48 49/* Switch for /proc files created */ 50int proc_created = 0; 51 52int proc_handle; 53 54/* 55 * These variables deal with the call graph. The call graph records arcs 56 * linking the location of each function call to the address of the called 57 * function. It is maintained as a hash table indexed by a call site's 58 * location. The bucket associated with each hash table entry records the 59 * targets of the calls. 60 */ 61unsigned short *cg_from_base = NULL; 62struct cg_arc_dest *cg_to_base = NULL; 63size_t cg_from_sz, cg_to_sz; 64int cg_arc_overflow; /* set when no new arcs can be added to the call graph */ 65int n_buckets = 0; 66 67size_t mem_needed; /* space needed for the call graph and the PC samples */ 68 69/* And these hold backtrace samples */ 70struct trace_ring_buf { 71 unsigned long *data; 72 int start; 73 int end; 74 int active; 75}; 76 77struct trace_ring_buf trace_bufs[NR_CPUS]; 78 79prof_mem_map_t memory_map; 80 81unsigned char cpu_prof_enabled[NR_CPUS]; 82unsigned long cpu_prof_enable_map = ~0UL; 83 84#define DEBUG_RECUR_COUNT_MAX 4 85static union { 86 struct percpu_data { 87 unsigned long lost_ones; 88 unsigned long total_mcount; 89 unsigned long debug_recurse_count[DEBUG_RECUR_COUNT_MAX]; 90 unsigned int amhere; 91 } d; 92 char __pad [SMP_CACHE_BYTES]; 93} kernprof_cpu_data [NR_CPUS] __cacheline_aligned; 94 95MODULE_AUTHOR("Dimitris Michailidis"); 96MODULE_DESCRIPTION("Kernel profile driver"); 97 98MODULE_PARM(PC_resolution, "i"); 99MODULE_PARM_DESC(PC_resolution, "resolution of PC samples " 100 "(rounded down to a power of 2)"); 101 102/* round x up to a multiple of n. n must be a power of 2 */ 103static inline size_t roundup(size_t x, int n) 104{ 105 return (x + n - 1) & ~(n - 1); 106} 107 108/* The next few definitions deal with procfs */ 109static ssize_t read_prof_buf(char *prof_buf, size_t prof_buf_sz, 110 char *user_buf, size_t count, loff_t *ppos) 111{ 112 if (!prof_buf) 113 return -EIO; 114 if (*ppos >= prof_buf_sz) 115 return 0; 116 if (count > prof_buf_sz - *ppos) 117 count = prof_buf_sz - *ppos; 118 copy_to_user(user_buf, prof_buf + *ppos, count); 119 *ppos += count; 120 return count; 121} 122 123static ssize_t read_PC_samples(struct file *file, char *user_buf, 124 size_t count, loff_t *ppos) 125{ 126 return read_prof_buf((char *)PC_sample_buf, PC_buf_sz, user_buf, 127 count, ppos); 128} 129 130static struct file_operations proc_PC_sample_operations = { 131 read: read_PC_samples, 132}; 133 134static ssize_t read_call_graph(struct file *file, char *user_buf, 135 size_t count, loff_t *ppos) 136{ 137 return read_prof_buf((char *)cg_from_base, (cg_from_sz + cg_to_sz) * smp_num_cpus, 138 user_buf, count, ppos); 139} 140 141static struct file_operations proc_call_graph_operations = { 142 read: read_call_graph, 143}; 144 145static void expand_enable_map(void) 146{ 147 int i; 148 149 for (i = 0; i < NR_CPUS; ++i) 150 cpu_prof_enabled[i] = (cpu_prof_enable_map & (1L << i)) != 0; 151} 152 153static void prof_reset(void) 154{ 155 int i; 156 if (PC_sample_buf) 157 memset(PC_sample_buf, 0, mem_needed); 158 cg_arc_overflow = 0; 159 prof_pid = 0; 160 for (i = 0; i < smp_num_cpus; i++) { 161#ifdef CONFIG_LIMIT_RECURS 162 int c; 163 for (c = 0; c < DEBUG_RECUR_COUNT_MAX; c++) { 164 kernprof_cpu_data[i].d.debug_recurse_count[c] = 0L; 165 } 166#endif 167 kernprof_cpu_data[i].d.total_mcount = 0L; 168 kernprof_cpu_data[i].d.lost_ones = 0L; 169 trace_bufs[i].start = 0; 170 trace_bufs[i].end = PROF_BACKTRACE_BUFSIZE - 1; 171 } 172} 173 174/* Deallocate profiling buffers */ 175static void prof_free_mem(void) 176{ 177 int i; 178 179 /* vfree() and kfree() handle NULL pointers */ 180 vfree(PC_sample_buf); 181 PC_sample_buf = NULL; 182 for (i = 0; i < smp_num_cpus; ++i) 183 kfree(trace_bufs[cpu_logical_map(i)].data); 184} 185 186/* 187 * Allocate memory for the various profiling buffers. We are lazy and only do 188 * this if we really try to use the profiling facilities. 189 */ 190static int prof_alloc_mem(void) 191{ 192 char *p; 193 int i; 194 195 if ((p = vmalloc(mem_needed)) == NULL) 196 return -ENOMEM; 197 PC_sample_buf = (PC_sample_count_t *) p; 198 memory_map.nr_cpus = smp_num_cpus; 199 if (supports_call_graph) 200 { 201 cg_from_base = (unsigned short *) (p + PC_buf_sz); 202 cg_to_base = (struct cg_arc_dest *) (p + PC_buf_sz + cg_from_sz * smp_num_cpus); 203 memory_map.cg_from_size = cg_from_sz; 204 memory_map.cg_to_size = cg_to_sz; 205 memory_map.cg_to_offset = cg_from_sz * smp_num_cpus; 206 } 207 else 208 { 209 memory_map.cg_from_size = 0L; 210 memory_map.cg_to_size = 0L; 211 memory_map.cg_to_offset = 0L; 212 } 213 if (prof_have_frameptr) /* allocate ring buffers for present CPUs */ 214 for (i = 0; i < smp_num_cpus; ++i) { 215 int cpu = cpu_logical_map(i); 216 217 trace_bufs[cpu].data = (unsigned long *)kmalloc( 218 PROF_BACKTRACE_BUFSIZE * sizeof(unsigned long), 219 GFP_KERNEL); 220 } 221 prof_reset(); 222 return 0; 223} 224 225/* Record a PC sample. Called from interrupt handlers. SMP safe. */ 226static void PC_sample(struct pt_regs *regs) 227{ 228 unsigned long pc; 229 230 if (!cpu_prof_enabled[smp_processor_id()]) return; 231 if (prof_pid && (!current || current->pid != prof_pid)) return; 232 233 pc = instruction_pointer(regs); 234 if (user_mode(regs)) 235 pc = FUNCTIONPC(USER); 236 else if (in_firmware(regs)) 237 pc = FUNCTIONPC(FIRMWARE); 238 else if (pc >= memory_map.module_start && pc < memory_map.module_end) 239 pc = FUNCTIONPC(MODULE); 240 else if (pc_out_of_range(pc)) 241 pc = FUNCTIONPC(UNKNOWN_KERNEL); 242 243 pc -= (unsigned long) &_stext; 244 atomic_inc((atomic_t *) &PC_sample_buf[pc >> prof_shift]); 245} 246 247/* Record PC samples when woken up, called from schedule() 248 * blocked --> time spent sleeping on a wait queue 249 * stalled --> time spent runnable yet not running 250 */ 251static void PC_wakeup_sample(unsigned long frompc, unsigned long blocked, 252 unsigned long stalled) 253{ 254 if (!cpu_prof_enabled[smp_processor_id()]) return; 255 if (prof_pid && (!current || current->pid != prof_pid)) return; 256 257 if (blocked == 0) 258 goto stalled; 259 260 frompc = FUNCTIONPC(SLEEPING) - (unsigned long) &_stext; 261 atomic_add(blocked * (get_prof_freq() / HZ), 262 (atomic_t *) &PC_sample_buf[frompc >> prof_shift]); 263 264 stalled: 265 if (!stalled) 266 return; 267 268 frompc = FUNCTIONPC(STALLED) - (unsigned long) &_stext; 269 atomic_add(stalled * (get_prof_freq() / HZ), 270 (atomic_t *) &PC_sample_buf[frompc >> prof_shift]); 271} 272 273/* Maintain function call counts. Called by mcount(). SMP safe. */ 274void record_fn_call(unsigned long not_used, unsigned long pc) 275{ 276 if (prof_pid && (!current || current->pid != prof_pid)) return; 277 if (pc_out_of_range(pc)) 278 { 279 if (pc >= memory_map.module_start && pc < memory_map.module_end) 280 pc = FUNCTIONPC(MODULE); 281 else 282 pc = FUNCTIONPC(UNKNOWN_KERNEL); 283 } 284 pc -= (unsigned long) &_stext; 285 atomic_inc((atomic_t *) &PC_sample_buf[pc >> prof_shift]); 286} 287 288/* Record an arc traversal in the call graph. Called by mcount(). SMP safe */ 289void cg_record_arc(unsigned long frompc, unsigned long selfpc) 290{ 291#ifndef __HAVE_ARCH_CMPXCHG16 292 static spinlock_t cg_record_lock = SPIN_LOCK_UNLOCKED; 293 unsigned long flags; 294#endif 295 int toindex; 296 int fromindex; 297 int cpu; 298 unsigned short *q; 299 struct cg_arc_dest *p; 300 unsigned short *cg_from; 301 struct cg_arc_dest *cg_to; 302#ifdef CONFIG_LIMIT_RECURS 303 uint *ishere; 304#endif /* CONFIG_LIMIT_RECURS */ 305 306 cpu = smp_processor_id(); 307 if (!cpu_prof_enabled[cpu]) 308 return; 309 kernprof_cpu_data[cpu].d.total_mcount++; 310#ifdef CONFIG_LIMIT_RECURS 311 ishere = &kernprof_cpu_data[cpu].d.amhere; 312 toindex = atomic_add_return(1, (atomic_t *)ishere) - 2; 313 if (unlikely(toindex >= 0)) { 314 /* Ongoing decrements (see below) should keep index in range */ 315 if (toindex >= DEBUG_RECUR_COUNT_MAX) BUG(); 316 kernprof_cpu_data[cpu].d.debug_recurse_count[toindex]++; 317 /* If we're at the highest recursion count, then bail out! */ 318 if (toindex == DEBUG_RECUR_COUNT_MAX-1) { 319 atomic_dec((atomic_t *)ishere); 320 return; 321 } 322 } 323#endif /* CONFIG_LIMIT_RECURS */ 324 cg_from = (u_short *)(((char *)cg_from_base) + cg_from_sz * cpu); 325 cg_to = &cg_to_base[CG_MAX_ARCS * cpu]; 326 if (pc_out_of_range(frompc)) 327 { 328 if (frompc >= memory_map.module_start && frompc < memory_map.module_end) 329 fromindex = (FUNCTIONPC(MODULE) - (unsigned long)&_stext) >> prof_shift; 330 else 331 fromindex = (FUNCTIONPC(UNKNOWN_KERNEL) - (unsigned long)&_stext) >> prof_shift; 332 } 333 else 334 fromindex = (frompc - (unsigned long) &_stext) >> prof_shift; 335 q = &cg_from[fromindex]; 336 337 /* Easy case: the arc is already in the call graph */ 338 for (toindex = *q; toindex != 0; ) { 339 p = &cg_to[toindex]; 340 if (p->address == selfpc) { 341 atomic_inc(&p->count); 342#ifdef CONFIG_LIMIT_RECURS 343 atomic_dec((atomic_t *)ishere); 344#endif /* CONFIG_LIMIT_RECURS */ 345 return; 346 } 347 toindex = p->link; 348 } 349 350 /* 351 * No luck. We need to add a new arc. Since cg_to[0] is unused, 352 * we use cg_to[0].count to keep track of the next available arc. 353 */ 354 if (cg_arc_overflow) 355 { 356 kernprof_cpu_data[cpu].d.lost_ones++; 357#ifdef CONFIG_LIMIT_RECURS 358 atomic_dec((atomic_t *)ishere); 359#endif /* CONFIG_LIMIT_RECURS */ 360 return; 361 } 362 toindex = atomic_add_return(1, &cg_to->count); 363 if (toindex >= CG_MAX_ARCS) { 364 /* 365 * We have run out of space for arcs. We'll keep incrementing 366 * the existing ones but we won't try to add any more. 367 */ 368 kernprof_cpu_data[cpu].d.lost_ones++; 369 cg_arc_overflow = 1; 370 atomic_set(&cg_to->count, CG_MAX_ARCS - 1); 371#ifdef CONFIG_LIMIT_RECURS 372 atomic_dec((atomic_t *)ishere); 373#endif /* CONFIG_LIMIT_RECURS */ 374 return; 375 } 376 377 /* 378 * We have a secured slot for a new arc and all we need to do is 379 * initialize it and add it to a hash bucket. We use compare&swap, if 380 * possible, to avoid any spinlocks whatsoever. 381 */ 382 p = &cg_to[toindex]; 383 p->address = selfpc; 384 atomic_set(&p->count, 1); 385#ifdef __HAVE_ARCH_CMPXCHG16 386 do { 387 p->link = *q; 388 } while (cmpxchg(q, p->link, toindex) != p->link); 389#else 390 spin_lock_irqsave(&cg_record_lock, flags); 391 p->link = *q; 392 *q = toindex; 393 spin_unlock_irqrestore(&cg_record_lock, flags); 394#endif 395#ifdef CONFIG_LIMIT_RECURS 396 atomic_dec((atomic_t *)ishere); 397#endif /* CONFIG_LIMIT_RECURS */ 398 return; 399} 400 401/* 402 * Record an arc traversal in the call graph, and walk up the stack to 403 * find and record all the call graph arcs. Called by schedule() (and 404 * potentially others). SMP safe. 405 */ 406void backtrace_cg_record_arc(unsigned long frompc, unsigned long selfpc) 407{ 408 int backtrace_count = PROF_BACKTRACE_MAX_LEN; /* for safety */ 409 frame_info_t frame; 410 unsigned long caller_pc, callee_pc; 411 412 if (prof_pid && (!current || current->pid != prof_pid)) 413 return; 414 415 /* If can't build fake frame, then record what info we have and leave */ 416 if (!build_fake_frame(&frame)) { 417#ifndef CONFIG_IA64 418 caller_pc = frompc; 419 callee_pc = (selfpc) ? selfpc 420 : (unsigned long)__builtin_return_address(0); 421 cg_record_arc(caller_pc, callee_pc); 422#endif 423 return; 424 } 425 426 /* Walk back to who called us */ 427 if (!get_next_frame(&frame)) { 428 return; 429 } 430 callee_pc = frame_get_pc(&frame); 431 if (pc_out_of_range(callee_pc)) { 432 return; 433 } 434 435 /* Now walk back to who called our caller, giving us the 1st cg arc */ 436 if (!get_next_frame(&frame)) { 437 printk(" computed callee_pc:0x%lx\n", callee_pc & 0xffffffffL); 438 printk(" caller-supplied caller:0x%lx callee:0x%lx\n", 439 frompc & 0xffffffffL, selfpc & 0xffffffffL); 440 BUG(); /* debug */ 441 return; 442 } 443 caller_pc = frame_get_pc(&frame); 444 if (pc_out_of_range(caller_pc)) { 445 return; 446 } 447 /* Now record this cg arc and keep walking back the stack for more */ 448 while (backtrace_count--) { 449 cg_record_arc(caller_pc, callee_pc); 450 callee_pc = caller_pc; 451 if (!get_next_frame(&frame)) 452 break; /* quit! */ 453 caller_pc = frame_get_pc(&frame); 454 if (pc_out_of_range(caller_pc)) 455 break; /* quit! */ 456 backtrace_count--; 457 } 458} 459 460#define PROF_TRACE_MASK (PROF_BACKTRACE_BUFSIZE - 1) 461 462/* circularly increment i to point to the next entry in a trace ring buffer */ 463#define CIRC_INC(i) (((i) + 1) & PROF_TRACE_MASK) 464 465/* 466 * In backtrace mode, add a sample to the per-processor trace bufs. 467 * 468 * If frame is NULL, there is no backtrace. Just record a length 1 469 * backtrace at alt_pc. 470 * 471 * If frame is non-NULL, use it to perform a backtrace, generating a 472 * list of PCs to add onto the trace bufs. 473 * 474 * If frame is non-NULL, and alt_pc is non-NULL, same as before, except 475 * force alt_pc to be at the head of the backtrace, and pretend that the 476 * first function on the frame called alt_pc. 477 */ 478 479static void do_backtrace_sample(frame_info_t *frame, unsigned long alt_pc, 480 unsigned long count) 481{ 482 int free_slots, j, n_entries; 483 struct trace_ring_buf *p; 484 485 p = &trace_bufs[smp_processor_id()]; 486 if (!p->active || 487 ((free_slots = ((p->end - p->start) & PROF_TRACE_MASK)) < 3)) 488 goto out; 489 j = CIRC_INC(p->start); 490 n_entries = 1; 491 492 if (!frame) { 493 p->data[j] = alt_pc; 494 goto end_trace; 495 } 496 497 /* We set aside one slot for the trace length */ 498 if (--free_slots > PROF_BACKTRACE_MAX_LEN) 499 free_slots = PROF_BACKTRACE_MAX_LEN; 500 501 n_entries = 0; 502 if (alt_pc) { 503 p->data[j] = alt_pc; 504 if (++n_entries == free_slots) 505 goto end_trace; 506 j = CIRC_INC(j); 507 } 508 while (1) { 509 p->data[j] = frame_get_pc(frame); 510 if (pc_out_of_range(p->data[j])) { 511 if (p->data[j] >= memory_map.module_start && 512 p->data[j] < memory_map.module_end) 513 p->data[j] = FUNCTIONPC(MODULE); 514 else 515 p->data[j] = FUNCTIONPC(UNKNOWN_KERNEL); 516 } 517 if (++n_entries == free_slots || !get_next_frame(frame)) 518 break; 519 j = CIRC_INC(j); 520 } 521end_trace: 522 /* count goes in upper half of data value. 0 is interpreted as a 1 */ 523 p->data[p->start] = (count << ((sizeof count) * 4)) | n_entries; 524 p->start = CIRC_INC(j); 525out: return; 526} 527 528/* Record a stack backtrace. Called from interrupt handlers. No MP issues. */ 529static void backtrace_sample(struct pt_regs *regs) 530{ 531 frame_info_t frame; 532 u_long pc; 533 534 if (!cpu_prof_enabled[smp_processor_id()]) 535 return; 536 if (prof_pid && (!current || current->pid != prof_pid)) 537 return; 538 539 /* Check for corner cases, otherwise generate frame from regs */ 540 541 if (user_mode(regs)) { 542 pc = FUNCTIONPC(USER); 543 do_backtrace_sample(NULL, pc, 0); 544 } else if (in_firmware(regs)) { 545 pc = FUNCTIONPC(FIRMWARE); 546 do_backtrace_sample(NULL, pc, 0); 547 } else if (pc_out_of_range(instruction_pointer(regs))) { 548 if (instruction_pointer(regs) >= memory_map.module_start && 549 instruction_pointer(regs) < memory_map.module_end) 550 { 551 pc = FUNCTIONPC(MODULE); 552 do_backtrace_sample(NULL, pc, 0); 553 } else { 554 pc = FUNCTIONPC(UNKNOWN_KERNEL); 555 do_backtrace_sample(NULL, pc, 0); 556 } 557 } else { 558 /* We have a pc value within the static kernel text area */ 559 get_top_frame(regs, &frame); 560 pc = instruction_pointer(regs); 561 do_backtrace_sample(&frame, 0, 0); 562 } 563 564 pc -= (u_long) &_stext; 565 atomic_inc((atomic_t *) &PC_sample_buf[pc >> prof_shift]); 566} 567 568static void backtrace_wakeup_sample(unsigned long frompc, unsigned long blocked, 569 unsigned long stalled) 570{ 571 frame_info_t frame; 572 u_long pc; 573 574 if (!cpu_prof_enabled[smp_processor_id()]) 575 return; 576 577 if (prof_pid == 0) 578 printk("kernprof error: backtrace_wakeup_sample but prof_pid == 0\n"); 579 580 if (!current || current->pid != prof_pid) 581 return; 582 583 if (!build_fake_frame(&frame)) 584 return; 585 586 if (!get_next_frame(&frame)) 587 return; 588 589 if (blocked) { 590 pc = FUNCTIONPC(SLEEPING); 591 do_backtrace_sample(&frame, pc, 592 blocked * (get_prof_freq() / HZ)); 593 594 pc -= (u_long) &_stext; 595 atomic_add(blocked * (get_prof_freq() / HZ), 596 (atomic_t *) &PC_sample_buf[pc >> prof_shift]); 597 } 598 599 if (stalled) { 600 pc = FUNCTIONPC(STALLED); 601 do_backtrace_sample(NULL, pc, 602 stalled * (get_prof_freq() / HZ)); 603 pc -= (u_long) &_stext; 604 atomic_add(stalled * (get_prof_freq() / HZ), 605 (atomic_t *) &PC_sample_buf[pc >> prof_shift]); 606 } 607} 608 609static ssize_t trace_read(struct file *file, char *buf, 610 size_t count, loff_t *ppos) 611{ 612 struct trace_ring_buf *p; 613 size_t avail, entries_to_write; 614 615 p = &trace_bufs[minor(file->f_dentry->d_inode->i_rdev) - 1]; 616 avail = (PROF_BACKTRACE_BUFSIZE - 1) + p->start - p->end; 617 avail &= PROF_TRACE_MASK; 618 619 entries_to_write = count / sizeof(*p->data); 620 if (entries_to_write > avail) 621 entries_to_write = avail; 622 if (entries_to_write == 0) 623 return 0; 624 count = entries_to_write * sizeof(*p->data); 625 if (p->end + entries_to_write < PROF_BACKTRACE_BUFSIZE) { 626 copy_to_user(buf, (void *)&p->data[p->end + 1], count); 627 p->end += entries_to_write; 628 } else { 629 size_t first_part; 630 631 avail = (PROF_BACKTRACE_BUFSIZE - 1) - p->end; 632 first_part = avail * sizeof(*p->data); 633 634 if (avail) 635 copy_to_user(buf, (void *)&p->data[p->end + 1], 636 first_part); 637 copy_to_user(buf + first_part, (void *)&p->data[0], 638 count - first_part); 639 p->end = entries_to_write - avail - 1; 640 } 641 return count; 642} 643 644static int trace_release(struct inode *inode, struct file *filp) 645{ 646 trace_bufs[minor(inode->i_rdev) - 1].active = 0; 647 return 0; 648} 649 650static struct file_operations prof_trace_fops = { 651 owner: THIS_MODULE, 652 read: trace_read, 653 release: trace_release, 654}; 655 656/* 657 * The perf counter interrupt handler calls this function which then calls the 658 * appropriate sampling function. We do this because we may need to reload the 659 * perf counter after it overflows. 660 */ 661void perfctr_aux_intr_handler(struct pt_regs *regs) 662{ 663 prof_perfctr_aux_hook(regs); 664 perfctr_reload(perfctr_freq); 665} 666 667/* Start the performance monitoring counters */ 668static void perfctr_commence(void *dummy) 669{ 670 __perfctr_commence(perfctr_freq, perfctr_event); 671} 672 673/* Stop the performance monitoring counters */ 674static void perfctr_stop(void *dummy) 675{ 676 __perfctr_stop(); 677} 678 679/* Open a profiling device */ 680static int prof_open(struct inode *inode, struct file *filp) 681{ 682 int minor = minor(inode->i_rdev); 683 684 if (minor != PROF_CNTRL_MINOR) { 685 --minor; 686 if (minor >= NR_CPUS || trace_bufs[minor].data == NULL) 687 return -ENODEV; 688 689 filp->f_op = &prof_trace_fops; 690 trace_bufs[minor].start = 0; 691 trace_bufs[minor].end = PROF_BACKTRACE_BUFSIZE - 1; 692 trace_bufs[minor].active = 1; 693 } 694 695 return 0; 696} 697 698static void prof_stop(void) 699{ 700 if (prof_mode & PROF_MODE_CALL_GRAPH) { 701 /* Aggregate per-cpu counts into all-cpu counts to display */ 702 unsigned long total_mcount = 0L; 703 unsigned long lost_ones = 0L; 704 int i; 705#ifdef CONFIG_LIMIT_RECURS 706 int ii; 707 unsigned long recur_counts[DEBUG_RECUR_COUNT_MAX]; 708 for (i = 0; i < DEBUG_RECUR_COUNT_MAX; i++) 709 recur_counts[i] = 0L; 710#endif 711 for (i = 0; i < smp_num_cpus; i++) { 712 total_mcount += kernprof_cpu_data[i].d.total_mcount; 713 lost_ones += kernprof_cpu_data[i].d.lost_ones; 714#ifdef CONFIG_LIMIT_RECURS 715 for (ii = 0; ii < DEBUG_RECUR_COUNT_MAX; ii++) 716 recur_counts[ii] += kernprof_cpu_data[i].d.debug_recurse_count[ii]; 717#endif 718 } 719#ifdef CONFIG_LIMIT_RECURS 720 if (lost_ones || recur_counts[DEBUG_RECUR_COUNT_MAX-1]) { 721#else 722 if (lost_ones) { 723#endif 724 printk("Total mcount invocations: %12lu\n", 725 total_mcount); 726 printk("Lost to table overflow: %12lu\n", 727 lost_ones); 728#ifdef CONFIG_LIMIT_RECURS 729 printk("Lost to recursive invoc: %12lu\n", 730 recur_counts[DEBUG_RECUR_COUNT_MAX-1]); 731 printk("Recursion depth:counts: "); 732 for (ii = 0; ii < DEBUG_RECUR_COUNT_MAX-1; ii++) 733 printk(" %d:%lu ", ii+1, recur_counts[ii]); 734 printk("\n"); 735#endif /* CONFIG_LIMIT_RECURS */ 736 } 737 } 738 if (prof_perfctr_hook) { 739 smp_call_function(perfctr_stop, NULL, 1, 0); 740 perfctr_stop(NULL); 741 } 742 prof_timer_hook = prof_perfctr_hook = NULL; 743 mcount_hook = NULL; 744 prof_scheduler_hook = NULL; 745 prof_wakeup_hook = NULL; 746 if (prof_enabled) { 747 unload_timeout = jiffies + HZ; 748 prof_enabled = 0; 749 MOD_DEC_USE_COUNT; 750 } 751} 752 753extern struct module *module_list; 754extern struct module *static_module_list; 755 756int prof_get_module_map(prof_mem_map_t *map) 757{ 758 struct module *mod; 759 struct module_symbol *s; 760 char *t; 761 u_long low = (u_long)-1L; 762 u_long high = 0L; 763 u_long end; 764 int i; 765 766 for (mod = module_list; mod != static_module_list; mod = mod->next) 767 { 768 if (mod->flags & MOD_RUNNING) 769 { 770 for (i = 0, s = mod->syms; i < mod->nsyms; i++, s++) 771 { 772 if ((t = strstr(s->name, "_S.text_L"))) 773 { 774 if (s->value < low) 775 low = s->value; 776 end = mod->size + s->value; 777 if (end > high) 778 high = end; 779 } 780 } 781 } 782 } 783 if (high) 784 { 785 map->module_start = low; 786 map->module_end = high; 787 map->module_buckets = 0; 788 return(0); 789 } 790 return(-1); 791} 792 793int create_proc_files(void) 794{ 795 struct proc_dir_entry *ent; 796 prof_mem_map_t m_map; 797 798 if (prof_get_module_map(&m_map)) 799 { 800 m_map.module_start = m_map.module_end = 0L; 801 m_map.module_buckets = 0; 802 } 803 if (n_buckets != memory_map.kernel_buckets + m_map.module_buckets) 804 { 805 if (proc_created) 806 { 807 remove_proc_entry("profile/PC_samples", 0); 808 if (supports_call_graph) 809 remove_proc_entry("profile/call_graph", 0); 810 remove_proc_entry("profile", 0); 811 prof_free_mem(); 812 proc_created = 0; 813 } 814 memory_map.module_buckets = 0; 815 memory_map.module_start = m_map.module_start; 816 memory_map.module_end = m_map.module_end; 817 n_buckets = memory_map.kernel_buckets; 818 } 819 820 if (proc_created) 821 return(0); 822 823 PC_buf_sz = n_buckets * sizeof(PC_sample_count_t); 824 825 if (!proc_mkdir("profile", 0)) 826 { 827 printk(KERN_ERR "kernprof: unable to create /proc entries\n"); 828 return -ENODEV; 829 } 830 if ((ent = create_proc_entry("profile/PC_samples", 0, 0)) != NULL) 831 { 832 ent->size = PC_buf_sz; 833 ent->proc_fops = &proc_PC_sample_operations; 834 } 835 else 836 printk("Unable to do create_proc_entry for PC_samples\n"); 837 838 if (supports_call_graph) 839 { 840 /* 841 * Calculate size of call graph structures. The round-ups 842 * ensure that pointers to these structures are properly 843 * aligned. 844 */ 845 cg_from_sz = n_buckets * sizeof(short); 846 cg_to_sz = CG_MAX_ARCS * sizeof(struct cg_arc_dest); 847 848 PC_buf_sz = roundup(PC_buf_sz, sizeof(unsigned long)); 849 cg_from_sz = roundup(cg_from_sz, sizeof(unsigned long)); 850 mem_needed = PC_buf_sz + cg_from_sz * smp_num_cpus + cg_to_sz * smp_num_cpus ; 851 852 if ((ent = create_proc_entry("profile/call_graph", 0, 0))) 853 { 854 ent->size = cg_to_sz * smp_num_cpus + cg_from_sz * smp_num_cpus; 855 ent->proc_fops = &proc_call_graph_operations; 856 } 857 else 858 printk("Unable to do create_proc_entry for call_graph\n"); 859 } 860 else 861 mem_needed = PC_buf_sz; 862 863 proc_created = 1; 864 return(0); 865} 866 867/* 868 * ioctl handler for the kernprof control device. 869 */ 870int prof_ctl_ioctl(struct inode *inode, struct file *filp, 871 unsigned int command, unsigned long arg) 872{ 873 int err = 0; 874 875 switch (command) { 876 case PROF_START: 877 if (prof_enabled) 878 return 0; 879 if (create_proc_files()) 880 { 881 err = -EINVAL; 882 return err; 883 } 884 if (PC_sample_buf == NULL && (err = prof_alloc_mem())) 885 return err; 886 MOD_INC_USE_COUNT; 887 prof_enabled = 1; 888 if (prof_mode & PROF_MODE_CALL_GRAPH) 889 { 890 mcount_hook = cg_record_arc; 891 } 892 else if (prof_mode & PROF_MODE_CALL_COUNT) 893 { 894 mcount_hook = record_fn_call; 895 } 896 else if (prof_mode & PROF_MODE_SCHEDULER_CALL_GRAPH) 897 prof_scheduler_hook = backtrace_cg_record_arc; 898 if (prof_mode & PROF_MODE_PC_SAMPLING) { 899 *prof_intr_hook = PC_sample; 900 if (prof_pid) 901 prof_wakeup_hook = PC_wakeup_sample; 902 } else if (prof_mode & PROF_MODE_BACKTRACE) { 903 *prof_intr_hook = backtrace_sample; 904 if (prof_pid) 905 prof_wakeup_hook = backtrace_wakeup_sample; 906 } 907 if (prof_domain == PROF_DOMAIN_PERFCTR) { 908 if (!(prof_mode & PROF_MODE_PC_SAMPLING) && 909 !(prof_mode & PROF_MODE_BACKTRACE)) 910 { 911 err = -EINVAL; 912 return err; 913 } 914 prof_perfctr_hook = perfctr_aux_intr_handler; 915 smp_call_function(perfctr_commence, NULL, 1, 0); 916 perfctr_commence(NULL); 917 } 918 break; 919 case PROF_STOP: 920 prof_stop(); 921 break; 922 case PROF_RESET: 923 prof_stop(); /* resetting also stops profiling */ 924 prof_reset(); 925 break; 926 case PROF_SET_SAMPLE_FREQ: 927 if (prof_domain == PROF_DOMAIN_TIME) 928 err = setup_profiling_timer(arg); 929 else if (prof_domain == PROF_DOMAIN_PERFCTR) { 930 if (valid_perfctr_freq(arg)) 931 perfctr_freq = arg; 932 else 933 err = -EINVAL; 934 } else 935 err = EINVAL; 936 break; 937 case PROF_GET_SAMPLE_FREQ: 938 if (prof_domain == PROF_DOMAIN_TIME) { 939 unsigned int freq = get_prof_freq(); 940 err = copy_to_user((void *)arg, &freq, sizeof freq) ? 941 -EFAULT : 0; 942 } else 943 err = copy_to_user((void *)arg, &perfctr_freq, 944 sizeof perfctr_freq) ? -EFAULT : 0; 945 break; 946 case PROF_GET_PC_RES: 947 err = copy_to_user((void *)arg, &PC_resolution, 948 sizeof PC_resolution) ? -EFAULT : 0; 949 break; 950 case PROF_GET_ON_OFF_STATE: 951 err = copy_to_user((void *)arg, &prof_enabled, 952 sizeof prof_enabled) ? -EFAULT : 0; 953 break; 954 case PROF_SET_DOMAIN: 955 if (arg != prof_domain) /* changing domains stops profiling */ 956 prof_stop(); 957 if (arg == PROF_DOMAIN_TIME) { 958 prof_domain = arg; 959 prof_intr_hook = &prof_timer_hook; 960 } else if (arg == PROF_DOMAIN_PERFCTR && have_perfctr()) { 961 prof_domain = arg; 962 prof_intr_hook = &prof_perfctr_aux_hook; 963 } else 964 err = -EINVAL; 965 break; 966 case PROF_GET_DOMAIN: 967 err = copy_to_user((void *)arg, &prof_domain, 968 sizeof prof_domain) ? -EFAULT : 0; 969 break; 970 case PROF_SET_MODE: 971 if (arg != prof_mode) /* changing modes also stops profiling */ 972 prof_stop(); 973 if (arg == PROF_MODE_PC_SAMPLING) 974 prof_mode = arg; 975 else if (arg == PROF_MODE_BACKTRACE && prof_have_frameptr) 976 prof_mode = arg; 977 else if (arg == PROF_MODE_CALL_COUNT && prof_have_mcount) 978 prof_mode = arg; 979 else if (supports_call_graph && 980 (arg == PROF_MODE_SCHEDULER_CALL_GRAPH || 981 arg == PROF_MODE_CALL_GRAPH || 982 arg == (PROF_MODE_CALL_GRAPH|PROF_MODE_PC_SAMPLING))) 983 prof_mode = arg; 984 else 985 err = -EINVAL; 986 break; 987 case PROF_GET_MODE: 988 err = copy_to_user((void *)arg, &prof_mode, sizeof prof_mode) ? 989 -EFAULT : 0; 990 break; 991 case PROF_SET_PID: 992 if (prof_enabled) /* don't change PID while profiling */ 993 err = -EINVAL; 994 else { 995 prof_reset(); 996 prof_pid = arg; 997 } 998 break; 999 case PROF_GET_PID: 1000 err = copy_to_user((void *)arg, &prof_pid, sizeof prof_pid) ? 1001 -EFAULT : 0; 1002 break; 1003 case PROF_SET_PERFCTR_EVENT: 1004 if (have_perfctr() && valid_perfctr_event(arg)) 1005 perfctr_event = arg; 1006 else 1007 err = -EINVAL; 1008 break; 1009 case PROF_GET_PERFCTR_EVENT: 1010 if (have_perfctr()) 1011 err = copy_to_user((void *)arg, &perfctr_event, 1012 sizeof perfctr_event) ? -EFAULT : 0; 1013 else 1014 err = -EINVAL; 1015 break; 1016 case PROF_SET_ENABLE_MAP: 1017 if (get_user(cpu_prof_enable_map, (u_long *)arg)) 1018 err = -EFAULT; 1019 else { 1020 cpu_prof_enable_map &= cpu_online_map; 1021 expand_enable_map(); 1022 } 1023 break; 1024 case PROF_GET_ENABLE_MAP: 1025 err = copy_to_user((void *)arg, &cpu_prof_enable_map, 1026 sizeof cpu_prof_enable_map) ? -EFAULT : 0; 1027 break; 1028 case PROF_GET_MAPPING: 1029 err = copy_to_user((void *)arg, &memory_map, 1030 sizeof memory_map) ? -EFAULT : 0; 1031 break; 1032 default: 1033 err = -EINVAL; 1034 } 1035 1036 return err; 1037} 1038 1039static struct file_operations prof_ctl_fops = { 1040 owner: THIS_MODULE, 1041 ioctl: prof_ctl_ioctl, 1042 open: prof_open, 1043}; 1044 1045#ifndef MODULE 1046static int __init kernprof_setup(char *str) 1047{ 1048 int res; 1049 1050 if (get_option(&str, &res)) PC_resolution = res; 1051 return 1; 1052} 1053 1054__setup("kernprof=", kernprof_setup); 1055#else 1056static int can_unload(void) 1057{ 1058 int ret = atomic_read(&__this_module.uc.usecount); 1059 1060 /* 1061 * It is conceivable that we may try to delete this module just as 1062 * an interrupt handler is trying to write into a profile buffer. 1063 * Since unloading the module frees the buffers that would be 1064 * unfortunate. To avoid such races this module may not be unloaded 1065 * within one second after profiling is turned off. 1066 */ 1067 if (time_before(jiffies, unload_timeout)) 1068 ret = 1; 1069 1070 return ret; 1071} 1072#endif 1073 1074int __init kernprof_init(void) 1075{ 1076 size_t text_size = (unsigned long) &_etext - (unsigned long) &_stext; 1077 int ret; 1078 1079 /* Round PC_resolution down to a power of 2 and compute its log */ 1080 if (PC_resolution == 0) 1081 PC_resolution = DFL_PC_RES; 1082 while ((PC_resolution & (PC_resolution - 1)) != 0) 1083 PC_resolution &= PC_resolution - 1; 1084 for (prof_shift = 0; (1 << prof_shift) < PC_resolution; prof_shift++); 1085 1086 /* Calculate size of PC-sample buffer. */ 1087 memory_map.kernel_buckets = n_buckets = text_size >> prof_shift; 1088 memory_map.kernel_start = (u_long)&_stext; 1089 memory_map.kernel_end = (u_long)&_etext; 1090 1091#ifdef MODULE 1092 __this_module.can_unload = can_unload; 1093#endif 1094 memset(trace_bufs, 0, sizeof trace_bufs); 1095 1096 cpu_prof_enable_map = cpu_online_map; 1097 expand_enable_map(); 1098 1099 ret = devfs_register_chrdev(KERNPROF_MAJOR, "profile", &prof_ctl_fops); 1100 if (ret < 0) 1101 return ret; 1102 proc_handle = devfs_register(NULL, "profile", 1103 DEVFS_FL_NONE, KERNPROF_MAJOR, 0, 1104 S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, 1105 &prof_ctl_fops, NULL); 1106 return 0; 1107} 1108 1109/* This must be static for some reason */ 1110static void __exit kernprof_exit(void) 1111{ 1112 devfs_unregister(proc_handle); 1113 devfs_unregister_chrdev(KERNPROF_MAJOR, "profile"); 1114 remove_proc_entry("profile/PC_samples", 0); 1115 if (supports_call_graph) 1116 remove_proc_entry("profile/call_graph", 0); 1117 remove_proc_entry("profile", 0); 1118 prof_free_mem(); 1119} 1120 1121module_init(kernprof_init); 1122module_exit(kernprof_exit); 1123