/* Perf doesn't export the raw PEBS output, which contains a lot of useful information. PEBS is a sampling format generated by Intel CPUs for some events. Grab PEBS data from perf. This assumes the perf pebs handler is running, we just also do trace points with the raw data. May need some minor tweaks as kernel interface change, and will also not likely work on very old kernels. This will create two new trace points trace_pebs_v1 and trace_pebs_v2 that log the complete PEBS record. When the CPU supports PEBSv2 (Haswell) the additional fields will be logged in pebs_v2. make [KDIR=/my/kernel/build/dir] insmod pebs-grabber.ko # needs to record as root perf record -e cycles:p,pebs_v1,pebs_v2 [command, -a for all etc.] perf report perf script to display pebs data # alternatively trace-cmd and kernelshark can be also used to dump # the pebs data See http://download.intel.com/products/processor/manual/253669.pdf 18.10.2 for a description of the PEBS fields. Note this doesn't work with standard FC18 kernels, as they broke trace points in modules. Author: Andi Kleen */ #define pr_fmt(fmt) "pebs_grabber: " fmt #include #include #include #include #include #include #include #include #include #define CREATE_TRACE_POINTS #include "pebs.h" struct pebs_v1 { u64 flags; u64 ip; u64 regs[16]; u64 status; u64 dla; u64 dse; u64 lat; }; struct pebs_v2 { struct pebs_v1 v1; u64 eventingip; u64 tsx_tuning; }; struct debug_store { u64 buffer_base; u64 index; u64 absolute_maximum; u64 interrupt_threshold; u64 pebs_buffer_base; u64 pebs_index; u64 pebs_absolute_maximum; u64 pebs_interrupt_threshold; u64 pebs_event_reset[0]; }; static DEFINE_PER_CPU(struct debug_store *, ds_base); static unsigned pebs_version; static char *handler_names[] = { [1] = "intel_pmu_drain_pebs_nhm", #if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0) [2] = "intel_pmu_drain_pebs_hsw", #else [2] = "intel_pmu_drain_pebs_nhm", #endif }; static unsigned pebs_record_size[] = { [1] = sizeof(struct pebs_v1), [2] = sizeof(struct pebs_v2), }; static int pebs_grabber(struct kprobe *kp, struct pt_regs *regs) { struct debug_store *ds; void *pebs; ds = this_cpu_read(ds_base); if (!ds) { u64 dsval; rdmsrl(MSR_IA32_DS_AREA, dsval); ds = (struct debug_store *)dsval; this_cpu_write(ds_base, ds); } for (pebs = (void *)ds->pebs_buffer_base; pebs < (void *)ds->pebs_index; pebs = pebs + pebs_record_size[pebs_version]) { struct pebs_v1 *v1 = pebs; trace_pebs_v1(v1->ip, v1->status, v1->dla, v1->dse, v1->lat); if (pebs_version == 2) { struct pebs_v2 *v2 = pebs; trace_pebs_v2(v2->eventingip, v2->tsx_tuning, v1->regs[0]); } trace_pebs_regs(v1->flags, v1->regs); } return 0; } static struct kprobe pebs_kp = { .symbol_name = "pebs-grabber", .pre_handler = pebs_grabber }; static int init_pebs_grabber(void) { int err; u64 eax, cap; if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) { pr_err("Arch perfmon not supported\n"); return -EIO; } eax = cpuid_eax(10); if ((eax & 0xff) < 2) { pr_err("Need at least version 2 of arch_perfmon, not %llu\n", eax & 0xff); return -EIO; } rdmsrl(MSR_IA32_PERF_CAPABILITIES, cap); pebs_version = (cap >> 8) & 0xf; pr_info("PEBS version %u\n", pebs_version); if (pebs_version < 1 || pebs_version > 2) { pr_err("Unsupported PEBS version %u\n", pebs_version); return -EIO; } pebs_kp.symbol_name = handler_names[pebs_version]; if ((err = register_kprobe(&pebs_kp)) < 0) { pr_err("Cannot register kprobe: %d\n", err); return err; } return 0; } static void exit_pebs_grabber(void) { unregister_kprobe(&pebs_kp); } module_init(init_pebs_grabber); module_exit(exit_pebs_grabber); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andi Kleen"); MODULE_DESCRIPTION("Get raw PEBS sampling data as trace points");