Human readable description support for performance events v2. With perf support included.
Changes from v1:
- applied on top of latest perf_event/ARM (5899/1 - 5903/1)
- moved to debugfs, now based on seq_file
- reads one line at a time, memory overallocation fixed [perf]
The following patches provide a sysfs entry with hardware event human readable description in the form of "0x%llx\t%lld-%lld\t%s\t%s" % (event_value, minval, maxval, name, description) and means to populate the file.
The version posted contains ARMv6, ARMv7 (Cortex-A[89]) support in this matter.
The intended use is twofold: for users to read the list directly and for tools (like perf).
This series includes:
[PATCH/RFC v2 1/3] perfevents: Added performance event structure definition, export event description in the debugfs "perf_events_platform" file
[PATCH/RFC v2 2/3] [ARM] perfevents: Event description for ARMv6, Cortex-A8 and Cortex-A9 exported
[PATCH/RFC v2 3/3] perf: Extended events (platform-specific) support in perf
Thanks,
--
Tomasz Fujak
Signed-off-by: Tomasz Fujak <[email protected]>
Reviewed-by: Marek Szyprowski <[email protected]>
Reviewed-by: Pawel Osciak <[email protected]>
Reviewed-by: Kyungmin Park <[email protected]>
---
include/linux/perf_event.h | 19 +++++++++
kernel/perf_event.c | 92 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 111 insertions(+), 0 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f..b50e2b8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -458,6 +458,12 @@ enum perf_callchain_context {
#define PERF_MAX_STACK_DEPTH 255
+#define PERF_EVENT_RAW_BIT (1ULL << 63)
+#define PERF_EVENT_RAW_TO_CONFIG(_val) ((_val) | PERF_EVENT_RAW_BIT)
+#define PERF_EVENT_CONFIG_TO_RAW(_val) ((_val) & ~PERF_EVENT_RAW_BIT)
+#define PERF_EVENT_IS_RAW(_val) ((_val) & PERF_EVENT_RAW_BIT)
+
+
struct perf_callchain_entry {
__u64 nr;
__u64 ip[PERF_MAX_STACK_DEPTH];
@@ -554,6 +560,19 @@ struct perf_mmap_data {
void *data_pages[0];
};
+struct perf_event_description {
+ struct list_head list;
+
+ /* type : 1, subsystem [0..7], id [56..63]*/
+ __u64 config;
+ __u64 min_value; /* min. wakeup period */
+ __u64 max_value; /* max. wakeup period */
+ __u32 flags; /* ??? */
+ __u32 reserved[3];
+ char *name;
+ char *description;
+};
+
struct perf_pending_entry {
struct perf_pending_entry *next;
void (*func)(struct perf_pending_entry *);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 603c0d8..dc68f0b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,9 @@
#include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
#include <asm/irq_regs.h>
/*
@@ -99,6 +102,10 @@ void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_event_setup(int cpu) { barrier(); }
void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
+static LIST_HEAD(perf_event_empty);
+
+const struct list_head __weak *perf_events_platform;
+
int __weak
hw_perf_group_sched_in(struct perf_event *group_leader,
struct perf_cpu_context *cpuctx,
@@ -5333,6 +5340,83 @@ perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
return count;
}
+static void *platevent_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct list_head *spos = NULL;
+
+ if (perf_events_platform) {
+ loff_t count = *pos;
+ struct list_head *curr;
+
+ list_for_each(curr, perf_events_platform)
+ if (!count--)
+ break;
+
+ if (curr != perf_events_platform) {
+ s->private = perf_events_platform;
+ spos = curr;
+ }
+ }
+
+ return spos;
+}
+
+static void *platevent_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct list_head *curr = (struct list_head *)v;
+ struct list_head *head = (struct list_head *)s->private;
+
+ if (list_is_last(curr, head))
+ return NULL;
+
+ ++(*pos);
+ return curr->next;
+}
+
+static void platevent_seq_stop(struct seq_file *s, void *v)
+{
+ kfree(v);
+}
+
+static int platevent_seq_show(struct seq_file *s, void *v)
+{
+ struct list_head *curr = (struct list_head *)v;
+
+ if (curr) {
+ struct perf_event_description *entry = list_entry(curr,
+ struct perf_event_description, list);
+
+ if (PERF_EVENT_IS_RAW(entry->config))
+ seq_printf(s, "0x%llx\t%s\t%lld-%lld\t%s\n",
+ PERF_EVENT_CONFIG_TO_RAW(entry->config),
+ entry->name, entry->min_value,
+ entry->max_value, entry->description);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations platevent_seq_ops = {
+ .start = platevent_seq_start,
+ .next = platevent_seq_next,
+ .stop = platevent_seq_stop,
+ .show = platevent_seq_show
+};
+
+static int platevent_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &platevent_seq_ops);
+};
+
+static const struct file_operations platevent_file_ops = {
+ .owner = THIS_MODULE,
+ .open = platevent_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+
static SYSDEV_CLASS_ATTR(
reserve_percpu,
0644,
@@ -5358,8 +5442,16 @@ static struct attribute_group perfclass_attr_group = {
.name = "perf_events",
};
+
static int __init perf_event_sysfs_init(void)
{
+ struct dentry *dentry;
+
+ dentry = debugfs_create_file("perf_events_platform", 0444, NULL,
+ NULL, &platevent_file_ops);
+ if (!dentry)
+ printk(KERN_WARNING "Cannot create debugfs entry 'perf_events_platform'\n");
+
return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
&perfclass_attr_group);
}
--
1.5.4.3
Signed-off-by: Tomasz Fujak <[email protected]>
Reviewed-by: Marek Szyprowski <[email protected]>
Reviewed-by: Pawel Osciak <[email protected]>
Reviewed-by: Kyungmin Park <[email protected]>
---
arch/arm/kernel/perf_event.c | 333 ++++++++++++++++++++++++++++++++++++++++++
1 files changed, 333 insertions(+), 0 deletions(-)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 65edef6..eec086e 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -28,6 +28,17 @@
static const struct pmu_irqs *pmu_irqs;
+#define PERF_EVENT_DESC_ENTRY(_val, _min, _max, _name, _desc) { \
+ .config = PERF_EVENT_RAW_TO_CONFIG(_val),\
+ .min_value = (_min),\
+ .max_value = (_max),\
+ .name = (_name),\
+ .description = (_desc)\
+}
+
+#define minv 0
+#define maxv 0
+
/*
* Hardware lock to serialize accesses to PMU registers. Needed for the
* read/modify/write sequences.
@@ -86,6 +97,8 @@ struct arm_pmu {
/* Set at runtime when we know what CPU type we are. */
static const struct arm_pmu *armpmu;
+static LIST_HEAD(perf_events_arm);
+struct list_head *perf_events_platform = &perf_events_arm;
#define HW_OP_UNSUPPORTED 0xFFFF
@@ -98,6 +111,17 @@ static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+static void
+perf_event_add_events(struct list_head *head,
+ struct perf_event_description *array,
+ unsigned int count)
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < count; ++i)
+ list_add_tail(&array[i].list, head);
+}
+
static int
armpmu_map_cache_event(u64 config)
{
@@ -820,6 +844,56 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
},
};
+static struct perf_event_description armv6_event_description[] = {
+ /* armv6 events */
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_ICACHE_MISS, minv, maxv,
+ "ICACHE_MISS", "Instruction cache miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_IBUF_STALL, minv, maxv,
+ "IBUF_STALL", "Instruction fetch stall cycle"
+ " (either uTLB or I-cache miss)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_DDEP_STALL, minv, maxv,
+ "DDEP_STALL", "Data dependency stall cycle"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_ITLB_MISS, minv, maxv,
+ "ITLB_MISS", "Instruction uTLB miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_DTLB_MISS, minv, maxv,
+ "DTLB_MISS", "Data uTLB miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_BR_EXEC, minv, maxv,
+ "BR_EXEC", "Branch instruction executed "
+ "(even if the PC hasn't been affected)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_BR_MISPREDICT, minv, maxv,
+ "BR_MISPREDICT", "Branch mispredicted"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_INSTR_EXEC, minv, maxv,
+ "INSTR_EXEC", "Instruction executed (may be incremented"
+ " by 2 on some occasion)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_DCACHE_HIT, minv, maxv,
+ "DCACHE_HIT", "Data cache hit for cacheable locations "
+ "(cache ops don't count)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_DCACHE_ACCESS, minv, maxv,
+ "DCACHE_ACCESS", "Data cache access, all locations (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_DCACHE_MISS, minv, maxv,
+ "DCACHE_MISS", "Data cache miss (cache ops don't count)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_DCACHE_WBACK, minv, maxv,
+ "DCACHE_WBACK", "Data cache writeback (once for "
+ "half a cache line)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_SW_PC_CHANGE, minv, maxv,
+ "SW_PC_CHANGE", "Software PC change (does not count if the "
+ "mode is changed, i.e. at SVC)"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_MAIN_TLB_MISS, minv, maxv,
+ "MAIN_TLB_MISS", "Main TLB (not uTLB) miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_EXPL_D_ACCESS, minv, maxv,
+ "EXPL_D_ACCESS", "Explicit external data access, DCache "
+ "linefill, Uncached, write-through"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_LSU_FULL_STALL, minv, maxv,
+ "LSU_FULL_STALL", "Stall cycle due to full Load/Store"
+ " Unit queue"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_WBUF_DRAINED, minv, maxv,
+ "WBUF_DRAINED", "Write buffer drained because of DSB or "
+ "Strongly Ordered memory operation"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_CPU_CYCLES, minv, maxv,
+ "CPU_CYCLES", "CPU cycles"),
+ PERF_EVENT_DESC_ENTRY(ARMV6_PERFCTR_NOP, minv, maxv, "NOP", "???")
+};
+
static inline unsigned long
armv6_pmcr_read(void)
{
@@ -1439,6 +1513,248 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
},
};
+static struct perf_event_description armv7_event_description[] = {
+ /* armv7 generic events */
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PMNC_SW_INCR, minv, maxv,
+ "PMNC_SW_INCR", "Software increment (write to a "
+ "dedicated register)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_IFETCH_MISS, minv, maxv,
+ "IFETCH_MISS", "Instruction fetch miss that causes "
+ "refill. Speculative misses count unless they don't "
+ "make to the execution, maintenance operations don't"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_ITLB_MISS, minv, maxv,
+ "ITLB_MISS", "Instruction TLB miss that causes a refill."
+ " Both speculative and explicit accesses count"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DCACHE_REFILL, minv, maxv,
+ "DCACHE_REFILL", "Data cache refill. Same rules as ITLB_MISS"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DCACHE_ACCESS, minv, maxv,
+ "DCACHE_ACCESS", "Data cache access. Same rules as ITLB_MISS"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DTLB_REFILL, minv, maxv,
+ "DTLB_REFILL", "Data TLB refill. Same rules as ITLB_MISS"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DREAD, minv, maxv, "DREAD",
+ "Data read executed (including SWP)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DWRITE, minv, maxv, "DWRITE",
+ "Data write executed (including SWP)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_EXC_TAKEN, minv, maxv,
+ "EXC_TAKEN", "Exception taken"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_EXC_EXECUTED, minv, maxv,
+ "EXC_EXECUTED", "Exception return executed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CID_WRITE, minv, maxv,
+ "CID_WRITE", "Context ID register written"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_WRITE, minv, maxv, "PC_WRITE",
+ "Software change of the PC (R15)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_IMM_BRANCH, minv, maxv,
+ "PC_IMM_BRANCH", "Immediate branch (B[L], BLX, CB[N]Z, HB[L],"
+ " HBLP), including conditional that fail"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_UNALIGNED_ACCESS, minv, maxv,
+ "UNALIGNED_ACCESS", "Data access unaligned to the transfer"
+ " size"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, minv, maxv,
+ "BRANCH_MISS_PRED", "Branch misprediction or not predicted"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CLOCK_CYCLES, minv, maxv,
+ "CLOCK_CYCLES", "Cycle count"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_BRANCH_MIS_USED, minv, maxv,
+ "BRANCH_MIS_USED", "Branch or other program flow change that "
+ "could have been predicted"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CPU_CYCLES, minv, maxv,
+ "CPU_CYCLES", "measures cpu cycles, the only allowed event"
+ " for the first counter")
+};
+
+static struct perf_event_description cortexa8_event_description[] = {
+ /* Cortex A8 specific events */
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_INSTR_EXECUTED, minv, maxv,
+ "INSTR_EXECUTED", "Instruction executed (including conditional"
+ " that don't pass)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_PROC_RETURN, minv, maxv,
+ "PC_PROC_RETURN", "Procedure return (BX LR; MOV PC, LR; POP "
+ "{.., PC} and such)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_WRITE_BUFFER_FULL, minv, maxv,
+ "WRITE_BUFFER_FULL", "Write buffer full cycle"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L2_STORE_MERGED, minv, maxv,
+ "L2_STORE_MERGED", "Store that is merged in the L2 memory"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L2_STORE_BUFF, minv, maxv,
+ "L2_STORE_BUFF", "A bufferable store from load/store to L2"
+ " cache, evictions and cast out data don't count (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L2_ACCESS, minv, maxv, "L2_ACCESS",
+ "L2 cache access"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L2_CACH_MISS, minv, maxv,
+ "L2_CACH_MISS", "L2 cache miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_AXI_READ_CYCLES, minv, maxv,
+ "AXI_READ_CYCLES", "AXI read data transfers"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_AXI_WRITE_CYCLES, minv, maxv,
+ "AXI_WRITE_CYCLES", "AXI write data transfers"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_MEMORY_REPLAY, minv, maxv,
+ "MEMORY_REPLAY", "Replay event in the memory subsystem (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY, minv, maxv,
+ "UNALIGNED_ACCESS_REPLAY", "An unaligned memory access that"
+ " results in a replay (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L1_DATA_MISS, minv, maxv,
+ "L1_DATA_MISS", "L1 data cache miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L1_INST_MISS, minv, maxv,
+ "L1_INST_MISS", "L1 instruction cache miss"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L1_DATA_COLORING, minv, maxv,
+ "L1_DATA_COLORING", "L1 access that triggers eviction or cast"
+ " out (page coloring alias)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L1_NEON_DATA, minv, maxv,
+ "L1_NEON_DATA", "A NEON access that hits the L1 DCache"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L1_NEON_CACH_DATA, minv, maxv,
+ "L1_NEON_CACH_DATA", "A cacheable NEON access that hits the"
+ " L1 DCache"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L2_NEON, minv, maxv, "L2_NEON",
+ "A NEON access memory access that results in L2 being"
+ " accessed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L2_NEON_HIT, minv, maxv,
+ "L2_NEON_HIT", "A NEON hit in the L2"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_L1_INST, minv, maxv, "L1_INST",
+ "A L1 instruction access (CP15 cache ops don't count)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_RETURN_MIS_PRED, minv, maxv,
+ "PC_RETURN_MIS_PRED", "A return stack misprediction because"
+ " of incorrect stack address"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_BRANCH_FAILED, minv, maxv,
+ "PC_BRANCH_FAILED", "Branch misprediction (both ways)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_BRANCH_TAKEN, minv, maxv,
+ "PC_BRANCH_TAKEN", "Predictable branch predicted taken"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PC_BRANCH_EXECUTED, minv, maxv,
+ "PC_BRANCH_EXECUTED", "Predictable branch executed taken"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_OP_EXECUTED, minv, maxv,
+ "OP_EXECUTED", "uOP executed (an instruction or a "
+ "multi-instruction step)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CYCLES_INST_STALL, minv, maxv,
+ "CYCLES_INST_STALL", "Instruction issue unit idle cycle"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CYCLES_INST, minv, maxv,
+ "CYCLES_INST", "Instruction issued (multicycle instruction "
+ "counts for one)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL, minv, maxv,
+ "CYCLES_NEON_DATA_STALL", "Cycles the CPU waits on MRC "
+ "from NEON"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_CYCLES_NEON_INST_STALL, minv, maxv,
+ "CYCLES_NEON_INST_STALL", "Stall cycles caused by full NEON"
+ " queue (either ins. queue or load queue)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_NEON_CYCLES, minv, maxv,
+ "NEON_CYCLES", "Cycles that both processors (ARM & NEON)"
+ " are not idle"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PMU0_EVENTS, minv, maxv,
+ "PMU0_EVENTS", "Event on external input source (PMUEXTIN[0])"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PMU1_EVENTS, minv, maxv,
+ "PMU1_EVENTS", "Event on external input source (PMUEXTIN[1])"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PMU_EVENTS, minv, maxv,
+ "PMU_EVENTS", "Event on either of the external input sources"
+ " (PMUEXTIN[0,1])")
+};
+
+static struct perf_event_description cortexa9_event_description[] = {
+ /* ARMv7 Cortex-A9 specific event types */
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC, minv, maxv,
+ "JAVA_HW_BYTECODE_EXEC", "Java bytecode executed in HW"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC, minv, maxv,
+ "JAVA_SW_BYTECODE_EXEC", "Java bytecode executed in SW"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC, minv, maxv,
+ "JAZELLE_BRANCH_EXEC", "Jazelle backward branch"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_COHERENT_LINE_MISS, minv, maxv,
+ "COHERENT_LINE_MISS", "???"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_COHERENT_LINE_HIT, minv, maxv,
+ "COHERENT_LINE_HIT", "???"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES, minv,
+ maxv, "ICACHE_DEP_STALL_CYCLES", "Instruction cache "
+ "dependent stall"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES, minv,
+ maxv, "DCACHE_DEP_STALL_CYCLES", "Data cache dependent stall"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES, minv,
+ maxv, "TLB_MISS_DEP_STALL_CYCLES", "Main TLB miss stall"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_STREX_EXECUTED_PASSED, minv, maxv,
+ "STREX_EXECUTED_PASSED", "STREX passed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_STREX_EXECUTED_FAILED, minv, maxv,
+ "STREX_EXECUTED_FAILED", "STREX failed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DATA_EVICTION, minv, maxv,
+ "DATA_EVICTION", "Cache data eviction (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_ISSUE_STAGE_NO_INST, minv, maxv,
+ "ISSUE_STAGE_NO_INST", "No instruction issued cycle"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_ISSUE_STAGE_EMPTY, minv, maxv,
+ "ISSUE_STAGE_EMPTY", "Empty issue unit cycles"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, minv,
+ maxv, "INST_OUT_OF_RENAME_STAGE", "???"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS, minv,
+ maxv, "PREDICTABLE_FUNCT_RETURNS", "Predictable return "
+ "occured (?)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST, minv,
+ maxv, "MAIN_UNIT_EXECUTED_INST", "Pipe 0 instruction "
+ "executed (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST, minv,
+ maxv, "SECOND_UNIT_EXECUTED_INST", "Pipe 1 instruction "
+ "executed (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST, minv,
+ maxv, "LD_ST_UNIT_EXECUTED_INST", "Load/Store Unit instruction"
+ " executed (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_FP_EXECUTED_INST, minv, maxv,
+ "FP_EXECUTED_INST", "VFP instruction executed (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_NEON_EXECUTED_INST, minv, maxv,
+ "NEON_EXECUTED_INST", "NEON instruction executed (?)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES,
+ minv, maxv, "PLD_FULL_DEP_STALL_CYCLES", "PLD stall cycle"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES, minv,
+ maxv, "DATA_WR_DEP_STALL_CYCLES", "Write stall cycle"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES, minv,
+ maxv, "ITLB_MISS_DEP_STALL_CYCLES", "Instruction stall due to"
+ " main TLB miss (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES, minv,
+ maxv, "DTLB_MISS_DEP_STALL_CYCLES", "Data stall due to main TLB"
+ " miss (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES,
+ minv, maxv, "MICRO_ITLB_MISS_DEP_STALL_CYCLES", "Instruction "
+ "stall due to uTLB miss (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES,
+ minv, maxv, "MICRO_DTLB_MISS_DEP_STALL_CYCLES", "Data stall "
+ "due to micro uTLB miss (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES, minv, maxv,
+ "DMB_DEP_STALL_CYCLES", "DMB stall (?)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES, minv,
+ maxv, "INTGR_CLK_ENABLED_CYCLES", "Integer core clock "
+ "disabled (?)"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES, minv,
+ maxv, "DATA_ENGINE_CLK_EN_CYCLES", "Data engine clock disabled"
+ " (?)"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_ISB_INST, minv, maxv, "ISB_INST",
+ "ISB executed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DSB_INST, minv, maxv, "DSB_INST",
+ "DSB executed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_DMB_INST, minv, maxv, "DMB_INST",
+ "DMB executed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_EXT_INTERRUPTS, minv, maxv,
+ "EXT_INTERRUPTS", "External interrupt"),
+
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED,
+ minv, maxv, "PLE_CACHE_LINE_RQST_COMPLETED", "PLE (Preload "
+ "engine) cache line request completed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED, minv,
+ maxv, "PLE_CACHE_LINE_RQST_SKIPPED", "PLE cache line "
+ "request skipped"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLE_FIFO_FLUSH, minv, maxv,
+ "PLE_FIFO_FLUSH", "PLE FIFO flush"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLE_RQST_COMPLETED, minv, maxv,
+ "PLE_RQST_COMPLETED", "PLE request completed"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLE_FIFO_OVERFLOW, minv, maxv,
+ "PLE_FIFO_OVERFLOW", "PLE FIFO overflow"),
+ PERF_EVENT_DESC_ENTRY(ARMV7_PERFCTR_PLE_RQST_PROG, minv, maxv,
+ "PLE_RQST_PROG", "PLE request programmed")
+};
+
+
+/* ********************************************************** */
+
/*
* Cortex-A9 HW events mapping
*/
@@ -2094,6 +2410,10 @@ init_hw_perf_events(void)
memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
sizeof(armv6_perf_cache_map));
perf_max_events = armv6pmu.num_events;
+
+ perf_event_add_events(&perf_events_arm,
+ armv6_event_description,
+ ARRAY_SIZE(armv6_event_description));
break;
case 0xB020: /* ARM11mpcore */
armpmu = &armv6mpcore_pmu;
@@ -2113,6 +2433,13 @@ init_hw_perf_events(void)
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
perf_max_events = armv7pmu.num_events;
+
+ perf_event_add_events(&perf_events_arm,
+ armv7_event_description,
+ ARRAY_SIZE(armv7_event_description));
+ perf_event_add_events(&perf_events_arm,
+ cortexa8_event_description,
+ ARRAY_SIZE(cortexa8_event_description));
break;
case 0xC090: /* Cortex-A9 */
armv7pmu.name = ARMV7_PMU_CORTEX_A9_NAME;
@@ -2121,6 +2448,12 @@ init_hw_perf_events(void)
armv7pmu.event_map = armv7_a9_pmu_event_map;
armpmu = &armv7pmu;
+ perf_event_add_events(&perf_events_arm,
+ armv7_event_description,
+ ARRAY_SIZE(armv7_event_description));
+ perf_event_add_events(&perf_events_arm,
+ cortexa9_event_description,
+ ARRAY_SIZE(cortexa9_event_description));
/* Reset PMNC and read the nb of CNTx counters
supported */
armv7pmu.num_events = armv7_reset_read_pmnc();
--
1.5.4.3
Signed-off-by: Tomasz Fujak <[email protected]>
Reviewed-by: Marek Szyprowski <[email protected]>
Reviewed-by: Kyungmin Park <[email protected]>
---
tools/perf/util/parse-events.c | 217 ++++++++++++++++++++++++++++++++++++++--
1 files changed, 206 insertions(+), 11 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e5bc0fb..37a9b25 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -9,6 +9,9 @@
#include "header.h"
#include "debugfs.h"
+#define PLATFORM_EVENT_FILE_NAME\
+ "perf_events_platform"
+
int nr_counters;
struct perf_event_attr attrs[MAX_COUNTERS];
@@ -60,6 +63,10 @@ static struct event_symbol event_symbols[] = {
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
+static struct event_symbol *platform_event_symbols;
+static unsigned int platform_event_count;
+static int platform_events_initialized;
+
static const char *hw_event_names[] = {
"cycles",
"instructions",
@@ -241,6 +248,150 @@ static const char *tracepoint_id_to_name(u64 config)
return buf;
}
+/* o for valid line, 1 for invalid */
+/* each line format should be "0x%llx\t%s\t%lld-%lld\t%s\n" */
+static int parse_platevent_line(char *line, struct event_symbol *symbol)
+{
+ char *name = NULL;
+ char *description, *ptr, *end;
+ int eaten;
+ unsigned long long discard;
+
+ if (1 != sscanf(line + 2, "%llx", &symbol->config))
+ return 1;
+
+ /* skip 0x%llx\t */
+ ptr = strchr(line + 2, '\t') + 1;
+ if (!ptr)
+ return 1;
+
+ end = strchr(ptr, '\t');
+ if (!end)
+ return 1;
+
+ name = strndup(ptr, end - ptr);
+ ptr = end + 1;
+
+ if (2 != sscanf(ptr, "%lld-%lld\t%n", &discard, &discard, &eaten)) {
+ free(name);
+ return 1;
+ }
+
+
+ description = strdup(ptr + eaten);
+ description[strlen(description) - 1] = 0;
+
+ if (name && description) {
+ symbol->symbol = name;
+ symbol->alias = "";
+ /* description gets lost here */
+ } else
+ free(name);
+ free(description);
+
+ return 0;
+}
+
+#define LINE_SIZE_MAX 256
+/* 0 - event ok, < 0 - unrecoverable error, > 0 - eof */
+static int extract_platevent_item(FILE *file, struct event_symbol *symbol)
+{
+ int result = -1;
+
+ do {
+ char line[LINE_SIZE_MAX];
+
+ if (!fgets(line, LINE_SIZE_MAX, file)) {
+ if (feof(file))
+ return 1;
+ if (ferror(file))
+ return -1;
+ continue;
+ }
+ result = parse_platevent_line(line, symbol);
+ } while (0 < result);
+
+ return result;
+}
+
+#define PATH_LEN_MAX 256
+static int load_platform_events(const char *platevent_entry)
+{
+ FILE *file;
+ int count = 0;
+ int capacity = 16;
+ int result;
+ char platevent_file_path[PATH_LEN_MAX];
+
+ struct event_symbol *symbols = NULL;
+
+ /* if the path is of excessive length, skip it */
+ if (debugfs_make_path(platevent_entry, platevent_file_path,
+ ARRAY_SIZE(platevent_file_path)))
+ return -1;
+
+ file = fopen(platevent_file_path, "r");
+ if (!file) {
+ fprintf(stderr, "can't open platform event file '%s'\n",
+ platevent_file_path);
+ return -1;
+ }
+
+ symbols = (struct event_symbol *)
+ calloc(sizeof(struct event_symbol), capacity);
+ if (!symbols) {
+ fclose(file);
+ return -1;
+ }
+
+ do {
+ result = extract_platevent_item(file, &symbols[count]);
+ if (!result)
+ ++count;
+
+ if (capacity == count) {
+ struct event_symbol *tmp =
+ (struct event_symbol *)realloc(symbols,
+ sizeof(struct event_symbol) *
+ (capacity <<= 1));
+
+ if (!tmp) {
+ result = -1;
+ break;
+ }
+ symbols = tmp;
+ }
+ } while (!result);
+ /* <0 - error */
+
+ fclose(file);
+
+ if ((result < 0) || (0 == count)) {
+ /* ditching the collection for there was a parse error */
+ free(symbols);
+ count = 0;
+ } else {
+ /* trim the collection storage */
+ if (count != capacity)
+ platform_event_symbols = realloc(symbols,
+ sizeof(struct event_symbol) * count);
+ else
+ platform_event_symbols = symbols;
+ platform_event_count = count;
+ }
+ return count;
+}
+
+static struct event_symbol *platevent_find_config(u64 config)
+{
+ unsigned int i;
+ for (i = 0; i < platform_event_count; ++i)
+ if (platform_event_symbols[i].config == config)
+ return &platform_event_symbols[i];
+
+ return NULL;
+}
+
static int is_cache_op_valid(u8 cache_type, u8 cache_op)
{
if (hw_cache_stat[cache_type] & COP(cache_op))
@@ -283,10 +434,16 @@ const char *__event_name(int type, u64 config)
}
switch (type) {
- case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HARDWARE: {
+ const struct event_symbol *event;
+
if (config < PERF_COUNT_HW_MAX)
return hw_event_names[config];
+ event = platevent_find_config(config);
+ if (event)
+ return event->symbol;
return "unknown-hardware";
+ }
case PERF_TYPE_HW_CACHE: {
u8 cache_type, cache_op, cache_result;
@@ -606,33 +763,34 @@ parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
return EVT_HANDLED;
}
-static int check_events(const char *str, unsigned int i)
+static int check_event(const char *str, const struct event_symbol *event)
{
int n;
- n = strlen(event_symbols[i].symbol);
- if (!strncmp(str, event_symbols[i].symbol, n))
+ n = strlen(event->symbol);
+ if (!strncmp(str, event->symbol, n))
return n;
- n = strlen(event_symbols[i].alias);
+ n = strlen(event->alias);
if (n)
- if (!strncmp(str, event_symbols[i].alias, n))
+ if (!strncmp(str, event->alias, n))
return n;
return 0;
}
static enum event_result
-parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
+do_parse_symbolic_event(const char **strp, struct perf_event_attr *attr,
+ const struct event_symbol *symbols, unsigned int count)
{
const char *str = *strp;
unsigned int i;
int n;
- for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
- n = check_events(str, i);
+ for (i = 0; i < count; ++i) {
+ n = check_event(str, &symbols[i]);
if (n > 0) {
- attr->type = event_symbols[i].type;
- attr->config = event_symbols[i].config;
+ attr->type = symbols[i].type;
+ attr->config = symbols[i].config;
*strp = str + n;
return EVT_HANDLED;
}
@@ -641,6 +799,27 @@ parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
}
static enum event_result
+parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
+{
+ return do_parse_symbolic_event(strp, attr,
+ event_symbols, ARRAY_SIZE(event_symbols));
+}
+
+static enum event_result
+parse_platform_event(const char **strp, struct perf_event_attr *attr)
+{
+ if (!platform_events_initialized)
+ platform_events_initialized =
+ load_platform_events(PLATFORM_EVENT_FILE_NAME);
+
+ if (platform_events_initialized < 0)
+ return EVT_FAILED;
+
+ return do_parse_symbolic_event(strp, attr, platform_event_symbols,
+ platform_event_count);
+}
+
+static enum event_result
parse_raw_event(const char **strp, struct perf_event_attr *attr)
{
const char *str = *strp;
@@ -739,6 +918,10 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr)
if (ret != EVT_FAILED)
goto modifier;
+ ret = parse_platform_event(str, attr);
+ if (ret != EVT_FAILED)
+ goto modifier;
+
ret = parse_breakpoint_event(str, attr);
if (ret != EVT_FAILED)
goto modifier;
@@ -924,6 +1107,18 @@ void print_events(void)
}
}
+ if (!platform_events_initialized)
+ platform_events_initialized =
+ load_platform_events(PLATFORM_EVENT_FILE_NAME);
+
+ if (0 < platform_events_initialized) {
+ for (i = 0; i < platform_event_count; ++i)
+ printf(" %-42s [%s]\n",
+ platform_event_symbols[i].symbol,
+ "Hardware platform-specific event");
+ }
+
+
printf("\n");
printf(" %-42s [raw hardware event descriptor]\n",
"rNNN");
--
1.5.4.3
On Thu, 2010-01-28 at 10:34 +0100, Tomasz Fujak wrote:
> Human readable description support for performance events v2. With perf support included.
> Changes from v1:
> - applied on top of latest perf_event/ARM (5899/1 - 5903/1)
> - moved to debugfs, now based on seq_file
> - reads one line at a time, memory overallocation fixed [perf]
You can keep sending these patches, but I'll keep ignoring them
eventually adding you to the /dev/null redirect.
> -----Original Message-----
> From: Peter Zijlstra [mailto:[email protected]]
> Sent: Thursday, January 28, 2010 11:52 AM
> To: Tomasz Fujak
> Cc: [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]
> Subject: Re: [PATCH/RFC v2 0/3] Human readable platform-specific
> performance event support
>
> On Thu, 2010-01-28 at 10:34 +0100, Tomasz Fujak wrote:
> > Human readable description support for performance events v2. With
> perf support included.
> > Changes from v1:
> > - applied on top of latest perf_event/ARM (5899/1 - 5903/1)
> > - moved to debugfs, now based on seq_file
> > - reads one line at a time, memory overallocation fixed [perf]
>
> You can keep sending these patches, but I'll keep ignoring them
> eventually adding you to the /dev/null redirect.
Apparently I did not comprehend your attitude towards the events' description being exported from the kernel.
There's been a lengthy discussion which ended in a conclusion that the platform detection is a complicated task.
The solution finally accepted covers just a subset of platforms. I guess the detection scheme may be updated and possibly changed as new cores/implementers come into sight.
That I think provides additional reasoning to keep the event list where it's defined (in the kernel).
The rest of the suggestions (perf implementation, debugfs instead of sysfs) I've included into the posted patches.
Therefore I cannot really understand why you're threatening to ignore my further efforts; especially since the arguments me and my teammate Michal brought I find reasonable.
So finally, if the proposed patches are too intrusive, can you imagine other mechanism in the kernel that would let the userspace unambiguously retrieve a list of supported events?
Maybe an entry in the sysfs that indicates supported event list version (relevant to the implementer, cupid and other black magic that is involved into the platform detection)?
Right now what can be done is to try follow the kernel implementation of the detection scheme in the applications, which I can't say I'm a fan of.
On Thu, 2010-01-28 at 12:57 +0100, Tomasz Fujak wrote:
please educate your MUA to wrap lines at ~80.
> Apparently I did not comprehend your attitude towards the events'
> description being exported from the kernel.
> There's been a lengthy discussion which ended in a conclusion that the
> platform detection is a complicated task.
If its really that complicated export a pmu identifier someplace.
The fact is, the kernel simply doesn't use this list, we have perf in
kernel so that resource scheduling and isolation can be done, for that
we need to know how to program the hardware and we need to know about
scheduling constraints, we do not need exhaustive lists of possible
events, let alone descriptive text for them, in the kernel.
Oops, I missed your statement from yesterday, that's why I posted the same
idea :s
Otherwise if the perf is meant to be closely tied to the kernel, maybe we
could use a (semi-)automated process to transform the event definitions
(enums in a .c file) to something perf could use. Basically a replacement
for sysfs/debugfs entry in a separately distributed file.
But that does not look so straightforward to implement to me, maybe I'll
come up with something next week.
> -----Original Message-----
> From: [email protected] [mailto:linux-arm-
> [email protected]] On Behalf Of Peter Zijlstra
> Sent: Thursday, January 28, 2010 1:14 PM
> To: Tomasz Fujak
> Cc: [email protected]; Michal Nazarewicz; Pawel Osciak;
> [email protected]; [email protected]; linux-
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Marek Szyprowski
> Subject: RE: [PATCH/RFC v2 0/3] Human readable platform-specific
> performance event support
>
> On Thu, 2010-01-28 at 12:57 +0100, Tomasz Fujak wrote:
>
> please educate your MUA to wrap lines at ~80.
>
> > Apparently I did not comprehend your attitude towards the events'
> > description being exported from the kernel.
> > There's been a lengthy discussion which ended in a conclusion that
> the
> > platform detection is a complicated task.
>
> If its really that complicated export a pmu identifier someplace.
>
> The fact is, the kernel simply doesn't use this list, we have perf in
> kernel so that resource scheduling and isolation can be done, for that
> we need to know how to program the hardware and we need to know about
> scheduling constraints, we do not need exhaustive lists of possible
> events, let alone descriptive text for them, in the kernel.
>
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel