2009-06-26 21:42:37

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: [GIT PULL -tip][PATCH 0/3 -tip] perf stat patches

Ingo,

These patch set :

fixes perf stat output to handle big names like 'L1-d$-prefetch-misses'

Add support for HARDWARE, SOFTWARE and cache events :
perf stat -e all-sw-events
perf stat -e sw-events
perf stat -e all-hw-events
perf stat -e hw-events
perf stat -e all-cache-events
perf stat -e cache-events

The following changes since commit 18fee47ce1ce511b088ddf65f4e5eb700a9f297a:
Ingo Molnar (1):
Merge branch 'perfcounters/urgent'

are available in the git repository at:

git://git.kernel.org/pub/scm/linux/kernel/git/jaswinder/linux-2.6-tip.git master

Jaswinder Singh Rajput (3):
perf stat: fix stat output
perf_counter tools: Add support to set of multiple events in one shot
perf_counter tools: Add support for all CACHE events

tools/perf/builtin-stat.c | 11 ++--
tools/perf/util/parse-events.c | 129 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 132 insertions(+), 8 deletions(-)

Complete diff :

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..9ff9dd5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -250,7 +251,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
{
double msecs = (double)count[0] / 1000000;

- fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter));
+ fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));

if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
@@ -264,7 +265,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)

static void abs_printout(int counter, u64 *count, u64 *noise)
{
- fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
+ fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter));

if (runtime_cycles_avg &&
attrs[counter].type == PERF_TYPE_HARDWARE &&
@@ -294,7 +295,7 @@ static void print_counter(int counter)
scaled = event_scaled_avg[counter];

if (scaled == -1) {
- fprintf(stderr, " %14s %-20s\n",
+ fprintf(stderr, " %14s %-24s\n",
"<not counted>", event_name(counter));
return;
}
@@ -305,8 +306,7 @@ static void print_counter(int counter)
abs_printout(counter, count, noise);

if (scaled)
- fprintf(stderr, " (scaled from %.2f%%)",
- (double) count[2] / count[1] * 100);
+ fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]);

fprintf(stderr, "\n");
}
@@ -417,7 +417,6 @@ static void print_stat(int argc, const char **argv)
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);

-
fprintf(stderr, "\n");
fprintf(stderr, " %14.9f seconds time elapsed.\n",
(double)walltime_nsecs_avg/1e9);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4d042f1..331b296 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,68 @@ static struct event_symbol event_symbols[] = {
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};

+struct event_type_symbol {
+ char *symbol;
+ char *alias;
+};
+
+static struct event_type_symbol event_type_symbols[] = {
+ [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
+ [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+ [PERF_TYPE_TRACEPOINT] = { "", "", },
+ [PERF_TYPE_HW_CACHE] = { "cache-events", "all-cache-events", },
+ [PERF_TYPE_RAW] = { "", "", },
+};
+
+struct event_cache_symbol {
+ u8 type;
+ u64 config;
+};
+
+#define CHCACHE(x, y, z) \
+.type = PERF_TYPE_HW_CACHE, \
+.config = (PERF_COUNT_HW_CACHE_##x | (PERF_COUNT_HW_CACHE_OP_##y << 8) |\
+ (PERF_COUNT_HW_CACHE_RESULT_##z << 16))
+
+/*
+ * Generalized Hardware cache counters events
+ * L1I is READ and PREFETCH only
+ * ITLB and BPU is READ only
+ */
+static struct event_cache_symbol event_cache_symbols[] = {
+ { CHCACHE(L1D, READ, ACCESS) },
+ { CHCACHE(L1D, READ, MISS) },
+ { CHCACHE(L1D, WRITE, ACCESS) },
+ { CHCACHE(L1D, WRITE, MISS) },
+ { CHCACHE(L1D, PREFETCH, ACCESS) },
+ { CHCACHE(L1D, PREFETCH, MISS) },
+
+ { CHCACHE(L1I, READ, ACCESS) },
+ { CHCACHE(L1I, READ, MISS) },
+ { CHCACHE(L1D, PREFETCH, ACCESS) },
+ { CHCACHE(L1D, PREFETCH, MISS) },
+
+ { CHCACHE(LL, READ, ACCESS) },
+ { CHCACHE(LL, READ, MISS) },
+ { CHCACHE(LL, WRITE, ACCESS) },
+ { CHCACHE(LL, WRITE, MISS) },
+ { CHCACHE(LL, PREFETCH, ACCESS) },
+ { CHCACHE(LL, PREFETCH, MISS) },
+
+ { CHCACHE(DTLB, READ, ACCESS) },
+ { CHCACHE(DTLB, READ, MISS) },
+ { CHCACHE(DTLB, WRITE, ACCESS) },
+ { CHCACHE(DTLB, WRITE, MISS) },
+ { CHCACHE(DTLB, PREFETCH, ACCESS) },
+ { CHCACHE(DTLB, PREFETCH, MISS) },
+
+ { CHCACHE(ITLB, READ, ACCESS) },
+ { CHCACHE(ITLB, READ, MISS) },
+
+ { CHCACHE(BPU, READ, ACCESS) },
+ { CHCACHE(BPU, READ, MISS) },
+};
+
#define __PERF_COUNTER_FIELD(config, name) \
((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)

@@ -237,6 +299,60 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

+static int set_multiple_events(unsigned int type)
+{
+ struct perf_counter_attr attr;
+ int i;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_SOFTWARE:
+ for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+ if (event_symbols[i].type == type) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_symbols[i].type;
+ attr.config = event_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
+ }
+ break;
+
+ case PERF_TYPE_HW_CACHE:
+ for (i = 0; i < ARRAY_SIZE(event_cache_symbols); i++) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_cache_symbols[i].type;
+ attr.config = event_cache_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
+ break;
+
+ default:
+ return -1;
+ }
+
+ /*
+ * parse_events() is assuming that only single event will be set,
+ * but we are setting multiple events so we need to return magical 1
+ */
+ return 1;
+}
+
+static int check_type_events(const char *str, unsigned int i)
+{
+ if (strlen(event_type_symbols[i].symbol))
+ if (!strncmp(str, event_type_symbols[i].symbol,
+ strlen(event_type_symbols[i].symbol)))
+ return 1;
+
+ if (strlen(event_type_symbols[i].alias))
+ if (!strncmp(str, event_type_symbols[i].alias,
+ strlen(event_type_symbols[i].alias)))
+ return 1;
+ return 0;
+}
+
static int check_events(const char *str, unsigned int i)
{
if (!strncmp(str, event_symbols[i].symbol,
@@ -288,6 +404,12 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

+ for (i = 0; i < ARRAY_SIZE(event_type_symbols); i++) {
+ if (check_type_events(str, i)) {
+ return set_multiple_events(i);
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
if (check_events(str, i)) {
attr->type = event_symbols[i].type;
@@ -314,8 +436,11 @@ again:
if (ret < 0)
return ret;

- attrs[nr_counters] = attr;
- nr_counters++;
+ /* No need to set attrs and increment counter when already set */
+ if (ret == 0) {
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }

str = strstr(str, ",");
if (str) {


2009-06-26 21:42:48

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: [PATCH 1/3 -tip] perf stat: fix stat output


Increase size for event name to handle big names like 'L1-d$-prefetch-misses'

Changed scaled as a multiplicative factor is more expressive,
than the percentage of from where we scale up.

Also aligned scaled otherwise sometimes it looks like :

384 iTLB-load-misses (4.74x scaled)
452029 branch-loads (8.00x scaled)
5892 branch-load-misses (20.39x scaled)
972315 iTLB-loads (3.24x scaled)

Before :
150708 L1-d$-stores (scaled from 23.57%)
428804 L1-d$-prefetches (scaled from 23.47%)
314446 L1-d$-prefetch-misses (scaled from 23.42%)
252626137 L1-i$-loads (scaled from 23.24%)
5297550 dTLB-load-misses (scaled from 23.96%)
106992392 branch-loads (scaled from 23.67%)
5239561 branch-load-misses (scaled from 23.43%)

After :

1731713 L1-d$-loads ( 14.25x scaled)
44241 L1-d$-prefetches ( 3.88x scaled)
21076 L1-d$-prefetch-misses ( 3.40x scaled)
5789421 L1-i$-loads ( 3.78x scaled)
29645 dTLB-load-misses ( 2.95x scaled)
461474 branch-loads ( 6.52x scaled)
7493 branch-load-misses ( 26.57x scaled)

Reported-by: Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/builtin-stat.c | 11 +++++------
1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..9ff9dd5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -250,7 +251,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
{
double msecs = (double)count[0] / 1000000;

- fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter));
+ fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));

if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
@@ -264,7 +265,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)

static void abs_printout(int counter, u64 *count, u64 *noise)
{
- fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
+ fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter));

if (runtime_cycles_avg &&
attrs[counter].type == PERF_TYPE_HARDWARE &&
@@ -294,7 +295,7 @@ static void print_counter(int counter)
scaled = event_scaled_avg[counter];

if (scaled == -1) {
- fprintf(stderr, " %14s %-20s\n",
+ fprintf(stderr, " %14s %-24s\n",
"<not counted>", event_name(counter));
return;
}
@@ -305,8 +306,7 @@ static void print_counter(int counter)
abs_printout(counter, count, noise);

if (scaled)
- fprintf(stderr, " (scaled from %.2f%%)",
- (double) count[2] / count[1] * 100);
+ fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]);

fprintf(stderr, "\n");
}
@@ -417,7 +417,6 @@ static void print_stat(int argc, const char **argv)
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);

-
fprintf(stderr, "\n");
fprintf(stderr, " %14.9f seconds time elapsed.\n",
(double)walltime_nsecs_avg/1e9);
--
1.6.0.6


2009-06-26 21:43:01

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot


Add support for HARDWARE and SOFTWARE events :
perf stat -e all-sw-events
perf stat -e sw-events
perf stat -e all-hw-events
perf stat -e hw-events

On AMD box :

$ ./perf stat -e hw-events -e all-sw-events -- ls -lR /usr/include/ > /dev/null

Performance counter stats for 'ls -lR /usr/include/':

744418792 cycles # 2027.230 M/sec ( 3.28x scaled)
515314667 instructions # 0.692 IPC ( 3.29x scaled)
247900772 cache-references # 675.093 M/sec ( 1.18x scaled)
3587971 cache-misses # 9.771 M/sec ( 1.18x scaled)
65830547 branches # 179.272 M/sec ( 1.18x scaled)
3743637 branch-misses # 10.195 M/sec ( 1.18x scaled)
<not counted> bus-cycles
367.880756 cpu-clock-msecs
367.209910 task-clock-msecs # 0.990 CPUs
441 page-faults # 0.001 M/sec
441 minor-faults # 0.001 M/sec
0 major-faults # 0.000 M/sec
41 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec

0.371065298 seconds time elapsed.

Reported-by : Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
1 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4d042f1..a368728 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,16 @@ static struct event_symbol event_symbols[] = {
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};

+struct event_type_symbol {
+ char *symbol;
+ char *alias;
+};
+
+static struct event_type_symbol event_type_symbols[] = {
+ [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
+ [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+};
+
#define __PERF_COUNTER_FIELD(config, name) \
((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)

@@ -237,6 +247,49 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

+static int set_multiple_events(unsigned int type)
+{
+ struct perf_counter_attr attr;
+ int i;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_SOFTWARE:
+ for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+ if (event_symbols[i].type == type) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_symbols[i].type;
+ attr.config = event_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
+ }
+ break;
+
+ default:
+ return -1;
+ }
+
+ /*
+ * parse_events() is assuming that only single event will be set,
+ * but we are setting multiple events so we need to return magical 1
+ */
+ return 1;
+}
+
+static int check_type_events(const char *str, unsigned int i)
+{
+ if (!strncmp(str, event_type_symbols[i].symbol,
+ strlen(event_type_symbols[i].symbol)))
+ return 1;
+
+ if (strlen(event_type_symbols[i].alias))
+ if (!strncmp(str, event_type_symbols[i].alias,
+ strlen(event_type_symbols[i].alias)))
+ return 1;
+ return 0;
+}
+
static int check_events(const char *str, unsigned int i)
{
if (!strncmp(str, event_symbols[i].symbol,
@@ -288,6 +341,12 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

+ for (i = 0; i < ARRAY_SIZE(event_type_symbols); i++) {
+ if (check_type_events(str, i)) {
+ return set_multiple_events(i);
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
if (check_events(str, i)) {
attr->type = event_symbols[i].type;
@@ -314,8 +373,11 @@ again:
if (ret < 0)
return ret;

- attrs[nr_counters] = attr;
- nr_counters++;
+ /* No need to set attrs and increment counter when already set */
+ if (ret == 0) {
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }

str = strstr(str, ",");
if (str) {
--
1.6.0.6


2009-06-26 21:43:23

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: [PATCH 3/3 -tip] perf_counter tools: Add support for all CACHE events


Add support for all CACHE events :
perf stat -e all-cache-events
perf stat -e cache-events

On AMD box (<not-counted> events are not available for AMD):

$./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null

Performance counter stats for 'ls -lR /usr/include/':

246786934 L1-d$-loads ( 4.64x scaled)
936899 L1-d$-load-misses ( 4.53x scaled)
138961 L1-d$-stores ( 4.46x scaled)
<not counted> L1-d$-store-misses
348659 L1-d$-prefetches ( 4.41x scaled)
236550 L1-d$-prefetch-misses ( 4.41x scaled)
248192242 L1-i$-loads ( 4.46x scaled)
3805771 L1-i$-load-misses ( 4.46x scaled)
334292 L1-d$-prefetches ( 4.46x scaled)
239715 L1-d$-prefetch-misses ( 4.47x scaled)
4966124 LLC-loads ( 4.47x scaled)
531900 LLC-load-misses ( 4.47x scaled)
5605759 LLC-stores ( 4.47x scaled)
<not counted> LLC-store-misses
<not counted> LLC-prefetches
<not counted> LLC-prefetch-misses
253681838 dTLB-loads ( 4.48x scaled)
4634809 dTLB-load-misses ( 4.49x scaled)
<not counted> dTLB-stores
<not counted> dTLB-store-misses
<not counted> dTLB-prefetches
<not counted> dTLB-prefetch-misses
253610942 iTLB-loads ( 4.51x scaled)
3271 iTLB-load-misses ( 4.56x scaled)
105697493 branch-loads ( 4.61x scaled)
5136856 branch-load-misses ( 4.66x scaled)

0.375218449 seconds time elapsed.

Reported-by: Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/util/parse-events.c | 67 ++++++++++++++++++++++++++++++++++++++-
1 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a368728..331b296 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -48,6 +48,58 @@ struct event_type_symbol {
static struct event_type_symbol event_type_symbols[] = {
[PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
[PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+ [PERF_TYPE_TRACEPOINT] = { "", "", },
+ [PERF_TYPE_HW_CACHE] = { "cache-events", "all-cache-events", },
+ [PERF_TYPE_RAW] = { "", "", },
+};
+
+struct event_cache_symbol {
+ u8 type;
+ u64 config;
+};
+
+#define CHCACHE(x, y, z) \
+.type = PERF_TYPE_HW_CACHE, \
+.config = (PERF_COUNT_HW_CACHE_##x | (PERF_COUNT_HW_CACHE_OP_##y << 8) |\
+ (PERF_COUNT_HW_CACHE_RESULT_##z << 16))
+
+/*
+ * Generalized Hardware cache counters events
+ * L1I is READ and PREFETCH only
+ * ITLB and BPU is READ only
+ */
+static struct event_cache_symbol event_cache_symbols[] = {
+ { CHCACHE(L1D, READ, ACCESS) },
+ { CHCACHE(L1D, READ, MISS) },
+ { CHCACHE(L1D, WRITE, ACCESS) },
+ { CHCACHE(L1D, WRITE, MISS) },
+ { CHCACHE(L1D, PREFETCH, ACCESS) },
+ { CHCACHE(L1D, PREFETCH, MISS) },
+
+ { CHCACHE(L1I, READ, ACCESS) },
+ { CHCACHE(L1I, READ, MISS) },
+ { CHCACHE(L1D, PREFETCH, ACCESS) },
+ { CHCACHE(L1D, PREFETCH, MISS) },
+
+ { CHCACHE(LL, READ, ACCESS) },
+ { CHCACHE(LL, READ, MISS) },
+ { CHCACHE(LL, WRITE, ACCESS) },
+ { CHCACHE(LL, WRITE, MISS) },
+ { CHCACHE(LL, PREFETCH, ACCESS) },
+ { CHCACHE(LL, PREFETCH, MISS) },
+
+ { CHCACHE(DTLB, READ, ACCESS) },
+ { CHCACHE(DTLB, READ, MISS) },
+ { CHCACHE(DTLB, WRITE, ACCESS) },
+ { CHCACHE(DTLB, WRITE, MISS) },
+ { CHCACHE(DTLB, PREFETCH, ACCESS) },
+ { CHCACHE(DTLB, PREFETCH, MISS) },
+
+ { CHCACHE(ITLB, READ, ACCESS) },
+ { CHCACHE(ITLB, READ, MISS) },
+
+ { CHCACHE(BPU, READ, ACCESS) },
+ { CHCACHE(BPU, READ, MISS) },
};

#define __PERF_COUNTER_FIELD(config, name) \
@@ -266,6 +318,16 @@ static int set_multiple_events(unsigned int type)
}
break;

+ case PERF_TYPE_HW_CACHE:
+ for (i = 0; i < ARRAY_SIZE(event_cache_symbols); i++) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_cache_symbols[i].type;
+ attr.config = event_cache_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
+ break;
+
default:
return -1;
}
@@ -279,9 +341,10 @@ static int set_multiple_events(unsigned int type)

static int check_type_events(const char *str, unsigned int i)
{
- if (!strncmp(str, event_type_symbols[i].symbol,
+ if (strlen(event_type_symbols[i].symbol))
+ if (!strncmp(str, event_type_symbols[i].symbol,
strlen(event_type_symbols[i].symbol)))
- return 1;
+ return 1;

if (strlen(event_type_symbols[i].alias))
if (!strncmp(str, event_type_symbols[i].alias,
--
1.6.0.6


2009-06-27 16:38:51

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot


* Jaswinder Singh Rajput <[email protected]> wrote:

> Add support for HARDWARE and SOFTWARE events :
> perf stat -e all-sw-events
> perf stat -e sw-events
> perf stat -e all-hw-events
> perf stat -e hw-events

> +static struct event_type_symbol event_type_symbols[] = {
> + [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
> + [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },

Hm, this really just special-cases and open-codes these. The better
solution is what i suggested in my review of your prior patches:
regex pattern matching.

Ingo

2009-06-27 17:32:18

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: [tip:perfcounters/urgent] perf stat: Improve output

Commit-ID: 6e750a8fc009fd0ae98704525d1d8e80d60e8cc9
Gitweb: http://git.kernel.org/tip/6e750a8fc009fd0ae98704525d1d8e80d60e8cc9
Author: Jaswinder Singh Rajput <[email protected]>
AuthorDate: Sat, 27 Jun 2009 03:02:07 +0530
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 27 Jun 2009 18:39:41 +0200

perf stat: Improve output

Increase size for event name to handle bigger names like
'L1-d$-prefetch-misses'

Changed scaled counters from percentage to a multiplicative
factor because the latter is more expressive.

Also aligned the scaling factor, otherwise sometimes it looks
like:

384 iTLB-load-misses (4.74x scaled)
452029 branch-loads (8.00x scaled)
5892 branch-load-misses (20.39x scaled)
972315 iTLB-loads (3.24x scaled)

Before:
150708 L1-d$-stores (scaled from 23.57%)
428804 L1-d$-prefetches (scaled from 23.47%)
314446 L1-d$-prefetch-misses (scaled from 23.42%)
252626137 L1-i$-loads (scaled from 23.24%)
5297550 dTLB-load-misses (scaled from 23.96%)
106992392 branch-loads (scaled from 23.67%)
5239561 branch-load-misses (scaled from 23.43%)

After:
1731713 L1-d$-loads ( 14.25x scaled)
44241 L1-d$-prefetches ( 3.88x scaled)
21076 L1-d$-prefetch-misses ( 3.40x scaled)
5789421 L1-i$-loads ( 3.78x scaled)
29645 dTLB-load-misses ( 2.95x scaled)
461474 branch-loads ( 6.52x scaled)
7493 branch-load-misses ( 26.57x scaled)

Reported-by: Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
Cc: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
tools/perf/builtin-stat.c | 11 +++++------
1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 52c176c..3840a70 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -251,7 +252,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
{
double msecs = (double)count[0] / 1000000;

- fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter));
+ fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter));

if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
@@ -265,7 +266,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)

static void abs_printout(int counter, u64 *count, u64 *noise)
{
- fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
+ fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter));

if (runtime_cycles_avg &&
attrs[counter].type == PERF_TYPE_HARDWARE &&
@@ -295,7 +296,7 @@ static void print_counter(int counter)
scaled = event_scaled_avg[counter];

if (scaled == -1) {
- fprintf(stderr, " %14s %-20s\n",
+ fprintf(stderr, " %14s %-24s\n",
"<not counted>", event_name(counter));
return;
}
@@ -306,8 +307,7 @@ static void print_counter(int counter)
abs_printout(counter, count, noise);

if (scaled)
- fprintf(stderr, " (scaled from %.2f%%)",
- (double) count[2] / count[1] * 100);
+ fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]);

fprintf(stderr, "\n");
}
@@ -421,7 +421,6 @@ static void print_stat(int argc, const char **argv)
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);

-
fprintf(stderr, "\n");
fprintf(stderr, " %14.9f seconds time elapsed",
(double)walltime_nsecs_avg/1e9);

2009-06-27 19:05:50

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot

On Sat, 2009-06-27 at 18:38 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <[email protected]> wrote:
>
> > Add support for HARDWARE and SOFTWARE events :
> > perf stat -e all-sw-events
> > perf stat -e sw-events
> > perf stat -e all-hw-events
> > perf stat -e hw-events
>
> > +static struct event_type_symbol event_type_symbols[] = {
> > + [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
> > + [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
>
> Hm, this really just special-cases and open-codes these. The better
> solution is what i suggested in my review of your prior patches:
> regex pattern matching.
>

parse_events() is responsible to parse events for 'perf stat -e' and it
is parsing by parse_event_symbol()

If you want to use regex pattern matching then either we should make
some another option or if we need to rewrite parse_event_symbol to use
regex pattern matching which will be applicable to all the events.

Let me know which option you are looking for.

Thanks,
--
JSR

2009-06-28 13:30:13

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot


* Jaswinder Singh Rajput <[email protected]> wrote:

> On Sat, 2009-06-27 at 18:38 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <[email protected]> wrote:
> >
> > > Add support for HARDWARE and SOFTWARE events :
> > > perf stat -e all-sw-events
> > > perf stat -e sw-events
> > > perf stat -e all-hw-events
> > > perf stat -e hw-events
> >
> > > +static struct event_type_symbol event_type_symbols[] = {
> > > + [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
> > > + [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
> >
> > Hm, this really just special-cases and open-codes these. The better
> > solution is what i suggested in my review of your prior patches:
> > regex pattern matching.
> >
>
> parse_events() is responsible to parse events for 'perf stat -e'
> and it is parsing by parse_event_symbol()
>
> If you want to use regex pattern matching then either we should
> make some another option or if we need to rewrite
> parse_event_symbol to use regex pattern matching which will be
> applicable to all the events.

As i mentioned it before, i think the most intuitive solution is to
extend the --event syntax with regex patterns. No new option - just
richer -e syntax.

We could have this syntax:

hw-cpu-cycles
hw-instructions
hw-cache-references
hw-cache-misses
hw-branch-instructions
hw-branch-misses
hw-bus-cycles

sw-cpu-clock
sw-task-clock
sw-page-faults
sw-minor-faults
sw-major-faults
sw-context-switches
sw-cpu-migrations

regex patterns like:

hw-*
sw-*
*

the first one would select all hardware events - the second all
software events - the third all events in general. But other regex
patterns make sense too, like:

*branch*
*cache*
*fault*

And as the number of generic events increases, so will regex
patterns become more and more useful.

Ingo

2009-06-28 15:16:50

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot

On Sun, 2009-06-28 at 15:29 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <[email protected]> wrote:
>
> > On Sat, 2009-06-27 at 18:38 +0200, Ingo Molnar wrote:
> > > * Jaswinder Singh Rajput <[email protected]> wrote:
> > >
> > > > Add support for HARDWARE and SOFTWARE events :
> > > > perf stat -e all-sw-events
> > > > perf stat -e sw-events
> > > > perf stat -e all-hw-events
> > > > perf stat -e hw-events
> > >
> > > > +static struct event_type_symbol event_type_symbols[] = {
> > > > + [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
> > > > + [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
> > >
> > > Hm, this really just special-cases and open-codes these. The better
> > > solution is what i suggested in my review of your prior patches:
> > > regex pattern matching.
> > >
> >
> > parse_events() is responsible to parse events for 'perf stat -e'
> > and it is parsing by parse_event_symbol()
> >
> > If you want to use regex pattern matching then either we should
> > make some another option or if we need to rewrite
> > parse_event_symbol to use regex pattern matching which will be
> > applicable to all the events.
>
> As i mentioned it before, i think the most intuitive solution is to
> extend the --event syntax with regex patterns. No new option - just
> richer -e syntax.
>
> We could have this syntax:
>
> hw-cpu-cycles
> hw-instructions
> hw-cache-references
> hw-cache-misses
> hw-branch-instructions
> hw-branch-misses
> hw-bus-cycles
>
> sw-cpu-clock
> sw-task-clock
> sw-page-faults
> sw-minor-faults
> sw-major-faults
> sw-context-switches
> sw-cpu-migrations
>
> regex patterns like:
>
> hw-*
> sw-*
> *
>
> the first one would select all hardware events - the second all
> software events - the third all events in general. But other regex
> patterns make sense too, like:
>
> *branch*
> *cache*
> *fault*
>
> And as the number of generic events increases, so will regex
> patterns become more and more useful.
>

This is a good approach, but this should be done separately because
these will effect all the events.

Currently I send patches to support multiple events in one shot which is
adding new feature.

And you telling to use regex wildcard for all the events in
parse_event_symbol() which is enhancement and should be done in series
of patches.

Do not you think you are mixing 2 things. I request you to accept
"multiple events in one shot" patches. And then we will do enhancement.

Thanks,
--
JSR


2009-06-29 03:57:45

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot


* Jaswinder Singh Rajput <[email protected]> wrote:

> On Sun, 2009-06-28 at 15:29 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <[email protected]> wrote:
> >
> > > On Sat, 2009-06-27 at 18:38 +0200, Ingo Molnar wrote:
> > > > * Jaswinder Singh Rajput <[email protected]> wrote:
> > > >
> > > > > Add support for HARDWARE and SOFTWARE events :
> > > > > perf stat -e all-sw-events
> > > > > perf stat -e sw-events
> > > > > perf stat -e all-hw-events
> > > > > perf stat -e hw-events
> > > >
> > > > > +static struct event_type_symbol event_type_symbols[] = {
> > > > > + [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
> > > > > + [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
> > > >
> > > > Hm, this really just special-cases and open-codes these. The better
> > > > solution is what i suggested in my review of your prior patches:
> > > > regex pattern matching.
> > > >
> > >
> > > parse_events() is responsible to parse events for 'perf stat -e'
> > > and it is parsing by parse_event_symbol()
> > >
> > > If you want to use regex pattern matching then either we should
> > > make some another option or if we need to rewrite
> > > parse_event_symbol to use regex pattern matching which will be
> > > applicable to all the events.
> >
> > As i mentioned it before, i think the most intuitive solution is to
> > extend the --event syntax with regex patterns. No new option - just
> > richer -e syntax.
> >
> > We could have this syntax:
> >
> > hw-cpu-cycles
> > hw-instructions
> > hw-cache-references
> > hw-cache-misses
> > hw-branch-instructions
> > hw-branch-misses
> > hw-bus-cycles
> >
> > sw-cpu-clock
> > sw-task-clock
> > sw-page-faults
> > sw-minor-faults
> > sw-major-faults
> > sw-context-switches
> > sw-cpu-migrations
> >
> > regex patterns like:
> >
> > hw-*
> > sw-*
> > *
> >
> > the first one would select all hardware events - the second all
> > software events - the third all events in general. But other regex
> > patterns make sense too, like:
> >
> > *branch*
> > *cache*
> > *fault*
> >
> > And as the number of generic events increases, so will regex
> > patterns become more and more useful.
> >
>
> This is a good approach, but this should be done separately
> because these will effect all the events.
>
> Currently I send patches to support multiple events in one shot
> which is adding new feature.
>
> And you telling to use regex wildcard for all the events in
> parse_event_symbol() which is enhancement and should be done in
> series of patches.

The above patterns i suggested _already cover_ 'multiple events'.

We might define further aliases like:

all := "*"
all-sw := "sw-*"

but it should all be in terms of patterns and regular expressions,
not via some hardcoded special-case thing as your posted patches
did.

Ingo

2009-06-29 17:12:18

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip:perfcounters/urgent] perf stat: Improve output

On Sat, 2009-06-27 at 17:31 +0000, tip-bot for Jaswinder Singh Rajput
wrote:
> Before:
> 150708 L1-d$-stores (scaled from 23.57%)
> 428804 L1-d$-prefetches (scaled from 23.47%)
> 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> 252626137 L1-i$-loads (scaled from 23.24%)
> 5297550 dTLB-load-misses (scaled from 23.96%)
> 106992392 branch-loads (scaled from 23.67%)
> 5239561 branch-load-misses (scaled from 23.43%)
>
> After:
> 1731713 L1-d$-loads ( 14.25x scaled)
> 44241 L1-d$-prefetches ( 3.88x scaled)
> 21076 L1-d$-prefetch-misses ( 3.40x scaled)
> 5789421 L1-i$-loads ( 3.78x scaled)
> 29645 dTLB-load-misses ( 2.95x scaled)
> 461474 branch-loads ( 6.52x scaled)
> 7493 branch-load-misses ( 26.57x scaled)

I really rather like the before better. Its far easier to get a feel for
the overload factor from a fraction that from this multiplier.


2009-06-29 19:52:57

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:perfcounters/urgent] perf stat: Improve output


* Peter Zijlstra <[email protected]> wrote:

> On Sat, 2009-06-27 at 17:31 +0000, tip-bot for Jaswinder Singh Rajput
> wrote:
> > Before:
> > 150708 L1-d$-stores (scaled from 23.57%)
> > 428804 L1-d$-prefetches (scaled from 23.47%)
> > 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> > 252626137 L1-i$-loads (scaled from 23.24%)
> > 5297550 dTLB-load-misses (scaled from 23.96%)
> > 106992392 branch-loads (scaled from 23.67%)
> > 5239561 branch-load-misses (scaled from 23.43%)
> >
> > After:
> > 1731713 L1-d$-loads ( 14.25x scaled)
> > 44241 L1-d$-prefetches ( 3.88x scaled)
> > 21076 L1-d$-prefetch-misses ( 3.40x scaled)
> > 5789421 L1-i$-loads ( 3.78x scaled)
> > 29645 dTLB-load-misses ( 2.95x scaled)
> > 461474 branch-loads ( 6.52x scaled)
> > 7493 branch-load-misses ( 26.57x scaled)
>
> I really rather like the before better. Its far easier to get a
> feel for the overload factor from a fraction that from this
> multiplier.

ok - i reverted this portion. I liked the multiplicator a bit more -
but you seem to have a strong preference for the percentage unit.

Ingo

2009-06-30 08:39:30

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot

On Mon, 2009-06-29 at 05:57 +0200, Ingo Molnar wrote:

> The above patterns i suggested _already cover_ 'multiple events'.
>
> We might define further aliases like:
>
> all := "*"
> all-sw := "sw-*"
>
> but it should all be in terms of patterns and regular expressions,
> not via some hardcoded special-case thing as your posted patches
> did.
>

It seems to me very confusing and needs lot of book-keeping and need to
rewrite whole tools/perf/util/parse-events.c because :

* means all perf_event_types :
PERF_TYPE_HARDWARE,
PERF_TYPE_SOFTWARE,
PERF_TYPE_TRACEPOINT,
PERF_TYPE_HW_CACHE,
PERF_TYPE_RAW

hw-* means all hardware events :
PERF_TYPE_HARDWARE,
PERF_TYPE_HW_CACHE,
PERF_TYPE_RAW

sw-* means all software events :
PERF_TYPE_SOFTWARE,
PERF_TYPE_TRACEPOINT

*cache* means all cache based events :
PERF_COUNT_CACHE_REFERENCES, /* Generalized H/W */
PERF_COUNT_CACHE_MISSES, /* Generalized H/W */
PERF_TYPE_HW_CACHE, /* Generalized Cache */

*write* means all write based events :
(L1D, WRITE, ACCESS),
(L1D, WRITE, MISS),
(LL, WRITE, ACCESS),
(LL, WRITE, MISS),
(DTLB, WRITE, ACCESS),
(DTLB, WRITE, MISS)

Please let me know why it looks complex to me, is it really complex or I am going in wrong direction.

Thanks,

--
JSR

2009-06-30 09:58:07

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot


* Jaswinder Singh Rajput <[email protected]> wrote:

> On Mon, 2009-06-29 at 05:57 +0200, Ingo Molnar wrote:
>
> > The above patterns i suggested _already cover_ 'multiple events'.
> >
> > We might define further aliases like:
> >
> > all := "*"
> > all-sw := "sw-*"
> >
> > but it should all be in terms of patterns and regular
> > expressions, not via some hardcoded special-case thing as your
> > posted patches did.
> >
>
> It seems to me very confusing and needs lot of book-keeping and
> need to rewrite whole tools/perf/util/parse-events.c because :
>
> * means all perf_event_types :
> PERF_TYPE_HARDWARE,
> PERF_TYPE_SOFTWARE,
> PERF_TYPE_TRACEPOINT,
> PERF_TYPE_HW_CACHE,
> PERF_TYPE_RAW
>
> hw-* means all hardware events :
> PERF_TYPE_HARDWARE,
> PERF_TYPE_HW_CACHE,
> PERF_TYPE_RAW
>
> sw-* means all software events :
> PERF_TYPE_SOFTWARE,
> PERF_TYPE_TRACEPOINT
>
> *cache* means all cache based events :
> PERF_COUNT_CACHE_REFERENCES, /* Generalized H/W */
> PERF_COUNT_CACHE_MISSES, /* Generalized H/W */
> PERF_TYPE_HW_CACHE, /* Generalized Cache */
>
> *write* means all write based events :
> (L1D, WRITE, ACCESS),
> (L1D, WRITE, MISS),
> (LL, WRITE, ACCESS),
> (LL, WRITE, MISS),
> (DTLB, WRITE, ACCESS),
> (DTLB, WRITE, MISS)
>
> Please let me know why it looks complex to me, is it really
> complex or I am going in wrong direction.

It would certainly need some reorganization of the code but the end
result would be more flexible and other places could use it too, for
example:

perf test -e hw-*

would test all (known) hardware counters.

Ingo

2009-06-30 13:23:44

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot

On Tue, 2009-06-30 at 11:57 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <[email protected]> wrote:
>
> > On Mon, 2009-06-29 at 05:57 +0200, Ingo Molnar wrote:
> >
> > > The above patterns i suggested _already cover_ 'multiple events'.
> > >
> > > We might define further aliases like:
> > >
> > > all := "*"
> > > all-sw := "sw-*"
> > >
> > > but it should all be in terms of patterns and regular
> > > expressions, not via some hardcoded special-case thing as your
> > > posted patches did.
> > >
> >
> > It seems to me very confusing and needs lot of book-keeping and
> > need to rewrite whole tools/perf/util/parse-events.c because :
> >
> > * means all perf_event_types :
> > PERF_TYPE_HARDWARE,
> > PERF_TYPE_SOFTWARE,
> > PERF_TYPE_TRACEPOINT,
> > PERF_TYPE_HW_CACHE,
> > PERF_TYPE_RAW
> >
> > hw-* means all hardware events :
> > PERF_TYPE_HARDWARE,
> > PERF_TYPE_HW_CACHE,
> > PERF_TYPE_RAW
> >
> > sw-* means all software events :
> > PERF_TYPE_SOFTWARE,
> > PERF_TYPE_TRACEPOINT
> >
> > *cache* means all cache based events :
> > PERF_COUNT_CACHE_REFERENCES, /* Generalized H/W */
> > PERF_COUNT_CACHE_MISSES, /* Generalized H/W */
> > PERF_TYPE_HW_CACHE, /* Generalized Cache */
> >
> > *write* means all write based events :
> > (L1D, WRITE, ACCESS),
> > (L1D, WRITE, MISS),
> > (LL, WRITE, ACCESS),
> > (LL, WRITE, MISS),
> > (DTLB, WRITE, ACCESS),
> > (DTLB, WRITE, MISS)
> >
> > Please let me know why it looks complex to me, is it really
> > complex or I am going in wrong direction.
>
> It would certainly need some reorganization of the code but the end
> result would be more flexible and other places could use it too, for
> example:
>
> perf test -e hw-*
>
> would test all (known) hardware counters.
>

Its true.

Can you please verify that the assumptions I made above are correct.

Thanks,
--
JSR

2009-06-30 22:47:34

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3 -tip] perf_counter tools: Add support to set of multiple events in one shot


* Jaswinder Singh Rajput <[email protected]> wrote:

> On Tue, 2009-06-30 at 11:57 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <[email protected]> wrote:
> >
> > > On Mon, 2009-06-29 at 05:57 +0200, Ingo Molnar wrote:
> > >
> > > > The above patterns i suggested _already cover_ 'multiple events'.
> > > >
> > > > We might define further aliases like:
> > > >
> > > > all := "*"
> > > > all-sw := "sw-*"
> > > >
> > > > but it should all be in terms of patterns and regular
> > > > expressions, not via some hardcoded special-case thing as your
> > > > posted patches did.
> > > >
> > >
> > > It seems to me very confusing and needs lot of book-keeping and
> > > need to rewrite whole tools/perf/util/parse-events.c because :
> > >
> > > * means all perf_event_types :
> > > PERF_TYPE_HARDWARE,
> > > PERF_TYPE_SOFTWARE,
> > > PERF_TYPE_TRACEPOINT,
> > > PERF_TYPE_HW_CACHE,
> > > PERF_TYPE_RAW
> > >
> > > hw-* means all hardware events :
> > > PERF_TYPE_HARDWARE,
> > > PERF_TYPE_HW_CACHE,
> > > PERF_TYPE_RAW
> > >
> > > sw-* means all software events :
> > > PERF_TYPE_SOFTWARE,
> > > PERF_TYPE_TRACEPOINT
> > >
> > > *cache* means all cache based events :
> > > PERF_COUNT_CACHE_REFERENCES, /* Generalized H/W */
> > > PERF_COUNT_CACHE_MISSES, /* Generalized H/W */
> > > PERF_TYPE_HW_CACHE, /* Generalized Cache */
> > >
> > > *write* means all write based events :
> > > (L1D, WRITE, ACCESS),
> > > (L1D, WRITE, MISS),
> > > (LL, WRITE, ACCESS),
> > > (LL, WRITE, MISS),
> > > (DTLB, WRITE, ACCESS),
> > > (DTLB, WRITE, MISS)
> > >
> > > Please let me know why it looks complex to me, is it really
> > > complex or I am going in wrong direction.
> >
> > It would certainly need some reorganization of the code but the end
> > result would be more flexible and other places could use it too, for
> > example:
> >
> > perf test -e hw-*
> >
> > would test all (known) hardware counters.
> >
>
> Its true.
>
> Can you please verify that the assumptions I made above are
> correct.

Well, the right way to approach this is to assign each event a "full
name" and a list of aliases/shortcuts (like we have now), and then
do pattern matching on the full name.

So we'd have full/long event names like:

hw-cycles
hw-instructions
hw-l1-cache-load-misses
sw-minor-page-faults
...

to implement regex patterns over these, no event specific knowledge
should be put into the pattern matching engine itself - it just
blindly goes over the full names as strings.

As long as the full names are unique and structured well, this will
work fine. The only non-trivial piece of restructuring is to make it
easy for the pattern matching engine to iterate over all events.
Right now they are in separate tables - perhaps they should be
collected into a single table or so.

Ingo