2009-06-25 21:03:25

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: [PATCH -tip] perf_counter tools: add support to set of multiple events in one short


Add support for HARDWARE and SOFTWARE events :
perf stat -e all-sw-events
perf stat -e sw-events
perf stat -e all-hw-events
perf stat -e hw-events

On AMD box :

./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null

Performance counter stats for 'ls -lR':

9977353 cycles # 557.193 M/sec (scaled from 21.81%)
4244800 instructions # 0.425 IPC (scaled from 27.51%)
2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
775760 branches # 43.323 M/sec (scaled from 89.10%)
57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
<not counted> bus-cycles
17.970985 cpu-clock-msecs
17.906460 task-clock-msecs # 0.955 CPUs
386 page-faults # 0.022 M/sec
386 minor-faults # 0.022 M/sec
0 major-faults # 0.000 M/sec
4 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec

0.018750671 seconds time elapsed.

Reported-by : Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
1 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 430f060..85d8021 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,16 @@ static struct event_symbol event_symbols[] = {
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};

+struct event_type_symbol {
+ char *symbol;
+ char *alias;
+};
+
+static struct event_type_symbol event_type_symbols[] = {
+ [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
+ [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+};
+
#define __PERF_COUNTER_FIELD(config, name) \
((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)

@@ -237,6 +247,49 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

+static int set_multiple_events(unsigned int type)
+{
+ struct perf_counter_attr attr;
+ int i;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_SOFTWARE:
+ for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+ if (event_symbols[i].type == type) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_symbols[i].type;
+ attr.config = event_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
+ }
+
+ break;
+ default:
+ return -1;
+ }
+
+ /*
+ * parse_events() is assuming that only single event will be set,
+ * but we are setting multiple events so we need to return magical 1
+ */
+ return 1;
+}
+
+static int check_type_events(const char *str, unsigned int i)
+{
+ if (!strncmp(str, event_type_symbols[i].symbol,
+ strlen(event_type_symbols[i].symbol)))
+ return 1;
+
+ if (strlen(event_type_symbols[i].alias))
+ if (!strncmp(str, event_type_symbols[i].alias,
+ strlen(event_type_symbols[i].alias)))
+ return 1;
+ return 0;
+}
+
static int check_events(const char *str, unsigned int i)
{
if (!strncmp(str, event_symbols[i].symbol,
@@ -288,6 +341,12 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

+ for (i = 0; i < ARRAY_SIZE(event_type_symbols); i++) {
+ if (check_type_events(str, i)) {
+ return set_multiple_events(i);
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
if (check_events(str, i)) {
attr->type = event_symbols[i].type;
@@ -314,8 +373,11 @@ again:
if (ret < 0)
return ret;

- attrs[nr_counters] = attr;
- nr_counters++;
+ /* No need to set attrs and increment counter when already set */
+ if (ret == 0) {
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }

str = strstr(str, ",");
if (str) {
--
1.6.0.6



2009-06-25 22:29:18

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH -tip] perf_counter tools: add support to set of multiple events in one short

On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> Add support for HARDWARE and SOFTWARE events :
> perf stat -e all-sw-events
> perf stat -e sw-events
> perf stat -e all-hw-events
> perf stat -e hw-events
>
> On AMD box :
>
> ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
>
> Performance counter stats for 'ls -lR':
>
> 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> 775760 branches # 43.323 M/sec (scaled from 89.10%)
> 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> <not counted> bus-cycles
> 17.970985 cpu-clock-msecs
> 17.906460 task-clock-msecs # 0.955 CPUs
> 386 page-faults # 0.022 M/sec
> 386 minor-faults # 0.022 M/sec
> 0 major-faults # 0.000 M/sec
> 4 context-switches # 0.000 M/sec
> 1 CPU-migrations # 0.000 M/sec
>
> 0.018750671 seconds time elapsed.
>
> Reported-by : Ingo Molnar <[email protected]>
> Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> ---
> tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 64 insertions(+), 2 deletions(-)

Please treat :
[PATCH -tip] perf_counter tools: add support to set of multiple events in one short
as
[PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short

And here is 2/2 :

[PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events

Add support for all CACHE events :
perf stat -e all-cache-events
perf stat -e cache-events

On AMD box (<not-counted> events are not available for AMD):

./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null

Performance counter stats for 'ls -lR /usr/include/':

246370884 L1-d$-loads (scaled from 23.55%)
1074018 L1-d$-load-misses (scaled from 23.38%)
150708 L1-d$-stores (scaled from 23.57%)
<not counted> L1-d$-store-misses
428804 L1-d$-prefetches (scaled from 23.47%)
314446 L1-d$-prefetch-misses (scaled from 23.42%)
252626137 L1-i$-loads (scaled from 23.24%)
3985110 L1-i$-load-misses (scaled from 23.24%)
93754 L1-i$-prefetches (scaled from 23.34%)
<not counted> L1-i$-prefetch-misses
5202314 LLC-loads (scaled from 23.34%)
525467 LLC-load-misses (scaled from 23.25%)
5220558 LLC-stores (scaled from 23.21%)
<not counted> LLC-store-misses
<not counted> LLC-prefetches
<not counted> LLC-prefetch-misses
251954203 dTLB-loads (scaled from 23.70%)
5297550 dTLB-load-misses (scaled from 23.96%)
<not counted> dTLB-stores
<not counted> dTLB-store-misses
<not counted> dTLB-prefetches
<not counted> dTLB-prefetch-misses
248561524 iTLB-loads (scaled from 24.15%)
4693 iTLB-load-misses (scaled from 24.18%)
106992392 branch-loads (scaled from 23.67%)
5239561 branch-load-misses (scaled from 23.43%)

0.395946903 seconds time elapsed.

Reported-by: Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
1 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index cfc622b..c1cd93e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,14 +40,63 @@ static struct event_symbol event_symbols[] = {
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};

+struct event_cache_symbol {
+ u8 type;
+ u64 config;
+};
+
struct event_type_symbol {
char *symbol;
char *alias;
};

+#define CHCACHE(x, y, z) \
+.type = PERF_TYPE_HW_CACHE, \
+.config = (PERF_COUNT_HW_CACHE_##x | (PERF_COUNT_HW_CACHE_OP_##y << 8) |\
+ (PERF_COUNT_HW_CACHE_RESULT_##z << 16))
+
+/* Generalized Hardware cache counters events */
+static struct event_cache_symbol event_cache_symbols[] = {
+ { CHCACHE(L1D, READ, ACCESS) },
+ { CHCACHE(L1D, READ, MISS) },
+ { CHCACHE(L1D, WRITE, ACCESS) },
+ { CHCACHE(L1D, WRITE, MISS) },
+ { CHCACHE(L1D, PREFETCH, ACCESS) },
+ { CHCACHE(L1D, PREFETCH, MISS) },
+
+ { CHCACHE(L1I, READ, ACCESS) },
+ { CHCACHE(L1I, READ, MISS) },
+ { CHCACHE(L1I, PREFETCH, ACCESS) },
+ { CHCACHE(L1I, PREFETCH, MISS) },
+
+ { CHCACHE(LL, READ, ACCESS) },
+ { CHCACHE(LL, READ, MISS) },
+ { CHCACHE(LL, WRITE, ACCESS) },
+ { CHCACHE(LL, WRITE, MISS) },
+ { CHCACHE(LL, PREFETCH, ACCESS) },
+ { CHCACHE(LL, PREFETCH, MISS) },
+
+ { CHCACHE(DTLB, READ, ACCESS) },
+ { CHCACHE(DTLB, READ, MISS) },
+ { CHCACHE(DTLB, WRITE, ACCESS) },
+ { CHCACHE(DTLB, WRITE, MISS) },
+ { CHCACHE(DTLB, PREFETCH, ACCESS) },
+ { CHCACHE(DTLB, PREFETCH, MISS) },
+
+ { CHCACHE(ITLB, READ, ACCESS) },
+ { CHCACHE(ITLB, READ, MISS) },
+
+ { CHCACHE(BPU, READ, ACCESS) },
+ { CHCACHE(BPU, READ, MISS) },
+
+};
+
static struct event_type_symbol event_type_symbols[] = {
- [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
- [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+ [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
+ [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+ [PERF_TYPE_TRACEPOINT] = { "", "", },
+ [PERF_TYPE_HW_CACHE] = { "cache-events", "all-cache-events", },
+ [PERF_TYPE_RAW] = { "", "", },
};

#define __PERF_COUNTER_FIELD(config, name) \
@@ -264,8 +313,18 @@ static int set_multiple_events(unsigned int type)
nr_counters++;
}
}
+ break;

+ case PERF_TYPE_HW_CACHE:
+ for (i = 0; i < ARRAY_SIZE(event_cache_symbols); i++) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_cache_symbols[i].type;
+ attr.config = event_cache_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
break;
+
default:
return -1;
}
@@ -279,9 +338,10 @@ static int set_multiple_events(unsigned int type)

static int check_type_events(const char *str, unsigned int i)
{
- if (!strncmp(str, event_type_symbols[i].symbol,
- strlen(event_type_symbols[i].symbol)))
- return 1;
+ if (strlen(event_type_symbols[i].symbol))
+ if (!strncmp(str, event_type_symbols[i].symbol,
+ strlen(event_type_symbols[i].symbol)))
+ return 1;

if (strlen(event_type_symbols[i].alias))
if (!strncmp(str, event_type_symbols[i].alias,
--
1.6.0.6



2009-06-26 12:23:31

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH -tip] perf_counter tools: add support to set of multiple events in one short

On Fri, 2009-06-26 at 03:58 +0530, Jaswinder Singh Rajput wrote:
> On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> > Add support for HARDWARE and SOFTWARE events :
> > perf stat -e all-sw-events
> > perf stat -e sw-events
> > perf stat -e all-hw-events
> > perf stat -e hw-events
> >
> > On AMD box :
> >
> > ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
> >
> > Performance counter stats for 'ls -lR':
> >
> > 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> > 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> > 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> > 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> > 775760 branches # 43.323 M/sec (scaled from 89.10%)
> > 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> > <not counted> bus-cycles
> > 17.970985 cpu-clock-msecs
> > 17.906460 task-clock-msecs # 0.955 CPUs
> > 386 page-faults # 0.022 M/sec
> > 386 minor-faults # 0.022 M/sec
> > 0 major-faults # 0.000 M/sec
> > 4 context-switches # 0.000 M/sec
> > 1 CPU-migrations # 0.000 M/sec
> >
> > 0.018750671 seconds time elapsed.
> >
> > Reported-by : Ingo Molnar <[email protected]>
> > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > ---
> > tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> > 1 files changed, 64 insertions(+), 2 deletions(-)
>
> Please treat :
> [PATCH -tip] perf_counter tools: add support to set of multiple events in one short
> as
> [PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
>
> And here is 2/2 :
>
> [PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
>
> Add support for all CACHE events :
> perf stat -e all-cache-events
> perf stat -e cache-events
>
> On AMD box (<not-counted> events are not available for AMD):
>
> ./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
>
> Performance counter stats for 'ls -lR /usr/include/':
>
> 246370884 L1-d$-loads (scaled from 23.55%)
> 1074018 L1-d$-load-misses (scaled from 23.38%)
> 150708 L1-d$-stores (scaled from 23.57%)
> <not counted> L1-d$-store-misses
> 428804 L1-d$-prefetches (scaled from 23.47%)
> 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> 252626137 L1-i$-loads (scaled from 23.24%)
> 3985110 L1-i$-load-misses (scaled from 23.24%)
> 93754 L1-i$-prefetches (scaled from 23.34%)
> <not counted> L1-i$-prefetch-misses
> 5202314 LLC-loads (scaled from 23.34%)
> 525467 LLC-load-misses (scaled from 23.25%)
> 5220558 LLC-stores (scaled from 23.21%)
> <not counted> LLC-store-misses
> <not counted> LLC-prefetches
> <not counted> LLC-prefetch-misses
> 251954203 dTLB-loads (scaled from 23.70%)
> 5297550 dTLB-load-misses (scaled from 23.96%)
> <not counted> dTLB-stores
> <not counted> dTLB-store-misses
> <not counted> dTLB-prefetches
> <not counted> dTLB-prefetch-misses
> 248561524 iTLB-loads (scaled from 24.15%)
> 4693 iTLB-load-misses (scaled from 24.18%)
> 106992392 branch-loads (scaled from 23.67%)
> 5239561 branch-load-misses (scaled from 23.43%)
>
> 0.395946903 seconds time elapsed.
>
> Reported-by: Ingo Molnar <[email protected]>
> Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> ---
> tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
> 1 files changed, 65 insertions(+), 5 deletions(-)
>


If this looks OK then can I send following patches.

Thanks,
--
JSR

2009-06-26 12:26:22

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH -tip] perf_counter tools: add support to set of multiple events in one short


* Jaswinder Singh Rajput <[email protected]> wrote:

> On Fri, 2009-06-26 at 03:58 +0530, Jaswinder Singh Rajput wrote:
> > On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> > > Add support for HARDWARE and SOFTWARE events :
> > > perf stat -e all-sw-events
> > > perf stat -e sw-events
> > > perf stat -e all-hw-events
> > > perf stat -e hw-events
> > >
> > > On AMD box :
> > >
> > > ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
> > >
> > > Performance counter stats for 'ls -lR':
> > >
> > > 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> > > 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> > > 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> > > 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> > > 775760 branches # 43.323 M/sec (scaled from 89.10%)
> > > 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> > > <not counted> bus-cycles
> > > 17.970985 cpu-clock-msecs
> > > 17.906460 task-clock-msecs # 0.955 CPUs
> > > 386 page-faults # 0.022 M/sec
> > > 386 minor-faults # 0.022 M/sec
> > > 0 major-faults # 0.000 M/sec
> > > 4 context-switches # 0.000 M/sec
> > > 1 CPU-migrations # 0.000 M/sec
> > >
> > > 0.018750671 seconds time elapsed.
> > >
> > > Reported-by : Ingo Molnar <[email protected]>
> > > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > > ---
> > > tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> > > 1 files changed, 64 insertions(+), 2 deletions(-)
> >
> > Please treat :
> > [PATCH -tip] perf_counter tools: add support to set of multiple events in one short
> > as
> > [PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
> >
> > And here is 2/2 :
> >
> > [PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
> >
> > Add support for all CACHE events :
> > perf stat -e all-cache-events
> > perf stat -e cache-events
> >
> > On AMD box (<not-counted> events are not available for AMD):
> >
> > ./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
> >
> > Performance counter stats for 'ls -lR /usr/include/':
> >
> > 246370884 L1-d$-loads (scaled from 23.55%)
> > 1074018 L1-d$-load-misses (scaled from 23.38%)
> > 150708 L1-d$-stores (scaled from 23.57%)
> > <not counted> L1-d$-store-misses
> > 428804 L1-d$-prefetches (scaled from 23.47%)
> > 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> > 252626137 L1-i$-loads (scaled from 23.24%)
> > 3985110 L1-i$-load-misses (scaled from 23.24%)
> > 93754 L1-i$-prefetches (scaled from 23.34%)
> > <not counted> L1-i$-prefetch-misses
> > 5202314 LLC-loads (scaled from 23.34%)
> > 525467 LLC-load-misses (scaled from 23.25%)
> > 5220558 LLC-stores (scaled from 23.21%)
> > <not counted> LLC-store-misses
> > <not counted> LLC-prefetches
> > <not counted> LLC-prefetch-misses
> > 251954203 dTLB-loads (scaled from 23.70%)
> > 5297550 dTLB-load-misses (scaled from 23.96%)
> > <not counted> dTLB-stores
> > <not counted> dTLB-store-misses
> > <not counted> dTLB-prefetches
> > <not counted> dTLB-prefetch-misses
> > 248561524 iTLB-loads (scaled from 24.15%)
> > 4693 iTLB-load-misses (scaled from 24.18%)
> > 106992392 branch-loads (scaled from 23.67%)
> > 5239561 branch-load-misses (scaled from 23.43%)
> >
> > 0.395946903 seconds time elapsed.
> >
> > Reported-by: Ingo Molnar <[email protected]>
> > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > ---
> > tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
> > 1 files changed, 65 insertions(+), 5 deletions(-)
> >
>
>
> If this looks OK then can I send following patches.

Would be nice to do the 'scaled' cleanup too that i suggested in the
other thread, plus size things so that there's no such lines:

428804 L1-d$-prefetches (scaled from 23.47%)
314446 L1-d$-prefetch-misses (scaled from 23.42%)

if that's done then it would be nice to have a series submitted to
lkml with numbered patches and a 0/3 (or so) mail summarizing the
changes, and with each patch having code and commit log quality that
you can stand behind and which needs no modification from the
maintainers.

Ingo

2009-06-26 12:39:27

by Jaswinder Singh Rajput

[permalink] [raw]
Subject: Re: [PATCH -tip] perf_counter tools: add support to set of multiple events in one short

On Fri, 2009-06-26 at 14:25 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <[email protected]> wrote:
>
> > On Fri, 2009-06-26 at 03:58 +0530, Jaswinder Singh Rajput wrote:
> > > On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> > > > Add support for HARDWARE and SOFTWARE events :
> > > > perf stat -e all-sw-events
> > > > perf stat -e sw-events
> > > > perf stat -e all-hw-events
> > > > perf stat -e hw-events
> > > >
> > > > On AMD box :
> > > >
> > > > ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
> > > >
> > > > Performance counter stats for 'ls -lR':
> > > >
> > > > 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> > > > 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> > > > 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> > > > 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> > > > 775760 branches # 43.323 M/sec (scaled from 89.10%)
> > > > 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> > > > <not counted> bus-cycles
> > > > 17.970985 cpu-clock-msecs
> > > > 17.906460 task-clock-msecs # 0.955 CPUs
> > > > 386 page-faults # 0.022 M/sec
> > > > 386 minor-faults # 0.022 M/sec
> > > > 0 major-faults # 0.000 M/sec
> > > > 4 context-switches # 0.000 M/sec
> > > > 1 CPU-migrations # 0.000 M/sec
> > > >
> > > > 0.018750671 seconds time elapsed.
> > > >
> > > > Reported-by : Ingo Molnar <[email protected]>
> > > > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > > > ---
> > > > tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> > > > 1 files changed, 64 insertions(+), 2 deletions(-)
> > >
> > > Please treat :
> > > [PATCH -tip] perf_counter tools: add support to set of multiple events in one short
> > > as
> > > [PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
> > >
> > > And here is 2/2 :
> > >
> > > [PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
> > >
> > > Add support for all CACHE events :
> > > perf stat -e all-cache-events
> > > perf stat -e cache-events
> > >
> > > On AMD box (<not-counted> events are not available for AMD):
> > >
> > > ./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
> > >
> > > Performance counter stats for 'ls -lR /usr/include/':
> > >
> > > 246370884 L1-d$-loads (scaled from 23.55%)
> > > 1074018 L1-d$-load-misses (scaled from 23.38%)
> > > 150708 L1-d$-stores (scaled from 23.57%)
> > > <not counted> L1-d$-store-misses
> > > 428804 L1-d$-prefetches (scaled from 23.47%)
> > > 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> > > 252626137 L1-i$-loads (scaled from 23.24%)
> > > 3985110 L1-i$-load-misses (scaled from 23.24%)
> > > 93754 L1-i$-prefetches (scaled from 23.34%)
> > > <not counted> L1-i$-prefetch-misses
> > > 5202314 LLC-loads (scaled from 23.34%)
> > > 525467 LLC-load-misses (scaled from 23.25%)
> > > 5220558 LLC-stores (scaled from 23.21%)
> > > <not counted> LLC-store-misses
> > > <not counted> LLC-prefetches
> > > <not counted> LLC-prefetch-misses
> > > 251954203 dTLB-loads (scaled from 23.70%)
> > > 5297550 dTLB-load-misses (scaled from 23.96%)
> > > <not counted> dTLB-stores
> > > <not counted> dTLB-store-misses
> > > <not counted> dTLB-prefetches
> > > <not counted> dTLB-prefetch-misses
> > > 248561524 iTLB-loads (scaled from 24.15%)
> > > 4693 iTLB-load-misses (scaled from 24.18%)
> > > 106992392 branch-loads (scaled from 23.67%)
> > > 5239561 branch-load-misses (scaled from 23.43%)
> > >
> > > 0.395946903 seconds time elapsed.
> > >
> > > Reported-by: Ingo Molnar <[email protected]>
> > > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > > ---
> > > tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
> > > 1 files changed, 65 insertions(+), 5 deletions(-)
> > >
> >
> >
> > If this looks OK then can I send following patches.
>
> Would be nice to do the 'scaled' cleanup too that i suggested in the
> other thread, plus size things so that there's no such lines:
>
> 428804 L1-d$-prefetches (scaled from 23.47%)
> 314446 L1-d$-prefetch-misses (scaled from 23.42%)
>
> if that's done then it would be nice to have a series submitted to
> lkml with numbered patches and a 0/3 (or so) mail summarizing the
> changes, and with each patch having code and commit log quality that
> you can stand behind and which needs no modification from the
> maintainers.
>

In the mean time I also wrote another patch.

Please let me know which option is better then I will make it 4/4 :

Subject: [PATCH] perf stat: use set_multiple_events() to select default
events

Select SOFTWARE and HARDWARE events, if no event is selected.
this avoids replicating same arrays and reduce book-keeping

OR

[PATCH] perf stat: fix default attrs and nr_counters

memcpy(attrs, default_attrs, sizeof(attrs)) is only required
if no event is selected and only need to copy sizeof(default_attrs)

and set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value

Also make default_attrs table small and simple

Complete patches :

Subject: [PATCH] perf stat: use set_multiple_events() to select default events

Select SOFTWARE and HARDWARE events, if no event is selected.
this avoids replicating same arrays and reduce book-keeping

Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/builtin-stat.c | 58 ++++++++++++++++++---------------------
tools/perf/util/parse-events.c | 2 +-
tools/perf/util/parse-events.h | 2 +
3 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..ca68bb5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -4,23 +4,28 @@
* Builtin stat command: Give a precise performance counters summary
* overview about any workload, CPU or specific PID.
*
- * Sample output:
+ * Sample output on AMD box (bus-cycles event is not available for AMD)

- $ perf stat ~/hackbench 10
- Time: 0.104
+ #./perf stat -- ls -lR /usr/include/ > /dev/null

- Performance counter stats for '/home/mingo/hackbench':
+ Performance counter stats for 'ls -lR /usr/include/':

- 1255.538611 task clock ticks # 10.143 CPU utilization factor
- 54011 context switches # 0.043 M/sec
- 385 CPU migrations # 0.000 M/sec
- 17755 pagefaults # 0.014 M/sec
- 3808323185 CPU cycles # 3033.219 M/sec
- 1575111190 instructions # 1254.530 M/sec
- 17367895 cache references # 13.833 M/sec
- 7674421 cache misses # 6.112 M/sec
+ 1912.810168 cpu-clock-msecs
+ 1903.386989 task-clock-msecs # 0.362 CPUs
+ 440 page-faults # 0.000 M/sec
+ 440 minor-faults # 0.000 M/sec
+ 0 major-faults # 0.000 M/sec
+ 1876 context-switches # 0.001 M/sec
+ 1 CPU-migrations # 0.000 M/sec
+ 972932473 cycles # 511.159 M/sec (scaled from 31.42%)
+ 588142134 instructions # 0.605 IPC (scaled from 30.98%)
+ 287837533 cache-references # 151.224 M/sec (scaled from 83.54%)
+ 7667661 cache-misses # 4.028 M/sec (scaled from 84.13%)
+ 75792456 branches # 39.820 M/sec (scaled from 85.04%)
+ 4457813 branch-misses # 2.342 M/sec (scaled from 84.89%)
+ <not counted> bus-cycles

- Wall-clock time elapsed: 123.786620 msecs
+ 5.257401849 seconds time elapsed.

*
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <[email protected]>
@@ -32,6 +37,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -45,20 +51,6 @@
#include <sys/prctl.h>
#include <math.h>

-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
-
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
-
-};
-
#define MAX_RUN 100

static int system_wide = 0;
@@ -468,16 +460,20 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
{
int status;

- memcpy(attrs, default_attrs, sizeof(attrs));
-
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
if (run_count <= 0 || run_count > MAX_RUN)
usage_with_options(stat_usage, options);

- if (!nr_counters)
- nr_counters = 8;
+ /*
+ * By default select SOFTWARE and HARDWARE events,
+ * if no event is selected
+ */
+ if (!nr_counters) {
+ set_multiple_events(PERF_TYPE_SOFTWARE);
+ set_multiple_events(PERF_TYPE_HARDWARE);
+ }

nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c1cd93e..eea71c5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -296,7 +296,7 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}

-static int set_multiple_events(unsigned int type)
+int set_multiple_events(unsigned int type)
{
struct perf_counter_attr attr;
int i;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e3d5529..ca44465 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,6 +9,8 @@ extern struct perf_counter_attr attrs[MAX_COUNTERS];

extern char *event_name(int ctr);

+extern int set_multiple_events(unsigned int type);
+
extern int parse_events(const struct option *opt, const char *str, int unset);

#define EVENTS_HELP_MAX (128*1024)
--
1.6.0.6

OR

Subject: [PATCH] perf stat: fix default attrs and nr_counters

memcpy(attrs, default_attrs, sizeof(attrs)) is only required
if no event is selected and only need to copy sizeof(default_attrs)

and set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value

Also make default_attrs table small and simple

Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/builtin-stat.c | 31 ++++++++++++++++++-------------
1 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..e2b24f4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -45,17 +46,20 @@
#include <sys/prctl.h>
#include <math.h>

-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
+#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
+#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x

- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
+static struct perf_counter_attr default_attrs[] = {

- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { CSW(TASK_CLOCK), },
+ { CSW(CONTEXT_SWITCHES), },
+ { CSW(CPU_MIGRATIONS), },
+ { CSW(PAGE_FAULTS), },
+
+ { CHW(CPU_CYCLES), },
+ { CHW(INSTRUCTIONS), },
+ { CHW(CACHE_REFERENCES), },
+ { CHW(CACHE_MISSES), },

};

@@ -468,16 +472,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
{
int status;

- memcpy(attrs, default_attrs, sizeof(attrs));
-
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
if (run_count <= 0 || run_count > MAX_RUN)
usage_with_options(stat_usage, options);

- if (!nr_counters)
- nr_counters = 8;
+ /* Set default attrs if no event is selected */
+ if (!nr_counters) {
+ memcpy(attrs, default_attrs, sizeof(default_attrs));
+ nr_counters = ARRAY_SIZE(default_attrs);
+ }

nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
--
1.6.0.6