Add support for HARDWARE and SOFTWARE events :
perf stat -e all-sw-events
perf stat -e sw-events
perf stat -e all-hw-events
perf stat -e hw-events
On AMD box :
./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
Performance counter stats for 'ls -lR':
9977353 cycles # 557.193 M/sec (scaled from 21.81%)
4244800 instructions # 0.425 IPC (scaled from 27.51%)
2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
775760 branches # 43.323 M/sec (scaled from 89.10%)
57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
<not counted> bus-cycles
17.970985 cpu-clock-msecs
17.906460 task-clock-msecs # 0.955 CPUs
386 page-faults # 0.022 M/sec
386 minor-faults # 0.022 M/sec
0 major-faults # 0.000 M/sec
4 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
0.018750671 seconds time elapsed.
Reported-by : Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
1 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 430f060..85d8021 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,16 @@ static struct event_symbol event_symbols[] = {
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};
+struct event_type_symbol {
+ char *symbol;
+ char *alias;
+};
+
+static struct event_type_symbol event_type_symbols[] = {
+ [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
+ [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+};
+
#define __PERF_COUNTER_FIELD(config, name) \
((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
@@ -237,6 +247,49 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}
+static int set_multiple_events(unsigned int type)
+{
+ struct perf_counter_attr attr;
+ int i;
+
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_SOFTWARE:
+ for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
+ if (event_symbols[i].type == type) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_symbols[i].type;
+ attr.config = event_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
+ }
+
+ break;
+ default:
+ return -1;
+ }
+
+ /*
+ * parse_events() is assuming that only single event will be set,
+ * but we are setting multiple events so we need to return magical 1
+ */
+ return 1;
+}
+
+static int check_type_events(const char *str, unsigned int i)
+{
+ if (!strncmp(str, event_type_symbols[i].symbol,
+ strlen(event_type_symbols[i].symbol)))
+ return 1;
+
+ if (strlen(event_type_symbols[i].alias))
+ if (!strncmp(str, event_type_symbols[i].alias,
+ strlen(event_type_symbols[i].alias)))
+ return 1;
+ return 0;
+}
+
static int check_events(const char *str, unsigned int i)
{
if (!strncmp(str, event_symbols[i].symbol,
@@ -288,6 +341,12 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}
+ for (i = 0; i < ARRAY_SIZE(event_type_symbols); i++) {
+ if (check_type_events(str, i)) {
+ return set_multiple_events(i);
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
if (check_events(str, i)) {
attr->type = event_symbols[i].type;
@@ -314,8 +373,11 @@ again:
if (ret < 0)
return ret;
- attrs[nr_counters] = attr;
- nr_counters++;
+ /* No need to set attrs and increment counter when already set */
+ if (ret == 0) {
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
str = strstr(str, ",");
if (str) {
--
1.6.0.6
On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> Add support for HARDWARE and SOFTWARE events :
> perf stat -e all-sw-events
> perf stat -e sw-events
> perf stat -e all-hw-events
> perf stat -e hw-events
>
> On AMD box :
>
> ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
>
> Performance counter stats for 'ls -lR':
>
> 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> 775760 branches # 43.323 M/sec (scaled from 89.10%)
> 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> <not counted> bus-cycles
> 17.970985 cpu-clock-msecs
> 17.906460 task-clock-msecs # 0.955 CPUs
> 386 page-faults # 0.022 M/sec
> 386 minor-faults # 0.022 M/sec
> 0 major-faults # 0.000 M/sec
> 4 context-switches # 0.000 M/sec
> 1 CPU-migrations # 0.000 M/sec
>
> 0.018750671 seconds time elapsed.
>
> Reported-by : Ingo Molnar <[email protected]>
> Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> ---
> tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 64 insertions(+), 2 deletions(-)
Please treat :
[PATCH -tip] perf_counter tools: add support to set of multiple events in one short
as
[PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
And here is 2/2 :
[PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
Add support for all CACHE events :
perf stat -e all-cache-events
perf stat -e cache-events
On AMD box (<not-counted> events are not available for AMD):
./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
Performance counter stats for 'ls -lR /usr/include/':
246370884 L1-d$-loads (scaled from 23.55%)
1074018 L1-d$-load-misses (scaled from 23.38%)
150708 L1-d$-stores (scaled from 23.57%)
<not counted> L1-d$-store-misses
428804 L1-d$-prefetches (scaled from 23.47%)
314446 L1-d$-prefetch-misses (scaled from 23.42%)
252626137 L1-i$-loads (scaled from 23.24%)
3985110 L1-i$-load-misses (scaled from 23.24%)
93754 L1-i$-prefetches (scaled from 23.34%)
<not counted> L1-i$-prefetch-misses
5202314 LLC-loads (scaled from 23.34%)
525467 LLC-load-misses (scaled from 23.25%)
5220558 LLC-stores (scaled from 23.21%)
<not counted> LLC-store-misses
<not counted> LLC-prefetches
<not counted> LLC-prefetch-misses
251954203 dTLB-loads (scaled from 23.70%)
5297550 dTLB-load-misses (scaled from 23.96%)
<not counted> dTLB-stores
<not counted> dTLB-store-misses
<not counted> dTLB-prefetches
<not counted> dTLB-prefetch-misses
248561524 iTLB-loads (scaled from 24.15%)
4693 iTLB-load-misses (scaled from 24.18%)
106992392 branch-loads (scaled from 23.67%)
5239561 branch-load-misses (scaled from 23.43%)
0.395946903 seconds time elapsed.
Reported-by: Ingo Molnar <[email protected]>
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
1 files changed, 65 insertions(+), 5 deletions(-)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index cfc622b..c1cd93e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,14 +40,63 @@ static struct event_symbol event_symbols[] = {
{ CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
};
+struct event_cache_symbol {
+ u8 type;
+ u64 config;
+};
+
struct event_type_symbol {
char *symbol;
char *alias;
};
+#define CHCACHE(x, y, z) \
+.type = PERF_TYPE_HW_CACHE, \
+.config = (PERF_COUNT_HW_CACHE_##x | (PERF_COUNT_HW_CACHE_OP_##y << 8) |\
+ (PERF_COUNT_HW_CACHE_RESULT_##z << 16))
+
+/* Generalized Hardware cache counters events */
+static struct event_cache_symbol event_cache_symbols[] = {
+ { CHCACHE(L1D, READ, ACCESS) },
+ { CHCACHE(L1D, READ, MISS) },
+ { CHCACHE(L1D, WRITE, ACCESS) },
+ { CHCACHE(L1D, WRITE, MISS) },
+ { CHCACHE(L1D, PREFETCH, ACCESS) },
+ { CHCACHE(L1D, PREFETCH, MISS) },
+
+ { CHCACHE(L1I, READ, ACCESS) },
+ { CHCACHE(L1I, READ, MISS) },
+ { CHCACHE(L1I, PREFETCH, ACCESS) },
+ { CHCACHE(L1I, PREFETCH, MISS) },
+
+ { CHCACHE(LL, READ, ACCESS) },
+ { CHCACHE(LL, READ, MISS) },
+ { CHCACHE(LL, WRITE, ACCESS) },
+ { CHCACHE(LL, WRITE, MISS) },
+ { CHCACHE(LL, PREFETCH, ACCESS) },
+ { CHCACHE(LL, PREFETCH, MISS) },
+
+ { CHCACHE(DTLB, READ, ACCESS) },
+ { CHCACHE(DTLB, READ, MISS) },
+ { CHCACHE(DTLB, WRITE, ACCESS) },
+ { CHCACHE(DTLB, WRITE, MISS) },
+ { CHCACHE(DTLB, PREFETCH, ACCESS) },
+ { CHCACHE(DTLB, PREFETCH, MISS) },
+
+ { CHCACHE(ITLB, READ, ACCESS) },
+ { CHCACHE(ITLB, READ, MISS) },
+
+ { CHCACHE(BPU, READ, ACCESS) },
+ { CHCACHE(BPU, READ, MISS) },
+
+};
+
static struct event_type_symbol event_type_symbols[] = {
- [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
- [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+ [PERF_TYPE_HARDWARE] = { "hw-events", "all-hw-events", },
+ [PERF_TYPE_SOFTWARE] = { "sw-events", "all-sw-events", },
+ [PERF_TYPE_TRACEPOINT] = { "", "", },
+ [PERF_TYPE_HW_CACHE] = { "cache-events", "all-cache-events", },
+ [PERF_TYPE_RAW] = { "", "", },
};
#define __PERF_COUNTER_FIELD(config, name) \
@@ -264,8 +313,18 @@ static int set_multiple_events(unsigned int type)
nr_counters++;
}
}
+ break;
+ case PERF_TYPE_HW_CACHE:
+ for (i = 0; i < ARRAY_SIZE(event_cache_symbols); i++) {
+ memset(&attr, 0, sizeof(attr));
+ attr.type = event_cache_symbols[i].type;
+ attr.config = event_cache_symbols[i].config;
+ attrs[nr_counters] = attr;
+ nr_counters++;
+ }
break;
+
default:
return -1;
}
@@ -279,9 +338,10 @@ static int set_multiple_events(unsigned int type)
static int check_type_events(const char *str, unsigned int i)
{
- if (!strncmp(str, event_type_symbols[i].symbol,
- strlen(event_type_symbols[i].symbol)))
- return 1;
+ if (strlen(event_type_symbols[i].symbol))
+ if (!strncmp(str, event_type_symbols[i].symbol,
+ strlen(event_type_symbols[i].symbol)))
+ return 1;
if (strlen(event_type_symbols[i].alias))
if (!strncmp(str, event_type_symbols[i].alias,
--
1.6.0.6
On Fri, 2009-06-26 at 03:58 +0530, Jaswinder Singh Rajput wrote:
> On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> > Add support for HARDWARE and SOFTWARE events :
> > perf stat -e all-sw-events
> > perf stat -e sw-events
> > perf stat -e all-hw-events
> > perf stat -e hw-events
> >
> > On AMD box :
> >
> > ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
> >
> > Performance counter stats for 'ls -lR':
> >
> > 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> > 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> > 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> > 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> > 775760 branches # 43.323 M/sec (scaled from 89.10%)
> > 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> > <not counted> bus-cycles
> > 17.970985 cpu-clock-msecs
> > 17.906460 task-clock-msecs # 0.955 CPUs
> > 386 page-faults # 0.022 M/sec
> > 386 minor-faults # 0.022 M/sec
> > 0 major-faults # 0.000 M/sec
> > 4 context-switches # 0.000 M/sec
> > 1 CPU-migrations # 0.000 M/sec
> >
> > 0.018750671 seconds time elapsed.
> >
> > Reported-by : Ingo Molnar <[email protected]>
> > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > ---
> > tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> > 1 files changed, 64 insertions(+), 2 deletions(-)
>
> Please treat :
> [PATCH -tip] perf_counter tools: add support to set of multiple events in one short
> as
> [PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
>
> And here is 2/2 :
>
> [PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
>
> Add support for all CACHE events :
> perf stat -e all-cache-events
> perf stat -e cache-events
>
> On AMD box (<not-counted> events are not available for AMD):
>
> ./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
>
> Performance counter stats for 'ls -lR /usr/include/':
>
> 246370884 L1-d$-loads (scaled from 23.55%)
> 1074018 L1-d$-load-misses (scaled from 23.38%)
> 150708 L1-d$-stores (scaled from 23.57%)
> <not counted> L1-d$-store-misses
> 428804 L1-d$-prefetches (scaled from 23.47%)
> 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> 252626137 L1-i$-loads (scaled from 23.24%)
> 3985110 L1-i$-load-misses (scaled from 23.24%)
> 93754 L1-i$-prefetches (scaled from 23.34%)
> <not counted> L1-i$-prefetch-misses
> 5202314 LLC-loads (scaled from 23.34%)
> 525467 LLC-load-misses (scaled from 23.25%)
> 5220558 LLC-stores (scaled from 23.21%)
> <not counted> LLC-store-misses
> <not counted> LLC-prefetches
> <not counted> LLC-prefetch-misses
> 251954203 dTLB-loads (scaled from 23.70%)
> 5297550 dTLB-load-misses (scaled from 23.96%)
> <not counted> dTLB-stores
> <not counted> dTLB-store-misses
> <not counted> dTLB-prefetches
> <not counted> dTLB-prefetch-misses
> 248561524 iTLB-loads (scaled from 24.15%)
> 4693 iTLB-load-misses (scaled from 24.18%)
> 106992392 branch-loads (scaled from 23.67%)
> 5239561 branch-load-misses (scaled from 23.43%)
>
> 0.395946903 seconds time elapsed.
>
> Reported-by: Ingo Molnar <[email protected]>
> Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> ---
> tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
> 1 files changed, 65 insertions(+), 5 deletions(-)
>
If this looks OK then can I send following patches.
Thanks,
--
JSR
* Jaswinder Singh Rajput <[email protected]> wrote:
> On Fri, 2009-06-26 at 03:58 +0530, Jaswinder Singh Rajput wrote:
> > On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> > > Add support for HARDWARE and SOFTWARE events :
> > > perf stat -e all-sw-events
> > > perf stat -e sw-events
> > > perf stat -e all-hw-events
> > > perf stat -e hw-events
> > >
> > > On AMD box :
> > >
> > > ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
> > >
> > > Performance counter stats for 'ls -lR':
> > >
> > > 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> > > 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> > > 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> > > 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> > > 775760 branches # 43.323 M/sec (scaled from 89.10%)
> > > 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> > > <not counted> bus-cycles
> > > 17.970985 cpu-clock-msecs
> > > 17.906460 task-clock-msecs # 0.955 CPUs
> > > 386 page-faults # 0.022 M/sec
> > > 386 minor-faults # 0.022 M/sec
> > > 0 major-faults # 0.000 M/sec
> > > 4 context-switches # 0.000 M/sec
> > > 1 CPU-migrations # 0.000 M/sec
> > >
> > > 0.018750671 seconds time elapsed.
> > >
> > > Reported-by : Ingo Molnar <[email protected]>
> > > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > > ---
> > > tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> > > 1 files changed, 64 insertions(+), 2 deletions(-)
> >
> > Please treat :
> > [PATCH -tip] perf_counter tools: add support to set of multiple events in one short
> > as
> > [PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
> >
> > And here is 2/2 :
> >
> > [PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
> >
> > Add support for all CACHE events :
> > perf stat -e all-cache-events
> > perf stat -e cache-events
> >
> > On AMD box (<not-counted> events are not available for AMD):
> >
> > ./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
> >
> > Performance counter stats for 'ls -lR /usr/include/':
> >
> > 246370884 L1-d$-loads (scaled from 23.55%)
> > 1074018 L1-d$-load-misses (scaled from 23.38%)
> > 150708 L1-d$-stores (scaled from 23.57%)
> > <not counted> L1-d$-store-misses
> > 428804 L1-d$-prefetches (scaled from 23.47%)
> > 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> > 252626137 L1-i$-loads (scaled from 23.24%)
> > 3985110 L1-i$-load-misses (scaled from 23.24%)
> > 93754 L1-i$-prefetches (scaled from 23.34%)
> > <not counted> L1-i$-prefetch-misses
> > 5202314 LLC-loads (scaled from 23.34%)
> > 525467 LLC-load-misses (scaled from 23.25%)
> > 5220558 LLC-stores (scaled from 23.21%)
> > <not counted> LLC-store-misses
> > <not counted> LLC-prefetches
> > <not counted> LLC-prefetch-misses
> > 251954203 dTLB-loads (scaled from 23.70%)
> > 5297550 dTLB-load-misses (scaled from 23.96%)
> > <not counted> dTLB-stores
> > <not counted> dTLB-store-misses
> > <not counted> dTLB-prefetches
> > <not counted> dTLB-prefetch-misses
> > 248561524 iTLB-loads (scaled from 24.15%)
> > 4693 iTLB-load-misses (scaled from 24.18%)
> > 106992392 branch-loads (scaled from 23.67%)
> > 5239561 branch-load-misses (scaled from 23.43%)
> >
> > 0.395946903 seconds time elapsed.
> >
> > Reported-by: Ingo Molnar <[email protected]>
> > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > ---
> > tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
> > 1 files changed, 65 insertions(+), 5 deletions(-)
> >
>
>
> If this looks OK then can I send following patches.
Would be nice to do the 'scaled' cleanup too that i suggested in the
other thread, plus size things so that there's no such lines:
428804 L1-d$-prefetches (scaled from 23.47%)
314446 L1-d$-prefetch-misses (scaled from 23.42%)
if that's done then it would be nice to have a series submitted to
lkml with numbered patches and a 0/3 (or so) mail summarizing the
changes, and with each patch having code and commit log quality that
you can stand behind and which needs no modification from the
maintainers.
Ingo
On Fri, 2009-06-26 at 14:25 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <[email protected]> wrote:
>
> > On Fri, 2009-06-26 at 03:58 +0530, Jaswinder Singh Rajput wrote:
> > > On Fri, 2009-06-26 at 02:32 +0530, Jaswinder Singh Rajput wrote:
> > > > Add support for HARDWARE and SOFTWARE events :
> > > > perf stat -e all-sw-events
> > > > perf stat -e sw-events
> > > > perf stat -e all-hw-events
> > > > perf stat -e hw-events
> > > >
> > > > On AMD box :
> > > >
> > > > ./perf stat -e hw-events -e all-sw-events -- ls -lR > /dev/null
> > > >
> > > > Performance counter stats for 'ls -lR':
> > > >
> > > > 9977353 cycles # 557.193 M/sec (scaled from 21.81%)
> > > > 4244800 instructions # 0.425 IPC (scaled from 27.51%)
> > > > 2953188 cache-references # 164.923 M/sec (scaled from 89.10%)
> > > > 72469 cache-misses # 4.047 M/sec (scaled from 89.13%)
> > > > 775760 branches # 43.323 M/sec (scaled from 89.10%)
> > > > 57814 branch-misses # 3.229 M/sec (scaled from 83.34%)
> > > > <not counted> bus-cycles
> > > > 17.970985 cpu-clock-msecs
> > > > 17.906460 task-clock-msecs # 0.955 CPUs
> > > > 386 page-faults # 0.022 M/sec
> > > > 386 minor-faults # 0.022 M/sec
> > > > 0 major-faults # 0.000 M/sec
> > > > 4 context-switches # 0.000 M/sec
> > > > 1 CPU-migrations # 0.000 M/sec
> > > >
> > > > 0.018750671 seconds time elapsed.
> > > >
> > > > Reported-by : Ingo Molnar <[email protected]>
> > > > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > > > ---
> > > > tools/perf/util/parse-events.c | 66 ++++++++++++++++++++++++++++++++++++++-
> > > > 1 files changed, 64 insertions(+), 2 deletions(-)
> > >
> > > Please treat :
> > > [PATCH -tip] perf_counter tools: add support to set of multiple events in one short
> > > as
> > > [PATCH 1/2-tip] perf_counter tools: add support to set of multiple events in one short
> > >
> > > And here is 2/2 :
> > >
> > > [PATCH 2/2 -tip] perf_counter tools: Add support for all CACHE events
> > >
> > > Add support for all CACHE events :
> > > perf stat -e all-cache-events
> > > perf stat -e cache-events
> > >
> > > On AMD box (<not-counted> events are not available for AMD):
> > >
> > > ./perf stat -e all-cache-events -- ls -lR /usr/include/ > /dev/null
> > >
> > > Performance counter stats for 'ls -lR /usr/include/':
> > >
> > > 246370884 L1-d$-loads (scaled from 23.55%)
> > > 1074018 L1-d$-load-misses (scaled from 23.38%)
> > > 150708 L1-d$-stores (scaled from 23.57%)
> > > <not counted> L1-d$-store-misses
> > > 428804 L1-d$-prefetches (scaled from 23.47%)
> > > 314446 L1-d$-prefetch-misses (scaled from 23.42%)
> > > 252626137 L1-i$-loads (scaled from 23.24%)
> > > 3985110 L1-i$-load-misses (scaled from 23.24%)
> > > 93754 L1-i$-prefetches (scaled from 23.34%)
> > > <not counted> L1-i$-prefetch-misses
> > > 5202314 LLC-loads (scaled from 23.34%)
> > > 525467 LLC-load-misses (scaled from 23.25%)
> > > 5220558 LLC-stores (scaled from 23.21%)
> > > <not counted> LLC-store-misses
> > > <not counted> LLC-prefetches
> > > <not counted> LLC-prefetch-misses
> > > 251954203 dTLB-loads (scaled from 23.70%)
> > > 5297550 dTLB-load-misses (scaled from 23.96%)
> > > <not counted> dTLB-stores
> > > <not counted> dTLB-store-misses
> > > <not counted> dTLB-prefetches
> > > <not counted> dTLB-prefetch-misses
> > > 248561524 iTLB-loads (scaled from 24.15%)
> > > 4693 iTLB-load-misses (scaled from 24.18%)
> > > 106992392 branch-loads (scaled from 23.67%)
> > > 5239561 branch-load-misses (scaled from 23.43%)
> > >
> > > 0.395946903 seconds time elapsed.
> > >
> > > Reported-by: Ingo Molnar <[email protected]>
> > > Signed-off-by: Jaswinder Singh Rajput <[email protected]>
> > > ---
> > > tools/perf/util/parse-events.c | 70 +++++++++++++++++++++++++++++++++++++---
> > > 1 files changed, 65 insertions(+), 5 deletions(-)
> > >
> >
> >
> > If this looks OK then can I send following patches.
>
> Would be nice to do the 'scaled' cleanup too that i suggested in the
> other thread, plus size things so that there's no such lines:
>
> 428804 L1-d$-prefetches (scaled from 23.47%)
> 314446 L1-d$-prefetch-misses (scaled from 23.42%)
>
> if that's done then it would be nice to have a series submitted to
> lkml with numbered patches and a 0/3 (or so) mail summarizing the
> changes, and with each patch having code and commit log quality that
> you can stand behind and which needs no modification from the
> maintainers.
>
In the mean time I also wrote another patch.
Please let me know which option is better then I will make it 4/4 :
Subject: [PATCH] perf stat: use set_multiple_events() to select default
events
Select SOFTWARE and HARDWARE events, if no event is selected.
this avoids replicating same arrays and reduce book-keeping
OR
[PATCH] perf stat: fix default attrs and nr_counters
memcpy(attrs, default_attrs, sizeof(attrs)) is only required
if no event is selected and only need to copy sizeof(default_attrs)
and set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value
Also make default_attrs table small and simple
Complete patches :
Subject: [PATCH] perf stat: use set_multiple_events() to select default events
Select SOFTWARE and HARDWARE events, if no event is selected.
this avoids replicating same arrays and reduce book-keeping
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/builtin-stat.c | 58 ++++++++++++++++++---------------------
tools/perf/util/parse-events.c | 2 +-
tools/perf/util/parse-events.h | 2 +
3 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..ca68bb5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -4,23 +4,28 @@
* Builtin stat command: Give a precise performance counters summary
* overview about any workload, CPU or specific PID.
*
- * Sample output:
+ * Sample output on AMD box (bus-cycles event is not available for AMD)
- $ perf stat ~/hackbench 10
- Time: 0.104
+ #./perf stat -- ls -lR /usr/include/ > /dev/null
- Performance counter stats for '/home/mingo/hackbench':
+ Performance counter stats for 'ls -lR /usr/include/':
- 1255.538611 task clock ticks # 10.143 CPU utilization factor
- 54011 context switches # 0.043 M/sec
- 385 CPU migrations # 0.000 M/sec
- 17755 pagefaults # 0.014 M/sec
- 3808323185 CPU cycles # 3033.219 M/sec
- 1575111190 instructions # 1254.530 M/sec
- 17367895 cache references # 13.833 M/sec
- 7674421 cache misses # 6.112 M/sec
+ 1912.810168 cpu-clock-msecs
+ 1903.386989 task-clock-msecs # 0.362 CPUs
+ 440 page-faults # 0.000 M/sec
+ 440 minor-faults # 0.000 M/sec
+ 0 major-faults # 0.000 M/sec
+ 1876 context-switches # 0.001 M/sec
+ 1 CPU-migrations # 0.000 M/sec
+ 972932473 cycles # 511.159 M/sec (scaled from 31.42%)
+ 588142134 instructions # 0.605 IPC (scaled from 30.98%)
+ 287837533 cache-references # 151.224 M/sec (scaled from 83.54%)
+ 7667661 cache-misses # 4.028 M/sec (scaled from 84.13%)
+ 75792456 branches # 39.820 M/sec (scaled from 85.04%)
+ 4457813 branch-misses # 2.342 M/sec (scaled from 84.89%)
+ <not counted> bus-cycles
- Wall-clock time elapsed: 123.786620 msecs
+ 5.257401849 seconds time elapsed.
*
* Copyright (C) 2008, Red Hat Inc, Ingo Molnar <[email protected]>
@@ -32,6 +37,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -45,20 +51,6 @@
#include <sys/prctl.h>
#include <math.h>
-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
-
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
-
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
-
-};
-
#define MAX_RUN 100
static int system_wide = 0;
@@ -468,16 +460,20 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
{
int status;
- memcpy(attrs, default_attrs, sizeof(attrs));
-
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
if (run_count <= 0 || run_count > MAX_RUN)
usage_with_options(stat_usage, options);
- if (!nr_counters)
- nr_counters = 8;
+ /*
+ * By default select SOFTWARE and HARDWARE events,
+ * if no event is selected
+ */
+ if (!nr_counters) {
+ set_multiple_events(PERF_TYPE_SOFTWARE);
+ set_multiple_events(PERF_TYPE_HARDWARE);
+ }
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c1cd93e..eea71c5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -296,7 +296,7 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
return 0;
}
-static int set_multiple_events(unsigned int type)
+int set_multiple_events(unsigned int type)
{
struct perf_counter_attr attr;
int i;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e3d5529..ca44465 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,6 +9,8 @@ extern struct perf_counter_attr attrs[MAX_COUNTERS];
extern char *event_name(int ctr);
+extern int set_multiple_events(unsigned int type);
+
extern int parse_events(const struct option *opt, const char *str, int unset);
#define EVENTS_HELP_MAX (128*1024)
--
1.6.0.6
OR
Subject: [PATCH] perf stat: fix default attrs and nr_counters
memcpy(attrs, default_attrs, sizeof(attrs)) is only required
if no event is selected and only need to copy sizeof(default_attrs)
and set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value
Also make default_attrs table small and simple
Signed-off-by: Jaswinder Singh Rajput <[email protected]>
---
tools/perf/builtin-stat.c | 31 ++++++++++++++++++-------------
1 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..e2b24f4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
* Wu Fengguang <[email protected]>
* Mike Galbraith <[email protected]>
* Paul Mackerras <[email protected]>
+ * Jaswinder Singh Rajput <[email protected]>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
@@ -45,17 +46,20 @@
#include <sys/prctl.h>
#include <math.h>
-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
+#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
+#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
- { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
+static struct perf_counter_attr default_attrs[] = {
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
+ { CSW(TASK_CLOCK), },
+ { CSW(CONTEXT_SWITCHES), },
+ { CSW(CPU_MIGRATIONS), },
+ { CSW(PAGE_FAULTS), },
+
+ { CHW(CPU_CYCLES), },
+ { CHW(INSTRUCTIONS), },
+ { CHW(CACHE_REFERENCES), },
+ { CHW(CACHE_MISSES), },
};
@@ -468,16 +472,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
{
int status;
- memcpy(attrs, default_attrs, sizeof(attrs));
-
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
if (run_count <= 0 || run_count > MAX_RUN)
usage_with_options(stat_usage, options);
- if (!nr_counters)
- nr_counters = 8;
+ /* Set default attrs if no event is selected */
+ if (!nr_counters) {
+ memcpy(attrs, default_attrs, sizeof(default_attrs));
+ nr_counters = ARRAY_SIZE(default_attrs);
+ }
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(nr_cpus <= MAX_NR_CPUS);
--
1.6.0.6