hi,
adding --per-numa option to aggregate stats per NUMA nodes,
you can get now use stat command like:
# perf stat -a -I 1000 -e cycles --per-numa
# time numa cpus counts unit events
1.000542550 N0 20 6,202,097 cycles
1.000542550 N1 20 639,559 cycles
2.002040063 N0 20 7,412,495 cycles
2.002040063 N1 20 2,185,577 cycles
3.003451699 N0 20 6,508,917 cycles
3.003451699 N1 20 765,607 cycles
...
thanks,
jirka
---
Jiri Olsa (3):
libperf: Add perf_cpu_map__max function
perf tools: Add perf_env__numa_node function
perf stat: Add --per-numa agregation support
tools/perf/Documentation/perf-stat.txt | 5 +++++
tools/perf/builtin-stat.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
tools/perf/lib/cpumap.c | 12 ++++++++++++
tools/perf/lib/include/perf/cpumap.h | 1 +
tools/perf/lib/libperf.map | 1 +
tools/perf/util/cpumap.c | 18 ++++++++++++++++++
tools/perf/util/cpumap.h | 3 +++
tools/perf/util/env.c | 35 +++++++++++++++++++++++++++++++++++
tools/perf/util/env.h | 6 ++++++
tools/perf/util/stat-display.c | 15 +++++++++++++++
tools/perf/util/stat.c | 1 +
tools/perf/util/stat.h | 1 +
12 files changed, 148 insertions(+), 10 deletions(-)
So it can be used from multiple places.
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Jiri Olsa <[email protected]>
---
tools/perf/builtin-stat.c | 14 +-------------
tools/perf/lib/cpumap.c | 12 ++++++++++++
tools/perf/lib/include/perf/cpumap.h | 1 +
tools/perf/lib/libperf.map | 1 +
4 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7e17bf9f700a..5bc0c570b7b6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -822,18 +822,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
return cpu_map__get_core(map, cpu, NULL);
}
-static int cpu_map__get_max(struct perf_cpu_map *map)
-{
- int i, max = -1;
-
- for (i = 0; i < map->nr; i++) {
- if (map->map[i] > max)
- max = map->map[i];
- }
-
- return max;
-}
-
static int perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
{
@@ -928,7 +916,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- nr = cpu_map__get_max(evsel_list->core.cpus);
+ nr = perf_cpu_map__max(evsel_list->core.cpus);
stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
index 1f0e6f334237..2ca1fafa620d 100644
--- a/tools/perf/lib/cpumap.c
+++ b/tools/perf/lib/cpumap.c
@@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
return -1;
}
+
+int perf_cpu_map__max(struct perf_cpu_map *map)
+{
+ int i, max = -1;
+
+ for (i = 0; i < map->nr; i++) {
+ if (map->map[i] > max)
+ max = map->map[i];
+ }
+
+ return max;
+}
diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h
index 8aa995c59498..ac9aa497f84a 100644
--- a/tools/perf/lib/include/perf/cpumap.h
+++ b/tools/perf/lib/include/perf/cpumap.h
@@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map
index dc4d66363bc4..cd0d17b996c8 100644
--- a/tools/perf/lib/libperf.map
+++ b/tools/perf/lib/libperf.map
@@ -9,6 +9,7 @@ LIBPERF_0.0.1 {
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__empty;
+ perf_cpu_map__max;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
--
2.21.0
Em Mon, Sep 02, 2019 at 02:12:53PM +0200, Jiri Olsa escreveu:
> So it can be used from multiple places.
Applied.
- Arnaldo
> Link: http://lkml.kernel.org/n/[email protected]
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> tools/perf/builtin-stat.c | 14 +-------------
> tools/perf/lib/cpumap.c | 12 ++++++++++++
> tools/perf/lib/include/perf/cpumap.h | 1 +
> tools/perf/lib/libperf.map | 1 +
> 4 files changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 7e17bf9f700a..5bc0c570b7b6 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -822,18 +822,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
> return cpu_map__get_core(map, cpu, NULL);
> }
>
> -static int cpu_map__get_max(struct perf_cpu_map *map)
> -{
> - int i, max = -1;
> -
> - for (i = 0; i < map->nr; i++) {
> - if (map->map[i] > max)
> - max = map->map[i];
> - }
> -
> - return max;
> -}
> -
> static int perf_stat__get_aggr(struct perf_stat_config *config,
> aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
> {
> @@ -928,7 +916,7 @@ static int perf_stat_init_aggr_mode(void)
> * taking the highest cpu number to be the size of
> * the aggregation translate cpumap.
> */
> - nr = cpu_map__get_max(evsel_list->core.cpus);
> + nr = perf_cpu_map__max(evsel_list->core.cpus);
> stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
> return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
> }
> diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
> index 1f0e6f334237..2ca1fafa620d 100644
> --- a/tools/perf/lib/cpumap.c
> +++ b/tools/perf/lib/cpumap.c
> @@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
>
> return -1;
> }
> +
> +int perf_cpu_map__max(struct perf_cpu_map *map)
> +{
> + int i, max = -1;
> +
> + for (i = 0; i < map->nr; i++) {
> + if (map->map[i] > max)
> + max = map->map[i];
> + }
> +
> + return max;
> +}
> diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h
> index 8aa995c59498..ac9aa497f84a 100644
> --- a/tools/perf/lib/include/perf/cpumap.h
> +++ b/tools/perf/lib/include/perf/cpumap.h
> @@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
> LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
> LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
> LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
> +LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
>
> #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
> for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
> diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map
> index dc4d66363bc4..cd0d17b996c8 100644
> --- a/tools/perf/lib/libperf.map
> +++ b/tools/perf/lib/libperf.map
> @@ -9,6 +9,7 @@ LIBPERF_0.0.1 {
> perf_cpu_map__nr;
> perf_cpu_map__cpu;
> perf_cpu_map__empty;
> + perf_cpu_map__max;
> perf_thread_map__new_dummy;
> perf_thread_map__set_pid;
> perf_thread_map__comm;
> --
> 2.21.0
--
- Arnaldo
The following commit has been merged into the perf/urgent branch of tip:
Commit-ID: 4256d434935e9c85a731823be562785494ca364b
Gitweb: https://git.kernel.org/tip/4256d434935e9c85a731823be562785494ca364b
Author: Jiri Olsa <[email protected]>
AuthorDate: Mon, 02 Sep 2019 14:12:53 +02:00
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitterDate: Tue, 10 Sep 2019 14:33:32 +01:00
libperf: Adopt perf_cpu_map__max() function
>From 'perf stat', so that it can be used from multiple places.
Signed-off-by: Jiri Olsa <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Joe Mario <[email protected]>
Cc: Kan Liang <[email protected]>
Cc: Michael Petlan <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lore.kernel.org/lkml/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-stat.c | 14 +-------------
tools/perf/lib/cpumap.c | 12 ++++++++++++
tools/perf/lib/include/perf/cpumap.h | 1 +
tools/perf/lib/libperf.map | 1 +
4 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7e17bf9..5bc0c57 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -822,18 +822,6 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
return cpu_map__get_core(map, cpu, NULL);
}
-static int cpu_map__get_max(struct perf_cpu_map *map)
-{
- int i, max = -1;
-
- for (i = 0; i < map->nr; i++) {
- if (map->map[i] > max)
- max = map->map[i];
- }
-
- return max;
-}
-
static int perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
{
@@ -928,7 +916,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- nr = cpu_map__get_max(evsel_list->core.cpus);
+ nr = perf_cpu_map__max(evsel_list->core.cpus);
stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
index 1f0e6f3..2ca1faf 100644
--- a/tools/perf/lib/cpumap.c
+++ b/tools/perf/lib/cpumap.c
@@ -260,3 +260,15 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
return -1;
}
+
+int perf_cpu_map__max(struct perf_cpu_map *map)
+{
+ int i, max = -1;
+
+ for (i = 0; i < map->nr; i++) {
+ if (map->map[i] > max)
+ max = map->map[i];
+ }
+
+ return max;
+}
diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h
index 8aa995c..ac9aa49 100644
--- a/tools/perf/lib/include/perf/cpumap.h
+++ b/tools/perf/lib/include/perf/cpumap.h
@@ -16,6 +16,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map
index dc4d663..cd0d17b 100644
--- a/tools/perf/lib/libperf.map
+++ b/tools/perf/lib/libperf.map
@@ -9,6 +9,7 @@ LIBPERF_0.0.1 {
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__empty;
+ perf_cpu_map__max;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;