2023-11-29 06:02:42

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 00/14] Clean up libperf cpumap's empty function

Rename and clean up the use of libperf CPU map functions particularly
focussing on perf_cpu_map__empty that may return true for maps
containing CPUs but also with an "any CPU"/dummy value.

perf_cpu_map__nr is also troubling in that iterating an empty CPU map
will yield the "any CPU"/dummy value. Reduce the appearance of some
calls to this by using the perf_cpu_map__for_each_cpu macro.

Ian Rogers (14):
libperf cpumap: Rename perf_cpu_map__dummy_new
libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
libperf cpumap: Rename perf_cpu_map__empty
libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
libperf cpumap: Add for_each_cpu that skips the "any CPU" case
libperf cpumap: Add any, empty and min helpers
perf arm-spe/cs-etm: Directly iterate CPU maps
perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty
use
perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty
perf top: Avoid repeated function calls
perf arm64 header: Remove unnecessary CPU map get and put
perf stat: Remove duplicate cpus_map_matched function
perf cpumap: Use perf_cpu_map__for_each_cpu when possible
libperf cpumap: Document perf_cpu_map__nr's behavior

.../perf/Documentation/examples/sampling.c | 2 +-
.../perf/Documentation/libperf-sampling.txt | 2 +-
tools/lib/perf/Documentation/libperf.txt | 4 +-
tools/lib/perf/cpumap.c | 92 +++++++++++++------
tools/lib/perf/evlist.c | 6 +-
tools/lib/perf/evsel.c | 2 +-
tools/lib/perf/include/perf/cpumap.h | 56 ++++++++++-
tools/lib/perf/libperf.map | 10 +-
tools/lib/perf/tests/test-cpumap.c | 4 +-
tools/lib/perf/tests/test-evlist.c | 6 +-
tools/lib/perf/tests/test-evsel.c | 2 +-
tools/perf/arch/arm/util/cs-etm.c | 83 +++++++----------
tools/perf/arch/arm64/util/arm-spe.c | 4 +-
tools/perf/arch/arm64/util/header.c | 15 +--
tools/perf/arch/x86/util/intel-bts.c | 4 +-
tools/perf/arch/x86/util/intel-pt.c | 10 +-
tools/perf/bench/epoll-ctl.c | 2 +-
tools/perf/bench/epoll-wait.c | 2 +-
tools/perf/bench/futex-hash.c | 2 +-
tools/perf/bench/futex-lock-pi.c | 2 +-
tools/perf/bench/futex-requeue.c | 2 +-
tools/perf/bench/futex-wake-parallel.c | 2 +-
tools/perf/bench/futex-wake.c | 2 +-
tools/perf/builtin-c2c.c | 6 +-
tools/perf/builtin-ftrace.c | 2 +-
tools/perf/builtin-record.c | 4 +-
tools/perf/builtin-stat.c | 31 +------
tools/perf/tests/bitmap.c | 13 +--
tools/perf/tests/code-reading.c | 2 +-
tools/perf/tests/cpumap.c | 2 +-
tools/perf/tests/keep-tracking.c | 2 +-
tools/perf/tests/mmap-basic.c | 2 +-
tools/perf/tests/openat-syscall-all-cpus.c | 2 +-
tools/perf/tests/perf-time-to-tsc.c | 2 +-
tools/perf/tests/sw-clock.c | 2 +-
tools/perf/tests/switch-tracking.c | 2 +-
tools/perf/tests/task-exit.c | 2 +-
tools/perf/tests/topology.c | 48 +++++-----
tools/perf/util/auxtrace.c | 4 +-
tools/perf/util/bpf_counter.c | 2 +-
tools/perf/util/bpf_kwork.c | 16 ++--
tools/perf/util/bpf_kwork_top.c | 12 +--
tools/perf/util/cpumap.c | 14 ++-
tools/perf/util/cputopo.c | 2 +-
tools/perf/util/evlist.c | 4 +-
tools/perf/util/evsel.c | 2 +-
tools/perf/util/perf_api_probe.c | 4 +-
tools/perf/util/record.c | 4 +-
.../scripting-engines/trace-event-python.c | 12 ++-
tools/perf/util/session.c | 5 +-
tools/perf/util/stat.c | 2 +-
tools/perf/util/svghelper.c | 20 ++--
tools/perf/util/top.c | 9 +-
53 files changed, 296 insertions(+), 254 deletions(-)

--
2.43.0.rc1.413.gea7ed67945-goog


2023-11-29 06:02:43

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 01/14] libperf cpumap: Rename perf_cpu_map__dummy_new

Rename perf_cpu_map__dummy_new to perf_cpu_map__new_any_cpu to better
indicate this is creating a CPU map for the perf_event_open "any" CPU
case.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/Documentation/libperf.txt | 2 +-
tools/lib/perf/cpumap.c | 4 ++--
tools/lib/perf/evsel.c | 2 +-
tools/lib/perf/include/perf/cpumap.h | 4 ++--
tools/lib/perf/libperf.map | 2 +-
tools/lib/perf/tests/test-cpumap.c | 2 +-
tools/lib/perf/tests/test-evlist.c | 2 +-
tools/perf/tests/cpumap.c | 2 +-
tools/perf/tests/sw-clock.c | 2 +-
tools/perf/tests/task-exit.c | 2 +-
tools/perf/util/evlist.c | 2 +-
tools/perf/util/evsel.c | 2 +-
12 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index a8f1a237931b..a256a26598b0 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -37,7 +37,7 @@ SYNOPSIS

struct perf_cpu_map;

- struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+ struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 2a5a29217374..2bd6aba3d8c9 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -27,7 +27,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
return result;
}

-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+struct perf_cpu_map *perf_cpu_map__new_any_cpu(void)
{
struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);

@@ -271,7 +271,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
else if (*cpu_list != '\0')
cpus = cpu_map__default_new();
else
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
invalid:
free(tmp_cpus);
out:
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8b51b008a81f..c07160953224 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
static struct perf_cpu_map *empty_cpu_map;

if (empty_cpu_map == NULL) {
- empty_cpu_map = perf_cpu_map__dummy_new();
+ empty_cpu_map = perf_cpu_map__new_any_cpu();
if (empty_cpu_map == NULL)
return -ENOMEM;
}
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index e38d859a384d..d0bf218ada11 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -19,9 +19,9 @@ struct perf_cache {
struct perf_cpu_map;

/**
- * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
+ * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value.
*/
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 190b56ae923a..a8ff64baea3e 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -1,7 +1,7 @@
LIBPERF_0.0.1 {
global:
libperf_init;
- perf_cpu_map__dummy_new;
+ perf_cpu_map__new_any_cpu;
perf_cpu_map__default_new;
perf_cpu_map__get;
perf_cpu_map__put;
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index 87b0510a556f..2c359bdb951e 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv)

libperf_init(libperf_print);

- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
if (!cpus)
return -1;

diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ed616fc19b4f..ab63878bacb9 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -261,7 +261,7 @@ static int test_mmap_thread(void)
threads = perf_thread_map__new_dummy();
__T("failed to create threads", threads);

- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
__T("failed to create cpus", cpus);

perf_thread_map__set_pid(threads, 0, pid);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 7730fc2ab40b..bd8e396f3e57 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -213,7 +213,7 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused,

static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
- struct perf_cpu_map *any = perf_cpu_map__dummy_new();
+ struct perf_cpu_map *any = perf_cpu_map__new_any_cpu();
struct perf_cpu_map *one = perf_cpu_map__new("1");
struct perf_cpu_map *two = perf_cpu_map__new("2");
struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two);
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 4d7493fa0105..290716783ac6 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -62,7 +62,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
}
evlist__add(evlist, evsel);

- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
threads = thread_map__new_by_tid(getpid());
if (!cpus || !threads) {
err = -ENOMEM;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 968dddde6dda..d33d0952025c 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -70,7 +70,7 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _
* evlist__prepare_workload we'll fill in the only thread
* we're monitoring, the one forked there.
*/
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
threads = thread_map__new_by_tid(-1);
if (!cpus || !threads) {
err = -ENOMEM;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e36da58522ef..ff7f85ded89d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
return -1;

if (target__uses_dummy_map(target))
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
else
cpus = perf_cpu_map__new(target->cpu_list);

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a5da74e3a517..76ef3ab488a2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,

if (cpus == NULL) {
if (empty_cpu_map == NULL) {
- empty_cpu_map = perf_cpu_map__dummy_new();
+ empty_cpu_map = perf_cpu_map__new_any_cpu();
if (empty_cpu_map == NULL)
return -ENOMEM;
}
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:04

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers

Additional helpers to better replace
perf_cpu_map__has_any_cpu_or_is_empty.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
tools/lib/perf/libperf.map | 4 ++++
3 files changed, 47 insertions(+)

diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 49fc98e16514..7403819da8fd 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}

+bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
+{
+ if (!map)
+ return true;
+
+ return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
+bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
+{
+ return map == NULL;
+}
+
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
int low, high;
@@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
}

+struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
+{
+ struct perf_cpu cpu, result = {
+ .cpu = -1
+ };
+ int idx;
+
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
+ result = cpu;
+ break;
+ }
+ return result;
+}
+
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
struct perf_cpu result = {
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index dbe0a7352b64..523e4348fc96 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
* perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
*/
LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_empty - does the map contain no values and it doesn't
+ * contain the special "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
+LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 10b3f3722642..2aa79b696032 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__has_any_cpu_or_is_empty;
+ perf_cpu_map__is_any_cpu_or_is_empty;
+ perf_cpu_map__is_empty;
+ perf_cpu_map__has_any_cpu;
+ perf_cpu_map__min;
perf_cpu_map__max;
perf_cpu_map__has;
perf_thread_map__new_array;
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:07

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 04/14] libperf cpumap: Replace usage of perf_cpu_map__new(NULL)

Passing NULL to perf_cpu_map__new performs
perf_cpu_map__new_online_cpus, just directly call
perf_cpu_map__new_online_cpus to be more intention revealing.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/Documentation/examples/sampling.c | 2 +-
tools/lib/perf/Documentation/libperf-sampling.txt | 2 +-
tools/lib/perf/evlist.c | 2 +-
tools/lib/perf/tests/test-evlist.c | 4 ++--
tools/lib/perf/tests/test-evsel.c | 2 +-
tools/perf/arch/arm/util/cs-etm.c | 6 +++---
tools/perf/arch/arm64/util/header.c | 2 +-
tools/perf/bench/epoll-ctl.c | 2 +-
tools/perf/bench/epoll-wait.c | 2 +-
tools/perf/bench/futex-hash.c | 2 +-
tools/perf/bench/futex-lock-pi.c | 2 +-
tools/perf/bench/futex-requeue.c | 2 +-
tools/perf/bench/futex-wake-parallel.c | 2 +-
tools/perf/bench/futex-wake.c | 2 +-
tools/perf/builtin-ftrace.c | 2 +-
tools/perf/tests/code-reading.c | 2 +-
tools/perf/tests/keep-tracking.c | 2 +-
tools/perf/tests/mmap-basic.c | 2 +-
tools/perf/tests/openat-syscall-all-cpus.c | 2 +-
tools/perf/tests/perf-time-to-tsc.c | 2 +-
tools/perf/tests/switch-tracking.c | 2 +-
tools/perf/tests/topology.c | 2 +-
tools/perf/util/bpf_counter.c | 2 +-
tools/perf/util/cpumap.c | 2 +-
tools/perf/util/cputopo.c | 2 +-
tools/perf/util/evlist.c | 2 +-
tools/perf/util/perf_api_probe.c | 4 ++--
tools/perf/util/record.c | 2 +-
28 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
index 8e1a926a9cfe..bc142f0664b5 100644
--- a/tools/lib/perf/Documentation/examples/sampling.c
+++ b/tools/lib/perf/Documentation/examples/sampling.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)

libperf_init(libperf_print);

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus) {
fprintf(stderr, "failed to create cpus\n");
return -1;
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d6ca24f6ef78..2378980fab8a 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs:

[source,c]
--
- 42 cpus = perf_cpu_map__new(NULL);
+ 42 cpus = perf_cpu_map__new_online_cpus();
43 if (!cpus) {
44 fprintf(stderr, "failed to create cpus\n");
45 return -1;
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 75f36218fdd9..058e3ff10f9b 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
if (evsel->system_wide) {
/* System wide: set the cpu map of the evsel to all online CPUs. */
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__new(NULL);
+ evsel->cpus = perf_cpu_map__new_online_cpus();
} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
/*
* User requested CPUs on a core PMU, ensure the requested CPUs
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ab63878bacb9..10f70cb41ff1 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -46,7 +46,7 @@ static int test_stat_cpu(void)
};
int err, idx;

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);

evlist = perf_evlist__new();
@@ -350,7 +350,7 @@ static int test_mmap_cpus(void)

attr.config = id;

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);

evlist = perf_evlist__new();
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index a11fc51bfb68..545ec3150546 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -27,7 +27,7 @@ static int test_stat_cpu(void)
};
int err, idx;

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);

evsel = perf_evsel__new(&attr);
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index c6b7b3066324..77e6663c1703 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -199,7 +199,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr,
{
int i, err = -EINVAL;
struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
- struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();

/* Set option of each CPU we have */
for (i = 0; i < cpu__max_cpu().cpu; i++) {
@@ -536,7 +536,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
int i;
int etmv3 = 0, etmv4 = 0, ete = 0;
struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
- struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();

/* cpu map is not empty, we have specific CPUs to work with */
if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
@@ -802,7 +802,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
u64 nr_cpu, type;
struct perf_cpu_map *cpu_map;
struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus;
- struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a2eef9ec5491..97037499152e 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -57,7 +57,7 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)

int get_cpuid(char *buf, size_t sz)
{
- struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
int ret;

if (!cpus)
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 6bfffe83dde9..d3db73dac66a 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv)
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
goto errmem;

diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index cb5174b53940..06bb3187660a 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -444,7 +444,7 @@ int bench_epoll_wait(int argc, const char **argv)
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
goto errmem;

diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 2005a3fa3026..0c69d20efa32 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -138,7 +138,7 @@ int bench_futex_hash(int argc, const char **argv)
exit(EXIT_FAILURE);
}

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
goto errmem;

diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 092cbd52db82..7a4973346180 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -172,7 +172,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
if (argc)
goto err;

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "calloc");

diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index c0035990a33c..d9ad736c1a3e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -174,7 +174,7 @@ int bench_futex_requeue(int argc, const char **argv)
if (argc)
goto err;

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "cpu_map__new");

diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 5ab0234d74e6..b66df553e561 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -264,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "mlockall");
}

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "calloc");

diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 18a5894af8bb..690fd6d3da13 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -149,7 +149,7 @@ int bench_futex_wake(int argc, const char **argv)
exit(EXIT_FAILURE);
}

- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "calloc");

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index ac2e6c75f912..eb30c8eca488 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -333,7 +333,7 @@ static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)

static int reset_tracing_cpu(void)
{
- struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus();
int ret;

ret = set_tracing_cpumask(cpumap);
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 8620146d0378..7a3a7bbbec71 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -610,7 +610,7 @@ static int do_test_code_reading(bool try_kcore)
goto out_put;
}

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus) {
pr_debug("perf_cpu_map__new failed\n");
goto out_put;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 8f4f9b632e1e..5a3b2bed07f3 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -81,7 +81,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte
threads = thread_map__new(-1, getpid(), UINT_MAX);
CHECK_NOT_NULL__(threads);

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
CHECK_NOT_NULL__(cpus);

evlist = evlist__new();
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 886a13a77a16..012c8ae439fd 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -52,7 +52,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
return -1;
}

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (cpus == NULL) {
pr_debug("perf_cpu_map__new\n");
goto out_free_threads;
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index f3275be83a33..fb114118c876 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -37,7 +37,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
return -1;
}

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (cpus == NULL) {
pr_debug("perf_cpu_map__new\n");
goto out_thread_map_delete;
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index efcd71c2738a..bbe2ddeb9b74 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -93,7 +93,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
threads = thread_map__new(-1, getpid(), UINT_MAX);
CHECK_NOT_NULL__(threads);

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
CHECK_NOT_NULL__(cpus);

evlist = evlist__new();
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index e52b031bedc5..5cab17a1942e 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
goto out_err;
}

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus) {
pr_debug("perf_cpu_map__new failed!\n");
goto out_err;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 9dee63734e66..2a842f53fbb5 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -215,7 +215,7 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su
if (session_write_header(path))
goto free_path;

- map = perf_cpu_map__new(NULL);
+ map = perf_cpu_map__new_online_cpus();
if (map == NULL) {
pr_debug("failed to get system cpumap\n");
goto free_path;
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 7f9b0e46e008..7a8af60e0f51 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
return -1;

if (!all_cpu_map) {
- all_cpu_map = perf_cpu_map__new(NULL);
+ all_cpu_map = perf_cpu_map__new_online_cpus();
if (!all_cpu_map)
return -1;
}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0e090e8bc334..0581ee0fa5f2 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
static struct perf_cpu_map *online;

if (!online)
- online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
+ online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */

return online;
}
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 81cfc85f4668..8bbeb2dc76fd 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void)
ncpus = cpu__max_present_cpu().cpu;

/* build online CPU map */
- map = perf_cpu_map__new(NULL);
+ map = perf_cpu_map__new_online_cpus();
if (map == NULL) {
pr_debug("failed to get system cpumap\n");
return NULL;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ff7f85ded89d..0ed3ce2aa8eb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
* error, and we may not want to do that fallback to a
* default cpu identity map :-\
*/
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
goto out;

diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index e1e2d701599c..1de3b69cdf4a 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn)
struct perf_cpu cpu;
int ret, i = 0;

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
return false;
cpu = perf_cpu_map__cpu(cpus, 0);
@@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void)
struct perf_cpu cpu;
int fd;

- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
return false;

diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 40290382b2d7..87e817b3cf7e 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -238,7 +238,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
evsel = evlist__last(temp_evlist);

if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
- struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+ struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();

if (cpus)
cpu = perf_cpu_map__cpu(cpus, 0);
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:15

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 05/14] libperf cpumap: Add for_each_cpu that skips the "any CPU" case

When iterating CPUs in a CPU map it is often desirable to skip the
"any CPU" (aka dummy) case. Add a helper for this and use in
builtin-record.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/include/perf/cpumap.h | 6 ++++++
tools/perf/builtin-record.c | 4 +---
2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 9cf361fc5edc..dbe0a7352b64 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -64,6 +64,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
(idx) < perf_cpu_map__nr(cpus); \
(idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))

+#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \
+ for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \
+ (idx) < perf_cpu_map__nr(cpus); \
+ (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \
+ if ((_cpu).cpu != -1)
+
#define perf_cpu_map__for_each_idx(idx, cpus) \
for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8ec818568662..066f9232e947 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3580,9 +3580,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
if (cpu_map__is_dummy(cpus))
return 0;

- perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
- if (cpu.cpu == -1)
- continue;
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
/* Return ENODEV is input cpu is greater than max cpu */
if ((unsigned long)cpu.cpu > mask->nbits)
return -ENODEV;
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:20

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 09/14] perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty

Most uses of what was perf_cpu_map__empty but is now
perf_cpu_map__has_any_cpu_or_is_empty want to do something with the
CPU map if it contains CPUs. Replace uses of
perf_cpu_map__has_any_cpu_or_is_empty with other helpers so that CPUs
within the map can be handled.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/builtin-c2c.c | 6 +-----
tools/perf/builtin-stat.c | 9 ++++-----
tools/perf/util/auxtrace.c | 4 ++--
tools/perf/util/record.c | 2 +-
tools/perf/util/stat.c | 2 +-
5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index f78eea9e2153..ef7ed53a4b4e 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2319,11 +2319,7 @@ static int setup_nodes(struct perf_session *session)

nodes[node] = set;

- /* empty node, skip */
- if (perf_cpu_map__has_any_cpu_or_is_empty(map))
- continue;
-
- perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
__set_bit(cpu.cpu, set);

if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3303aa20f326..f583027a0639 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1316,10 +1316,9 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
* be the first online CPU in the cache domain else use the
* first online CPU of the cache domain as the ID.
*/
- if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map))
+ id = perf_cpu_map__min(cpu_map).cpu;
+ if (id == -1)
id = cpu.cpu;
- else
- id = perf_cpu_map__cpu(cpu_map, 0).cpu;

/* Free the perf_cpu_map used to find the cache ID */
perf_cpu_map__put(cpu_map);
@@ -1622,7 +1621,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
else
nr = 0;
@@ -2289,7 +2288,7 @@ int process_stat_config_event(struct perf_session *session,

perf_event__read_stat_config(&stat_config, &event->stat_config);

- if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) {
+ if (perf_cpu_map__is_empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
} else if (st->aggr_mode != AGGR_UNSET) {
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3684e6009b63..6b1d4bafad59 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist,
struct evsel *evsel, int idx)
{
- bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);

mp->mmap_needed = evsel->needs_auxtrace_mmap;

@@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,

static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
{
- bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);

if (per_cpu_mmaps) {
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 87e817b3cf7e..e867de8ddaaa 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -237,7 +237,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)

evsel = evlist__last(temp_evlist);

- if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
+ if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();

if (cpus)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 012c4946b9c4..915808a6211a 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
if (!counter->per_pkg)
return 0;

- if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (perf_cpu_map__is_any_cpu_or_is_empty(cpus))
return 0;

if (!mask) {
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:20

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 07/14] perf arm-spe/cs-etm: Directly iterate CPU maps

Rather than iterate all CPUs and see if they are in CPU maps, directly
iterate the CPU map. Similarly make use of the intersect
function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
appropriate alternatives.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
tools/perf/arch/arm64/util/arm-spe.c | 4 +-
2 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 77e6663c1703..a68a72f2f668 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
static int cs_etm_validate_config(struct auxtrace_record *itr,
struct evsel *evsel)
{
- int i, err = -EINVAL;
+ int idx, err = -EINVAL;
struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
+ struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
+ struct perf_cpu cpu;

- /* Set option of each CPU we have */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- /*
- * In per-cpu case, do the validation for CPUs to work with.
- * In per-thread case, the CPU map is empty. Since the traced
- * program can run on any CPUs in this case, thus don't skip
- * validation.
- */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
- !perf_cpu_map__has(event_cpus, cpu))
- continue;
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
+ perf_cpu_map__put(online_cpus);

- err = cs_etm_validate_context_id(itr, evsel, i);
+ /*
+ * Set option of each CPU we have. In per-cpu case, do the validation
+ * for CPUs to work with. In per-thread case, the CPU map is empty.
+ * Since the traced program can run on any CPUs in this case, thus don't
+ * skip validation.
+ */
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
if (err)
goto out;
- err = cs_etm_validate_timestamp(itr, evsel, i);
+ err = cs_etm_validate_timestamp(itr, evsel, idx);
if (err)
goto out;
}

err = 0;
out:
- perf_cpu_map__put(online_cpus);
+ perf_cpu_map__put(intersect_cpus);
return err;
}

@@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
* Also the case of per-cpu mmaps, need the contextID in order to be notified
* when a context switch happened.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
"timestamp", 1);
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
@@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
evsel->core.attr.sample_period = 1;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(evsel, TIME);

err = cs_etm_validate_config(itr, cs_etm_evsel);
@@ -533,38 +527,32 @@ static size_t
cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct evlist *evlist __maybe_unused)
{
- int i;
+ int idx;
int etmv3 = 0, etmv4 = 0, ete = 0;
struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
+ struct perf_cpu cpu;

/* cpu map is not empty, we have specific CPUs to work with */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(event_cpus, cpu) ||
- !perf_cpu_map__has(online_cpus, cpu))
- continue;
+ if (!perf_cpu_map__is_empty(event_cpus)) {
+ struct perf_cpu_map *intersect_cpus =
+ perf_cpu_map__intersect(event_cpus, online_cpus);

- if (cs_etm_is_ete(itr, i))
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
ete++;
- else if (cs_etm_is_etmv4(itr, i))
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
etmv4++;
else
etmv3++;
}
+ perf_cpu_map__put(intersect_cpus);
} else {
/* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
-
- if (cs_etm_is_ete(itr, i))
+ perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
ete++;
- else if (cs_etm_is_etmv4(itr, i))
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
etmv4++;
else
etmv3++;
@@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
return -EINVAL;

/* If the cpu_map is empty all online CPUs are involved */
- if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
+ if (perf_cpu_map__is_empty(event_cpus)) {
cpu_map = online_cpus;
} else {
/* Make sure all specified CPUs are online */
- for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
- struct perf_cpu cpu = { .cpu = i, };
+ struct perf_cpu cpu;

- if (perf_cpu_map__has(event_cpus, cpu) &&
- !perf_cpu_map__has(online_cpus, cpu))
+ perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
+ if (!perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}

diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 51ccbfd3d246..0b52e67edb3b 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, sample CPU for AUX event;
* also enable the timestamp tracing for samples correlation.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(arm_spe_evsel, CPU);
evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
"ts_enable", 1);
@@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.sample_period = 1;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
evsel__set_sample_bit(tracking_evsel, CPU);

--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:27

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 12/14] perf stat: Remove duplicate cpus_map_matched function

Use libperf's perf_cpu_map__equal that performs the same function.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/builtin-stat.c | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f583027a0639..8e2f90b5c276 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,26 +164,6 @@ static struct perf_stat_config stat_config = {
.iostat_run = false,
};

-static bool cpus_map_matched(struct evsel *a, struct evsel *b)
-{
- if (!a->core.cpus && !b->core.cpus)
- return true;
-
- if (!a->core.cpus || !b->core.cpus)
- return false;
-
- if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
- return false;
-
- for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
- if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
- perf_cpu_map__cpu(b->core.cpus, i).cpu)
- return false;
- }
-
- return true;
-}
-
static void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *warned_leader = NULL;
@@ -194,7 +174,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
/* Check that leader matches cpus with each member. */
if (leader == evsel)
continue;
- if (cpus_map_matched(leader, evsel))
+ if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
continue;

/* If there's mismatch disable the group and warn user. */
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:26

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 08/14] perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty use

Switch perf_cpu_map__has_any_cpu_or_is_empty to
perf_cpu_map__is_any_cpu_or_is_empty as a CPU map may contain CPUs as
well as the dummy event and perf_cpu_map__is_any_cpu_or_is_empty is a
more correct alternative.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/intel-bts.c | 4 ++--
tools/perf/arch/x86/util/intel-pt.c | 10 +++++-----
2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index af8ae4647585..34696f3d3d5d 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;

- if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (opts->full_auxtrace && !perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
return -EINVAL;
}
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_bts_evsel, CPU);
}

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index d199619df3ab..6de7e2d21075 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}

- per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
+ per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);

auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
@@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_pt_evsel, CPU);
}

@@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
tracking_evsel->immediate = true;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
evsel__set_sample_bit(tracking_evsel, CPU);
@@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
- if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!target__none(&opts->target) &&
!intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:32

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 10/14] perf top: Avoid repeated function calls

Add a local variable to avoid repeated calls to perf_cpu_map__nr.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/util/top.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index be7157de0451..4db3d1bd686c 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
struct record_opts *opts = &top->record_opts;
struct target *target = &opts->target;
size_t ret = 0;
+ int nr_cpus;

if (top->samples) {
samples_per_sec = top->samples / top->delay_secs;
@@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
else
ret += SNPRINTF(bf + ret, size - ret, " (all");

+ nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus);
if (target->cpu_list)
ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
- perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
- ? "s" : "",
+ nr_cpus > 1 ? "s" : "",
target->cpu_list);
else {
if (target->tid)
ret += SNPRINTF(bf + ret, size - ret, ")");
else
ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
- perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
- perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
- ? "s" : "");
+ nr_cpus, nr_cpus > 1 ? "s" : "");
}

perf_top__reset_sample_counters(top);
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:34

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 14/14] libperf cpumap: Document perf_cpu_map__nr's behavior

perf_cpu_map__nr's behavior around an empty CPU map is strange as it
returns that there is 1 CPU. Changing code that may rely on this
behavior is hard, we can at least document the behavior.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/include/perf/cpumap.h | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 523e4348fc96..90457d17fb2f 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -44,7 +44,18 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
+ * is invalid.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+/**
+ * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
+ * cpu of -1 for an invalid index, this makes an empty map
+ * look like it contains the "any CPU"/dummy value. Otherwise
+ * the result is the number CPUs in the map plus one if the
+ * "any CPU"/dummy value is present.
+ */
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
/**
* perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:49

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 13/14] perf cpumap: Use perf_cpu_map__for_each_cpu when possible

Rather than manually iterating the CPU map, use
perf_cpu_map__for_each_cpu. When possible tidy local variables.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm64/util/header.c | 10 ++--
tools/perf/tests/bitmap.c | 13 +++---
tools/perf/tests/topology.c | 46 +++++++++----------
tools/perf/util/bpf_kwork.c | 16 ++++---
tools/perf/util/bpf_kwork_top.c | 12 ++---
tools/perf/util/cpumap.c | 12 ++---
.../scripting-engines/trace-event-python.c | 12 +++--
tools/perf/util/session.c | 5 +-
tools/perf/util/svghelper.c | 20 ++++----
9 files changed, 72 insertions(+), 74 deletions(-)

diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a9de0b5187dd..741df3614a09 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -4,8 +4,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
-#include <util/cpumap.h>
-#include <internal/cpumap.h>
#include <api/fs/fs.h>
#include <errno.h>
#include "debug.h"
@@ -19,18 +17,18 @@
static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
const char *sysfs = sysfs__mountpoint();
- int cpu;
- int ret = EINVAL;
+ struct perf_cpu cpu;
+ int idx, ret = EINVAL;

if (!sysfs || sz < MIDR_SIZE)
return EINVAL;

- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
char path[PATH_MAX];
FILE *file;

scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
- sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
+ sysfs, cpu.cpu);

file = fopen(path, "r");
if (!file) {
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 0173f5402a35..98956e0e0765 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -11,18 +11,19 @@
static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
- unsigned long *bm = NULL;
- int i;
+ unsigned long *bm;

bm = bitmap_zalloc(nbits);

if (map && bm) {
- for (i = 0; i < perf_cpu_map__nr(map); i++)
- __set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
+ int i;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ __set_bit(cpu.cpu, bm);
}

- if (map)
- perf_cpu_map__put(map);
+ perf_cpu_map__put(map);
return bm;
}

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 2a842f53fbb5..a8cb5ba898ab 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -68,6 +68,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
};
int i;
struct aggr_cpu_id id;
+ struct perf_cpu cpu;

session = perf_session__new(&data, NULL);
TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
@@ -113,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);

for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
- struct perf_cpu cpu = { .cpu = i };
-
+ cpu.cpu = i;
if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
@@ -123,48 +123,48 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}

// Test that CPU ID contains socket, die, core and CPU
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__cpu(cpu, NULL);
TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
- perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
+ cpu.cpu == id.cpu.cpu);

TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);
TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Cpu map - Thread IDX is set", id.thread_idx == -1);
}

// Test that core ID contains socket, die and core
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__core(cpu, NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);

TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread IDX is set", id.thread_idx == -1);
}

// Test that die ID contains socket and die
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__die(cpu, NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);

TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
@@ -173,10 +173,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}

// Test that socket ID contains only socket
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__socket(cpu, NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
@@ -187,10 +187,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}

// Test that node ID contains only node
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__node(cpu, NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
+ cpu__get_node(cpu) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 6eb2c78fd7f4..44f0f708a15d 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -147,12 +147,12 @@ static bool valid_kwork_class_type(enum kwork_class_type type)

static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, key, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list != NULL) {
- fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+ int idx, nr_cpus;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
return -1;
@@ -165,8 +165,8 @@ static int setup_filters(struct perf_kwork *kwork)
}

nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;

if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
@@ -181,6 +181,8 @@ static int setup_filters(struct perf_kwork *kwork)
}

if (kwork->profile_name != NULL) {
+ int key, fd;
+
if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
pr_err("Requested name filter %s too large, limit to %d\n",
kwork->profile_name, MAX_KWORKNAME - 1);
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 035e02272790..22a3b00a1e23 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -122,11 +122,11 @@ static bool valid_kwork_class_type(enum kwork_class_type type)

static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list) {
+ int idx, nr_cpus, fd;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+
fd = bpf_map__fd(skel->maps.kwork_top_cpu_filter);
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
@@ -140,8 +140,8 @@ static int setup_filters(struct perf_kwork *kwork)
}

nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;

if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0581ee0fa5f2..e2287187babd 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -629,10 +629,10 @@ static char hex_char(unsigned char val)

size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu;
+ int idx;
char *ptr = buf;
unsigned char *bitmap;
- struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1);
+ struct perf_cpu c, last_cpu = perf_cpu_map__max(map);

if (buf == NULL)
return 0;
@@ -643,12 +643,10 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
return 0;
}

- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- cpu = perf_cpu_map__cpu(map, i).cpu;
- bitmap[cpu / 8] |= 1 << (cpu % 8);
- }
+ perf_cpu_map__for_each_cpu(c, idx, map)
+ bitmap[c.cpu / 8] |= 1 << (c.cpu % 8);

- for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];

if (cpu % 8)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 860e1837ba96..8ef0e5ac03c2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1693,13 +1693,15 @@ static void python_process_stat(struct perf_stat_config *config,
{
struct perf_thread_map *threads = counter->core.threads;
struct perf_cpu_map *cpus = counter->core.cpus;
- int cpu, thread;

- for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
- process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
+ for (int thread = 0; thread < perf_thread_map__nr(threads); thread++) {
+ int idx;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ process_stat(counter, cpu,
perf_thread_map__pid(threads, thread), tstamp,
- perf_counts(counter->counts, cpu, thread));
+ perf_counts(counter->counts, idx, thread));
}
}
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 199d3e8df315..d52b58344dbc 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2738,6 +2738,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
int i, err = -1;
struct perf_cpu_map *map;
int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+ struct perf_cpu cpu;

for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;
@@ -2759,9 +2760,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}

- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
-
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
"Consider raising MAX_NR_CPUS\n", cpu.cpu);
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1892e9b6aa7f..2b04f47f4db0 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -725,26 +725,24 @@ static void scan_core_topology(int *map, struct topology *t, int nr_cpus)

static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
{
- int i;
- int ret = 0;
- struct perf_cpu_map *m;
- struct perf_cpu c;
+ int idx, ret = 0;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;

- m = perf_cpu_map__new(s);
- if (!m)
+ map = perf_cpu_map__new(s);
+ if (!map)
return -1;

- for (i = 0; i < perf_cpu_map__nr(m); i++) {
- c = perf_cpu_map__cpu(m, i);
- if (c.cpu >= nr_cpus) {
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ if (cpu.cpu >= nr_cpus) {
ret = -1;
break;
}

- __set_bit(c.cpu, cpumask_bits(b));
+ __set_bit(cpu.cpu, cpumask_bits(b));
}

- perf_cpu_map__put(m);
+ perf_cpu_map__put(map);

return ret;
}
--
2.43.0.rc1.413.gea7ed67945-goog

2023-11-29 06:03:55

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v1 11/14] perf arm64 header: Remove unnecessary CPU map get and put

In both cases the CPU map is known owned by either the caller or a
PMU.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm64/util/header.c | 3 ---
1 file changed, 3 deletions(-)

diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index 97037499152e..a9de0b5187dd 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -25,8 +25,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
if (!sysfs || sz < MIDR_SIZE)
return EINVAL;

- cpus = perf_cpu_map__get(cpus);
-
for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
char path[PATH_MAX];
FILE *file;
@@ -51,7 +49,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
break;
}

- perf_cpu_map__put(cpus);
return ret;
}

--
2.43.0.rc1.413.gea7ed67945-goog

2023-12-11 19:31:33

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1 00/14] Clean up libperf cpumap's empty function

On Tue, Nov 28, 2023 at 10:02 PM Ian Rogers <[email protected]> wrote:
>
> Rename and clean up the use of libperf CPU map functions particularly
> focussing on perf_cpu_map__empty that may return true for maps
> containing CPUs but also with an "any CPU"/dummy value.
>
> perf_cpu_map__nr is also troubling in that iterating an empty CPU map
> will yield the "any CPU"/dummy value. Reduce the appearance of some
> calls to this by using the perf_cpu_map__for_each_cpu macro.
>
> Ian Rogers (14):
> libperf cpumap: Rename perf_cpu_map__dummy_new
> libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
> libperf cpumap: Rename perf_cpu_map__empty
> libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
> libperf cpumap: Add for_each_cpu that skips the "any CPU" case
> libperf cpumap: Add any, empty and min helpers
> perf arm-spe/cs-etm: Directly iterate CPU maps
> perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty
> use
> perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty
> perf top: Avoid repeated function calls
> perf arm64 header: Remove unnecessary CPU map get and put
> perf stat: Remove duplicate cpus_map_matched function
> perf cpumap: Use perf_cpu_map__for_each_cpu when possible
> libperf cpumap: Document perf_cpu_map__nr's behavior

Ping. Thanks,
Ian

> .../perf/Documentation/examples/sampling.c | 2 +-
> .../perf/Documentation/libperf-sampling.txt | 2 +-
> tools/lib/perf/Documentation/libperf.txt | 4 +-
> tools/lib/perf/cpumap.c | 92 +++++++++++++------
> tools/lib/perf/evlist.c | 6 +-
> tools/lib/perf/evsel.c | 2 +-
> tools/lib/perf/include/perf/cpumap.h | 56 ++++++++++-
> tools/lib/perf/libperf.map | 10 +-
> tools/lib/perf/tests/test-cpumap.c | 4 +-
> tools/lib/perf/tests/test-evlist.c | 6 +-
> tools/lib/perf/tests/test-evsel.c | 2 +-
> tools/perf/arch/arm/util/cs-etm.c | 83 +++++++----------
> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> tools/perf/arch/arm64/util/header.c | 15 +--
> tools/perf/arch/x86/util/intel-bts.c | 4 +-
> tools/perf/arch/x86/util/intel-pt.c | 10 +-
> tools/perf/bench/epoll-ctl.c | 2 +-
> tools/perf/bench/epoll-wait.c | 2 +-
> tools/perf/bench/futex-hash.c | 2 +-
> tools/perf/bench/futex-lock-pi.c | 2 +-
> tools/perf/bench/futex-requeue.c | 2 +-
> tools/perf/bench/futex-wake-parallel.c | 2 +-
> tools/perf/bench/futex-wake.c | 2 +-
> tools/perf/builtin-c2c.c | 6 +-
> tools/perf/builtin-ftrace.c | 2 +-
> tools/perf/builtin-record.c | 4 +-
> tools/perf/builtin-stat.c | 31 +------
> tools/perf/tests/bitmap.c | 13 +--
> tools/perf/tests/code-reading.c | 2 +-
> tools/perf/tests/cpumap.c | 2 +-
> tools/perf/tests/keep-tracking.c | 2 +-
> tools/perf/tests/mmap-basic.c | 2 +-
> tools/perf/tests/openat-syscall-all-cpus.c | 2 +-
> tools/perf/tests/perf-time-to-tsc.c | 2 +-
> tools/perf/tests/sw-clock.c | 2 +-
> tools/perf/tests/switch-tracking.c | 2 +-
> tools/perf/tests/task-exit.c | 2 +-
> tools/perf/tests/topology.c | 48 +++++-----
> tools/perf/util/auxtrace.c | 4 +-
> tools/perf/util/bpf_counter.c | 2 +-
> tools/perf/util/bpf_kwork.c | 16 ++--
> tools/perf/util/bpf_kwork_top.c | 12 +--
> tools/perf/util/cpumap.c | 14 ++-
> tools/perf/util/cputopo.c | 2 +-
> tools/perf/util/evlist.c | 4 +-
> tools/perf/util/evsel.c | 2 +-
> tools/perf/util/perf_api_probe.c | 4 +-
> tools/perf/util/record.c | 4 +-
> .../scripting-engines/trace-event-python.c | 12 ++-
> tools/perf/util/session.c | 5 +-
> tools/perf/util/stat.c | 2 +-
> tools/perf/util/svghelper.c | 20 ++--
> tools/perf/util/top.c | 9 +-
> 53 files changed, 296 insertions(+), 254 deletions(-)
>
> --
> 2.43.0.rc1.413.gea7ed67945-goog
>

2023-12-12 11:21:39

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 01/14] libperf cpumap: Rename perf_cpu_map__dummy_new



On 29/11/2023 06:01, Ian Rogers wrote:
> Rename perf_cpu_map__dummy_new to perf_cpu_map__new_any_cpu to better
> indicate this is creating a CPU map for the perf_event_open "any" CPU
> case.
>
> Signed-off-by: Ian Rogers <[email protected]>

Reviewed-by: James Clark <[email protected]>

> ---
> tools/lib/perf/Documentation/libperf.txt | 2 +-
> tools/lib/perf/cpumap.c | 4 ++--
> tools/lib/perf/evsel.c | 2 +-
> tools/lib/perf/include/perf/cpumap.h | 4 ++--
> tools/lib/perf/libperf.map | 2 +-
> tools/lib/perf/tests/test-cpumap.c | 2 +-
> tools/lib/perf/tests/test-evlist.c | 2 +-
> tools/perf/tests/cpumap.c | 2 +-
> tools/perf/tests/sw-clock.c | 2 +-
> tools/perf/tests/task-exit.c | 2 +-
> tools/perf/util/evlist.c | 2 +-
> tools/perf/util/evsel.c | 2 +-
> 12 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
> index a8f1a237931b..a256a26598b0 100644
> --- a/tools/lib/perf/Documentation/libperf.txt
> +++ b/tools/lib/perf/Documentation/libperf.txt
> @@ -37,7 +37,7 @@ SYNOPSIS
>
> struct perf_cpu_map;
>
> - struct perf_cpu_map *perf_cpu_map__dummy_new(void);
> + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
> struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
> struct perf_cpu_map *perf_cpu_map__read(FILE *file);
> struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> index 2a5a29217374..2bd6aba3d8c9 100644
> --- a/tools/lib/perf/cpumap.c
> +++ b/tools/lib/perf/cpumap.c
> @@ -27,7 +27,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
> return result;
> }
>
> -struct perf_cpu_map *perf_cpu_map__dummy_new(void)
> +struct perf_cpu_map *perf_cpu_map__new_any_cpu(void)
> {
> struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
>
> @@ -271,7 +271,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
> else if (*cpu_list != '\0')
> cpus = cpu_map__default_new();
> else
> - cpus = perf_cpu_map__dummy_new();
> + cpus = perf_cpu_map__new_any_cpu();
> invalid:
> free(tmp_cpus);
> out:
> diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
> index 8b51b008a81f..c07160953224 100644
> --- a/tools/lib/perf/evsel.c
> +++ b/tools/lib/perf/evsel.c
> @@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
> static struct perf_cpu_map *empty_cpu_map;
>
> if (empty_cpu_map == NULL) {
> - empty_cpu_map = perf_cpu_map__dummy_new();
> + empty_cpu_map = perf_cpu_map__new_any_cpu();
> if (empty_cpu_map == NULL)
> return -ENOMEM;
> }
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index e38d859a384d..d0bf218ada11 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -19,9 +19,9 @@ struct perf_cache {
> struct perf_cpu_map;
>
> /**
> - * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
> + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value.
> */
> -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
> +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
> LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
> LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
> LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
> diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
> index 190b56ae923a..a8ff64baea3e 100644
> --- a/tools/lib/perf/libperf.map
> +++ b/tools/lib/perf/libperf.map
> @@ -1,7 +1,7 @@
> LIBPERF_0.0.1 {
> global:
> libperf_init;
> - perf_cpu_map__dummy_new;
> + perf_cpu_map__new_any_cpu;
> perf_cpu_map__default_new;
> perf_cpu_map__get;
> perf_cpu_map__put;
> diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
> index 87b0510a556f..2c359bdb951e 100644
> --- a/tools/lib/perf/tests/test-cpumap.c
> +++ b/tools/lib/perf/tests/test-cpumap.c
> @@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv)
>
> libperf_init(libperf_print);
>
> - cpus = perf_cpu_map__dummy_new();
> + cpus = perf_cpu_map__new_any_cpu();
> if (!cpus)
> return -1;
>
> diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
> index ed616fc19b4f..ab63878bacb9 100644
> --- a/tools/lib/perf/tests/test-evlist.c
> +++ b/tools/lib/perf/tests/test-evlist.c
> @@ -261,7 +261,7 @@ static int test_mmap_thread(void)
> threads = perf_thread_map__new_dummy();
> __T("failed to create threads", threads);
>
> - cpus = perf_cpu_map__dummy_new();
> + cpus = perf_cpu_map__new_any_cpu();
> __T("failed to create cpus", cpus);
>
> perf_thread_map__set_pid(threads, 0, pid);
> diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
> index 7730fc2ab40b..bd8e396f3e57 100644
> --- a/tools/perf/tests/cpumap.c
> +++ b/tools/perf/tests/cpumap.c
> @@ -213,7 +213,7 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused,
>
> static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
> {
> - struct perf_cpu_map *any = perf_cpu_map__dummy_new();
> + struct perf_cpu_map *any = perf_cpu_map__new_any_cpu();
> struct perf_cpu_map *one = perf_cpu_map__new("1");
> struct perf_cpu_map *two = perf_cpu_map__new("2");
> struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two);
> diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
> index 4d7493fa0105..290716783ac6 100644
> --- a/tools/perf/tests/sw-clock.c
> +++ b/tools/perf/tests/sw-clock.c
> @@ -62,7 +62,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
> }
> evlist__add(evlist, evsel);
>
> - cpus = perf_cpu_map__dummy_new();
> + cpus = perf_cpu_map__new_any_cpu();
> threads = thread_map__new_by_tid(getpid());
> if (!cpus || !threads) {
> err = -ENOMEM;
> diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
> index 968dddde6dda..d33d0952025c 100644
> --- a/tools/perf/tests/task-exit.c
> +++ b/tools/perf/tests/task-exit.c
> @@ -70,7 +70,7 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _
> * evlist__prepare_workload we'll fill in the only thread
> * we're monitoring, the one forked there.
> */
> - cpus = perf_cpu_map__dummy_new();
> + cpus = perf_cpu_map__new_any_cpu();
> threads = thread_map__new_by_tid(-1);
> if (!cpus || !threads) {
> err = -ENOMEM;
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index e36da58522ef..ff7f85ded89d 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
> return -1;
>
> if (target__uses_dummy_map(target))
> - cpus = perf_cpu_map__dummy_new();
> + cpus = perf_cpu_map__new_any_cpu();
> else
> cpus = perf_cpu_map__new(target->cpu_list);
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index a5da74e3a517..76ef3ab488a2 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>
> if (cpus == NULL) {
> if (empty_cpu_map == NULL) {
> - empty_cpu_map = perf_cpu_map__dummy_new();
> + empty_cpu_map = perf_cpu_map__new_any_cpu();
> if (empty_cpu_map == NULL)
> return -ENOMEM;
> }

2023-12-12 11:25:40

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 13/14] perf cpumap: Use perf_cpu_map__for_each_cpu when possible



On 29/11/2023 06:02, Ian Rogers wrote:
> Rather than manually iterating the CPU map, use
> perf_cpu_map__for_each_cpu. When possible tidy local variables.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/arch/arm64/util/header.c | 10 ++--
> tools/perf/tests/bitmap.c | 13 +++---
> tools/perf/tests/topology.c | 46 +++++++++----------
> tools/perf/util/bpf_kwork.c | 16 ++++---
> tools/perf/util/bpf_kwork_top.c | 12 ++---
> tools/perf/util/cpumap.c | 12 ++---
> .../scripting-engines/trace-event-python.c | 12 +++--
> tools/perf/util/session.c | 5 +-
> tools/perf/util/svghelper.c | 20 ++++----
> 9 files changed, 72 insertions(+), 74 deletions(-)
>
[...]
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index 860e1837ba96..8ef0e5ac03c2 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -1693,13 +1693,15 @@ static void python_process_stat(struct perf_stat_config *config,
> {
> struct perf_thread_map *threads = counter->core.threads;
> struct perf_cpu_map *cpus = counter->core.cpus;
> - int cpu, thread;
>
> - for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
> - for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
> - process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
> + for (int thread = 0; thread < perf_thread_map__nr(threads); thread++) {
> + int idx;
> + struct perf_cpu cpu;
> +
> + perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
> + process_stat(counter, cpu,
> perf_thread_map__pid(threads, thread), tstamp,
> - perf_counts(counter->counts, cpu, thread));
> + perf_counts(counter->counts, idx, thread));

I thought changing cpu to idx was fixing a bug, but it was actually just
hard to read before where cpu was actually idx and not cpu, so this
cleanup is pretty good.

Reviewed-by: James Clark <[email protected]>

2023-12-12 11:28:54

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 12/14] perf stat: Remove duplicate cpus_map_matched function



On 29/11/2023 06:02, Ian Rogers wrote:
> Use libperf's perf_cpu_map__equal that performs the same function.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---

Reviewed-by: James Clark <[email protected]>

> tools/perf/builtin-stat.c | 22 +---------------------
> 1 file changed, 1 insertion(+), 21 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index f583027a0639..8e2f90b5c276 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -164,26 +164,6 @@ static struct perf_stat_config stat_config = {
> .iostat_run = false,
> };
>
> -static bool cpus_map_matched(struct evsel *a, struct evsel *b)
> -{
> - if (!a->core.cpus && !b->core.cpus)
> - return true;
> -
> - if (!a->core.cpus || !b->core.cpus)
> - return false;
> -
> - if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
> - return false;
> -
> - for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
> - if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
> - perf_cpu_map__cpu(b->core.cpus, i).cpu)
> - return false;
> - }
> -
> - return true;
> -}
> -
> static void evlist__check_cpu_maps(struct evlist *evlist)
> {
> struct evsel *evsel, *warned_leader = NULL;
> @@ -194,7 +174,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
> /* Check that leader matches cpus with each member. */
> if (leader == evsel)
> continue;
> - if (cpus_map_matched(leader, evsel))
> + if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
> continue;
>
> /* If there's mismatch disable the group and warn user. */

2023-12-12 11:44:34

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 04/14] libperf cpumap: Replace usage of perf_cpu_map__new(NULL)



On 29/11/2023 06:02, Ian Rogers wrote:
> Passing NULL to perf_cpu_map__new performs
> perf_cpu_map__new_online_cpus, just directly call
> perf_cpu_map__new_online_cpus to be more intention revealing.

If it's not too much effort I would make perf_cpu_map__new() assert if
it's called with NULL now, to avoid any future divergance or hidden
behavior again.

Either way:

Reviewed-by: James Clark <[email protected]>

>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/lib/perf/Documentation/examples/sampling.c | 2 +-
> tools/lib/perf/Documentation/libperf-sampling.txt | 2 +-
> tools/lib/perf/evlist.c | 2 +-
> tools/lib/perf/tests/test-evlist.c | 4 ++--
> tools/lib/perf/tests/test-evsel.c | 2 +-
> tools/perf/arch/arm/util/cs-etm.c | 6 +++---
> tools/perf/arch/arm64/util/header.c | 2 +-
> tools/perf/bench/epoll-ctl.c | 2 +-
> tools/perf/bench/epoll-wait.c | 2 +-
> tools/perf/bench/futex-hash.c | 2 +-
> tools/perf/bench/futex-lock-pi.c | 2 +-
> tools/perf/bench/futex-requeue.c | 2 +-
> tools/perf/bench/futex-wake-parallel.c | 2 +-
> tools/perf/bench/futex-wake.c | 2 +-
> tools/perf/builtin-ftrace.c | 2 +-
> tools/perf/tests/code-reading.c | 2 +-
> tools/perf/tests/keep-tracking.c | 2 +-
> tools/perf/tests/mmap-basic.c | 2 +-
> tools/perf/tests/openat-syscall-all-cpus.c | 2 +-
> tools/perf/tests/perf-time-to-tsc.c | 2 +-
> tools/perf/tests/switch-tracking.c | 2 +-
> tools/perf/tests/topology.c | 2 +-
> tools/perf/util/bpf_counter.c | 2 +-
> tools/perf/util/cpumap.c | 2 +-
> tools/perf/util/cputopo.c | 2 +-
> tools/perf/util/evlist.c | 2 +-
> tools/perf/util/perf_api_probe.c | 4 ++--
> tools/perf/util/record.c | 2 +-
> 28 files changed, 32 insertions(+), 32 deletions(-)
>
> diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
> index 8e1a926a9cfe..bc142f0664b5 100644
> --- a/tools/lib/perf/Documentation/examples/sampling.c
> +++ b/tools/lib/perf/Documentation/examples/sampling.c
> @@ -39,7 +39,7 @@ int main(int argc, char **argv)
>
> libperf_init(libperf_print);
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (!cpus) {
> fprintf(stderr, "failed to create cpus\n");
> return -1;
> diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
> index d6ca24f6ef78..2378980fab8a 100644
> --- a/tools/lib/perf/Documentation/libperf-sampling.txt
> +++ b/tools/lib/perf/Documentation/libperf-sampling.txt
> @@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs:
>
> [source,c]
> --
> - 42 cpus = perf_cpu_map__new(NULL);
> + 42 cpus = perf_cpu_map__new_online_cpus();
> 43 if (!cpus) {
> 44 fprintf(stderr, "failed to create cpus\n");
> 45 return -1;
> diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
> index 75f36218fdd9..058e3ff10f9b 100644
> --- a/tools/lib/perf/evlist.c
> +++ b/tools/lib/perf/evlist.c
> @@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
> if (evsel->system_wide) {
> /* System wide: set the cpu map of the evsel to all online CPUs. */
> perf_cpu_map__put(evsel->cpus);
> - evsel->cpus = perf_cpu_map__new(NULL);
> + evsel->cpus = perf_cpu_map__new_online_cpus();
> } else if (evlist->has_user_cpus && evsel->is_pmu_core) {
> /*
> * User requested CPUs on a core PMU, ensure the requested CPUs
> diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
> index ab63878bacb9..10f70cb41ff1 100644
> --- a/tools/lib/perf/tests/test-evlist.c
> +++ b/tools/lib/perf/tests/test-evlist.c
> @@ -46,7 +46,7 @@ static int test_stat_cpu(void)
> };
> int err, idx;
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> __T("failed to create cpus", cpus);
>
> evlist = perf_evlist__new();
> @@ -350,7 +350,7 @@ static int test_mmap_cpus(void)
>
> attr.config = id;
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> __T("failed to create cpus", cpus);
>
> evlist = perf_evlist__new();
> diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
> index a11fc51bfb68..545ec3150546 100644
> --- a/tools/lib/perf/tests/test-evsel.c
> +++ b/tools/lib/perf/tests/test-evsel.c
> @@ -27,7 +27,7 @@ static int test_stat_cpu(void)
> };
> int err, idx;
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> __T("failed to create cpus", cpus);
>
> evsel = perf_evsel__new(&attr);
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index c6b7b3066324..77e6663c1703 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -199,7 +199,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr,
> {
> int i, err = -EINVAL;
> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
> + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
>
> /* Set option of each CPU we have */
> for (i = 0; i < cpu__max_cpu().cpu; i++) {
> @@ -536,7 +536,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> int i;
> int etmv3 = 0, etmv4 = 0, ete = 0;
> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
> + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
>
> /* cpu map is not empty, we have specific CPUs to work with */
> if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> @@ -802,7 +802,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> u64 nr_cpu, type;
> struct perf_cpu_map *cpu_map;
> struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus;
> - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
> + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> struct cs_etm_recording *ptr =
> container_of(itr, struct cs_etm_recording, itr);
> struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
> diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
> index a2eef9ec5491..97037499152e 100644
> --- a/tools/perf/arch/arm64/util/header.c
> +++ b/tools/perf/arch/arm64/util/header.c
> @@ -57,7 +57,7 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
>
> int get_cpuid(char *buf, size_t sz)
> {
> - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
> + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
> int ret;
>
> if (!cpus)
> diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
> index 6bfffe83dde9..d3db73dac66a 100644
> --- a/tools/perf/bench/epoll-ctl.c
> +++ b/tools/perf/bench/epoll-ctl.c
> @@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv)
> act.sa_sigaction = toggle_done;
> sigaction(SIGINT, &act, NULL);
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> goto errmem;
>
> diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
> index cb5174b53940..06bb3187660a 100644
> --- a/tools/perf/bench/epoll-wait.c
> +++ b/tools/perf/bench/epoll-wait.c
> @@ -444,7 +444,7 @@ int bench_epoll_wait(int argc, const char **argv)
> act.sa_sigaction = toggle_done;
> sigaction(SIGINT, &act, NULL);
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> goto errmem;
>
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index 2005a3fa3026..0c69d20efa32 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -138,7 +138,7 @@ int bench_futex_hash(int argc, const char **argv)
> exit(EXIT_FAILURE);
> }
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> goto errmem;
>
> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> index 092cbd52db82..7a4973346180 100644
> --- a/tools/perf/bench/futex-lock-pi.c
> +++ b/tools/perf/bench/futex-lock-pi.c
> @@ -172,7 +172,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
> if (argc)
> goto err;
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> err(EXIT_FAILURE, "calloc");
>
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index c0035990a33c..d9ad736c1a3e 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -174,7 +174,7 @@ int bench_futex_requeue(int argc, const char **argv)
> if (argc)
> goto err;
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> err(EXIT_FAILURE, "cpu_map__new");
>
> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> index 5ab0234d74e6..b66df553e561 100644
> --- a/tools/perf/bench/futex-wake-parallel.c
> +++ b/tools/perf/bench/futex-wake-parallel.c
> @@ -264,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
> err(EXIT_FAILURE, "mlockall");
> }
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> err(EXIT_FAILURE, "calloc");
>
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index 18a5894af8bb..690fd6d3da13 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -149,7 +149,7 @@ int bench_futex_wake(int argc, const char **argv)
> exit(EXIT_FAILURE);
> }
>
> - cpu = perf_cpu_map__new(NULL);
> + cpu = perf_cpu_map__new_online_cpus();
> if (!cpu)
> err(EXIT_FAILURE, "calloc");
>
> diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> index ac2e6c75f912..eb30c8eca488 100644
> --- a/tools/perf/builtin-ftrace.c
> +++ b/tools/perf/builtin-ftrace.c
> @@ -333,7 +333,7 @@ static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
>
> static int reset_tracing_cpu(void)
> {
> - struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
> + struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus();
> int ret;
>
> ret = set_tracing_cpumask(cpumap);
> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> index 8620146d0378..7a3a7bbbec71 100644
> --- a/tools/perf/tests/code-reading.c
> +++ b/tools/perf/tests/code-reading.c
> @@ -610,7 +610,7 @@ static int do_test_code_reading(bool try_kcore)
> goto out_put;
> }
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (!cpus) {
> pr_debug("perf_cpu_map__new failed\n");
> goto out_put;
> diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
> index 8f4f9b632e1e..5a3b2bed07f3 100644
> --- a/tools/perf/tests/keep-tracking.c
> +++ b/tools/perf/tests/keep-tracking.c
> @@ -81,7 +81,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte
> threads = thread_map__new(-1, getpid(), UINT_MAX);
> CHECK_NOT_NULL__(threads);
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> CHECK_NOT_NULL__(cpus);
>
> evlist = evlist__new();
> diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
> index 886a13a77a16..012c8ae439fd 100644
> --- a/tools/perf/tests/mmap-basic.c
> +++ b/tools/perf/tests/mmap-basic.c
> @@ -52,7 +52,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
> return -1;
> }
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (cpus == NULL) {
> pr_debug("perf_cpu_map__new\n");
> goto out_free_threads;
> diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
> index f3275be83a33..fb114118c876 100644
> --- a/tools/perf/tests/openat-syscall-all-cpus.c
> +++ b/tools/perf/tests/openat-syscall-all-cpus.c
> @@ -37,7 +37,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
> return -1;
> }
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (cpus == NULL) {
> pr_debug("perf_cpu_map__new\n");
> goto out_thread_map_delete;
> diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
> index efcd71c2738a..bbe2ddeb9b74 100644
> --- a/tools/perf/tests/perf-time-to-tsc.c
> +++ b/tools/perf/tests/perf-time-to-tsc.c
> @@ -93,7 +93,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
> threads = thread_map__new(-1, getpid(), UINT_MAX);
> CHECK_NOT_NULL__(threads);
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> CHECK_NOT_NULL__(cpus);
>
> evlist = evlist__new();
> diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
> index e52b031bedc5..5cab17a1942e 100644
> --- a/tools/perf/tests/switch-tracking.c
> +++ b/tools/perf/tests/switch-tracking.c
> @@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
> goto out_err;
> }
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (!cpus) {
> pr_debug("perf_cpu_map__new failed!\n");
> goto out_err;
> diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
> index 9dee63734e66..2a842f53fbb5 100644
> --- a/tools/perf/tests/topology.c
> +++ b/tools/perf/tests/topology.c
> @@ -215,7 +215,7 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su
> if (session_write_header(path))
> goto free_path;
>
> - map = perf_cpu_map__new(NULL);
> + map = perf_cpu_map__new_online_cpus();
> if (map == NULL) {
> pr_debug("failed to get system cpumap\n");
> goto free_path;
> diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
> index 7f9b0e46e008..7a8af60e0f51 100644
> --- a/tools/perf/util/bpf_counter.c
> +++ b/tools/perf/util/bpf_counter.c
> @@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
> return -1;
>
> if (!all_cpu_map) {
> - all_cpu_map = perf_cpu_map__new(NULL);
> + all_cpu_map = perf_cpu_map__new_online_cpus();
> if (!all_cpu_map)
> return -1;
> }
> diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
> index 0e090e8bc334..0581ee0fa5f2 100644
> --- a/tools/perf/util/cpumap.c
> +++ b/tools/perf/util/cpumap.c
> @@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
> static struct perf_cpu_map *online;
>
> if (!online)
> - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
> + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */
>
> return online;
> }
> diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
> index 81cfc85f4668..8bbeb2dc76fd 100644
> --- a/tools/perf/util/cputopo.c
> +++ b/tools/perf/util/cputopo.c
> @@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void)
> ncpus = cpu__max_present_cpu().cpu;
>
> /* build online CPU map */
> - map = perf_cpu_map__new(NULL);
> + map = perf_cpu_map__new_online_cpus();
> if (map == NULL) {
> pr_debug("failed to get system cpumap\n");
> return NULL;
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index ff7f85ded89d..0ed3ce2aa8eb 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
> * error, and we may not want to do that fallback to a
> * default cpu identity map :-\
> */
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (!cpus)
> goto out;
>
> diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
> index e1e2d701599c..1de3b69cdf4a 100644
> --- a/tools/perf/util/perf_api_probe.c
> +++ b/tools/perf/util/perf_api_probe.c
> @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn)
> struct perf_cpu cpu;
> int ret, i = 0;
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (!cpus)
> return false;
> cpu = perf_cpu_map__cpu(cpus, 0);
> @@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void)
> struct perf_cpu cpu;
> int fd;
>
> - cpus = perf_cpu_map__new(NULL);
> + cpus = perf_cpu_map__new_online_cpus();
> if (!cpus)
> return false;
>
> diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
> index 40290382b2d7..87e817b3cf7e 100644
> --- a/tools/perf/util/record.c
> +++ b/tools/perf/util/record.c
> @@ -238,7 +238,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
> evsel = evlist__last(temp_evlist);
>
> if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
> - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
> + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
>
> if (cpus)
> cpu = perf_cpu_map__cpu(cpus, 0);

2023-12-12 13:54:36

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 05/14] libperf cpumap: Add for_each_cpu that skips the "any CPU" case



On 29/11/2023 06:02, Ian Rogers wrote:
> When iterating CPUs in a CPU map it is often desirable to skip the
> "any CPU" (aka dummy) case. Add a helper for this and use in
> builtin-record.
>
> Signed-off-by: Ian Rogers <[email protected]>

Reviewed-by: James Clark <[email protected]>

> ---
> tools/lib/perf/include/perf/cpumap.h | 6 ++++++
> tools/perf/builtin-record.c | 4 +---
> 2 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index 9cf361fc5edc..dbe0a7352b64 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -64,6 +64,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
> (idx) < perf_cpu_map__nr(cpus); \
> (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
>
> +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \
> + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \
> + (idx) < perf_cpu_map__nr(cpus); \
> + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \
> + if ((_cpu).cpu != -1)
> +
> #define perf_cpu_map__for_each_idx(idx, cpus) \
> for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 8ec818568662..066f9232e947 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -3580,9 +3580,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
> if (cpu_map__is_dummy(cpus))
> return 0;
>
> - perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
> - if (cpu.cpu == -1)
> - continue;
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
> /* Return ENODEV is input cpu is greater than max cpu */
> if ((unsigned long)cpu.cpu > mask->nbits)
> return -ENODEV;

2023-12-12 14:01:18

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers



On 29/11/2023 06:02, Ian Rogers wrote:
> Additional helpers to better replace
> perf_cpu_map__has_any_cpu_or_is_empty.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
> tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
> tools/lib/perf/libperf.map | 4 ++++
> 3 files changed, 47 insertions(+)
>
> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> index 49fc98e16514..7403819da8fd 100644
> --- a/tools/lib/perf/cpumap.c
> +++ b/tools/lib/perf/cpumap.c
> @@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
> }
>
> +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> +{
> + if (!map)
> + return true;
> +
> + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
> +}
> +
> +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
> +{
> + return map == NULL;
> +}
> +

Maybe it doesn't currently happen, but it seems a bit weird that the
'new' function can create a map of length 0 which would return empty ==
false here.

Could we either make this check also return true for maps with length 0,
or prevent the new function from returning a map of 0 length?

> int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
> {
> int low, high;
> @@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
> return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
> }
>
> +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
> +{
> + struct perf_cpu cpu, result = {
> + .cpu = -1
> + };
> + int idx;
> +
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
> + result = cpu;
> + break;
> + }
> + return result;
> +}
> +
> struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
> {
> struct perf_cpu result = {
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index dbe0a7352b64..523e4348fc96 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
> * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
> */
> LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
> + */
> +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__is_empty - does the map contain no values and it doesn't
> + * contain the special "any CPU"/dummy value.
> + */
> +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
> + */
> +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
> + */
> LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
> LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
> LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
> diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
> index 10b3f3722642..2aa79b696032 100644
> --- a/tools/lib/perf/libperf.map
> +++ b/tools/lib/perf/libperf.map
> @@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
> perf_cpu_map__nr;
> perf_cpu_map__cpu;
> perf_cpu_map__has_any_cpu_or_is_empty;
> + perf_cpu_map__is_any_cpu_or_is_empty;
> + perf_cpu_map__is_empty;
> + perf_cpu_map__has_any_cpu;
> + perf_cpu_map__min;
> perf_cpu_map__max;
> perf_cpu_map__has;
> perf_thread_map__new_array;

2023-12-12 14:17:48

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 07/14] perf arm-spe/cs-etm: Directly iterate CPU maps



On 29/11/2023 06:02, Ian Rogers wrote:
> Rather than iterate all CPUs and see if they are in CPU maps, directly
> iterate the CPU map. Similarly make use of the intersect
> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
> appropriate alternatives.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> 2 files changed, 34 insertions(+), 47 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index 77e6663c1703..a68a72f2f668 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
> static int cs_etm_validate_config(struct auxtrace_record *itr,
> struct evsel *evsel)
> {
> - int i, err = -EINVAL;
> + int idx, err = -EINVAL;
> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
> + struct perf_cpu cpu;
>
> - /* Set option of each CPU we have */
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - /*
> - * In per-cpu case, do the validation for CPUs to work with.
> - * In per-thread case, the CPU map is empty. Since the traced
> - * program can run on any CPUs in this case, thus don't skip
> - * validation.
> - */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
> - !perf_cpu_map__has(event_cpus, cpu))
> - continue;

This has broken validation for per-thread sessions.
perf_cpu_map__intersect() doesn't seem to be able to handle the case
where an 'any' map intersected with an online map should return the
online map. Or at least it should for this to work, and it seems to make
sense for it to work that way.

At least that was my initial impression, but I only debugged it and saw
that the loop is now skipped entirely.

> -
> - if (!perf_cpu_map__has(online_cpus, cpu))
> - continue;
> + perf_cpu_map__put(online_cpus);
>
> - err = cs_etm_validate_context_id(itr, evsel, i);
> + /*
> + * Set option of each CPU we have. In per-cpu case, do the validation
> + * for CPUs to work with. In per-thread case, the CPU map is empty.
> + * Since the traced program can run on any CPUs in this case, thus don't
> + * skip validation.
> + */
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
> if (err)
> goto out;
> - err = cs_etm_validate_timestamp(itr, evsel, i);
> + err = cs_etm_validate_timestamp(itr, evsel, idx);
> if (err)
> goto out;
> }
>
> err = 0;
> out:
> - perf_cpu_map__put(online_cpus);
> + perf_cpu_map__put(intersect_cpus);
> return err;
> }
>
> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> * Also the case of per-cpu mmaps, need the contextID in order to be notified
> * when a context switch happened.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> "timestamp", 1);
> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> evsel->core.attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> evsel__set_sample_bit(evsel, TIME);
>
> err = cs_etm_validate_config(itr, cs_etm_evsel);
> @@ -533,38 +527,32 @@ static size_t
> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> - int i;
> + int idx;
> int etmv3 = 0, etmv4 = 0, ete = 0;
> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> + struct perf_cpu cpu;
>
> /* cpu map is not empty, we have specific CPUs to work with */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - if (!perf_cpu_map__has(event_cpus, cpu) ||
> - !perf_cpu_map__has(online_cpus, cpu))
> - continue;
> + if (!perf_cpu_map__is_empty(event_cpus)) {
> + struct perf_cpu_map *intersect_cpus =
> + perf_cpu_map__intersect(event_cpus, online_cpus);
>
> - if (cs_etm_is_ete(itr, i))
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> + if (cs_etm_is_ete(itr, cpu.cpu))
> ete++;
> - else if (cs_etm_is_etmv4(itr, i))
> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> etmv4++;
> else
> etmv3++;
> }
> + perf_cpu_map__put(intersect_cpus);
> } else {
> /* get configuration for all CPUs in the system */
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - if (!perf_cpu_map__has(online_cpus, cpu))
> - continue;
> -
> - if (cs_etm_is_ete(itr, i))
> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
> + if (cs_etm_is_ete(itr, cpu.cpu))
> ete++;
> - else if (cs_etm_is_etmv4(itr, i))
> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> etmv4++;
> else
> etmv3++;
> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> return -EINVAL;
>
> /* If the cpu_map is empty all online CPUs are involved */
> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> + if (perf_cpu_map__is_empty(event_cpus)) {
> cpu_map = online_cpus;
> } else {
> /* Make sure all specified CPUs are online */
> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> + struct perf_cpu cpu;
>
> - if (perf_cpu_map__has(event_cpus, cpu) &&
> - !perf_cpu_map__has(online_cpus, cpu))
> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
> + if (!perf_cpu_map__has(online_cpus, cpu))
> return -EINVAL;
> }
>
> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> index 51ccbfd3d246..0b52e67edb3b 100644
> --- a/tools/perf/arch/arm64/util/arm-spe.c
> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> * In the case of per-cpu mmaps, sample CPU for AUX event;
> * also enable the timestamp tracing for samples correlation.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_sample_bit(arm_spe_evsel, CPU);
> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
> "ts_enable", 1);
> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> tracking_evsel->core.attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_sample_bit(tracking_evsel, TIME);
> evsel__set_sample_bit(tracking_evsel, CPU);
>

2023-12-12 14:36:32

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 07/14] perf arm-spe/cs-etm: Directly iterate CPU maps



On 12/12/2023 14:17, James Clark wrote:
>
>
> On 29/11/2023 06:02, Ian Rogers wrote:
>> Rather than iterate all CPUs and see if they are in CPU maps, directly
>> iterate the CPU map. Similarly make use of the intersect
>> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
>> appropriate alternatives.
>>
>> Signed-off-by: Ian Rogers <[email protected]>
>> ---
>> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
>> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
>> 2 files changed, 34 insertions(+), 47 deletions(-)
>>
>> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
>> index 77e6663c1703..a68a72f2f668 100644
>> --- a/tools/perf/arch/arm/util/cs-etm.c
>> +++ b/tools/perf/arch/arm/util/cs-etm.c
>> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
>> static int cs_etm_validate_config(struct auxtrace_record *itr,
>> struct evsel *evsel)
>> {
>> - int i, err = -EINVAL;
>> + int idx, err = -EINVAL;
>> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
>> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
>> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
>> + struct perf_cpu cpu;
>>
>> - /* Set option of each CPU we have */
>> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
>> - struct perf_cpu cpu = { .cpu = i, };
>> -
>> - /*
>> - * In per-cpu case, do the validation for CPUs to work with.
>> - * In per-thread case, the CPU map is empty. Since the traced
>> - * program can run on any CPUs in this case, thus don't skip
>> - * validation.
>> - */
>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
>> - !perf_cpu_map__has(event_cpus, cpu))
>> - continue;
>
> This has broken validation for per-thread sessions.
> perf_cpu_map__intersect() doesn't seem to be able to handle the case
> where an 'any' map intersected with an online map should return the
> online map. Or at least it should for this to work, and it seems to make
> sense for it to work that way.
>
> At least that was my initial impression, but I only debugged it and saw
> that the loop is now skipped entirely.
>
>> -
>> - if (!perf_cpu_map__has(online_cpus, cpu))
>> - continue;
>> + perf_cpu_map__put(online_cpus);
>>
>> - err = cs_etm_validate_context_id(itr, evsel, i);
>> + /*
>> + * Set option of each CPU we have. In per-cpu case, do the validation
>> + * for CPUs to work with. In per-thread case, the CPU map is empty.
>> + * Since the traced program can run on any CPUs in this case, thus don't
>> + * skip validation.
>> + */
>> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
>> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
>> if (err)
>> goto out;
>> - err = cs_etm_validate_timestamp(itr, evsel, i);
>> + err = cs_etm_validate_timestamp(itr, evsel, idx);
>> if (err)
>> goto out;
>> }
>>
>> err = 0;
>> out:
>> - perf_cpu_map__put(online_cpus);
>> + perf_cpu_map__put(intersect_cpus);
>> return err;
>> }
>>
>> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
>> * Also the case of per-cpu mmaps, need the contextID in order to be notified
>> * when a context switch happened.
>> */
>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
>> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
>> "timestamp", 1);
>> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
>> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
>> evsel->core.attr.sample_period = 1;
>>
>> /* In per-cpu case, always need the time of mmap events etc */
>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
>> evsel__set_sample_bit(evsel, TIME);
>>
>> err = cs_etm_validate_config(itr, cs_etm_evsel);
>> @@ -533,38 +527,32 @@ static size_t
>> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
>> struct evlist *evlist __maybe_unused)
>> {
>> - int i;
>> + int idx;
>> int etmv3 = 0, etmv4 = 0, ete = 0;
>> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
>> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
>> + struct perf_cpu cpu;
>>
>> /* cpu map is not empty, we have specific CPUs to work with */
>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
>> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
>> - struct perf_cpu cpu = { .cpu = i, };
>> -
>> - if (!perf_cpu_map__has(event_cpus, cpu) ||
>> - !perf_cpu_map__has(online_cpus, cpu))
>> - continue;
>> + if (!perf_cpu_map__is_empty(event_cpus)) {
>> + struct perf_cpu_map *intersect_cpus =
>> + perf_cpu_map__intersect(event_cpus, online_cpus);
>>
>> - if (cs_etm_is_ete(itr, i))
>> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
>> + if (cs_etm_is_ete(itr, cpu.cpu))

Similar problem here. For a per-thread session, the CPU map is not empty
(it's an 'any' map, presumably length 1), so it comes into this first
if, rather than the else below which is for the 'any' scenario.

Then the intersect with online CPUs results in an empty map, so no CPU
metadata is recorded, then the session fails.

If you made the intersect work in the way I mentioned above we could
also delete the else below, because that's just another way to convert
from 'any' to 'all online'.

>> ete++;
>> - else if (cs_etm_is_etmv4(itr, i))
>> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
>> etmv4++;
>> else
>> etmv3++;
>> }
>> + perf_cpu_map__put(intersect_cpus);
>> } else {
>> /* get configuration for all CPUs in the system */
>> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
>> - struct perf_cpu cpu = { .cpu = i, };
>> -
>> - if (!perf_cpu_map__has(online_cpus, cpu))
>> - continue;
>> -
>> - if (cs_etm_is_ete(itr, i))
>> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
>> + if (cs_etm_is_ete(itr, cpu.cpu))
>> ete++;
>> - else if (cs_etm_is_etmv4(itr, i))
>> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
>> etmv4++;
>> else
>> etmv3++;
>> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
>> return -EINVAL;
>>
>> /* If the cpu_map is empty all online CPUs are involved */
>> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
>> + if (perf_cpu_map__is_empty(event_cpus)) {
>> cpu_map = online_cpus;
>> } else {
>> /* Make sure all specified CPUs are online */
>> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
>> - struct perf_cpu cpu = { .cpu = i, };
>> + struct perf_cpu cpu;
>>
>> - if (perf_cpu_map__has(event_cpus, cpu) &&
>> - !perf_cpu_map__has(online_cpus, cpu))
>> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
>> + if (!perf_cpu_map__has(online_cpus, cpu))
>> return -EINVAL;
>> }
>>
>> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
>> index 51ccbfd3d246..0b52e67edb3b 100644
>> --- a/tools/perf/arch/arm64/util/arm-spe.c
>> +++ b/tools/perf/arch/arm64/util/arm-spe.c
>> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
>> * In the case of per-cpu mmaps, sample CPU for AUX event;
>> * also enable the timestamp tracing for samples correlation.
>> */
>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
>> evsel__set_sample_bit(arm_spe_evsel, CPU);
>> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
>> "ts_enable", 1);
>> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
>> tracking_evsel->core.attr.sample_period = 1;
>>
>> /* In per-cpu case, always need the time of mmap events etc */
>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
>> evsel__set_sample_bit(tracking_evsel, TIME);
>> evsel__set_sample_bit(tracking_evsel, CPU);
>>

2023-12-12 14:51:49

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers



On 12/12/2023 14:00, James Clark wrote:
>
>
> On 29/11/2023 06:02, Ian Rogers wrote:
>> Additional helpers to better replace
>> perf_cpu_map__has_any_cpu_or_is_empty.
>>
>> Signed-off-by: Ian Rogers <[email protected]>
>> ---
>> tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
>> tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
>> tools/lib/perf/libperf.map | 4 ++++
>> 3 files changed, 47 insertions(+)
>>
>> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
>> index 49fc98e16514..7403819da8fd 100644
>> --- a/tools/lib/perf/cpumap.c
>> +++ b/tools/lib/perf/cpumap.c
>> @@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
>> return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
>> }
>>
>> +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
>> +{
>> + if (!map)
>> + return true;
>> +
>> + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
>> +}
>> +
>> +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
>> +{
>> + return map == NULL;
>> +}
>> +
>
> Maybe it doesn't currently happen, but it seems a bit weird that the
> 'new' function can create a map of length 0 which would return empty ==
> false here.
>
> Could we either make this check also return true for maps with length 0,
> or prevent the new function from returning a map of 0 length?
>

By 'new' I actually meant perf_cpu_map__alloc(). I see
perf_cpu_map__new() actually takes a string, and returns all online CPUs
if the string is null, but similarly I don't see a use case where that
string is NULL.

Having something return either the parsed string, or all online CPUs if
the string was null seems like unnecessary hidden behaviour so it would
be good to remove that one too.

>> int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
>> {
>> int low, high;
>> @@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
>> return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
>> }
>>
>> +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
>> +{
>> + struct perf_cpu cpu, result = {
>> + .cpu = -1
>> + };
>> + int idx;
>> +
>> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
>> + result = cpu;
>> + break;
>> + }
>> + return result;
>> +}
>> +
>> struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
>> {
>> struct perf_cpu result = {
>> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
>> index dbe0a7352b64..523e4348fc96 100644
>> --- a/tools/lib/perf/include/perf/cpumap.h
>> +++ b/tools/lib/perf/include/perf/cpumap.h
>> @@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
>> * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
>> */
>> LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
>> +/**
>> + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
>> + */
>> +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
>> +/**
>> + * perf_cpu_map__is_empty - does the map contain no values and it doesn't
>> + * contain the special "any CPU"/dummy value.
>> + */
>> +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
>> +/**
>> + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
>> + */
>> +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
>> +/**
>> + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
>> + */
>> LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
>> LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
>> LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
>> diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
>> index 10b3f3722642..2aa79b696032 100644
>> --- a/tools/lib/perf/libperf.map
>> +++ b/tools/lib/perf/libperf.map
>> @@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
>> perf_cpu_map__nr;
>> perf_cpu_map__cpu;
>> perf_cpu_map__has_any_cpu_or_is_empty;
>> + perf_cpu_map__is_any_cpu_or_is_empty;
>> + perf_cpu_map__is_empty;
>> + perf_cpu_map__has_any_cpu;
>> + perf_cpu_map__min;
>> perf_cpu_map__max;
>> perf_cpu_map__has;
>> perf_thread_map__new_array;
>

2023-12-12 15:07:14

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers



On 29/11/2023 06:02, Ian Rogers wrote:
> Additional helpers to better replace
> perf_cpu_map__has_any_cpu_or_is_empty.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
> tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
> tools/lib/perf/libperf.map | 4 ++++
> 3 files changed, 47 insertions(+)
>
> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> index 49fc98e16514..7403819da8fd 100644
> --- a/tools/lib/perf/cpumap.c
> +++ b/tools/lib/perf/cpumap.c
> @@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
> }
>
> +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> +{
> + if (!map)
> + return true;
> +
> + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
> +}

I'm struggling to understand the relevance of the difference between
has_any and is_any I see that there is a slight difference, but could it
not be refactored out so we only need one?

Do you ever get an 'any' map that has more than 1 entry? It's quite a
subtle difference that is_any returns false if the first one is 'any'
but then there are subsequent entries. Whereas has_any would return
true. I'm not sure if future readers would be able to appreciate that.

I see has_any is only used twice, both on evlist->all_cpus. Is there
something about that member that means it could have a map that has an
'any' mixed with CPUs? Wouldn't that have the same result as a normal
'any' anyway?

> +
> +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
> +{
> + return map == NULL;
> +}
> +
> int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
> {
> int low, high;
> @@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
> return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
> }
>
> +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
> +{
> + struct perf_cpu cpu, result = {
> + .cpu = -1
> + };
> + int idx;
> +
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
> + result = cpu;
> + break;
> + }
> + return result;
> +}
> +
> struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
> {
> struct perf_cpu result = {
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index dbe0a7352b64..523e4348fc96 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
> * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
> */
> LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
> + */
> +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__is_empty - does the map contain no values and it doesn't
> + * contain the special "any CPU"/dummy value.
> + */
> +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
> + */
> +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
> + */
> LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
> LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
> LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
> diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
> index 10b3f3722642..2aa79b696032 100644
> --- a/tools/lib/perf/libperf.map
> +++ b/tools/lib/perf/libperf.map
> @@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
> perf_cpu_map__nr;
> perf_cpu_map__cpu;
> perf_cpu_map__has_any_cpu_or_is_empty;
> + perf_cpu_map__is_any_cpu_or_is_empty;
> + perf_cpu_map__is_empty;
> + perf_cpu_map__has_any_cpu;
> + perf_cpu_map__min;
> perf_cpu_map__max;
> perf_cpu_map__has;
> perf_thread_map__new_array;

2023-12-12 15:11:15

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 09/14] perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty



On 29/11/2023 06:02, Ian Rogers wrote:
> Most uses of what was perf_cpu_map__empty but is now
> perf_cpu_map__has_any_cpu_or_is_empty want to do something with the
> CPU map if it contains CPUs. Replace uses of
> perf_cpu_map__has_any_cpu_or_is_empty with other helpers so that CPUs
> within the map can be handled.
>
> Signed-off-by: Ian Rogers <[email protected]>

Reviewed-by: James Clark <[email protected]>

> ---
> tools/perf/builtin-c2c.c | 6 +-----
> tools/perf/builtin-stat.c | 9 ++++-----
> tools/perf/util/auxtrace.c | 4 ++--
> tools/perf/util/record.c | 2 +-
> tools/perf/util/stat.c | 2 +-
> 5 files changed, 9 insertions(+), 14 deletions(-)
>
> diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
> index f78eea9e2153..ef7ed53a4b4e 100644
> --- a/tools/perf/builtin-c2c.c
> +++ b/tools/perf/builtin-c2c.c
> @@ -2319,11 +2319,7 @@ static int setup_nodes(struct perf_session *session)
>
> nodes[node] = set;
>
> - /* empty node, skip */
> - if (perf_cpu_map__has_any_cpu_or_is_empty(map))
> - continue;
> -
> - perf_cpu_map__for_each_cpu(cpu, idx, map) {
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
> __set_bit(cpu.cpu, set);
>
> if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 3303aa20f326..f583027a0639 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -1316,10 +1316,9 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
> * be the first online CPU in the cache domain else use the
> * first online CPU of the cache domain as the ID.
> */
> - if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map))
> + id = perf_cpu_map__min(cpu_map).cpu;
> + if (id == -1)
> id = cpu.cpu;
> - else
> - id = perf_cpu_map__cpu(cpu_map, 0).cpu;
>
> /* Free the perf_cpu_map used to find the cache ID */
> perf_cpu_map__put(cpu_map);
> @@ -1622,7 +1621,7 @@ static int perf_stat_init_aggr_mode(void)
> * taking the highest cpu number to be the size of
> * the aggregation translate cpumap.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
> nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
> else
> nr = 0;
> @@ -2289,7 +2288,7 @@ int process_stat_config_event(struct perf_session *session,
>
> perf_event__read_stat_config(&stat_config, &event->stat_config);
>
> - if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) {
> + if (perf_cpu_map__is_empty(st->cpus)) {
> if (st->aggr_mode != AGGR_UNSET)
> pr_warning("warning: processing task data, aggregation mode not set\n");
> } else if (st->aggr_mode != AGGR_UNSET) {
> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> index 3684e6009b63..6b1d4bafad59 100644
> --- a/tools/perf/util/auxtrace.c
> +++ b/tools/perf/util/auxtrace.c
> @@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
> struct evlist *evlist,
> struct evsel *evsel, int idx)
> {
> - bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
> + bool per_cpu = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);
>
> mp->mmap_needed = evsel->needs_auxtrace_mmap;
>
> @@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
>
> static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
> {
> - bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
> + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);
>
> if (per_cpu_mmaps) {
> struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
> diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
> index 87e817b3cf7e..e867de8ddaaa 100644
> --- a/tools/perf/util/record.c
> +++ b/tools/perf/util/record.c
> @@ -237,7 +237,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
>
> evsel = evlist__last(temp_evlist);
>
> - if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
> + if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
> struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
>
> if (cpus)
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index 012c4946b9c4..915808a6211a 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
> if (!counter->per_pkg)
> return 0;
>
> - if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> + if (perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> return 0;
>
> if (!mask) {

2023-12-12 15:12:10

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 10/14] perf top: Avoid repeated function calls



On 29/11/2023 06:02, Ian Rogers wrote:
> Add a local variable to avoid repeated calls to perf_cpu_map__nr.
>
> Signed-off-by: Ian Rogers <[email protected]>

Reviewed-by: James Clark <[email protected]>

> ---
> tools/perf/util/top.c | 9 ++++-----
> 1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
> index be7157de0451..4db3d1bd686c 100644
> --- a/tools/perf/util/top.c
> +++ b/tools/perf/util/top.c
> @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
> struct record_opts *opts = &top->record_opts;
> struct target *target = &opts->target;
> size_t ret = 0;
> + int nr_cpus;
>
> if (top->samples) {
> samples_per_sec = top->samples / top->delay_secs;
> @@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
> else
> ret += SNPRINTF(bf + ret, size - ret, " (all");
>
> + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus);
> if (target->cpu_list)
> ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
> - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
> - ? "s" : "",
> + nr_cpus > 1 ? "s" : "",
> target->cpu_list);
> else {
> if (target->tid)
> ret += SNPRINTF(bf + ret, size - ret, ")");
> else
> ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
> - perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
> - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
> - ? "s" : "");
> + nr_cpus, nr_cpus > 1 ? "s" : "");
> }
>
> perf_top__reset_sample_counters(top);

2023-12-12 15:13:32

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 11/14] perf arm64 header: Remove unnecessary CPU map get and put



On 29/11/2023 06:02, Ian Rogers wrote:
> In both cases the CPU map is known owned by either the caller or a
> PMU.
>
> Signed-off-by: Ian Rogers <[email protected]>

Reviewed-by: James Clark <[email protected]>

> ---
> tools/perf/arch/arm64/util/header.c | 3 ---
> 1 file changed, 3 deletions(-)
>
> diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
> index 97037499152e..a9de0b5187dd 100644
> --- a/tools/perf/arch/arm64/util/header.c
> +++ b/tools/perf/arch/arm64/util/header.c
> @@ -25,8 +25,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
> if (!sysfs || sz < MIDR_SIZE)
> return EINVAL;
>
> - cpus = perf_cpu_map__get(cpus);
> -
> for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
> char path[PATH_MAX];
> FILE *file;
> @@ -51,7 +49,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
> break;
> }
>
> - perf_cpu_map__put(cpus);
> return ret;
> }
>

2023-12-12 15:21:10

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 14/14] libperf cpumap: Document perf_cpu_map__nr's behavior



On 29/11/2023 06:02, Ian Rogers wrote:
> perf_cpu_map__nr's behavior around an empty CPU map is strange as it
> returns that there is 1 CPU. Changing code that may rely on this
> behavior is hard, we can at least document the behavior.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/lib/perf/include/perf/cpumap.h | 11 +++++++++++
> 1 file changed, 11 insertions(+)
>
> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> index 523e4348fc96..90457d17fb2f 100644
> --- a/tools/lib/perf/include/perf/cpumap.h
> +++ b/tools/lib/perf/include/perf/cpumap.h
> @@ -44,7 +44,18 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
> LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
> struct perf_cpu_map *other);
> LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
> +/**
> + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
> + * is invalid.
> + */
> LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
> +/**
> + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
> + * cpu of -1 for an invalid index, this makes an empty map
> + * look like it contains the "any CPU"/dummy value. Otherwise
> + * the result is the number CPUs in the map plus one if the
> + * "any CPU"/dummy value is present.

Hmmm... I'm not sure whether to laugh or cry at that API.

Reviewed-by: James Clark <[email protected]>

> + */
> LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
> /**
> * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.

2023-12-12 17:59:49

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v1 00/14] Clean up libperf cpumap's empty function

Em Tue, Nov 28, 2023 at 10:01:57PM -0800, Ian Rogers escreveu:
> Rename and clean up the use of libperf CPU map functions particularly
> focussing on perf_cpu_map__empty that may return true for maps
> containing CPUs but also with an "any CPU"/dummy value.
>
> perf_cpu_map__nr is also troubling in that iterating an empty CPU map
> will yield the "any CPU"/dummy value. Reduce the appearance of some
> calls to this by using the perf_cpu_map__for_each_cpu macro.
>
> Ian Rogers (14):
> libperf cpumap: Rename perf_cpu_map__dummy_new
> libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
> libperf cpumap: Rename perf_cpu_map__empty
> libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
> libperf cpumap: Add for_each_cpu that skips the "any CPU" case

Applied 1-6, with James Reviewed-by tags, would be good to have Adrian
check the PT and BTS parts, testing the end result if he things its all
ok.

- Arnaldo

2023-12-12 20:03:13

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers

On Tue, Dec 12, 2023 at 6:01 AM James Clark <[email protected]> wrote:
>
>
>
> On 29/11/2023 06:02, Ian Rogers wrote:
> > Additional helpers to better replace
> > perf_cpu_map__has_any_cpu_or_is_empty.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
> > tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
> > tools/lib/perf/libperf.map | 4 ++++
> > 3 files changed, 47 insertions(+)
> >
> > diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> > index 49fc98e16514..7403819da8fd 100644
> > --- a/tools/lib/perf/cpumap.c
> > +++ b/tools/lib/perf/cpumap.c
> > @@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> > return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
> > }
> >
> > +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> > +{
> > + if (!map)
> > + return true;
> > +
> > + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
> > +}
> > +
> > +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
> > +{
> > + return map == NULL;
> > +}
> > +
>
> Maybe it doesn't currently happen, but it seems a bit weird that the
> 'new' function can create a map of length 0 which would return empty ==
> false here.
>
> Could we either make this check also return true for maps with length 0,
> or prevent the new function from returning a map of 0 length?

We can add an assert or return NULL for size 0 in alloc. I think I
prefer the return NULL option. It should never happen but it feels the
right defensive thing to do.

Thanks,
Ian

> > int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
> > {
> > int low, high;
> > @@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
> > return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
> > }
> >
> > +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
> > +{
> > + struct perf_cpu cpu, result = {
> > + .cpu = -1
> > + };
> > + int idx;
> > +
> > + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
> > + result = cpu;
> > + break;
> > + }
> > + return result;
> > +}
> > +
> > struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
> > {
> > struct perf_cpu result = {
> > diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> > index dbe0a7352b64..523e4348fc96 100644
> > --- a/tools/lib/perf/include/perf/cpumap.h
> > +++ b/tools/lib/perf/include/perf/cpumap.h
> > @@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
> > * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
> > */
> > LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
> > + */
> > +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__is_empty - does the map contain no values and it doesn't
> > + * contain the special "any CPU"/dummy value.
> > + */
> > +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
> > + */
> > +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
> > + */
> > LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
> > LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
> > LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
> > diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
> > index 10b3f3722642..2aa79b696032 100644
> > --- a/tools/lib/perf/libperf.map
> > +++ b/tools/lib/perf/libperf.map
> > @@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
> > perf_cpu_map__nr;
> > perf_cpu_map__cpu;
> > perf_cpu_map__has_any_cpu_or_is_empty;
> > + perf_cpu_map__is_any_cpu_or_is_empty;
> > + perf_cpu_map__is_empty;
> > + perf_cpu_map__has_any_cpu;
> > + perf_cpu_map__min;
> > perf_cpu_map__max;
> > perf_cpu_map__has;
> > perf_thread_map__new_array;

2023-12-12 20:27:41

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers

On Tue, Dec 12, 2023 at 7:06 AM James Clark <[email protected]> wrote:
>
>
>
> On 29/11/2023 06:02, Ian Rogers wrote:
> > Additional helpers to better replace
> > perf_cpu_map__has_any_cpu_or_is_empty.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
> > tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
> > tools/lib/perf/libperf.map | 4 ++++
> > 3 files changed, 47 insertions(+)
> >
> > diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
> > index 49fc98e16514..7403819da8fd 100644
> > --- a/tools/lib/perf/cpumap.c
> > +++ b/tools/lib/perf/cpumap.c
> > @@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> > return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
> > }
> >
> > +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
> > +{
> > + if (!map)
> > + return true;
> > +
> > + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
> > +}
>
> I'm struggling to understand the relevance of the difference between
> has_any and is_any I see that there is a slight difference, but could it
> not be refactored out so we only need one?

Yep, that's what these changes are working toward. For has any the set
{-1, 0, 1} would return true while is any will return false.
Previously the has any behavior was called "empty" which I think is
actively misleading.

> Do you ever get an 'any' map that has more than 1 entry? It's quite a
> subtle difference that is_any returns false if the first one is 'any'
> but then there are subsequent entries. Whereas has_any would return
> true. I'm not sure if future readers would be able to appreciate that.
>
> I see has_any is only used twice, both on evlist->all_cpus. Is there
> something about that member that means it could have a map that has an
> 'any' mixed with CPUs? Wouldn't that have the same result as a normal
> 'any' anyway?

The dummy event may be opened on any CPU but then a particular event
may be opened on certain CPUs. We merge CPU maps in places like evlist
so that we can iterate the appropriate CPUs for events and
open/enable/disable/close all events on a certain CPU at the same time
(we also set the affinity to that CPU to avoid IPIs). What I'm hoping
to do in these changes is reduce the ambiguity, the corner cases are
by their nature unusual.

An example of a corner case is, uncore events often get opened just on
CPU 0 but on a multi-socket system you may have a CPU 32 that also
needs to open the event. Previous code treated the CPU map index and
value it contained pretty interchangeably. This is often fine for the
core PMU but is clearly wrong in this uncore case, {0, 32} has indexes
0 and 1 but those indexes don't match the CPU numbers. The case of -1
has often previously been called dummy but I'm trying to call it the
"any CPU" case to match the perf_event_open man page (I'm hoping it
also makes it less ambiguous with any CPU being used with a particular
event like cycles, calling it dummy makes the event sound like it may
have sideband data). The difference between "all CPUs" and "any CPU"
is that an evsel for all CPUs would need the event opening
individually on each CPU, while any CPU events are a single open call.
Any CPU is only valid to perf_event_open if a PID is specified.
Depending on the set up there could be overlaps in what they count but
hopefully it is clearer what the distinction is. I believe the case of
"any CPU" and specific CPU numbers is more common with aux buffers and
Adrian has mentioned needing it for intel-pt.

Thanks,
Ian

> > +
> > +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
> > +{
> > + return map == NULL;
> > +}
> > +
> > int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
> > {
> > int low, high;
> > @@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
> > return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
> > }
> >
> > +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
> > +{
> > + struct perf_cpu cpu, result = {
> > + .cpu = -1
> > + };
> > + int idx;
> > +
> > + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
> > + result = cpu;
> > + break;
> > + }
> > + return result;
> > +}
> > +
> > struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
> > {
> > struct perf_cpu result = {
> > diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
> > index dbe0a7352b64..523e4348fc96 100644
> > --- a/tools/lib/perf/include/perf/cpumap.h
> > +++ b/tools/lib/perf/include/perf/cpumap.h
> > @@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
> > * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
> > */
> > LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
> > + */
> > +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__is_empty - does the map contain no values and it doesn't
> > + * contain the special "any CPU"/dummy value.
> > + */
> > +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
> > + */
> > +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
> > + */
> > LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
> > LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
> > LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
> > diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
> > index 10b3f3722642..2aa79b696032 100644
> > --- a/tools/lib/perf/libperf.map
> > +++ b/tools/lib/perf/libperf.map
> > @@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
> > perf_cpu_map__nr;
> > perf_cpu_map__cpu;
> > perf_cpu_map__has_any_cpu_or_is_empty;
> > + perf_cpu_map__is_any_cpu_or_is_empty;
> > + perf_cpu_map__is_empty;
> > + perf_cpu_map__has_any_cpu;
> > + perf_cpu_map__min;
> > perf_cpu_map__max;
> > perf_cpu_map__has;
> > perf_thread_map__new_array;

2023-12-13 12:48:54

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH v1 00/14] Clean up libperf cpumap's empty function

On 12/12/23 19:59, Arnaldo Carvalho de Melo wrote:
> Em Tue, Nov 28, 2023 at 10:01:57PM -0800, Ian Rogers escreveu:
>> Rename and clean up the use of libperf CPU map functions particularly
>> focussing on perf_cpu_map__empty that may return true for maps
>> containing CPUs but also with an "any CPU"/dummy value.
>>
>> perf_cpu_map__nr is also troubling in that iterating an empty CPU map
>> will yield the "any CPU"/dummy value. Reduce the appearance of some
>> calls to this by using the perf_cpu_map__for_each_cpu macro.
>>
>> Ian Rogers (14):
>> libperf cpumap: Rename perf_cpu_map__dummy_new
>> libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
>> libperf cpumap: Rename perf_cpu_map__empty
>> libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
>> libperf cpumap: Add for_each_cpu that skips the "any CPU" case
>
> Applied 1-6, with James Reviewed-by tags, would be good to have Adrian
> check the PT and BTS parts, testing the end result if he things its all
> ok.
>

Changing the same lines of code twice in the same patch set is not
really kernel style.

Some of the churn could be reduced by applying and rebasing on the
patch below.

Ideally the patches should be reordered so that the lines only
change once i.e.

perf_cpu_map__empty -> <replacement>

instead of

perf_cpu_map__empty -> <rename> -> <replacement>

If that is too much trouble, please accept my ack instead:

Acked-by: Adrian Hunter <[email protected]>


From: Adrian Hunter <[email protected]>

Factor out perf_cpu_map__empty() use to reduce the occurrences and make
the code more readable.

Signed-off-by: Adrian Hunter <[email protected]>
---
tools/perf/arch/x86/util/intel-bts.c | 11 ++++++++---
tools/perf/arch/x86/util/intel-pt.c | 21 ++++++++++++---------
2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index d2c8cac11470..cebe994eb9db 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -59,6 +59,11 @@ intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
return INTEL_BTS_AUXTRACE_PRIV_SIZE;
}

+static bool intel_bts_per_cpu(struct evlist *evlist)
+{
+ return !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+}
+
static int intel_bts_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct perf_record_auxtrace_info *auxtrace_info,
@@ -109,8 +114,8 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
struct intel_bts_recording *btsr =
container_of(itr, struct intel_bts_recording, itr);
struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ bool per_cpu_mmaps = intel_bts_per_cpu(evlist);
struct evsel *evsel, *intel_bts_evsel = NULL;
- const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);

if (opts->auxtrace_sample_mode) {
@@ -143,7 +148,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;

- if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) {
+ if (opts->full_auxtrace && per_cpu_mmaps) {
pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
return -EINVAL;
}
@@ -224,7 +229,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__empty(cpus))
+ if (per_cpu_mmaps)
evsel__set_sample_bit(intel_bts_evsel, CPU);
}

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index fa0c718b9e72..0ff9147c75da 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -312,6 +312,11 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
*d = eax;
}

+static bool intel_pt_per_cpu(struct evlist *evlist)
+{
+ return !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+}
+
static int intel_pt_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct perf_record_auxtrace_info *auxtrace_info,
@@ -322,7 +327,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
struct perf_event_mmap_page *pc;
struct perf_tsc_conversion tc = { .time_mult = 0, };
- bool cap_user_time_zero = false, per_cpu_mmaps;
+ bool per_cpu_mmaps = intel_pt_per_cpu(session->evlist);
+ bool cap_user_time_zero = false;
u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
unsigned long max_non_turbo_ratio;
@@ -369,8 +375,6 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}

- per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus);
-
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
@@ -604,8 +608,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
bool have_timing_info, need_immediate = false;
struct evsel *evsel, *intel_pt_evsel = NULL;
- const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
bool privileged = perf_event_paranoid_check(-1);
+ bool per_cpu_mmaps = intel_pt_per_cpu(evlist);
u64 tsc_bit;
int err;

@@ -774,8 +778,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__empty(cpus) &&
- !record_opts__no_switch_events(opts)) {
+ if (have_timing_info && per_cpu_mmaps && !record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
@@ -832,7 +835,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__empty(cpus))
+ if (per_cpu_mmaps)
evsel__set_sample_bit(intel_pt_evsel, CPU);
}

@@ -858,7 +861,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
tracking_evsel->immediate = true;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__empty(cpus)) {
+ if (per_cpu_mmaps) {
evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
evsel__set_sample_bit(tracking_evsel, CPU);
@@ -870,7 +873,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
- if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
+ if (!ptr->have_sched_switch && per_cpu_mmaps &&
!target__none(&opts->target) &&
!intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
--
2.34.1



2023-12-13 13:48:48

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 06/14] libperf cpumap: Add any, empty and min helpers



On 12/12/2023 20:27, Ian Rogers wrote:
> On Tue, Dec 12, 2023 at 7:06 AM James Clark <[email protected]> wrote:
>>
>>
>>
>> On 29/11/2023 06:02, Ian Rogers wrote:
>>> Additional helpers to better replace
>>> perf_cpu_map__has_any_cpu_or_is_empty.
>>>
>>> Signed-off-by: Ian Rogers <[email protected]>
>>> ---
>>> tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
>>> tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
>>> tools/lib/perf/libperf.map | 4 ++++
>>> 3 files changed, 47 insertions(+)
>>>
>>> diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
>>> index 49fc98e16514..7403819da8fd 100644
>>> --- a/tools/lib/perf/cpumap.c
>>> +++ b/tools/lib/perf/cpumap.c
>>> @@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
>>> return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
>>> }
>>>
>>> +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
>>> +{
>>> + if (!map)
>>> + return true;
>>> +
>>> + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
>>> +}
>>
>> I'm struggling to understand the relevance of the difference between
>> has_any and is_any I see that there is a slight difference, but could it
>> not be refactored out so we only need one?
>
> Yep, that's what these changes are working toward. For has any the set
> {-1, 0, 1} would return true while is any will return false.
> Previously the has any behavior was called "empty" which I think is
> actively misleading.
>
>> Do you ever get an 'any' map that has more than 1 entry? It's quite a
>> subtle difference that is_any returns false if the first one is 'any'
>> but then there are subsequent entries. Whereas has_any would return
>> true. I'm not sure if future readers would be able to appreciate that.
>>
>> I see has_any is only used twice, both on evlist->all_cpus. Is there
>> something about that member that means it could have a map that has an
>> 'any' mixed with CPUs? Wouldn't that have the same result as a normal
>> 'any' anyway?
>
> The dummy event may be opened on any CPU but then a particular event
> may be opened on certain CPUs. We merge CPU maps in places like evlist
> so that we can iterate the appropriate CPUs for events and
> open/enable/disable/close all events on a certain CPU at the same time
> (we also set the affinity to that CPU to avoid IPIs). What I'm hoping
> to do in these changes is reduce the ambiguity, the corner cases are
> by their nature unusual.
>
> An example of a corner case is, uncore events often get opened just on
> CPU 0 but on a multi-socket system you may have a CPU 32 that also
> needs to open the event. Previous code treated the CPU map index and
> value it contained pretty interchangeably. This is often fine for the
> core PMU but is clearly wrong in this uncore case, {0, 32} has indexes
> 0 and 1 but those indexes don't match the CPU numbers. The case of -1
> has often previously been called dummy but I'm trying to call it the
> "any CPU" case to match the perf_event_open man page (I'm hoping it
> also makes it less ambiguous with any CPU being used with a particular
> event like cycles, calling it dummy makes the event sound like it may
> have sideband data). The difference between "all CPUs" and "any CPU"
> is that an evsel for all CPUs would need the event opening
> individually on each CPU, while any CPU events are a single open call.
> Any CPU is only valid to perf_event_open if a PID is specified.
> Depending on the set up there could be overlaps in what they count but
> hopefully it is clearer what the distinction is. I believe the case of
> "any CPU" and specific CPU numbers is more common with aux buffers and
> Adrian has mentioned needing it for intel-pt.
>
> Thanks,
> Ian
>

Thanks for explaining. I suppose I didn't realise that 'any' could be
merged with per-cpu maps, but it makes sense.

>>> +
>>> +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
>>> +{
>>> + return map == NULL;
>>> +}
>>> +
>>> int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
>>> {
>>> int low, high;
>>> @@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
>>> return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
>>> }
>>>
>>> +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
>>> +{
>>> + struct perf_cpu cpu, result = {
>>> + .cpu = -1
>>> + };
>>> + int idx;
>>> +
>>> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
>>> + result = cpu;
>>> + break;
>>> + }
>>> + return result;
>>> +}
>>> +
>>> struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
>>> {
>>> struct perf_cpu result = {
>>> diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
>>> index dbe0a7352b64..523e4348fc96 100644
>>> --- a/tools/lib/perf/include/perf/cpumap.h
>>> +++ b/tools/lib/perf/include/perf/cpumap.h
>>> @@ -50,6 +50,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
>>> * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
>>> */
>>> LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
>>> +/**
>>> + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
>>> + */
>>> +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
>>> +/**
>>> + * perf_cpu_map__is_empty - does the map contain no values and it doesn't
>>> + * contain the special "any CPU"/dummy value.
>>> + */
>>> +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
>>> +/**
>>> + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
>>> + */
>>> +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
>>> +/**
>>> + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
>>> + */
>>> LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
>>> LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
>>> LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
>>> diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
>>> index 10b3f3722642..2aa79b696032 100644
>>> --- a/tools/lib/perf/libperf.map
>>> +++ b/tools/lib/perf/libperf.map
>>> @@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
>>> perf_cpu_map__nr;
>>> perf_cpu_map__cpu;
>>> perf_cpu_map__has_any_cpu_or_is_empty;
>>> + perf_cpu_map__is_any_cpu_or_is_empty;
>>> + perf_cpu_map__is_empty;
>>> + perf_cpu_map__has_any_cpu;
>>> + perf_cpu_map__min;
>>> perf_cpu_map__max;
>>> perf_cpu_map__has;
>>> perf_thread_map__new_array;

2023-12-13 23:29:45

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH v1 00/14] Clean up libperf cpumap's empty function

On Tue, Nov 28, 2023 at 10:02 PM Ian Rogers <[email protected]> wrote:
>
> Rename and clean up the use of libperf CPU map functions particularly
> focussing on perf_cpu_map__empty that may return true for maps
> containing CPUs but also with an "any CPU"/dummy value.
>
> perf_cpu_map__nr is also troubling in that iterating an empty CPU map
> will yield the "any CPU"/dummy value. Reduce the appearance of some
> calls to this by using the perf_cpu_map__for_each_cpu macro.
>
> Ian Rogers (14):
> libperf cpumap: Rename perf_cpu_map__dummy_new
> libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
> libperf cpumap: Rename perf_cpu_map__empty
> libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
> libperf cpumap: Add for_each_cpu that skips the "any CPU" case
> libperf cpumap: Add any, empty and min helpers
> perf arm-spe/cs-etm: Directly iterate CPU maps
> perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty
> use
> perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty
> perf top: Avoid repeated function calls
> perf arm64 header: Remove unnecessary CPU map get and put
> perf stat: Remove duplicate cpus_map_matched function
> perf cpumap: Use perf_cpu_map__for_each_cpu when possible
> libperf cpumap: Document perf_cpu_map__nr's behavior

Acked-by: Namhyung Kim <[email protected]>

Thanks,
Namhyung

2023-12-14 13:49:38

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v1 00/14] Clean up libperf cpumap's empty function

Em Wed, Dec 13, 2023 at 02:48:15PM +0200, Adrian Hunter escreveu:
> On 12/12/23 19:59, Arnaldo Carvalho de Melo wrote:
> > Em Tue, Nov 28, 2023 at 10:01:57PM -0800, Ian Rogers escreveu:
> >> Rename and clean up the use of libperf CPU map functions particularly
> >> focussing on perf_cpu_map__empty that may return true for maps
> >> containing CPUs but also with an "any CPU"/dummy value.
> >>
> >> perf_cpu_map__nr is also troubling in that iterating an empty CPU map
> >> will yield the "any CPU"/dummy value. Reduce the appearance of some
> >> calls to this by using the perf_cpu_map__for_each_cpu macro.
> >>
> >> Ian Rogers (14):
> >> libperf cpumap: Rename perf_cpu_map__dummy_new
> >> libperf cpumap: Rename and prefer sysfs for perf_cpu_map__default_new
> >> libperf cpumap: Rename perf_cpu_map__empty
> >> libperf cpumap: Replace usage of perf_cpu_map__new(NULL)
> >> libperf cpumap: Add for_each_cpu that skips the "any CPU" case
> >
> > Applied 1-6, with James Reviewed-by tags, would be good to have Adrian
> > check the PT and BTS parts, testing the end result if he things its all
> > ok.

Ian,

1-6 is in perf-tools-next now, can you please consider Adrian's
suggestion to reduce patch size and rebase the remaining patches?

- Arnaldo

> Changing the same lines of code twice in the same patch set is not
> really kernel style.
>
> Some of the churn could be reduced by applying and rebasing on the
> patch below.
>
> Ideally the patches should be reordered so that the lines only
> change once i.e.
>
> perf_cpu_map__empty -> <replacement>
>
> instead of
>
> perf_cpu_map__empty -> <rename> -> <replacement>
>
> If that is too much trouble, please accept my ack instead:
>
> Acked-by: Adrian Hunter <[email protected]>



>
> From: Adrian Hunter <[email protected]>
>
> Factor out perf_cpu_map__empty() use to reduce the occurrences and make
> the code more readable.
>
> Signed-off-by: Adrian Hunter <[email protected]>
> ---
> tools/perf/arch/x86/util/intel-bts.c | 11 ++++++++---
> tools/perf/arch/x86/util/intel-pt.c | 21 ++++++++++++---------
> 2 files changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
> index d2c8cac11470..cebe994eb9db 100644
> --- a/tools/perf/arch/x86/util/intel-bts.c
> +++ b/tools/perf/arch/x86/util/intel-bts.c
> @@ -59,6 +59,11 @@ intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> return INTEL_BTS_AUXTRACE_PRIV_SIZE;
> }
>
> +static bool intel_bts_per_cpu(struct evlist *evlist)
> +{
> + return !perf_cpu_map__empty(evlist->core.user_requested_cpus);
> +}
> +
> static int intel_bts_info_fill(struct auxtrace_record *itr,
> struct perf_session *session,
> struct perf_record_auxtrace_info *auxtrace_info,
> @@ -109,8 +114,8 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
> struct intel_bts_recording *btsr =
> container_of(itr, struct intel_bts_recording, itr);
> struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
> + bool per_cpu_mmaps = intel_bts_per_cpu(evlist);
> struct evsel *evsel, *intel_bts_evsel = NULL;
> - const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
> bool privileged = perf_event_paranoid_check(-1);
>
> if (opts->auxtrace_sample_mode) {
> @@ -143,7 +148,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
> if (!opts->full_auxtrace)
> return 0;
>
> - if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) {
> + if (opts->full_auxtrace && per_cpu_mmaps) {
> pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
> return -EINVAL;
> }
> @@ -224,7 +229,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
> * In the case of per-cpu mmaps, we need the CPU on the
> * AUX event.
> */
> - if (!perf_cpu_map__empty(cpus))
> + if (per_cpu_mmaps)
> evsel__set_sample_bit(intel_bts_evsel, CPU);
> }
>
> diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
> index fa0c718b9e72..0ff9147c75da 100644
> --- a/tools/perf/arch/x86/util/intel-pt.c
> +++ b/tools/perf/arch/x86/util/intel-pt.c
> @@ -312,6 +312,11 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
> *d = eax;
> }
>
> +static bool intel_pt_per_cpu(struct evlist *evlist)
> +{
> + return !perf_cpu_map__empty(evlist->core.user_requested_cpus);
> +}
> +
> static int intel_pt_info_fill(struct auxtrace_record *itr,
> struct perf_session *session,
> struct perf_record_auxtrace_info *auxtrace_info,
> @@ -322,7 +327,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
> struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
> struct perf_event_mmap_page *pc;
> struct perf_tsc_conversion tc = { .time_mult = 0, };
> - bool cap_user_time_zero = false, per_cpu_mmaps;
> + bool per_cpu_mmaps = intel_pt_per_cpu(session->evlist);
> + bool cap_user_time_zero = false;
> u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
> u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
> unsigned long max_non_turbo_ratio;
> @@ -369,8 +375,6 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
> ui__warning("Intel Processor Trace: TSC not available\n");
> }
>
> - per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus);
> -
> auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
> auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
> auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
> @@ -604,8 +608,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
> bool have_timing_info, need_immediate = false;
> struct evsel *evsel, *intel_pt_evsel = NULL;
> - const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
> bool privileged = perf_event_paranoid_check(-1);
> + bool per_cpu_mmaps = intel_pt_per_cpu(evlist);
> u64 tsc_bit;
> int err;
>
> @@ -774,8 +778,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> * Per-cpu recording needs sched_switch events to distinguish different
> * threads.
> */
> - if (have_timing_info && !perf_cpu_map__empty(cpus) &&
> - !record_opts__no_switch_events(opts)) {
> + if (have_timing_info && per_cpu_mmaps && !record_opts__no_switch_events(opts)) {
> if (perf_can_record_switch_events()) {
> bool cpu_wide = !target__none(&opts->target) &&
> !target__has_task(&opts->target);
> @@ -832,7 +835,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> * In the case of per-cpu mmaps, we need the CPU on the
> * AUX event.
> */
> - if (!perf_cpu_map__empty(cpus))
> + if (per_cpu_mmaps)
> evsel__set_sample_bit(intel_pt_evsel, CPU);
> }
>
> @@ -858,7 +861,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> tracking_evsel->immediate = true;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__empty(cpus)) {
> + if (per_cpu_mmaps) {
> evsel__set_sample_bit(tracking_evsel, TIME);
> /* And the CPU for switch events */
> evsel__set_sample_bit(tracking_evsel, CPU);
> @@ -870,7 +873,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> * Warn the user when we do not have enough information to decode i.e.
> * per-cpu with no sched_switch (except workload-only).
> */
> - if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
> + if (!ptr->have_sched_switch && per_cpu_mmaps &&
> !target__none(&opts->target) &&
> !intel_pt_evsel->core.attr.exclude_user)
> ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
> --
> 2.34.1
>
>
>

--

- Arnaldo

2023-12-18 20:35:08

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v1 10/14] perf top: Avoid repeated function calls

Em Tue, Dec 12, 2023 at 03:11:53PM +0000, James Clark escreveu:
>
>
> On 29/11/2023 06:02, Ian Rogers wrote:
> > Add a local variable to avoid repeated calls to perf_cpu_map__nr.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
>
> Reviewed-by: James Clark <[email protected]>

Thanks, applied to perf-tools-next.

- Arnaldo


> > ---
> > tools/perf/util/top.c | 9 ++++-----
> > 1 file changed, 4 insertions(+), 5 deletions(-)
> >
> > diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
> > index be7157de0451..4db3d1bd686c 100644
> > --- a/tools/perf/util/top.c
> > +++ b/tools/perf/util/top.c
> > @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
> > struct record_opts *opts = &top->record_opts;
> > struct target *target = &opts->target;
> > size_t ret = 0;
> > + int nr_cpus;
> >
> > if (top->samples) {
> > samples_per_sec = top->samples / top->delay_secs;
> > @@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
> > else
> > ret += SNPRINTF(bf + ret, size - ret, " (all");
> >
> > + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus);
> > if (target->cpu_list)
> > ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
> > - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
> > - ? "s" : "",
> > + nr_cpus > 1 ? "s" : "",
> > target->cpu_list);
> > else {
> > if (target->tid)
> > ret += SNPRINTF(bf + ret, size - ret, ")");
> > else
> > ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
> > - perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
> > - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
> > - ? "s" : "");
> > + nr_cpus, nr_cpus > 1 ? "s" : "");
> > }
> >
> > perf_top__reset_sample_counters(top);
>

--

- Arnaldo

2023-12-18 20:36:49

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH v1 14/14] libperf cpumap: Document perf_cpu_map__nr's behavior

Em Tue, Dec 12, 2023 at 03:20:47PM +0000, James Clark escreveu:
> On 29/11/2023 06:02, Ian Rogers wrote:
> > LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
> > +/**
> > + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
> > + * is invalid.
> > + */
> > LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
> > +/**
> > + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
> > + * cpu of -1 for an invalid index, this makes an empty map
> > + * look like it contains the "any CPU"/dummy value. Otherwise
> > + * the result is the number CPUs in the map plus one if the
> > + * "any CPU"/dummy value is present.
>
> Hmmm... I'm not sure whether to laugh or cry at that API.
>
> Reviewed-by: James Clark <[email protected]>



Thanks, applied to perf-tools-next.

- Arnaldo


2024-02-01 02:13:12

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v1 07/14] perf arm-spe/cs-etm: Directly iterate CPU maps

On Tue, Dec 12, 2023 at 6:36 AM James Clark <[email protected]> wrote:
>
>
>
> On 12/12/2023 14:17, James Clark wrote:
> >
> >
> > On 29/11/2023 06:02, Ian Rogers wrote:
> >> Rather than iterate all CPUs and see if they are in CPU maps, directly
> >> iterate the CPU map. Similarly make use of the intersect
> >> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
> >> appropriate alternatives.
> >>
> >> Signed-off-by: Ian Rogers <[email protected]>
> >> ---
> >> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
> >> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> >> 2 files changed, 34 insertions(+), 47 deletions(-)
> >>
> >> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> >> index 77e6663c1703..a68a72f2f668 100644
> >> --- a/tools/perf/arch/arm/util/cs-etm.c
> >> +++ b/tools/perf/arch/arm/util/cs-etm.c
> >> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
> >> static int cs_etm_validate_config(struct auxtrace_record *itr,
> >> struct evsel *evsel)
> >> {
> >> - int i, err = -EINVAL;
> >> + int idx, err = -EINVAL;
> >> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> >> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> >> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
> >> + struct perf_cpu cpu;
> >>
> >> - /* Set option of each CPU we have */
> >> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> >> - struct perf_cpu cpu = { .cpu = i, };
> >> -
> >> - /*
> >> - * In per-cpu case, do the validation for CPUs to work with.
> >> - * In per-thread case, the CPU map is empty. Since the traced
> >> - * program can run on any CPUs in this case, thus don't skip
> >> - * validation.
> >> - */
> >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
> >> - !perf_cpu_map__has(event_cpus, cpu))
> >> - continue;
> >
> > This has broken validation for per-thread sessions.
> > perf_cpu_map__intersect() doesn't seem to be able to handle the case
> > where an 'any' map intersected with an online map should return the
> > online map. Or at least it should for this to work, and it seems to make
> > sense for it to work that way.
> >
> > At least that was my initial impression, but I only debugged it and saw
> > that the loop is now skipped entirely.
> >
> >> -
> >> - if (!perf_cpu_map__has(online_cpus, cpu))
> >> - continue;
> >> + perf_cpu_map__put(online_cpus);
> >>
> >> - err = cs_etm_validate_context_id(itr, evsel, i);
> >> + /*
> >> + * Set option of each CPU we have. In per-cpu case, do the validation
> >> + * for CPUs to work with. In per-thread case, the CPU map is empty.
> >> + * Since the traced program can run on any CPUs in this case, thus don't
> >> + * skip validation.
> >> + */
> >> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> >> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
> >> if (err)
> >> goto out;
> >> - err = cs_etm_validate_timestamp(itr, evsel, i);
> >> + err = cs_etm_validate_timestamp(itr, evsel, idx);

I think this is an error, idx shouldn't be used here, cpu.cpu should.

> >> if (err)
> >> goto out;
> >> }
> >>
> >> err = 0;
> >> out:
> >> - perf_cpu_map__put(online_cpus);
> >> + perf_cpu_map__put(intersect_cpus);
> >> return err;
> >> }
> >>
> >> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> >> * Also the case of per-cpu mmaps, need the contextID in order to be notified
> >> * when a context switch happened.
> >> */
> >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> >> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> >> "timestamp", 1);
> >> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> >> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> >> evsel->core.attr.sample_period = 1;
> >>
> >> /* In per-cpu case, always need the time of mmap events etc */
> >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> >> evsel__set_sample_bit(evsel, TIME);
> >>
> >> err = cs_etm_validate_config(itr, cs_etm_evsel);
> >> @@ -533,38 +527,32 @@ static size_t
> >> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> >> struct evlist *evlist __maybe_unused)
> >> {
> >> - int i;
> >> + int idx;
> >> int etmv3 = 0, etmv4 = 0, ete = 0;
> >> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> >> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> >> + struct perf_cpu cpu;
> >>
> >> /* cpu map is not empty, we have specific CPUs to work with */
> >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> >> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> >> - struct perf_cpu cpu = { .cpu = i, };
> >> -
> >> - if (!perf_cpu_map__has(event_cpus, cpu) ||
> >> - !perf_cpu_map__has(online_cpus, cpu))
> >> - continue;
> >> + if (!perf_cpu_map__is_empty(event_cpus)) {
> >> + struct perf_cpu_map *intersect_cpus =
> >> + perf_cpu_map__intersect(event_cpus, online_cpus);
> >>
> >> - if (cs_etm_is_ete(itr, i))
> >> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> >> + if (cs_etm_is_ete(itr, cpu.cpu))
>
> Similar problem here. For a per-thread session, the CPU map is not empty
> (it's an 'any' map, presumably length 1), so it comes into this first
> if, rather than the else below which is for the 'any' scenario.
>
> Then the intersect with online CPUs results in an empty map, so no CPU
> metadata is recorded, then the session fails.
>
> If you made the intersect work in the way I mentioned above we could
> also delete the else below, because that's just another way to convert
> from 'any' to 'all online'.

I don't think intersect of "all online" with an "any CPU" should
return "all online" as these would be quite different options to
perf_event_open. Let's see if the issue above fixes this change
otherwise I can revert it to a more mechanical translation of the
existing code into the new APIs.

Thanks,
Ian

> >> ete++;
> >> - else if (cs_etm_is_etmv4(itr, i))
> >> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> >> etmv4++;
> >> else
> >> etmv3++;
> >> }
> >> + perf_cpu_map__put(intersect_cpus);
> >> } else {
> >> /* get configuration for all CPUs in the system */
> >> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> >> - struct perf_cpu cpu = { .cpu = i, };
> >> -
> >> - if (!perf_cpu_map__has(online_cpus, cpu))
> >> - continue;
> >> -
> >> - if (cs_etm_is_ete(itr, i))
> >> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
> >> + if (cs_etm_is_ete(itr, cpu.cpu))
> >> ete++;
> >> - else if (cs_etm_is_etmv4(itr, i))
> >> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> >> etmv4++;
> >> else
> >> etmv3++;
> >> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> >> return -EINVAL;
> >>
> >> /* If the cpu_map is empty all online CPUs are involved */
> >> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> >> + if (perf_cpu_map__is_empty(event_cpus)) {
> >> cpu_map = online_cpus;
> >> } else {
> >> /* Make sure all specified CPUs are online */
> >> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
> >> - struct perf_cpu cpu = { .cpu = i, };
> >> + struct perf_cpu cpu;
> >>
> >> - if (perf_cpu_map__has(event_cpus, cpu) &&
> >> - !perf_cpu_map__has(online_cpus, cpu))
> >> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
> >> + if (!perf_cpu_map__has(online_cpus, cpu))
> >> return -EINVAL;
> >> }
> >>
> >> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> >> index 51ccbfd3d246..0b52e67edb3b 100644
> >> --- a/tools/perf/arch/arm64/util/arm-spe.c
> >> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> >> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> >> * In the case of per-cpu mmaps, sample CPU for AUX event;
> >> * also enable the timestamp tracing for samples correlation.
> >> */
> >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> >> evsel__set_sample_bit(arm_spe_evsel, CPU);
> >> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
> >> "ts_enable", 1);
> >> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> >> tracking_evsel->core.attr.sample_period = 1;
> >>
> >> /* In per-cpu case, always need the time of mmap events etc */
> >> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> >> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> >> evsel__set_sample_bit(tracking_evsel, TIME);
> >> evsel__set_sample_bit(tracking_evsel, CPU);
> >>

2024-02-01 11:13:06

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v1 07/14] perf arm-spe/cs-etm: Directly iterate CPU maps



On 01/02/2024 02:12, Ian Rogers wrote:
> On Tue, Dec 12, 2023 at 6:36 AM James Clark <[email protected]> wrote:
>>
>>
>>
>> On 12/12/2023 14:17, James Clark wrote:
>>>
>>>
>>> On 29/11/2023 06:02, Ian Rogers wrote:
>>>> Rather than iterate all CPUs and see if they are in CPU maps, directly
>>>> iterate the CPU map. Similarly make use of the intersect
>>>> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
>>>> appropriate alternatives.
>>>>
>>>> Signed-off-by: Ian Rogers <[email protected]>
>>>> ---
>>>> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
>>>> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
>>>> 2 files changed, 34 insertions(+), 47 deletions(-)
>>>>
>>>> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
>>>> index 77e6663c1703..a68a72f2f668 100644
>>>> --- a/tools/perf/arch/arm/util/cs-etm.c
>>>> +++ b/tools/perf/arch/arm/util/cs-etm.c
>>>> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
>>>> static int cs_etm_validate_config(struct auxtrace_record *itr,
>>>> struct evsel *evsel)
>>>> {
>>>> - int i, err = -EINVAL;
>>>> + int idx, err = -EINVAL;
>>>> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
>>>> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
>>>> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
>>>> + struct perf_cpu cpu;
>>>>
>>>> - /* Set option of each CPU we have */
>>>> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
>>>> - struct perf_cpu cpu = { .cpu = i, };
>>>> -
>>>> - /*
>>>> - * In per-cpu case, do the validation for CPUs to work with.
>>>> - * In per-thread case, the CPU map is empty. Since the traced
>>>> - * program can run on any CPUs in this case, thus don't skip
>>>> - * validation.
>>>> - */
>>>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
>>>> - !perf_cpu_map__has(event_cpus, cpu))
>>>> - continue;
>>>
>>> This has broken validation for per-thread sessions.
>>> perf_cpu_map__intersect() doesn't seem to be able to handle the case
>>> where an 'any' map intersected with an online map should return the
>>> online map. Or at least it should for this to work, and it seems to make
>>> sense for it to work that way.
>>>
>>> At least that was my initial impression, but I only debugged it and saw
>>> that the loop is now skipped entirely.
>>>
>>>> -
>>>> - if (!perf_cpu_map__has(online_cpus, cpu))
>>>> - continue;
>>>> + perf_cpu_map__put(online_cpus);
>>>>
>>>> - err = cs_etm_validate_context_id(itr, evsel, i);
>>>> + /*
>>>> + * Set option of each CPU we have. In per-cpu case, do the validation
>>>> + * for CPUs to work with. In per-thread case, the CPU map is empty.
>>>> + * Since the traced program can run on any CPUs in this case, thus don't
>>>> + * skip validation.
>>>> + */
>>>> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
>>>> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
>>>> if (err)
>>>> goto out;
>>>> - err = cs_etm_validate_timestamp(itr, evsel, i);
>>>> + err = cs_etm_validate_timestamp(itr, evsel, idx);
>
> I think this is an error, idx shouldn't be used here, cpu.cpu should.
>

Yes I think you're right. But when I tested this it was on a machine
with all CPUs online, and all traced, so I think idx == cpu.cpu. So
although this might need to be fixed it didn't cause the breakage.

Also this line of code was also never hit because the issue was the
intersect returning an empty map before here.

>>>> if (err)
>>>> goto out;
>>>> }
>>>>
>>>> err = 0;
>>>> out:
>>>> - perf_cpu_map__put(online_cpus);
>>>> + perf_cpu_map__put(intersect_cpus);
>>>> return err;
>>>> }
>>>>
>>>> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
>>>> * Also the case of per-cpu mmaps, need the contextID in order to be notified
>>>> * when a context switch happened.
>>>> */
>>>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
>>>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
>>>> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
>>>> "timestamp", 1);
>>>> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
>>>> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
>>>> evsel->core.attr.sample_period = 1;
>>>>
>>>> /* In per-cpu case, always need the time of mmap events etc */
>>>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
>>>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
>>>> evsel__set_sample_bit(evsel, TIME);
>>>>
>>>> err = cs_etm_validate_config(itr, cs_etm_evsel);
>>>> @@ -533,38 +527,32 @@ static size_t
>>>> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
>>>> struct evlist *evlist __maybe_unused)
>>>> {
>>>> - int i;
>>>> + int idx;
>>>> int etmv3 = 0, etmv4 = 0, ete = 0;
>>>> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
>>>> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
>>>> + struct perf_cpu cpu;
>>>>
>>>> /* cpu map is not empty, we have specific CPUs to work with */
>>>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
>>>> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
>>>> - struct perf_cpu cpu = { .cpu = i, };
>>>> -
>>>> - if (!perf_cpu_map__has(event_cpus, cpu) ||
>>>> - !perf_cpu_map__has(online_cpus, cpu))
>>>> - continue;
>>>> + if (!perf_cpu_map__is_empty(event_cpus)) {
>>>> + struct perf_cpu_map *intersect_cpus =
>>>> + perf_cpu_map__intersect(event_cpus, online_cpus);
>>>>
>>>> - if (cs_etm_is_ete(itr, i))
>>>> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
>>>> + if (cs_etm_is_ete(itr, cpu.cpu))
>>
>> Similar problem here. For a per-thread session, the CPU map is not empty
>> (it's an 'any' map, presumably length 1), so it comes into this first
>> if, rather than the else below which is for the 'any' scenario.
>>
>> Then the intersect with online CPUs results in an empty map, so no CPU
>> metadata is recorded, then the session fails.
>>
>> If you made the intersect work in the way I mentioned above we could
>> also delete the else below, because that's just another way to convert
>> from 'any' to 'all online'.
>
> I don't think intersect of "all online" with an "any CPU" should
> return "all online" as these would be quite different options to
> perf_event_open. Let's see if the issue above fixes this change
> otherwise I can revert it to a more mechanical translation of the
> existing code into the new APIs.
>

If you're not ok with the intersection behavior that I suggested, then
maybe we need a new function like perf_cpu_map__flatten() or
perf_cpu_map__expand() which can be used in these two cases which does
behave in that way. It doesn't look like it needs any more re-writing,
it's just the perf_cpu_map__intersect() needs to behave slightly
differently in this case.

It's because ETM cares about validating and saving the configuration
per-core, even if you finally open the event with CPU==-1, because later
we'll need all that info to decode the trace.

Although I can't see what scenario that you wouldn't want ('any'
intersect '1,2') == '1,2'. Is there any code in Perf that's already
doing that that would be broken by that change? It might be best to have
that behavior earlier rather than later in case something does start
depending on it.

As far as I know you can't do both per-thread and per-cpu sessions in
Perf, so it doesn't really matter how the intersection behaves in that
scenario.

> Thanks,
> Ian
>
>>>> ete++;
>>>> - else if (cs_etm_is_etmv4(itr, i))
>>>> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
>>>> etmv4++;
>>>> else
>>>> etmv3++;
>>>> }
>>>> + perf_cpu_map__put(intersect_cpus);
>>>> } else {
>>>> /* get configuration for all CPUs in the system */
>>>> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
>>>> - struct perf_cpu cpu = { .cpu = i, };
>>>> -
>>>> - if (!perf_cpu_map__has(online_cpus, cpu))
>>>> - continue;
>>>> -
>>>> - if (cs_etm_is_ete(itr, i))
>>>> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
>>>> + if (cs_etm_is_ete(itr, cpu.cpu))
>>>> ete++;
>>>> - else if (cs_etm_is_etmv4(itr, i))
>>>> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
>>>> etmv4++;
>>>> else
>>>> etmv3++;
>>>> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
>>>> return -EINVAL;
>>>>
>>>> /* If the cpu_map is empty all online CPUs are involved */
>>>> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
>>>> + if (perf_cpu_map__is_empty(event_cpus)) {
>>>> cpu_map = online_cpus;
>>>> } else {
>>>> /* Make sure all specified CPUs are online */
>>>> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
>>>> - struct perf_cpu cpu = { .cpu = i, };
>>>> + struct perf_cpu cpu;
>>>>
>>>> - if (perf_cpu_map__has(event_cpus, cpu) &&
>>>> - !perf_cpu_map__has(online_cpus, cpu))
>>>> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
>>>> + if (!perf_cpu_map__has(online_cpus, cpu))
>>>> return -EINVAL;
>>>> }
>>>>
>>>> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
>>>> index 51ccbfd3d246..0b52e67edb3b 100644
>>>> --- a/tools/perf/arch/arm64/util/arm-spe.c
>>>> +++ b/tools/perf/arch/arm64/util/arm-spe.c
>>>> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
>>>> * In the case of per-cpu mmaps, sample CPU for AUX event;
>>>> * also enable the timestamp tracing for samples correlation.
>>>> */
>>>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
>>>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
>>>> evsel__set_sample_bit(arm_spe_evsel, CPU);
>>>> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
>>>> "ts_enable", 1);
>>>> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
>>>> tracking_evsel->core.attr.sample_period = 1;
>>>>
>>>> /* In per-cpu case, always need the time of mmap events etc */
>>>> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
>>>> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
>>>> evsel__set_sample_bit(tracking_evsel, TIME);
>>>> evsel__set_sample_bit(tracking_evsel, CPU);
>>>>