2024-02-01 04:23:02

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 0/8] Clean up libperf cpumap's empty function

Rename and clean up the use of libperf CPU map functions particularly
focussing on perf_cpu_map__empty that may return true for maps
containing CPUs but also with an "any CPU"/dummy value.

perf_cpu_map__nr is also troubling in that iterating an empty CPU map
will yield the "any CPU"/dummy value. Reduce the appearance of some
calls to this by using the perf_cpu_map__for_each_cpu macro.

v2: 6 patches were merged by Arnaldo. New patch added ensure empty
maps are allocated as NULL (suggested by James Clark). Hopefully a
fix to "perf arm-spe/cs-etm: Directly iterate CPU maps".

Ian Rogers (8):
libperf cpumap: Add any, empty and min helpers
libperf cpumap: Ensure empty cpumap is NULL from alloc
perf arm-spe/cs-etm: Directly iterate CPU maps
perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty
use
perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty
perf arm64 header: Remove unnecessary CPU map get and put
perf stat: Remove duplicate cpus_map_matched function
perf cpumap: Use perf_cpu_map__for_each_cpu when possible

tools/lib/perf/cpumap.c | 33 +++++++-
tools/lib/perf/include/perf/cpumap.h | 16 ++++
tools/lib/perf/libperf.map | 4 +
tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++-----------
tools/perf/arch/arm64/util/arm-spe.c | 4 +-
tools/perf/arch/arm64/util/header.c | 13 +---
tools/perf/arch/x86/util/intel-bts.c | 4 +-
tools/perf/arch/x86/util/intel-pt.c | 10 +--
tools/perf/builtin-c2c.c | 6 +-
tools/perf/builtin-stat.c | 31 ++------
tools/perf/tests/bitmap.c | 13 ++--
tools/perf/tests/topology.c | 46 +++++------
tools/perf/util/auxtrace.c | 4 +-
tools/perf/util/bpf_kwork.c | 16 ++--
tools/perf/util/bpf_kwork_top.c | 12 +--
tools/perf/util/cpumap.c | 12 ++-
tools/perf/util/record.c | 2 +-
.../scripting-engines/trace-event-python.c | 12 +--
tools/perf/util/session.c | 5 +-
tools/perf/util/stat.c | 2 +-
tools/perf/util/svghelper.c | 20 +++--
21 files changed, 175 insertions(+), 167 deletions(-)

--
2.43.0.429.g432eaa2c6b-goog



2024-02-01 04:23:20

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 1/8] libperf cpumap: Add any, empty and min helpers

Additional helpers to better replace
perf_cpu_map__has_any_cpu_or_is_empty.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
tools/lib/perf/libperf.map | 4 ++++
3 files changed, 47 insertions(+)

diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 4adcd7920d03..ba49552952c5 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}

+bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
+{
+ if (!map)
+ return true;
+
+ return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
+bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
+{
+ return map == NULL;
+}
+
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
int low, high;
@@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
}

+struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
+{
+ struct perf_cpu cpu, result = {
+ .cpu = -1
+ };
+ int idx;
+
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
+ result = cpu;
+ break;
+ }
+ return result;
+}
+
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
struct perf_cpu result = {
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 228c6c629b0c..90457d17fb2f 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -61,6 +61,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
* perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
*/
LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_empty - does the map contain no values and it doesn't
+ * contain the special "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
+LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 10b3f3722642..2aa79b696032 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__has_any_cpu_or_is_empty;
+ perf_cpu_map__is_any_cpu_or_is_empty;
+ perf_cpu_map__is_empty;
+ perf_cpu_map__has_any_cpu;
+ perf_cpu_map__min;
perf_cpu_map__max;
perf_cpu_map__has;
perf_thread_map__new_array;
--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:23:29

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 2/8] libperf cpumap: Ensure empty cpumap is NULL from alloc

Potential corner cases could cause a cpumap to be allocated with size
0, but an empty cpumap should be represented as NULL. Add a path in
perf_cpu_map__alloc to ensure this.

Suggested-by: James Clark <[email protected]>
Closes: https://lore.kernel.org/lkml/[email protected]/
Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/cpumap.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index ba49552952c5..cae799ad44e1 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -18,9 +18,13 @@ void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)

struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
- RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
+ RC_STRUCT(perf_cpu_map) *cpus;
struct perf_cpu_map *result;

+ if (nr_cpus == 0)
+ return NULL;
+
+ cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (ADD_RC_CHK(result, cpus)) {
cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:24:09

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 4/8] perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty use

Switch perf_cpu_map__has_any_cpu_or_is_empty to
perf_cpu_map__is_any_cpu_or_is_empty as a CPU map may contain CPUs as
well as the dummy event and perf_cpu_map__is_any_cpu_or_is_empty is a
more correct alternative.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/intel-bts.c | 4 ++--
tools/perf/arch/x86/util/intel-pt.c | 10 +++++-----
2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index af8ae4647585..34696f3d3d5d 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;

- if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (opts->full_auxtrace && !perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
return -EINVAL;
}
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_bts_evsel, CPU);
}

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index d199619df3ab..6de7e2d21075 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}

- per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
+ per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);

auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
@@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_pt_evsel, CPU);
}

@@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
tracking_evsel->immediate = true;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
evsel__set_sample_bit(tracking_evsel, CPU);
@@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
- if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!target__none(&opts->target) &&
!intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:24:48

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 6/8] perf arm64 header: Remove unnecessary CPU map get and put

In both cases the CPU map is known owned by either the caller or a
PMU.

Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/arch/arm64/util/header.c | 3 ---
1 file changed, 3 deletions(-)

diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index 97037499152e..a9de0b5187dd 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -25,8 +25,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
if (!sysfs || sz < MIDR_SIZE)
return EINVAL;

- cpus = perf_cpu_map__get(cpus);
-
for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
char path[PATH_MAX];
FILE *file;
@@ -51,7 +49,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
break;
}

- perf_cpu_map__put(cpus);
return ret;
}

--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:25:05

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 7/8] perf stat: Remove duplicate cpus_map_matched function

Use libperf's perf_cpu_map__equal that performs the same function.

Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/builtin-stat.c | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 280eb0c99d2b..d80bad7c73e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,26 +164,6 @@ static struct perf_stat_config stat_config = {
.iostat_run = false,
};

-static bool cpus_map_matched(struct evsel *a, struct evsel *b)
-{
- if (!a->core.cpus && !b->core.cpus)
- return true;
-
- if (!a->core.cpus || !b->core.cpus)
- return false;
-
- if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
- return false;
-
- for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
- if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
- perf_cpu_map__cpu(b->core.cpus, i).cpu)
- return false;
- }
-
- return true;
-}
-
static void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *warned_leader = NULL;
@@ -194,7 +174,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
/* Check that leader matches cpus with each member. */
if (leader == evsel)
continue;
- if (cpus_map_matched(leader, evsel))
+ if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
continue;

/* If there's mismatch disable the group and warn user. */
--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:25:25

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 8/8] perf cpumap: Use perf_cpu_map__for_each_cpu when possible

Rather than manually iterating the CPU map, use
perf_cpu_map__for_each_cpu. When possible tidy local variables.

Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/arch/arm64/util/header.c | 10 ++--
tools/perf/tests/bitmap.c | 13 +++---
tools/perf/tests/topology.c | 46 +++++++++----------
tools/perf/util/bpf_kwork.c | 16 ++++---
tools/perf/util/bpf_kwork_top.c | 12 ++---
tools/perf/util/cpumap.c | 12 ++---
.../scripting-engines/trace-event-python.c | 12 +++--
tools/perf/util/session.c | 5 +-
tools/perf/util/svghelper.c | 20 ++++----
9 files changed, 72 insertions(+), 74 deletions(-)

diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a9de0b5187dd..741df3614a09 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -4,8 +4,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
-#include <util/cpumap.h>
-#include <internal/cpumap.h>
#include <api/fs/fs.h>
#include <errno.h>
#include "debug.h"
@@ -19,18 +17,18 @@
static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
const char *sysfs = sysfs__mountpoint();
- int cpu;
- int ret = EINVAL;
+ struct perf_cpu cpu;
+ int idx, ret = EINVAL;

if (!sysfs || sz < MIDR_SIZE)
return EINVAL;

- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
char path[PATH_MAX];
FILE *file;

scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
- sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
+ sysfs, cpu.cpu);

file = fopen(path, "r");
if (!file) {
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 0173f5402a35..98956e0e0765 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -11,18 +11,19 @@
static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
- unsigned long *bm = NULL;
- int i;
+ unsigned long *bm;

bm = bitmap_zalloc(nbits);

if (map && bm) {
- for (i = 0; i < perf_cpu_map__nr(map); i++)
- __set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
+ int i;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ __set_bit(cpu.cpu, bm);
}

- if (map)
- perf_cpu_map__put(map);
+ perf_cpu_map__put(map);
return bm;
}

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 2a842f53fbb5..a8cb5ba898ab 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -68,6 +68,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
};
int i;
struct aggr_cpu_id id;
+ struct perf_cpu cpu;

session = perf_session__new(&data, NULL);
TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
@@ -113,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);

for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
- struct perf_cpu cpu = { .cpu = i };
-
+ cpu.cpu = i;
if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
@@ -123,48 +123,48 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}

// Test that CPU ID contains socket, die, core and CPU
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__cpu(cpu, NULL);
TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
- perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
+ cpu.cpu == id.cpu.cpu);

TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);
TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Cpu map - Thread IDX is set", id.thread_idx == -1);
}

// Test that core ID contains socket, die and core
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__core(cpu, NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);

TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread IDX is set", id.thread_idx == -1);
}

// Test that die ID contains socket and die
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__die(cpu, NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);

TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
@@ -173,10 +173,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}

// Test that socket ID contains only socket
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__socket(cpu, NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);

TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
@@ -187,10 +187,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}

// Test that node ID contains only node
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__node(cpu, NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
+ cpu__get_node(cpu) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 6eb2c78fd7f4..44f0f708a15d 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -147,12 +147,12 @@ static bool valid_kwork_class_type(enum kwork_class_type type)

static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, key, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list != NULL) {
- fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+ int idx, nr_cpus;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
return -1;
@@ -165,8 +165,8 @@ static int setup_filters(struct perf_kwork *kwork)
}

nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;

if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
@@ -181,6 +181,8 @@ static int setup_filters(struct perf_kwork *kwork)
}

if (kwork->profile_name != NULL) {
+ int key, fd;
+
if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
pr_err("Requested name filter %s too large, limit to %d\n",
kwork->profile_name, MAX_KWORKNAME - 1);
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 035e02272790..22a3b00a1e23 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -122,11 +122,11 @@ static bool valid_kwork_class_type(enum kwork_class_type type)

static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list) {
+ int idx, nr_cpus, fd;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+
fd = bpf_map__fd(skel->maps.kwork_top_cpu_filter);
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
@@ -140,8 +140,8 @@ static int setup_filters(struct perf_kwork *kwork)
}

nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;

if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0581ee0fa5f2..e2287187babd 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -629,10 +629,10 @@ static char hex_char(unsigned char val)

size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu;
+ int idx;
char *ptr = buf;
unsigned char *bitmap;
- struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1);
+ struct perf_cpu c, last_cpu = perf_cpu_map__max(map);

if (buf == NULL)
return 0;
@@ -643,12 +643,10 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
return 0;
}

- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- cpu = perf_cpu_map__cpu(map, i).cpu;
- bitmap[cpu / 8] |= 1 << (cpu % 8);
- }
+ perf_cpu_map__for_each_cpu(c, idx, map)
+ bitmap[c.cpu / 8] |= 1 << (c.cpu % 8);

- for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];

if (cpu % 8)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 860e1837ba96..8ef0e5ac03c2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1693,13 +1693,15 @@ static void python_process_stat(struct perf_stat_config *config,
{
struct perf_thread_map *threads = counter->core.threads;
struct perf_cpu_map *cpus = counter->core.cpus;
- int cpu, thread;

- for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
- process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
+ for (int thread = 0; thread < perf_thread_map__nr(threads); thread++) {
+ int idx;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ process_stat(counter, cpu,
perf_thread_map__pid(threads, thread), tstamp,
- perf_counts(counter->counts, cpu, thread));
+ perf_counts(counter->counts, idx, thread));
}
}
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 199d3e8df315..d52b58344dbc 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2738,6 +2738,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
int i, err = -1;
struct perf_cpu_map *map;
int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+ struct perf_cpu cpu;

for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;
@@ -2759,9 +2760,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}

- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
-
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
"Consider raising MAX_NR_CPUS\n", cpu.cpu);
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1892e9b6aa7f..2b04f47f4db0 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -725,26 +725,24 @@ static void scan_core_topology(int *map, struct topology *t, int nr_cpus)

static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
{
- int i;
- int ret = 0;
- struct perf_cpu_map *m;
- struct perf_cpu c;
+ int idx, ret = 0;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;

- m = perf_cpu_map__new(s);
- if (!m)
+ map = perf_cpu_map__new(s);
+ if (!map)
return -1;

- for (i = 0; i < perf_cpu_map__nr(m); i++) {
- c = perf_cpu_map__cpu(m, i);
- if (c.cpu >= nr_cpus) {
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ if (cpu.cpu >= nr_cpus) {
ret = -1;
break;
}

- __set_bit(c.cpu, cpumask_bits(b));
+ __set_bit(cpu.cpu, cpumask_bits(b));
}

- perf_cpu_map__put(m);
+ perf_cpu_map__put(map);

return ret;
}
--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:32:40

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 3/8] perf arm-spe/cs-etm: Directly iterate CPU maps

Rather than iterate all CPUs and see if they are in CPU maps, directly
iterate the CPU map. Similarly make use of the intersect
function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
appropriate alternatives.

Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
tools/perf/arch/arm64/util/arm-spe.c | 4 +-
2 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 77e6663c1703..f4378ba0b8d6 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
static int cs_etm_validate_config(struct auxtrace_record *itr,
struct evsel *evsel)
{
- int i, err = -EINVAL;
+ int idx, err = -EINVAL;
struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
+ struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
+ struct perf_cpu cpu;

- /* Set option of each CPU we have */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- /*
- * In per-cpu case, do the validation for CPUs to work with.
- * In per-thread case, the CPU map is empty. Since the traced
- * program can run on any CPUs in this case, thus don't skip
- * validation.
- */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
- !perf_cpu_map__has(event_cpus, cpu))
- continue;
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
+ perf_cpu_map__put(online_cpus);

- err = cs_etm_validate_context_id(itr, evsel, i);
+ /*
+ * Set option of each CPU we have. In per-cpu case, do the validation
+ * for CPUs to work with. In per-thread case, the CPU map is empty.
+ * Since the traced program can run on any CPUs in this case, thus don't
+ * skip validation.
+ */
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
if (err)
goto out;
- err = cs_etm_validate_timestamp(itr, evsel, i);
+ err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
if (err)
goto out;
}

err = 0;
out:
- perf_cpu_map__put(online_cpus);
+ perf_cpu_map__put(intersect_cpus);
return err;
}

@@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
* Also the case of per-cpu mmaps, need the contextID in order to be notified
* when a context switch happened.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
"timestamp", 1);
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
@@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
evsel->core.attr.sample_period = 1;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(evsel, TIME);

err = cs_etm_validate_config(itr, cs_etm_evsel);
@@ -533,38 +527,32 @@ static size_t
cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct evlist *evlist __maybe_unused)
{
- int i;
+ int idx;
int etmv3 = 0, etmv4 = 0, ete = 0;
struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
+ struct perf_cpu cpu;

/* cpu map is not empty, we have specific CPUs to work with */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(event_cpus, cpu) ||
- !perf_cpu_map__has(online_cpus, cpu))
- continue;
+ if (!perf_cpu_map__is_empty(event_cpus)) {
+ struct perf_cpu_map *intersect_cpus =
+ perf_cpu_map__intersect(event_cpus, online_cpus);

- if (cs_etm_is_ete(itr, i))
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
ete++;
- else if (cs_etm_is_etmv4(itr, i))
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
etmv4++;
else
etmv3++;
}
+ perf_cpu_map__put(intersect_cpus);
} else {
/* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
-
- if (cs_etm_is_ete(itr, i))
+ perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
ete++;
- else if (cs_etm_is_etmv4(itr, i))
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
etmv4++;
else
etmv3++;
@@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
return -EINVAL;

/* If the cpu_map is empty all online CPUs are involved */
- if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
+ if (perf_cpu_map__is_empty(event_cpus)) {
cpu_map = online_cpus;
} else {
/* Make sure all specified CPUs are online */
- for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
- struct perf_cpu cpu = { .cpu = i, };
+ struct perf_cpu cpu;

- if (perf_cpu_map__has(event_cpus, cpu) &&
- !perf_cpu_map__has(online_cpus, cpu))
+ perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
+ if (!perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}

diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 51ccbfd3d246..0b52e67edb3b 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, sample CPU for AUX event;
* also enable the timestamp tracing for samples correlation.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(arm_spe_evsel, CPU);
evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
"ts_enable", 1);
@@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.sample_period = 1;

/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
evsel__set_sample_bit(tracking_evsel, CPU);

--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 04:35:09

by Ian Rogers

[permalink] [raw]
Subject: [PATCH v2 5/8] perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty

Most uses of what was perf_cpu_map__empty but is now
perf_cpu_map__has_any_cpu_or_is_empty want to do something with the
CPU map if it contains CPUs. Replace uses of
perf_cpu_map__has_any_cpu_or_is_empty with other helpers so that CPUs
within the map can be handled.

Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/builtin-c2c.c | 6 +-----
tools/perf/builtin-stat.c | 9 ++++-----
tools/perf/util/auxtrace.c | 4 ++--
tools/perf/util/record.c | 2 +-
tools/perf/util/stat.c | 2 +-
5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 16b40f5d43db..24107062c43e 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2319,11 +2319,7 @@ static int setup_nodes(struct perf_session *session)

nodes[node] = set;

- /* empty node, skip */
- if (perf_cpu_map__has_any_cpu_or_is_empty(map))
- continue;
-
- perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
__set_bit(cpu.cpu, set);

if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5fe9abc6a524..280eb0c99d2b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1317,10 +1317,9 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
* be the first online CPU in the cache domain else use the
* first online CPU of the cache domain as the ID.
*/
- if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map))
+ id = perf_cpu_map__min(cpu_map).cpu;
+ if (id == -1)
id = cpu.cpu;
- else
- id = perf_cpu_map__cpu(cpu_map, 0).cpu;

/* Free the perf_cpu_map used to find the cache ID */
perf_cpu_map__put(cpu_map);
@@ -1623,7 +1622,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
else
nr = 0;
@@ -2290,7 +2289,7 @@ int process_stat_config_event(struct perf_session *session,

perf_event__read_stat_config(&stat_config, &event->stat_config);

- if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) {
+ if (perf_cpu_map__is_empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
} else if (st->aggr_mode != AGGR_UNSET) {
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3684e6009b63..6b1d4bafad59 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist,
struct evsel *evsel, int idx)
{
- bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);

mp->mmap_needed = evsel->needs_auxtrace_mmap;

@@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,

static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
{
- bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);

if (per_cpu_mmaps) {
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 87e817b3cf7e..e867de8ddaaa 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -237,7 +237,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)

evsel = evlist__last(temp_evlist);

- if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
+ if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();

if (cpus)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index b0bcf92f0f9c..0bd5467389e4 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
if (!counter->per_pkg)
return 0;

- if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (perf_cpu_map__is_any_cpu_or_is_empty(cpus))
return 0;

if (!mask) {
--
2.43.0.429.g432eaa2c6b-goog


2024-02-01 11:33:52

by James Clark

[permalink] [raw]
Subject: Re: [PATCH v2 3/8] perf arm-spe/cs-etm: Directly iterate CPU maps



On 01/02/2024 04:22, Ian Rogers wrote:
> Rather than iterate all CPUs and see if they are in CPU maps, directly
> iterate the CPU map. Similarly make use of the intersect
> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
> appropriate alternatives.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> 2 files changed, 34 insertions(+), 47 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index 77e6663c1703..f4378ba0b8d6 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
> static int cs_etm_validate_config(struct auxtrace_record *itr,
> struct evsel *evsel)
> {
> - int i, err = -EINVAL;
> + int idx, err = -EINVAL;
> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);

Hi Ian,

This has the same issue as V1. 'any' intersect 'online' == empty. Now no
validation happens anymore. For this to be the same as it used to be,
validation has to happen on _all_ cores when event_cpus == -1. So it
needs to be 'any' intersect 'online' == 'online'.

Same issue below with cs_etm_info_priv_size()

Thanks
James

> + struct perf_cpu cpu;
>
> - /* Set option of each CPU we have */
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - /*
> - * In per-cpu case, do the validation for CPUs to work with.
> - * In per-thread case, the CPU map is empty. Since the traced
> - * program can run on any CPUs in this case, thus don't skip
> - * validation.
> - */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
> - !perf_cpu_map__has(event_cpus, cpu))
> - continue;
> -
> - if (!perf_cpu_map__has(online_cpus, cpu))
> - continue;
> + perf_cpu_map__put(online_cpus);
>
> - err = cs_etm_validate_context_id(itr, evsel, i);
> + /*
> + * Set option of each CPU we have. In per-cpu case, do the validation
> + * for CPUs to work with. In per-thread case, the CPU map is empty.
> + * Since the traced program can run on any CPUs in this case, thus don't
> + * skip validation.
> + */
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
> if (err)
> goto out;
> - err = cs_etm_validate_timestamp(itr, evsel, i);
> + err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
> if (err)
> goto out;
> }
>
> err = 0;
> out:
> - perf_cpu_map__put(online_cpus);
> + perf_cpu_map__put(intersect_cpus);
> return err;
> }
>
> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> * Also the case of per-cpu mmaps, need the contextID in order to be notified
> * when a context switch happened.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> "timestamp", 1);
> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> evsel->core.attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> evsel__set_sample_bit(evsel, TIME);
>
> err = cs_etm_validate_config(itr, cs_etm_evsel);
> @@ -533,38 +527,32 @@ static size_t
> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> - int i;
> + int idx;
> int etmv3 = 0, etmv4 = 0, ete = 0;
> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> + struct perf_cpu cpu;
>
> /* cpu map is not empty, we have specific CPUs to work with */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - if (!perf_cpu_map__has(event_cpus, cpu) ||
> - !perf_cpu_map__has(online_cpus, cpu))
> - continue;
> + if (!perf_cpu_map__is_empty(event_cpus)) {
> + struct perf_cpu_map *intersect_cpus =
> + perf_cpu_map__intersect(event_cpus, online_cpus);
>
> - if (cs_etm_is_ete(itr, i))
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> + if (cs_etm_is_ete(itr, cpu.cpu))
> ete++;
> - else if (cs_etm_is_etmv4(itr, i))
> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> etmv4++;
> else
> etmv3++;
> }
> + perf_cpu_map__put(intersect_cpus);
> } else {
> /* get configuration for all CPUs in the system */
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - if (!perf_cpu_map__has(online_cpus, cpu))
> - continue;
> -
> - if (cs_etm_is_ete(itr, i))
> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
> + if (cs_etm_is_ete(itr, cpu.cpu))
> ete++;
> - else if (cs_etm_is_etmv4(itr, i))
> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> etmv4++;
> else
> etmv3++;
> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> return -EINVAL;
>
> /* If the cpu_map is empty all online CPUs are involved */
> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> + if (perf_cpu_map__is_empty(event_cpus)) {
> cpu_map = online_cpus;
> } else {
> /* Make sure all specified CPUs are online */
> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> + struct perf_cpu cpu;
>
> - if (perf_cpu_map__has(event_cpus, cpu) &&
> - !perf_cpu_map__has(online_cpus, cpu))
> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
> + if (!perf_cpu_map__has(online_cpus, cpu))
> return -EINVAL;
> }
>
> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> index 51ccbfd3d246..0b52e67edb3b 100644
> --- a/tools/perf/arch/arm64/util/arm-spe.c
> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> * In the case of per-cpu mmaps, sample CPU for AUX event;
> * also enable the timestamp tracing for samples correlation.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_sample_bit(arm_spe_evsel, CPU);
> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
> "ts_enable", 1);
> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> tracking_evsel->core.attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_sample_bit(tracking_evsel, TIME);
> evsel__set_sample_bit(tracking_evsel, CPU);
>

2024-02-01 14:25:32

by Ian Rogers

[permalink] [raw]
Subject: Re: [PATCH v2 3/8] perf arm-spe/cs-etm: Directly iterate CPU maps

On Thu, Feb 1, 2024 at 3:25 AM James Clark <[email protected]> wrote:
>
>
>
> On 01/02/2024 04:22, Ian Rogers wrote:
> > Rather than iterate all CPUs and see if they are in CPU maps, directly
> > iterate the CPU map. Similarly make use of the intersect
> > function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
> > appropriate alternatives.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
> > tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> > 2 files changed, 34 insertions(+), 47 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 77e6663c1703..f4378ba0b8d6 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
> > static int cs_etm_validate_config(struct auxtrace_record *itr,
> > struct evsel *evsel)
> > {
> > - int i, err = -EINVAL;
> > + int idx, err = -EINVAL;
> > struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> > struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> > + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
>
> Hi Ian,
>
> This has the same issue as V1. 'any' intersect 'online' == empty. Now no
> validation happens anymore. For this to be the same as it used to be,
> validation has to happen on _all_ cores when event_cpus == -1. So it
> needs to be 'any' intersect 'online' == 'online'.
>
> Same issue below with cs_etm_info_priv_size()

Thanks James, sorry for the churn. I'll work on that for v3.

Ian

> Thanks
> James
>
> > + struct perf_cpu cpu;
> >
> > - /* Set option of each CPU we have */
> > - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > -
> > - /*
> > - * In per-cpu case, do the validation for CPUs to work with.
> > - * In per-thread case, the CPU map is empty. Since the traced
> > - * program can run on any CPUs in this case, thus don't skip
> > - * validation.
> > - */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
> > - !perf_cpu_map__has(event_cpus, cpu))
> > - continue;
> > -
> > - if (!perf_cpu_map__has(online_cpus, cpu))
> > - continue;
> > + perf_cpu_map__put(online_cpus);
> >
> > - err = cs_etm_validate_context_id(itr, evsel, i);
> > + /*
> > + * Set option of each CPU we have. In per-cpu case, do the validation
> > + * for CPUs to work with. In per-thread case, the CPU map is empty.
> > + * Since the traced program can run on any CPUs in this case, thus don't
> > + * skip validation.
> > + */
> > + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> > + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
> > if (err)
> > goto out;
> > - err = cs_etm_validate_timestamp(itr, evsel, i);
> > + err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
> > if (err)
> > goto out;
> > }
> >
> > err = 0;
> > out:
> > - perf_cpu_map__put(online_cpus);
> > + perf_cpu_map__put(intersect_cpus);
> > return err;
> > }
> >
> > @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> > * Also the case of per-cpu mmaps, need the contextID in order to be notified
> > * when a context switch happened.
> > */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> > evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> > "timestamp", 1);
> > evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> > @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> > evsel->core.attr.sample_period = 1;
> >
> > /* In per-cpu case, always need the time of mmap events etc */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> > evsel__set_sample_bit(evsel, TIME);
> >
> > err = cs_etm_validate_config(itr, cs_etm_evsel);
> > @@ -533,38 +527,32 @@ static size_t
> > cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> > struct evlist *evlist __maybe_unused)
> > {
> > - int i;
> > + int idx;
> > int etmv3 = 0, etmv4 = 0, ete = 0;
> > struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> > struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> > + struct perf_cpu cpu;
> >
> > /* cpu map is not empty, we have specific CPUs to work with */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> > - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > -
> > - if (!perf_cpu_map__has(event_cpus, cpu) ||
> > - !perf_cpu_map__has(online_cpus, cpu))
> > - continue;
> > + if (!perf_cpu_map__is_empty(event_cpus)) {
> > + struct perf_cpu_map *intersect_cpus =
> > + perf_cpu_map__intersect(event_cpus, online_cpus);
> >
> > - if (cs_etm_is_ete(itr, i))
> > + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> > + if (cs_etm_is_ete(itr, cpu.cpu))
> > ete++;
> > - else if (cs_etm_is_etmv4(itr, i))
> > + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> > etmv4++;
> > else
> > etmv3++;
> > }
> > + perf_cpu_map__put(intersect_cpus);
> > } else {
> > /* get configuration for all CPUs in the system */
> > - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > -
> > - if (!perf_cpu_map__has(online_cpus, cpu))
> > - continue;
> > -
> > - if (cs_etm_is_ete(itr, i))
> > + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
> > + if (cs_etm_is_ete(itr, cpu.cpu))
> > ete++;
> > - else if (cs_etm_is_etmv4(itr, i))
> > + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> > etmv4++;
> > else
> > etmv3++;
> > @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > return -EINVAL;
> >
> > /* If the cpu_map is empty all online CPUs are involved */
> > - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> > + if (perf_cpu_map__is_empty(event_cpus)) {
> > cpu_map = online_cpus;
> > } else {
> > /* Make sure all specified CPUs are online */
> > - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > + struct perf_cpu cpu;
> >
> > - if (perf_cpu_map__has(event_cpus, cpu) &&
> > - !perf_cpu_map__has(online_cpus, cpu))
> > + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
> > + if (!perf_cpu_map__has(online_cpus, cpu))
> > return -EINVAL;
> > }
> >
> > diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> > index 51ccbfd3d246..0b52e67edb3b 100644
> > --- a/tools/perf/arch/arm64/util/arm-spe.c
> > +++ b/tools/perf/arch/arm64/util/arm-spe.c
> > @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> > * In the case of per-cpu mmaps, sample CPU for AUX event;
> > * also enable the timestamp tracing for samples correlation.
> > */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> > evsel__set_sample_bit(arm_spe_evsel, CPU);
> > evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
> > "ts_enable", 1);
> > @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> > tracking_evsel->core.attr.sample_period = 1;
> >
> > /* In per-cpu case, always need the time of mmap events etc */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> > evsel__set_sample_bit(tracking_evsel, TIME);
> > evsel__set_sample_bit(tracking_evsel, CPU);
> >