Rename and clean up the use of libperf CPU map functions particularly
focussing on perf_cpu_map__empty that may return true for maps
containing CPUs but also with an "any CPU"/dummy value.
perf_cpu_map__nr is also troubling in that iterating an empty CPU map
will yield the "any CPU"/dummy value. Reduce the appearance of some
calls to this by using the perf_cpu_map__for_each_cpu macro.
v2: 6 patches were merged by Arnaldo. New patch added ensure empty
maps are allocated as NULL (suggested by James Clark). Hopefully a
fix to "perf arm-spe/cs-etm: Directly iterate CPU maps".
Ian Rogers (8):
libperf cpumap: Add any, empty and min helpers
libperf cpumap: Ensure empty cpumap is NULL from alloc
perf arm-spe/cs-etm: Directly iterate CPU maps
perf intel-pt/intel-bts: Switch perf_cpu_map__has_any_cpu_or_is_empty
use
perf cpumap: Clean up use of perf_cpu_map__has_any_cpu_or_is_empty
perf arm64 header: Remove unnecessary CPU map get and put
perf stat: Remove duplicate cpus_map_matched function
perf cpumap: Use perf_cpu_map__for_each_cpu when possible
tools/lib/perf/cpumap.c | 33 +++++++-
tools/lib/perf/include/perf/cpumap.h | 16 ++++
tools/lib/perf/libperf.map | 4 +
tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++-----------
tools/perf/arch/arm64/util/arm-spe.c | 4 +-
tools/perf/arch/arm64/util/header.c | 13 +---
tools/perf/arch/x86/util/intel-bts.c | 4 +-
tools/perf/arch/x86/util/intel-pt.c | 10 +--
tools/perf/builtin-c2c.c | 6 +-
tools/perf/builtin-stat.c | 31 ++------
tools/perf/tests/bitmap.c | 13 ++--
tools/perf/tests/topology.c | 46 +++++------
tools/perf/util/auxtrace.c | 4 +-
tools/perf/util/bpf_kwork.c | 16 ++--
tools/perf/util/bpf_kwork_top.c | 12 +--
tools/perf/util/cpumap.c | 12 ++-
tools/perf/util/record.c | 2 +-
.../scripting-engines/trace-event-python.c | 12 +--
tools/perf/util/session.c | 5 +-
tools/perf/util/stat.c | 2 +-
tools/perf/util/svghelper.c | 20 +++--
21 files changed, 175 insertions(+), 167 deletions(-)
--
2.43.0.429.g432eaa2c6b-goog
Additional helpers to better replace
perf_cpu_map__has_any_cpu_or_is_empty.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/cpumap.c | 27 +++++++++++++++++++++++++++
tools/lib/perf/include/perf/cpumap.h | 16 ++++++++++++++++
tools/lib/perf/libperf.map | 4 ++++
3 files changed, 47 insertions(+)
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 4adcd7920d03..ba49552952c5 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -316,6 +316,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}
+bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
+{
+ if (!map)
+ return true;
+
+ return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
+bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
+{
+ return map == NULL;
+}
+
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
int low, high;
@@ -372,6 +385,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
}
+struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
+{
+ struct perf_cpu cpu, result = {
+ .cpu = -1
+ };
+ int idx;
+
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
+ result = cpu;
+ break;
+ }
+ return result;
+}
+
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
struct perf_cpu result = {
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 228c6c629b0c..90457d17fb2f 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -61,6 +61,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
* perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
*/
LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_empty - does the map contain no values and it doesn't
+ * contain the special "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
+LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 10b3f3722642..2aa79b696032 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__has_any_cpu_or_is_empty;
+ perf_cpu_map__is_any_cpu_or_is_empty;
+ perf_cpu_map__is_empty;
+ perf_cpu_map__has_any_cpu;
+ perf_cpu_map__min;
perf_cpu_map__max;
perf_cpu_map__has;
perf_thread_map__new_array;
--
2.43.0.429.g432eaa2c6b-goog
Potential corner cases could cause a cpumap to be allocated with size
0, but an empty cpumap should be represented as NULL. Add a path in
perf_cpu_map__alloc to ensure this.
Suggested-by: James Clark <[email protected]>
Closes: https://lore.kernel.org/lkml/[email protected]/
Signed-off-by: Ian Rogers <[email protected]>
---
tools/lib/perf/cpumap.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index ba49552952c5..cae799ad44e1 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -18,9 +18,13 @@ void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
- RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
+ RC_STRUCT(perf_cpu_map) *cpus;
struct perf_cpu_map *result;
+ if (nr_cpus == 0)
+ return NULL;
+
+ cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (ADD_RC_CHK(result, cpus)) {
cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
--
2.43.0.429.g432eaa2c6b-goog
Switch perf_cpu_map__has_any_cpu_or_is_empty to
perf_cpu_map__is_any_cpu_or_is_empty as a CPU map may contain CPUs as
well as the dummy event and perf_cpu_map__is_any_cpu_or_is_empty is a
more correct alternative.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/x86/util/intel-bts.c | 4 ++--
tools/perf/arch/x86/util/intel-pt.c | 10 +++++-----
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index af8ae4647585..34696f3d3d5d 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
- if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (opts->full_auxtrace && !perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
return -EINVAL;
}
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_bts_evsel, CPU);
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index d199619df3ab..6de7e2d21075 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}
- per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
+ per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
@@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_pt_evsel, CPU);
}
@@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
tracking_evsel->immediate = true;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
evsel__set_sample_bit(tracking_evsel, CPU);
@@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
- if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!target__none(&opts->target) &&
!intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
--
2.43.0.429.g432eaa2c6b-goog
In both cases the CPU map is known owned by either the caller or a
PMU.
Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/arch/arm64/util/header.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index 97037499152e..a9de0b5187dd 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -25,8 +25,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
if (!sysfs || sz < MIDR_SIZE)
return EINVAL;
- cpus = perf_cpu_map__get(cpus);
-
for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
char path[PATH_MAX];
FILE *file;
@@ -51,7 +49,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
break;
}
- perf_cpu_map__put(cpus);
return ret;
}
--
2.43.0.429.g432eaa2c6b-goog
Use libperf's perf_cpu_map__equal that performs the same function.
Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/builtin-stat.c | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 280eb0c99d2b..d80bad7c73e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,26 +164,6 @@ static struct perf_stat_config stat_config = {
.iostat_run = false,
};
-static bool cpus_map_matched(struct evsel *a, struct evsel *b)
-{
- if (!a->core.cpus && !b->core.cpus)
- return true;
-
- if (!a->core.cpus || !b->core.cpus)
- return false;
-
- if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
- return false;
-
- for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
- if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
- perf_cpu_map__cpu(b->core.cpus, i).cpu)
- return false;
- }
-
- return true;
-}
-
static void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *warned_leader = NULL;
@@ -194,7 +174,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
/* Check that leader matches cpus with each member. */
if (leader == evsel)
continue;
- if (cpus_map_matched(leader, evsel))
+ if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
continue;
/* If there's mismatch disable the group and warn user. */
--
2.43.0.429.g432eaa2c6b-goog
Rather than manually iterating the CPU map, use
perf_cpu_map__for_each_cpu. When possible tidy local variables.
Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/arch/arm64/util/header.c | 10 ++--
tools/perf/tests/bitmap.c | 13 +++---
tools/perf/tests/topology.c | 46 +++++++++----------
tools/perf/util/bpf_kwork.c | 16 ++++---
tools/perf/util/bpf_kwork_top.c | 12 ++---
tools/perf/util/cpumap.c | 12 ++---
.../scripting-engines/trace-event-python.c | 12 +++--
tools/perf/util/session.c | 5 +-
tools/perf/util/svghelper.c | 20 ++++----
9 files changed, 72 insertions(+), 74 deletions(-)
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a9de0b5187dd..741df3614a09 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -4,8 +4,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
-#include <util/cpumap.h>
-#include <internal/cpumap.h>
#include <api/fs/fs.h>
#include <errno.h>
#include "debug.h"
@@ -19,18 +17,18 @@
static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
const char *sysfs = sysfs__mountpoint();
- int cpu;
- int ret = EINVAL;
+ struct perf_cpu cpu;
+ int idx, ret = EINVAL;
if (!sysfs || sz < MIDR_SIZE)
return EINVAL;
- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
char path[PATH_MAX];
FILE *file;
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
- sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
+ sysfs, cpu.cpu);
file = fopen(path, "r");
if (!file) {
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 0173f5402a35..98956e0e0765 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -11,18 +11,19 @@
static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
- unsigned long *bm = NULL;
- int i;
+ unsigned long *bm;
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < perf_cpu_map__nr(map); i++)
- __set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
+ int i;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ __set_bit(cpu.cpu, bm);
}
- if (map)
- perf_cpu_map__put(map);
+ perf_cpu_map__put(map);
return bm;
}
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 2a842f53fbb5..a8cb5ba898ab 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -68,6 +68,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
};
int i;
struct aggr_cpu_id id;
+ struct perf_cpu cpu;
session = perf_session__new(&data, NULL);
TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
@@ -113,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
- struct perf_cpu cpu = { .cpu = i };
-
+ cpu.cpu = i;
if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
@@ -123,48 +123,48 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that CPU ID contains socket, die, core and CPU
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__cpu(cpu, NULL);
TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
- perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
+ cpu.cpu == id.cpu.cpu);
TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);
TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Cpu map - Thread IDX is set", id.thread_idx == -1);
}
// Test that core ID contains socket, die and core
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__core(cpu, NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);
TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread IDX is set", id.thread_idx == -1);
}
// Test that die ID contains socket and die
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__die(cpu, NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
@@ -173,10 +173,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that socket ID contains only socket
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__socket(cpu, NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
@@ -187,10 +187,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that node ID contains only node
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__node(cpu, NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
+ cpu__get_node(cpu) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 6eb2c78fd7f4..44f0f708a15d 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -147,12 +147,12 @@ static bool valid_kwork_class_type(enum kwork_class_type type)
static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, key, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list != NULL) {
- fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+ int idx, nr_cpus;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
return -1;
@@ -165,8 +165,8 @@ static int setup_filters(struct perf_kwork *kwork)
}
nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;
if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
@@ -181,6 +181,8 @@ static int setup_filters(struct perf_kwork *kwork)
}
if (kwork->profile_name != NULL) {
+ int key, fd;
+
if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
pr_err("Requested name filter %s too large, limit to %d\n",
kwork->profile_name, MAX_KWORKNAME - 1);
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 035e02272790..22a3b00a1e23 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -122,11 +122,11 @@ static bool valid_kwork_class_type(enum kwork_class_type type)
static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list) {
+ int idx, nr_cpus, fd;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+
fd = bpf_map__fd(skel->maps.kwork_top_cpu_filter);
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
@@ -140,8 +140,8 @@ static int setup_filters(struct perf_kwork *kwork)
}
nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;
if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0581ee0fa5f2..e2287187babd 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -629,10 +629,10 @@ static char hex_char(unsigned char val)
size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu;
+ int idx;
char *ptr = buf;
unsigned char *bitmap;
- struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1);
+ struct perf_cpu c, last_cpu = perf_cpu_map__max(map);
if (buf == NULL)
return 0;
@@ -643,12 +643,10 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
return 0;
}
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- cpu = perf_cpu_map__cpu(map, i).cpu;
- bitmap[cpu / 8] |= 1 << (cpu % 8);
- }
+ perf_cpu_map__for_each_cpu(c, idx, map)
+ bitmap[c.cpu / 8] |= 1 << (c.cpu % 8);
- for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];
if (cpu % 8)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 860e1837ba96..8ef0e5ac03c2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1693,13 +1693,15 @@ static void python_process_stat(struct perf_stat_config *config,
{
struct perf_thread_map *threads = counter->core.threads;
struct perf_cpu_map *cpus = counter->core.cpus;
- int cpu, thread;
- for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
- process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
+ for (int thread = 0; thread < perf_thread_map__nr(threads); thread++) {
+ int idx;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ process_stat(counter, cpu,
perf_thread_map__pid(threads, thread), tstamp,
- perf_counts(counter->counts, cpu, thread));
+ perf_counts(counter->counts, idx, thread));
}
}
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 199d3e8df315..d52b58344dbc 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2738,6 +2738,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
int i, err = -1;
struct perf_cpu_map *map;
int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+ struct perf_cpu cpu;
for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;
@@ -2759,9 +2760,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
-
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
"Consider raising MAX_NR_CPUS\n", cpu.cpu);
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1892e9b6aa7f..2b04f47f4db0 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -725,26 +725,24 @@ static void scan_core_topology(int *map, struct topology *t, int nr_cpus)
static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
{
- int i;
- int ret = 0;
- struct perf_cpu_map *m;
- struct perf_cpu c;
+ int idx, ret = 0;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
- m = perf_cpu_map__new(s);
- if (!m)
+ map = perf_cpu_map__new(s);
+ if (!map)
return -1;
- for (i = 0; i < perf_cpu_map__nr(m); i++) {
- c = perf_cpu_map__cpu(m, i);
- if (c.cpu >= nr_cpus) {
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ if (cpu.cpu >= nr_cpus) {
ret = -1;
break;
}
- __set_bit(c.cpu, cpumask_bits(b));
+ __set_bit(cpu.cpu, cpumask_bits(b));
}
- perf_cpu_map__put(m);
+ perf_cpu_map__put(map);
return ret;
}
--
2.43.0.429.g432eaa2c6b-goog
Rather than iterate all CPUs and see if they are in CPU maps, directly
iterate the CPU map. Similarly make use of the intersect
function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
appropriate alternatives.
Signed-off-by: Ian Rogers <[email protected]>
---
tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
tools/perf/arch/arm64/util/arm-spe.c | 4 +-
2 files changed, 34 insertions(+), 47 deletions(-)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 77e6663c1703..f4378ba0b8d6 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
static int cs_etm_validate_config(struct auxtrace_record *itr,
struct evsel *evsel)
{
- int i, err = -EINVAL;
+ int idx, err = -EINVAL;
struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
+ struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
+ struct perf_cpu cpu;
- /* Set option of each CPU we have */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- /*
- * In per-cpu case, do the validation for CPUs to work with.
- * In per-thread case, the CPU map is empty. Since the traced
- * program can run on any CPUs in this case, thus don't skip
- * validation.
- */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
- !perf_cpu_map__has(event_cpus, cpu))
- continue;
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
+ perf_cpu_map__put(online_cpus);
- err = cs_etm_validate_context_id(itr, evsel, i);
+ /*
+ * Set option of each CPU we have. In per-cpu case, do the validation
+ * for CPUs to work with. In per-thread case, the CPU map is empty.
+ * Since the traced program can run on any CPUs in this case, thus don't
+ * skip validation.
+ */
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
if (err)
goto out;
- err = cs_etm_validate_timestamp(itr, evsel, i);
+ err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
if (err)
goto out;
}
err = 0;
out:
- perf_cpu_map__put(online_cpus);
+ perf_cpu_map__put(intersect_cpus);
return err;
}
@@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
* Also the case of per-cpu mmaps, need the contextID in order to be notified
* when a context switch happened.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
"timestamp", 1);
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
@@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
evsel->core.attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(evsel, TIME);
err = cs_etm_validate_config(itr, cs_etm_evsel);
@@ -533,38 +527,32 @@ static size_t
cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct evlist *evlist __maybe_unused)
{
- int i;
+ int idx;
int etmv3 = 0, etmv4 = 0, ete = 0;
struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
+ struct perf_cpu cpu;
/* cpu map is not empty, we have specific CPUs to work with */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(event_cpus, cpu) ||
- !perf_cpu_map__has(online_cpus, cpu))
- continue;
+ if (!perf_cpu_map__is_empty(event_cpus)) {
+ struct perf_cpu_map *intersect_cpus =
+ perf_cpu_map__intersect(event_cpus, online_cpus);
- if (cs_etm_is_ete(itr, i))
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
ete++;
- else if (cs_etm_is_etmv4(itr, i))
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
etmv4++;
else
etmv3++;
}
+ perf_cpu_map__put(intersect_cpus);
} else {
/* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
-
- if (cs_etm_is_ete(itr, i))
+ perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
ete++;
- else if (cs_etm_is_etmv4(itr, i))
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
etmv4++;
else
etmv3++;
@@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
return -EINVAL;
/* If the cpu_map is empty all online CPUs are involved */
- if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
+ if (perf_cpu_map__is_empty(event_cpus)) {
cpu_map = online_cpus;
} else {
/* Make sure all specified CPUs are online */
- for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
- struct perf_cpu cpu = { .cpu = i, };
+ struct perf_cpu cpu;
- if (perf_cpu_map__has(event_cpus, cpu) &&
- !perf_cpu_map__has(online_cpus, cpu))
+ perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
+ if (!perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 51ccbfd3d246..0b52e67edb3b 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, sample CPU for AUX event;
* also enable the timestamp tracing for samples correlation.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(arm_spe_evsel, CPU);
evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
"ts_enable", 1);
@@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
evsel__set_sample_bit(tracking_evsel, CPU);
--
2.43.0.429.g432eaa2c6b-goog
Most uses of what was perf_cpu_map__empty but is now
perf_cpu_map__has_any_cpu_or_is_empty want to do something with the
CPU map if it contains CPUs. Replace uses of
perf_cpu_map__has_any_cpu_or_is_empty with other helpers so that CPUs
within the map can be handled.
Signed-off-by: Ian Rogers <[email protected]>
Reviewed-by: James Clark <[email protected]>
---
tools/perf/builtin-c2c.c | 6 +-----
tools/perf/builtin-stat.c | 9 ++++-----
tools/perf/util/auxtrace.c | 4 ++--
tools/perf/util/record.c | 2 +-
tools/perf/util/stat.c | 2 +-
5 files changed, 9 insertions(+), 14 deletions(-)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 16b40f5d43db..24107062c43e 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2319,11 +2319,7 @@ static int setup_nodes(struct perf_session *session)
nodes[node] = set;
- /* empty node, skip */
- if (perf_cpu_map__has_any_cpu_or_is_empty(map))
- continue;
-
- perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
__set_bit(cpu.cpu, set);
if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5fe9abc6a524..280eb0c99d2b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1317,10 +1317,9 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
* be the first online CPU in the cache domain else use the
* first online CPU of the cache domain as the ID.
*/
- if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map))
+ id = perf_cpu_map__min(cpu_map).cpu;
+ if (id == -1)
id = cpu.cpu;
- else
- id = perf_cpu_map__cpu(cpu_map, 0).cpu;
/* Free the perf_cpu_map used to find the cache ID */
perf_cpu_map__put(cpu_map);
@@ -1623,7 +1622,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
else
nr = 0;
@@ -2290,7 +2289,7 @@ int process_stat_config_event(struct perf_session *session,
perf_event__read_stat_config(&stat_config, &event->stat_config);
- if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) {
+ if (perf_cpu_map__is_empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
} else if (st->aggr_mode != AGGR_UNSET) {
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3684e6009b63..6b1d4bafad59 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist,
struct evsel *evsel, int idx)
{
- bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);
mp->mmap_needed = evsel->needs_auxtrace_mmap;
@@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
{
- bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);
if (per_cpu_mmaps) {
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 87e817b3cf7e..e867de8ddaaa 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -237,7 +237,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
evsel = evlist__last(temp_evlist);
- if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
+ if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
if (cpus)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index b0bcf92f0f9c..0bd5467389e4 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
if (!counter->per_pkg)
return 0;
- if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (perf_cpu_map__is_any_cpu_or_is_empty(cpus))
return 0;
if (!mask) {
--
2.43.0.429.g432eaa2c6b-goog
On 01/02/2024 04:22, Ian Rogers wrote:
> Rather than iterate all CPUs and see if they are in CPU maps, directly
> iterate the CPU map. Similarly make use of the intersect
> function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
> appropriate alternatives.
>
> Signed-off-by: Ian Rogers <[email protected]>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
> tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> 2 files changed, 34 insertions(+), 47 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index 77e6663c1703..f4378ba0b8d6 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
> static int cs_etm_validate_config(struct auxtrace_record *itr,
> struct evsel *evsel)
> {
> - int i, err = -EINVAL;
> + int idx, err = -EINVAL;
> struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
Hi Ian,
This has the same issue as V1. 'any' intersect 'online' == empty. Now no
validation happens anymore. For this to be the same as it used to be,
validation has to happen on _all_ cores when event_cpus == -1. So it
needs to be 'any' intersect 'online' == 'online'.
Same issue below with cs_etm_info_priv_size()
Thanks
James
> + struct perf_cpu cpu;
>
> - /* Set option of each CPU we have */
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - /*
> - * In per-cpu case, do the validation for CPUs to work with.
> - * In per-thread case, the CPU map is empty. Since the traced
> - * program can run on any CPUs in this case, thus don't skip
> - * validation.
> - */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
> - !perf_cpu_map__has(event_cpus, cpu))
> - continue;
> -
> - if (!perf_cpu_map__has(online_cpus, cpu))
> - continue;
> + perf_cpu_map__put(online_cpus);
>
> - err = cs_etm_validate_context_id(itr, evsel, i);
> + /*
> + * Set option of each CPU we have. In per-cpu case, do the validation
> + * for CPUs to work with. In per-thread case, the CPU map is empty.
> + * Since the traced program can run on any CPUs in this case, thus don't
> + * skip validation.
> + */
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
> if (err)
> goto out;
> - err = cs_etm_validate_timestamp(itr, evsel, i);
> + err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
> if (err)
> goto out;
> }
>
> err = 0;
> out:
> - perf_cpu_map__put(online_cpus);
> + perf_cpu_map__put(intersect_cpus);
> return err;
> }
>
> @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> * Also the case of per-cpu mmaps, need the contextID in order to be notified
> * when a context switch happened.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> "timestamp", 1);
> evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> evsel->core.attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> evsel__set_sample_bit(evsel, TIME);
>
> err = cs_etm_validate_config(itr, cs_etm_evsel);
> @@ -533,38 +527,32 @@ static size_t
> cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> struct evlist *evlist __maybe_unused)
> {
> - int i;
> + int idx;
> int etmv3 = 0, etmv4 = 0, ete = 0;
> struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> + struct perf_cpu cpu;
>
> /* cpu map is not empty, we have specific CPUs to work with */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - if (!perf_cpu_map__has(event_cpus, cpu) ||
> - !perf_cpu_map__has(online_cpus, cpu))
> - continue;
> + if (!perf_cpu_map__is_empty(event_cpus)) {
> + struct perf_cpu_map *intersect_cpus =
> + perf_cpu_map__intersect(event_cpus, online_cpus);
>
> - if (cs_etm_is_ete(itr, i))
> + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> + if (cs_etm_is_ete(itr, cpu.cpu))
> ete++;
> - else if (cs_etm_is_etmv4(itr, i))
> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> etmv4++;
> else
> etmv3++;
> }
> + perf_cpu_map__put(intersect_cpus);
> } else {
> /* get configuration for all CPUs in the system */
> - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> -
> - if (!perf_cpu_map__has(online_cpus, cpu))
> - continue;
> -
> - if (cs_etm_is_ete(itr, i))
> + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
> + if (cs_etm_is_ete(itr, cpu.cpu))
> ete++;
> - else if (cs_etm_is_etmv4(itr, i))
> + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> etmv4++;
> else
> etmv3++;
> @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> return -EINVAL;
>
> /* If the cpu_map is empty all online CPUs are involved */
> - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> + if (perf_cpu_map__is_empty(event_cpus)) {
> cpu_map = online_cpus;
> } else {
> /* Make sure all specified CPUs are online */
> - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
> - struct perf_cpu cpu = { .cpu = i, };
> + struct perf_cpu cpu;
>
> - if (perf_cpu_map__has(event_cpus, cpu) &&
> - !perf_cpu_map__has(online_cpus, cpu))
> + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
> + if (!perf_cpu_map__has(online_cpus, cpu))
> return -EINVAL;
> }
>
> diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> index 51ccbfd3d246..0b52e67edb3b 100644
> --- a/tools/perf/arch/arm64/util/arm-spe.c
> +++ b/tools/perf/arch/arm64/util/arm-spe.c
> @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> * In the case of per-cpu mmaps, sample CPU for AUX event;
> * also enable the timestamp tracing for samples correlation.
> */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_sample_bit(arm_spe_evsel, CPU);
> evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
> "ts_enable", 1);
> @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> tracking_evsel->core.attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> evsel__set_sample_bit(tracking_evsel, TIME);
> evsel__set_sample_bit(tracking_evsel, CPU);
>
On Thu, Feb 1, 2024 at 3:25 AM James Clark <[email protected]> wrote:
>
>
>
> On 01/02/2024 04:22, Ian Rogers wrote:
> > Rather than iterate all CPUs and see if they are in CPU maps, directly
> > iterate the CPU map. Similarly make use of the intersect
> > function. Switch perf_cpu_map__has_any_cpu_or_is_empty to more
> > appropriate alternatives.
> >
> > Signed-off-by: Ian Rogers <[email protected]>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 77 ++++++++++++----------------
> > tools/perf/arch/arm64/util/arm-spe.c | 4 +-
> > 2 files changed, 34 insertions(+), 47 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 77e6663c1703..f4378ba0b8d6 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -197,38 +197,32 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
> > static int cs_etm_validate_config(struct auxtrace_record *itr,
> > struct evsel *evsel)
> > {
> > - int i, err = -EINVAL;
> > + int idx, err = -EINVAL;
> > struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
> > struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> > + struct perf_cpu_map *intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
>
> Hi Ian,
>
> This has the same issue as V1. 'any' intersect 'online' == empty. Now no
> validation happens anymore. For this to be the same as it used to be,
> validation has to happen on _all_ cores when event_cpus == -1. So it
> needs to be 'any' intersect 'online' == 'online'.
>
> Same issue below with cs_etm_info_priv_size()
Thanks James, sorry for the churn. I'll work on that for v3.
Ian
> Thanks
> James
>
> > + struct perf_cpu cpu;
> >
> > - /* Set option of each CPU we have */
> > - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > -
> > - /*
> > - * In per-cpu case, do the validation for CPUs to work with.
> > - * In per-thread case, the CPU map is empty. Since the traced
> > - * program can run on any CPUs in this case, thus don't skip
> > - * validation.
> > - */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
> > - !perf_cpu_map__has(event_cpus, cpu))
> > - continue;
> > -
> > - if (!perf_cpu_map__has(online_cpus, cpu))
> > - continue;
> > + perf_cpu_map__put(online_cpus);
> >
> > - err = cs_etm_validate_context_id(itr, evsel, i);
> > + /*
> > + * Set option of each CPU we have. In per-cpu case, do the validation
> > + * for CPUs to work with. In per-thread case, the CPU map is empty.
> > + * Since the traced program can run on any CPUs in this case, thus don't
> > + * skip validation.
> > + */
> > + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> > + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
> > if (err)
> > goto out;
> > - err = cs_etm_validate_timestamp(itr, evsel, i);
> > + err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
> > if (err)
> > goto out;
> > }
> >
> > err = 0;
> > out:
> > - perf_cpu_map__put(online_cpus);
> > + perf_cpu_map__put(intersect_cpus);
> > return err;
> > }
> >
> > @@ -435,7 +429,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> > * Also the case of per-cpu mmaps, need the contextID in order to be notified
> > * when a context switch happened.
> > */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> > evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> > "timestamp", 1);
> > evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
> > @@ -461,7 +455,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
> > evsel->core.attr.sample_period = 1;
> >
> > /* In per-cpu case, always need the time of mmap events etc */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
> > evsel__set_sample_bit(evsel, TIME);
> >
> > err = cs_etm_validate_config(itr, cs_etm_evsel);
> > @@ -533,38 +527,32 @@ static size_t
> > cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
> > struct evlist *evlist __maybe_unused)
> > {
> > - int i;
> > + int idx;
> > int etmv3 = 0, etmv4 = 0, ete = 0;
> > struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
> > struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
> > + struct perf_cpu cpu;
> >
> > /* cpu map is not empty, we have specific CPUs to work with */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> > - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > -
> > - if (!perf_cpu_map__has(event_cpus, cpu) ||
> > - !perf_cpu_map__has(online_cpus, cpu))
> > - continue;
> > + if (!perf_cpu_map__is_empty(event_cpus)) {
> > + struct perf_cpu_map *intersect_cpus =
> > + perf_cpu_map__intersect(event_cpus, online_cpus);
> >
> > - if (cs_etm_is_ete(itr, i))
> > + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
> > + if (cs_etm_is_ete(itr, cpu.cpu))
> > ete++;
> > - else if (cs_etm_is_etmv4(itr, i))
> > + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> > etmv4++;
> > else
> > etmv3++;
> > }
> > + perf_cpu_map__put(intersect_cpus);
> > } else {
> > /* get configuration for all CPUs in the system */
> > - for (i = 0; i < cpu__max_cpu().cpu; i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > -
> > - if (!perf_cpu_map__has(online_cpus, cpu))
> > - continue;
> > -
> > - if (cs_etm_is_ete(itr, i))
> > + perf_cpu_map__for_each_cpu(cpu, idx, online_cpus) {
> > + if (cs_etm_is_ete(itr, cpu.cpu))
> > ete++;
> > - else if (cs_etm_is_etmv4(itr, i))
> > + else if (cs_etm_is_etmv4(itr, cpu.cpu))
> > etmv4++;
> > else
> > etmv3++;
> > @@ -814,15 +802,14 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > return -EINVAL;
> >
> > /* If the cpu_map is empty all online CPUs are involved */
> > - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
> > + if (perf_cpu_map__is_empty(event_cpus)) {
> > cpu_map = online_cpus;
> > } else {
> > /* Make sure all specified CPUs are online */
> > - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
> > - struct perf_cpu cpu = { .cpu = i, };
> > + struct perf_cpu cpu;
> >
> > - if (perf_cpu_map__has(event_cpus, cpu) &&
> > - !perf_cpu_map__has(online_cpus, cpu))
> > + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
> > + if (!perf_cpu_map__has(online_cpus, cpu))
> > return -EINVAL;
> > }
> >
> > diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
> > index 51ccbfd3d246..0b52e67edb3b 100644
> > --- a/tools/perf/arch/arm64/util/arm-spe.c
> > +++ b/tools/perf/arch/arm64/util/arm-spe.c
> > @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> > * In the case of per-cpu mmaps, sample CPU for AUX event;
> > * also enable the timestamp tracing for samples correlation.
> > */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> > evsel__set_sample_bit(arm_spe_evsel, CPU);
> > evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
> > "ts_enable", 1);
> > @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
> > tracking_evsel->core.attr.sample_period = 1;
> >
> > /* In per-cpu case, always need the time of mmap events etc */
> > - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
> > + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
> > evsel__set_sample_bit(tracking_evsel, TIME);
> > evsel__set_sample_bit(tracking_evsel, CPU);
> >