2019-11-05 00:26:39

by Andi Kleen

[permalink] [raw]
Subject: Optimize perf stat for large number of events/cpus

[v4: Address review feedback as far as possible.
Fix bug with unsorted CPUs specified by user.]

This patch kit optimizes perf stat for a large number of events
on systems with many CPUs and PMUs.

Some profiling shows that the most overhead is doing IPIs to
all the target CPUs. We can optimize this by using sched_setaffinity
to set the affinity to a target CPU once and then doing
the perf operation for all events on that CPU. This requires
some restructuring, but cuts the set up time quite a bit.

In theory we could go further by parallelizing these setups
too, but that would be much more complicated and for now just batching it
per CPU seems to be sufficient. At some point with many more cores
parallelization or a better bulk perf setup API might be needed though.

In addition perf does a lot of redundant /sys accesses with
many PMUs, which can be also expensve. This is also optimized.

On a large test case (>700 events with many weak groups) on a 94 CPU
system I go from

real 0m8.607s
user 0m0.550s
sys 0m8.041s

to

real 0m3.269s
user 0m0.760s
sys 0m1.694s

so shaving ~6 seconds of system time, at slightly more cost
in perf stat itself. On a 4 socket system with the savings
are more dramatic:

real 0m15.641s
user 0m0.873s
sys 0m14.729s

to

real 0m4.493s
user 0m1.578s
sys 0m2.444s

so 11s difference in the user visible set up time.

Also available in

git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc perf/stat-scale-7

v1: Initial post.
v2: Rebase. Fix some minor issues.
v3: Rebase. Address review feedback. Fix one minor issue
v4: Modified based on review feedback. Now it maintains
all_cpus per evlist. There is still a need for cpu_index iteration
to get the correct index for indexing the file descriptors.
Fix bug with unsorted cpu maps, now they are always sorted.
Some cleanups and refactoring.

-Andi


2019-11-05 00:26:46

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v4 6/9] perf stat: Factor out open error handling

From: Andi Kleen <[email protected]>

Factor out the open error handling into a separate function.
This is useful for followon patches who need to duplicate this.

No behavior change intended.

Signed-off-by: Andi Kleen <[email protected]>
---
tools/perf/builtin-stat.c | 100 +++++++++++++++++++++++---------------
1 file changed, 60 insertions(+), 40 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c88d4e118409..1a586009e5a7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -420,6 +420,57 @@ static bool is_target_alive(struct target *_target,
return false;
}

+enum counter_recovery {
+ COUNTER_SKIP,
+ COUNTER_RETRY,
+ COUNTER_FATAL,
+};
+
+static enum counter_recovery stat_handle_error(struct evsel *counter)
+{
+ char msg[BUFSIZ];
+ /*
+ * PPC returns ENXIO for HW counters until 2.6.37
+ * (behavior changed with commit b0a873e).
+ */
+ if (errno == EINVAL || errno == ENOSYS ||
+ errno == ENOENT || errno == EOPNOTSUPP ||
+ errno == ENXIO) {
+ if (verbose > 0)
+ ui__warning("%s event is not supported by the kernel.\n",
+ perf_evsel__name(counter));
+ counter->supported = false;
+
+ if ((counter->leader != counter) ||
+ !(counter->leader->core.nr_members > 1))
+ return COUNTER_SKIP;
+ } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (verbose > 0)
+ ui__warning("%s\n", msg);
+ return COUNTER_RETRY;
+ } else if (target__has_per_thread(&target) &&
+ evsel_list->core.threads &&
+ evsel_list->core.threads->err_thread != -1) {
+ /*
+ * For global --per-thread case, skip current
+ * error thread.
+ */
+ if (!thread_map__remove(evsel_list->core.threads,
+ evsel_list->core.threads->err_thread)) {
+ evsel_list->core.threads->err_thread = -1;
+ return COUNTER_RETRY;
+ }
+ }
+
+ perf_evsel__open_strerror(counter, &target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+ return COUNTER_FATAL;
+}
+
static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
@@ -469,47 +520,16 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
goto try_again;
}

- /*
- * PPC returns ENXIO for HW counters until 2.6.37
- * (behavior changed with commit b0a873e).
- */
- if (errno == EINVAL || errno == ENOSYS ||
- errno == ENOENT || errno == EOPNOTSUPP ||
- errno == ENXIO) {
- if (verbose > 0)
- ui__warning("%s event is not supported by the kernel.\n",
- perf_evsel__name(counter));
- counter->supported = false;
-
- if ((counter->leader != counter) ||
- !(counter->leader->core.nr_members > 1))
- continue;
- } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
- if (verbose > 0)
- ui__warning("%s\n", msg);
- goto try_again;
- } else if (target__has_per_thread(&target) &&
- evsel_list->core.threads &&
- evsel_list->core.threads->err_thread != -1) {
- /*
- * For global --per-thread case, skip current
- * error thread.
- */
- if (!thread_map__remove(evsel_list->core.threads,
- evsel_list->core.threads->err_thread)) {
- evsel_list->core.threads->err_thread = -1;
- goto try_again;
- }
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
+ goto try_again;
+ case COUNTER_SKIP:
+ continue;
+ default:
+ break;
}
-
- perf_evsel__open_strerror(counter, &target,
- errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
-
- if (child_pid != -1)
- kill(child_pid, SIGTERM);
-
- return -1;
}
counter->supported = true;

--
2.23.0

2019-11-05 00:27:39

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v4 2/9] perf affinity: Add infrastructure to save/restore affinity

From: Andi Kleen <[email protected]>

The kernel perf subsystem has to IPI to the target CPU for many
operations. On systems with many CPUs and when managing many events the
overhead can be dominated by lots of IPIs.

An alternative is to set up CPU affinity in the perf tool, then set up
all the events for that CPU, and then move on to the next CPU.

Add some affinity management infrastructure to enable such a model.
Used in followon patches.

Signed-off-by: Andi Kleen <[email protected]>

---

v2: Use linux/bitmap.h functions.
---
tools/perf/util/Build | 1 +
tools/perf/util/affinity.c | 72 ++++++++++++++++++++++++++++++++++++++
tools/perf/util/affinity.h | 15 ++++++++
3 files changed, 88 insertions(+)
create mode 100644 tools/perf/util/affinity.c
create mode 100644 tools/perf/util/affinity.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 2c1504fe924c..c7d4eab017e5 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -76,6 +76,7 @@ perf-y += sort.o
perf-y += hist.o
perf-y += util.o
perf-y += cpumap.o
+perf-y += affinity.o
perf-y += cputopo.o
perf-y += cgroup.o
perf-y += target.o
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
new file mode 100644
index 000000000000..e197b0416f56
--- /dev/null
+++ b/tools/perf/util/affinity.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Manage affinity to optimize IPIs inside the kernel perf API. */
+#define _GNU_SOURCE 1
+#include <sched.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include "perf.h"
+#include "cpumap.h"
+#include "affinity.h"
+
+static int get_cpu_set_size(void)
+{
+ int sz = cpu__max_cpu() + 8 - 1;
+ /*
+ * sched_getaffinity doesn't like masks smaller than the kernel.
+ * Hopefully that's big enough.
+ */
+ if (sz < 4096)
+ sz = 4096;
+ return sz/8;
+}
+
+int affinity__setup(struct affinity *a)
+{
+ int cpu_set_size = get_cpu_set_size();
+
+ a->orig_cpus = bitmap_alloc(cpu_set_size*8);
+ if (!a->orig_cpus)
+ return -1;
+ sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
+ a->sched_cpus = bitmap_alloc(cpu_set_size*8);
+ if (!a->sched_cpus) {
+ free(a->orig_cpus);
+ return -1;
+ }
+ bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size);
+ a->changed = false;
+ return 0;
+}
+
+/*
+ * perf_event_open does an IPI internally to the target CPU.
+ * It is more efficient to change perf's affinity to the target
+ * CPU and then set up all events on that CPU, so we amortize
+ * CPU communication.
+ */
+void affinity__set(struct affinity *a, int cpu)
+{
+ int cpu_set_size = get_cpu_set_size();
+
+ if (cpu == -1)
+ return;
+ a->changed = true;
+ set_bit(cpu, a->sched_cpus);
+ /*
+ * We ignore errors because affinity is just an optimization.
+ * This could happen for example with isolated CPUs or cpusets.
+ * In this case the IPIs inside the kernel's perf API still work.
+ */
+ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
+ clear_bit(cpu, a->sched_cpus);
+}
+
+void affinity__cleanup(struct affinity *a)
+{
+ int cpu_set_size = get_cpu_set_size();
+
+ if (a->changed)
+ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
+ free(a->sched_cpus);
+ free(a->orig_cpus);
+}
diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h
new file mode 100644
index 000000000000..008e2c3995b9
--- /dev/null
+++ b/tools/perf/util/affinity.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef AFFINITY_H
+#define AFFINITY_H 1
+
+struct affinity {
+ unsigned long *orig_cpus;
+ unsigned long *sched_cpus;
+ bool changed;
+};
+
+void affinity__cleanup(struct affinity *a);
+void affinity__set(struct affinity *a, int cpu);
+int affinity__setup(struct affinity *a);
+
+#endif
--
2.23.0

2019-11-05 00:27:54

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v4 5/9] perf stat: Use affinity for closing file descriptors

From: Andi Kleen <[email protected]>

Closing a perf fd can also trigger an IPI to the target CPU.
Use the same affinity technique as we use for reading/enabling events
to closing to optimize the CPU transitions.

Before on a large test case with 94 CPUs:

% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
32.56 3.085463 50 61483 close

After:

10.54 0.735704 11 61485 close

Signed-off-by: Andi Kleen <[email protected]>

---

v2: Use new iterator macros
v3: Use new iterator macros
Add missing affinity__cleanup
---
tools/perf/lib/evsel.c | 27 +++++++++++++++++++++------
tools/perf/lib/include/perf/evsel.h | 1 +
tools/perf/util/evlist.c | 29 +++++++++++++++++++++++++++--
tools/perf/util/evsel.h | 1 +
4 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index 5a89857b0381..ea775dacbd2d 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}

+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+{
+ int thread;
+
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+ if (FD(evsel, cpu, thread) >= 0)
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu, thread;
+ int cpu;

for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
- }
+ perf_evsel__close_fd_cpu(evsel, cpu);
}

void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}

+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+{
+ if (evsel->fd == NULL)
+ return;
+
+ perf_evsel__close_fd_cpu(evsel, cpu);
+}
+
int perf_evsel__read_size(struct perf_evsel *evsel)
{
u64 read_format = evsel->attr.read_format;
diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h
index 557f5815a9c9..e7add554f861 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -26,6 +26,7 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index dccf96e0d01b..8f3bfbe277b5 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,6 +18,7 @@
#include "debug.h"
#include "units.h"
#include <internal/lib.h> // page_size
+#include "affinity.h"
#include "../perf.h"
#include "asm/bug.h"
#include "bpf-event.h"
@@ -1165,9 +1166,33 @@ void perf_evlist__set_selected(struct evlist *evlist,
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
+ struct affinity affinity;
+ int cpu, i;

- evlist__for_each_entry_reverse(evlist, evsel)
- evsel__close(evsel);
+ if (!evlist->core.cpus) {
+ evlist__for_each_entry_reverse(evlist, evsel)
+ evsel__close(evsel);
+ return;
+ }
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+ evlist__cpu_iter_start(evlist);
+ evlist__for_each_cpu (evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ if (evlist__cpu_iter_skip(evsel, cpu))
+ continue;
+ perf_evsel__close_cpu(&evsel->core, evsel->cpu_index);
+ evlist__cpu_iter_next(evsel);
+ }
+ }
+ affinity__cleanup(&affinity);
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ perf_evsel__free_fd(&evsel->core);
+ perf_evsel__free_id(&evsel->core);
+ }
}

static int perf_evlist__create_syswide_maps(struct evlist *evlist)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index cf90019ae744..2e3b011ed09e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -391,4 +391,5 @@ static inline bool evsel__has_callchain(const struct evsel *evsel)
struct perf_env *perf_evsel__env(struct evsel *evsel);

int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+
#endif /* __PERF_EVSEL_H */
--
2.23.0

2019-11-05 00:28:05

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v4 1/9] perf pmu: Use file system cache to optimize sysfs access

From: Andi Kleen <[email protected]>

pmu.c does a lot of redundant /sys accesses while parsing aliases
and probing for PMUs. On large systems with a lot of PMUs this
can get expensive (>2s):

% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
27.25 1.227847 8 160888 16976 openat
26.42 1.190481 7 164224 164077 stat

Add a cache to remember if specific file names exist or don't
exist, which eliminates most of this overhead.

Also optimize some stat() calls to be slightly cheaper access()

Resulting in:

0.18 0.004166 2 1851 305 open
0.08 0.001970 2 829 622 access

Signed-off-by: Andi Kleen <[email protected]>

---

v2: Use single lookup function as API (Jiri)
---
tools/perf/util/Build | 1 +
tools/perf/util/fncache.c | 63 +++++++++++++++++++++++++++++++++++++++
tools/perf/util/fncache.h | 7 +++++
tools/perf/util/pmu.c | 34 +++++++--------------
tools/perf/util/srccode.c | 9 +-----
5 files changed, 83 insertions(+), 31 deletions(-)
create mode 100644 tools/perf/util/fncache.c
create mode 100644 tools/perf/util/fncache.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 39814b1806a6..2c1504fe924c 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -48,6 +48,7 @@ perf-y += header.o
perf-y += callchain.o
perf-y += values.o
perf-y += debug.o
+perf-y += fncache.o
perf-y += machine.o
perf-y += map.o
perf-y += pstack.o
diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c
new file mode 100644
index 000000000000..5afcd7edbe7a
--- /dev/null
+++ b/tools/perf/util/fncache.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Manage a cache of file names' existence */
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/list.h>
+#include "fncache.h"
+
+struct fncache {
+ struct hlist_node nd;
+ bool res;
+ char name[];
+};
+
+#define FNHSIZE 61
+
+static struct hlist_head fncache_hash[FNHSIZE];
+
+unsigned shash(const unsigned char *s)
+{
+ unsigned h = 0;
+ while (*s)
+ h = 65599 * h + *s++;
+ return h ^ (h >> 16);
+}
+
+static bool lookup_fncache(const char *name, bool *res)
+{
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+ struct fncache *n;
+
+ hlist_for_each_entry (n, &fncache_hash[h], nd) {
+ if (!strcmp(n->name, name)) {
+ *res = n->res;
+ return true;
+ }
+ }
+ return false;
+}
+
+static void update_fncache(const char *name, bool res)
+{
+ struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1);
+ int h = shash((const unsigned char *)name) % FNHSIZE;
+
+ if (!n)
+ return;
+ strcpy(n->name, name);
+ n->res = res;
+ hlist_add_head(&n->nd, &fncache_hash[h]);
+}
+
+/* No LRU, only use when bounded in some other way. */
+bool file_available(const char *name)
+{
+ bool res;
+
+ if (lookup_fncache(name, &res))
+ return res;
+ res = access(name, R_OK) == 0;
+ update_fncache(name, res);
+ return res;
+}
diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h
new file mode 100644
index 000000000000..fe020beaefb1
--- /dev/null
+++ b/tools/perf/util/fncache.h
@@ -0,0 +1,7 @@
+#ifndef _FCACHE_H
+#define _FCACHE_H 1
+
+unsigned shash(const unsigned char *s);
+bool file_available(const char *name);
+
+#endif
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index adbe97e941dd..81357cc3d59a 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -24,6 +24,7 @@
#include "pmu-events/pmu-events.h"
#include "string2.h"
#include "strbuf.h"
+#include "fncache.h"

struct perf_pmu_format {
char *name;
@@ -82,7 +83,6 @@ int perf_pmu__format_parse(char *dir, struct list_head *head)
*/
static int pmu_format(const char *name, struct list_head *format)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();

@@ -92,8 +92,8 @@ static int pmu_format(const char *name, struct list_head *format)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);

- if (stat(path, &st) < 0)
- return 0; /* no error if format does not exist */
+ if (!file_available(path))
+ return 0;

if (perf_pmu__format_parse(path, format))
return -1;
@@ -475,7 +475,6 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
*/
static int pmu_aliases(const char *name, struct list_head *head)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();

@@ -485,8 +484,8 @@ static int pmu_aliases(const char *name, struct list_head *head)
snprintf(path, PATH_MAX,
"%s/bus/event_source/devices/%s/events", sysfs, name);

- if (stat(path, &st) < 0)
- return 0; /* no error if 'events' does not exist */
+ if (!file_available(path))
+ return 0;

if (pmu_aliases_parse(path, head))
return -1;
@@ -525,7 +524,6 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
*/
static int pmu_type(const char *name, __u32 *type)
{
- struct stat st;
char path[PATH_MAX];
FILE *file;
int ret = 0;
@@ -537,7 +535,7 @@ static int pmu_type(const char *name, __u32 *type)
snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);

- if (stat(path, &st) < 0)
+ if (access(path, R_OK) < 0)
return -1;

file = fopen(path, "r");
@@ -628,14 +626,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name)
static bool pmu_is_uncore(const char *name)
{
char path[PATH_MAX];
- struct perf_cpu_map *cpus;
- const char *sysfs = sysfs__mountpoint();
+ const char *sysfs;

+ sysfs = sysfs__mountpoint();
snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
- cpus = __pmu_cpumask(path);
- perf_cpu_map__put(cpus);
-
- return !!cpus;
+ return file_available(path);
}

/*
@@ -645,7 +640,6 @@ static bool pmu_is_uncore(const char *name)
*/
static int is_arm_pmu_core(const char *name)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();

@@ -655,10 +649,7 @@ static int is_arm_pmu_core(const char *name)
/* Look for cpu sysfs (specific to arm) */
scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
sysfs, name);
- if (stat(path, &st) == 0)
- return 1;
-
- return 0;
+ return file_available(path);
}

static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
@@ -1528,7 +1519,6 @@ bool pmu_have_event(const char *pname, const char *name)

static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
{
- struct stat st;
char path[PATH_MAX];
const char *sysfs;

@@ -1538,10 +1528,8 @@ static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)

snprintf(path, PATH_MAX,
"%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
-
- if (stat(path, &st) < 0)
+ if (!file_available(path))
return NULL;
-
return fopen(path, "r");
}

diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
index d84ed8b6caaa..c29edaaca863 100644
--- a/tools/perf/util/srccode.c
+++ b/tools/perf/util/srccode.c
@@ -16,6 +16,7 @@
#include "srccode.h"
#include "debug.h"
#include <internal/lib.h> // page_size
+#include "fncache.h"

#define MAXSRCCACHE (32*1024*1024)
#define MAXSRCFILES 64
@@ -36,14 +37,6 @@ static LIST_HEAD(srcfile_list);
static long map_total_sz;
static int num_srcfiles;

-static unsigned shash(unsigned char *s)
-{
- unsigned h = 0;
- while (*s)
- h = 65599 * h + *s++;
- return h ^ (h >> 16);
-}
-
static int countlines(char *map, int maplen)
{
int numl;
--
2.23.0

2019-11-05 00:28:26

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v4 3/9] perf evlist: Maintain evlist->all_cpus

From: Andi Kleen <[email protected]>

Maintain a cpumap in the evlist that is the union of all the cpus
of the events.

This needs a cpumap merge operation. To make the merge operation
work efficiently maintain the cpumap in a sorted state.

The sorting is also needed for the affinity loop changes later.

Signed-off-by: Andi Kleen <[email protected]>
---
tools/perf/lib/cpumap.c | 42 ++++++++++++++++++++++++
tools/perf/lib/evlist.c | 1 +
tools/perf/lib/include/internal/evlist.h | 1 +
tools/perf/lib/include/perf/cpumap.h | 2 ++
4 files changed, 46 insertions(+)

diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
index 2ca1fafa620d..5ca26a9d318f 100644
--- a/tools/perf/lib/cpumap.c
+++ b/tools/perf/lib/cpumap.c
@@ -68,6 +68,11 @@ static struct perf_cpu_map *cpu_map__default_new(void)
return cpus;
}

+static int cmp_int(const void *a, const void *b)
+{
+ return *(const int *)a - *(const int*)b;
+}
+
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
{
size_t payload_size = nr_cpus * sizeof(int);
@@ -76,6 +81,7 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
if (cpus != NULL) {
cpus->nr = nr_cpus;
memcpy(cpus->map, tmp_cpus, payload_size);
+ qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
refcount_set(&cpus->refcnt, 1);
}

@@ -272,3 +278,39 @@ int perf_cpu_map__max(struct perf_cpu_map *map)

return max;
}
+
+struct perf_cpu_map *perf_cpu_map__update(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other)
+{
+ int *tmp_cpus;
+ int tmp_len;
+ int i, j, k;
+ struct perf_cpu_map *merged;
+
+ if (!orig) {
+ perf_cpu_map__get(other);
+ return other;
+ }
+ if (orig->nr == other->nr &&
+ !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+ return orig;
+ tmp_len = orig->nr + other->nr;
+ tmp_cpus = malloc(tmp_len * sizeof(int));
+ if (!tmp_cpus)
+ return NULL;
+ i = j = k = 0;
+ while (i < orig->nr && j < other->nr) {
+ if (orig->map[i] <= other->map[j])
+ tmp_cpus[k++] = orig->map[i++];
+ else
+ tmp_cpus[k++] = other->map[j++];
+ }
+ while (i < orig->nr)
+ tmp_cpus[k++] = orig->map[i++];
+ while (j < other->nr)
+ tmp_cpus[k++] = other->map[j++];
+ assert(k < tmp_len);
+ merged = cpu_map__trim_new(k, tmp_cpus);
+ free(tmp_cpus);
+ return merged;
+}
diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c
index 205ddbb80bc1..35a627283122 100644
--- a/tools/perf/lib/evlist.c
+++ b/tools/perf/lib/evlist.c
@@ -54,6 +54,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,

perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
+ evlist->all_cpus = perf_cpu_map__update(evlist->all_cpus, evsel->cpus);
}

static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h
index a2fbccf1922f..74dc8c3f0b66 100644
--- a/tools/perf/lib/include/internal/evlist.h
+++ b/tools/perf/lib/include/internal/evlist.h
@@ -18,6 +18,7 @@ struct perf_evlist {
int nr_entries;
bool has_user_cpus;
struct perf_cpu_map *cpus;
+ struct perf_cpu_map *all_cpus;
struct perf_thread_map *threads;
int nr_mmaps;
size_t mmap_len;
diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h
index ac9aa497f84a..dcf61da64267 100644
--- a/tools/perf/lib/include/perf/cpumap.h
+++ b/tools/perf/lib/include/perf/cpumap.h
@@ -12,6 +12,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__update(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
--
2.23.0

2019-11-05 00:29:32

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v4 9/9] perf stat: Use affinity for enabling/disabling events

From: Andi Kleen <[email protected]>

Restructure event enabling/disabling to use affinity, which
minimizes the number of IPIs needed.

Before on a large test case with 94 CPUs:

% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
54.65 1.899986 22 84812 660 ioctl

after:

39.21 0.930451 10 84796 644 ioctl

Signed-off-by: Andi Kleen <[email protected]>

---

v2: Use new iterator macros
v3: Use new iterator macros
---
tools/perf/lib/evsel.c | 49 +++++++++++++++++++++--------
tools/perf/lib/include/perf/evsel.h | 2 ++
tools/perf/util/evlist.c | 46 ++++++++++++++++++++++++---
tools/perf/util/evsel.c | 13 ++++++++
tools/perf/util/evsel.h | 2 ++
5 files changed, 94 insertions(+), 18 deletions(-)

diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index ea775dacbd2d..89ddfade0b96 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -198,38 +198,61 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
}

static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
- int ioc, void *arg)
+ int ioc, void *arg,
+ int cpu)
{
- int cpu, thread;
+ int thread;

- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
- for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int fd = FD(evsel, cpu, thread),
- err = ioctl(fd, ioc, arg);
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int fd = FD(evsel, cpu, thread),
+ err = ioctl(fd, ioc, arg);

- if (err)
- return err;
- }
+ if (err)
+ return err;
}

return 0;
}

+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, cpu);
+}
+
int perf_evsel__enable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+ int i;
+ int err = 0;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, i);
+ return err;
+}
+
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, cpu);
}

int perf_evsel__disable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+ int i;
+ int err = 0;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, i);
+ return err;
}

int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
- return perf_evsel__run_ioctl(evsel,
+ int err = 0, i;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
- (void *)filter);
+ (void *)filter, i);
+ return err;
}

struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h
index e7add554f861..c82ec39a4ad0 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -30,7 +30,9 @@ LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 146e6d126966..01caacf3349b 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -374,26 +374,62 @@ void evlist__cpu_iter_next(struct evsel *ev)
void evlist__disable(struct evlist *evlist)
{
struct evsel *pos;
+ struct affinity affinity;
+ int cpu, i;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ evlist__cpu_iter_start(evlist);
+ evlist__for_each_cpu (evlist, i, cpu) {
+ affinity__set(&affinity, cpu);

+ evlist__for_each_entry(evlist, pos) {
+ if (evlist__cpu_iter_skip(pos, cpu))
+ continue;
+ if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__disable_cpu(pos, pos->cpu_index);
+ evlist__cpu_iter_next(pos);
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
- evsel__disable(pos);
+ pos->disabled = true;
}
-
evlist->enabled = false;
}

void evlist__enable(struct evlist *evlist)
{
struct evsel *pos;
+ struct affinity affinity;
+ int cpu, i;
+
+ if (affinity__setup(&affinity) < 0)
+ return;

+ evlist__cpu_iter_start(evlist);
+ evlist__for_each_cpu (evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evlist__cpu_iter_skip(pos, cpu))
+ continue;
+ if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__enable_cpu(pos, pos->cpu_index);
+ evlist__cpu_iter_next(pos);
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
- evsel__enable(pos);
+ pos->disabled = false;
}
-
evlist->enabled = true;
}

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7106f9a067df..79050a6f4991 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1205,13 +1205,26 @@ int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
return perf_evsel__append_filter(evsel, "%s,%s", filter);
}

+/* Caller has to clear disabled after going through all CPUs. */
+int evsel__enable_cpu(struct evsel *evsel, int cpu)
+{
+ int err = perf_evsel__enable_cpu(&evsel->core, cpu);
+ return err;
+}
+
int evsel__enable(struct evsel *evsel)
{
int err = perf_evsel__enable(&evsel->core);

if (!err)
evsel->disabled = false;
+ return err;
+}

+/* Caller has to set disabled after going through all CPUs. */
+int evsel__disable_cpu(struct evsel *evsel, int cpu)
+{
+ int err = perf_evsel__disable_cpu(&evsel->core, cpu);
return err;
}

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 9fc9f6698aa4..15977bbe7b63 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -222,8 +222,10 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_addr_filter(struct evsel *evsel,
const char *filter);
+int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
+int evsel__disable_cpu(struct evsel *evsel, int cpu);

int perf_evsel__open_per_cpu(struct evsel *evsel,
struct perf_cpu_map *cpus,
--
2.23.0

2019-11-06 16:35:35

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v4 5/9] perf stat: Use affinity for closing file descriptors

On Mon, Nov 04, 2019 at 04:25:18PM -0800, Andi Kleen wrote:

SNIP

> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index dccf96e0d01b..8f3bfbe277b5 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -18,6 +18,7 @@
> #include "debug.h"
> #include "units.h"
> #include <internal/lib.h> // page_size
> +#include "affinity.h"
> #include "../perf.h"
> #include "asm/bug.h"
> #include "bpf-event.h"
> @@ -1165,9 +1166,33 @@ void perf_evlist__set_selected(struct evlist *evlist,
> void evlist__close(struct evlist *evlist)
> {
> struct evsel *evsel;
> + struct affinity affinity;
> + int cpu, i;
>
> - evlist__for_each_entry_reverse(evlist, evsel)
> - evsel__close(evsel);
> + if (!evlist->core.cpus) {
> + evlist__for_each_entry_reverse(evlist, evsel)
> + evsel__close(evsel);
> + return;
> + }
> +
> + if (affinity__setup(&affinity) < 0)
> + return;
> + evlist__cpu_iter_start(evlist);
> + evlist__for_each_cpu (evlist, i, cpu) {
> + affinity__set(&affinity, cpu);
> +
> + evlist__for_each_entry_reverse(evlist, evsel) {
> + if (evlist__cpu_iter_skip(evsel, cpu))
> + continue;
> + perf_evsel__close_cpu(&evsel->core, evsel->cpu_index);
> + evlist__cpu_iter_next(evsel);
> + }
> + }

looks much better, how about we make it even more compact
from above patern, like:

affinity__setup

evlist__cpu_iter(evlist, i, cpu) {
affinity__set(&affinity, cpu);

evlist__for_each_entry_reverse(evlist, evsel) {
if (evsel__cpu_iter_skip(evsel, cpu))
continue;
perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter);
}
}

affinity__cleanup

where:
- evlist__cpu_iter would call evlist__cpu_iter_start

- evlist__cpu_iter_skip could increment evsel->cpu_index in false case
so there's no need to have evlist__cpu_iter_next

- rename evsel->cpu_index to evsel->cpu_iter

- renamse evlist__cpu_iter_skip to evsel__cpu_iter_skip

jirka

2019-11-06 16:36:02

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v4 3/9] perf evlist: Maintain evlist->all_cpus

On Mon, Nov 04, 2019 at 04:25:16PM -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> Maintain a cpumap in the evlist that is the union of all the cpus
> of the events.
>
> This needs a cpumap merge operation. To make the merge operation
> work efficiently maintain the cpumap in a sorted state.
>
> The sorting is also needed for the affinity loop changes later.
>
> Signed-off-by: Andi Kleen <[email protected]>
> ---
> tools/perf/lib/cpumap.c | 42 ++++++++++++++++++++++++
> tools/perf/lib/evlist.c | 1 +
> tools/perf/lib/include/internal/evlist.h | 1 +
> tools/perf/lib/include/perf/cpumap.h | 2 ++
> 4 files changed, 46 insertions(+)
>
> diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
> index 2ca1fafa620d..5ca26a9d318f 100644
> --- a/tools/perf/lib/cpumap.c
> +++ b/tools/perf/lib/cpumap.c
> @@ -68,6 +68,11 @@ static struct perf_cpu_map *cpu_map__default_new(void)
> return cpus;
> }
>
> +static int cmp_int(const void *a, const void *b)
> +{
> + return *(const int *)a - *(const int*)b;
> +}
> +
> static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
> {
> size_t payload_size = nr_cpus * sizeof(int);
> @@ -76,6 +81,7 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
> if (cpus != NULL) {
> cpus->nr = nr_cpus;
> memcpy(cpus->map, tmp_cpus, payload_size);
> + qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
> refcount_set(&cpus->refcnt, 1);
> }

please move the sort into separate change

>
> @@ -272,3 +278,39 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
>
> return max;
> }
> +
> +struct perf_cpu_map *perf_cpu_map__update(struct perf_cpu_map *orig,
> + struct perf_cpu_map *other)
> +{

it's more like perf_cpu_map__merge, right?

> + int *tmp_cpus;
> + int tmp_len;
> + int i, j, k;
> + struct perf_cpu_map *merged;
> +
> + if (!orig) {
> + perf_cpu_map__get(other);
> + return other;
> + }
> + if (orig->nr == other->nr &&
> + !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
> + return orig;
> + tmp_len = orig->nr + other->nr;
> + tmp_cpus = malloc(tmp_len * sizeof(int));
> + if (!tmp_cpus)
> + return NULL;
> + i = j = k = 0;
> + while (i < orig->nr && j < other->nr) {
> + if (orig->map[i] <= other->map[j])
> + tmp_cpus[k++] = orig->map[i++];
> + else
> + tmp_cpus[k++] = other->map[j++];
> + }
> + while (i < orig->nr)
> + tmp_cpus[k++] = orig->map[i++];
> + while (j < other->nr)
> + tmp_cpus[k++] = other->map[j++];
> + assert(k < tmp_len);
> + merged = cpu_map__trim_new(k, tmp_cpus);
> + free(tmp_cpus);
> + return merged;
> +}

this is great function for automated test, please add

thanks,
jirka

2019-11-06 16:36:16

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v4 5/9] perf stat: Use affinity for closing file descriptors

On Mon, Nov 04, 2019 at 04:25:18PM -0800, Andi Kleen wrote:

SNIP

> ---
> tools/perf/lib/evsel.c | 27 +++++++++++++++++++++------
> tools/perf/lib/include/perf/evsel.h | 1 +
> tools/perf/util/evlist.c | 29 +++++++++++++++++++++++++++--
> tools/perf/util/evsel.h | 1 +
> 4 files changed, 50 insertions(+), 8 deletions(-)
>
> diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
> index 5a89857b0381..ea775dacbd2d 100644
> --- a/tools/perf/lib/evsel.c
> +++ b/tools/perf/lib/evsel.c
> @@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
> return err;
> }
>
> +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
> +{
> + int thread;
> +
> + for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
> + if (FD(evsel, cpu, thread) >= 0)
> + close(FD(evsel, cpu, thread));
> + FD(evsel, cpu, thread) = -1;
> + }
> +}
> +
> void perf_evsel__close_fd(struct perf_evsel *evsel)
> {
> - int cpu, thread;
> + int cpu;
>
> for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
> - for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
> - if (FD(evsel, cpu, thread) >= 0)
> - close(FD(evsel, cpu, thread));
> - FD(evsel, cpu, thread) = -1;
> - }
> + perf_evsel__close_fd_cpu(evsel, cpu);
> }
>
> void perf_evsel__free_fd(struct perf_evsel *evsel)
> @@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel)
> perf_evsel__free_fd(evsel);
> }
>
> +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
> +{
> + if (evsel->fd == NULL)
> + return;
> +
> + perf_evsel__close_fd_cpu(evsel, cpu);
> +}
> +

please move the perf_evsel__close_cpu addition into separate patch

thanks,
jirka

2019-11-06 16:37:37

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v4 6/9] perf stat: Factor out open error handling

On Mon, Nov 04, 2019 at 04:25:19PM -0800, Andi Kleen wrote:

SNIP

> goto try_again;
> }
>
> - /*
> - * PPC returns ENXIO for HW counters until 2.6.37
> - * (behavior changed with commit b0a873e).
> - */
> - if (errno == EINVAL || errno == ENOSYS ||
> - errno == ENOENT || errno == EOPNOTSUPP ||
> - errno == ENXIO) {
> - if (verbose > 0)
> - ui__warning("%s event is not supported by the kernel.\n",
> - perf_evsel__name(counter));
> - counter->supported = false;
> -
> - if ((counter->leader != counter) ||
> - !(counter->leader->core.nr_members > 1))
> - continue;
> - } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
> - if (verbose > 0)
> - ui__warning("%s\n", msg);
> - goto try_again;
> - } else if (target__has_per_thread(&target) &&
> - evsel_list->core.threads &&
> - evsel_list->core.threads->err_thread != -1) {
> - /*
> - * For global --per-thread case, skip current
> - * error thread.
> - */
> - if (!thread_map__remove(evsel_list->core.threads,
> - evsel_list->core.threads->err_thread)) {
> - evsel_list->core.threads->err_thread = -1;
> - goto try_again;
> - }
> + switch (stat_handle_error(counter)) {
> + case COUNTER_FATAL:
> + return -1;
> + case COUNTER_RETRY:
> + goto try_again;
> + case COUNTER_SKIP:
> + continue;
> + default:
> + break;
> }

great, looks good, thanks

jirka

> -
> - perf_evsel__open_strerror(counter, &target,
> - errno, msg, sizeof(msg));
> - ui__error("%s\n", msg);
> -
> - if (child_pid != -1)
> - kill(child_pid, SIGTERM);
> -
> - return -1;
> }
> counter->supported = true;
>
> --
> 2.23.0
>

2019-11-06 16:39:33

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v4 9/9] perf stat: Use affinity for enabling/disabling events

On Mon, Nov 04, 2019 at 04:25:22PM -0800, Andi Kleen wrote:

SNIP

> }
>
> static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
> - int ioc, void *arg)
> + int ioc, void *arg,
> + int cpu)
> {
> - int cpu, thread;
> + int thread;
>
> - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
> - for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
> - int fd = FD(evsel, cpu, thread),
> - err = ioctl(fd, ioc, arg);
> + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
> + int fd = FD(evsel, cpu, thread),
> + err = ioctl(fd, ioc, arg);
>
> - if (err)
> - return err;
> - }
> + if (err)
> + return err;
> }
>
> return 0;
> }
>
> +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
> +{
> + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, cpu);
> +}
> +
> int perf_evsel__enable(struct perf_evsel *evsel)
> {
> - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
> + int i;
> + int err = 0;
> +
> + for (i = 0; i < evsel->cpus->nr && !err; i++)
> + err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0, i);
> + return err;
> +}
> +
> +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
> +{
> + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, cpu);
> }
>
> int perf_evsel__disable(struct perf_evsel *evsel)
> {
> - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
> + int i;
> + int err = 0;
> +
> + for (i = 0; i < evsel->cpus->nr && !err; i++)
> + err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0, i);
> + return err;
> }
>
> int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
> {
> - return perf_evsel__run_ioctl(evsel,
> + int err = 0, i;
> +
> + for (i = 0; i < evsel->cpus->nr && !err; i++)
> + err = perf_evsel__run_ioctl(evsel,
> PERF_EVENT_IOC_SET_FILTER,
> - (void *)filter);
> + (void *)filter, i);
> + return err;
> }

please move the new function additions to separate patches

thanks,
jirka