2019-11-12 01:01:26

by Andi Kleen

[permalink] [raw]
Subject: [PATCH v6 10/12] perf stat: Use affinity for reading

From: Andi Kleen <[email protected]>

Restructure event reading to use affinity to minimize the number
of IPIs needed.

Before on a large test case with 94 CPUs:

% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
3.16 0.106079 4 22082 read

After:

3.43 0.081295 3 22082 read

Signed-off-by: Andi Kleen <[email protected]>

---

v2: Use new iterator macros
v3: Use new iterator macros
v4: Change iterator macros even more
v5: Preserve counter->err in all cases
---
tools/perf/builtin-stat.c | 95 ++++++++++++++++++++++-----------------
tools/perf/util/evsel.h | 1 +
2 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 039aefb07777..7784f5a93944 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter(struct evsel *counter, struct timespec *rs, int cpu)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
- int ncpus, cpu, thread;
-
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
- ncpus = perf_evsel__nr_cpus(counter);
- else
- ncpus = 1;
+ int thread;

if (!counter->supported)
return -ENOENT;
@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1;

for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- struct perf_counts_values *count;
-
- count = perf_counts(counter->counts, cpu, thread);
-
- /*
- * The leader's group read loads data into its group members
- * (via perf_evsel__read_counter) and sets threir count->loaded.
- */
- if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
- read_single_counter(counter, cpu, thread, rs)) {
- counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
- return -1;
- }
+ struct perf_counts_values *count;

- perf_counts__set_loaded(counter->counts, cpu, thread, false);
+ count = perf_counts(counter->counts, cpu, thread);

- if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
- pr_err("failed to write stat event\n");
- return -1;
- }
- }
+ /*
+ * The leader's group read loads data into its group members
+ * (via perf_evsel__read_counter) and sets threir count->loaded.
+ */
+ if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+ read_single_counter(counter, cpu, thread, rs)) {
+ counter->counts->scaled = -1;
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
+ perf_counts(counter->counts, cpu, thread)->run = 0;
+ return -1;
+ }
+
+ perf_counts__set_loaded(counter->counts, cpu, thread, false);

- if (verbose > 1) {
- fprintf(stat_config.output,
- "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
- cpu,
- count->val, count->ena, count->run);
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
}
}
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
}

return 0;
@@ -325,15 +318,35 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
- int ret;
+ struct affinity affinity;
+ int i, ncpus, cpu;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ ncpus = evsel_list->core.all_cpus->nr;
+ if (!(target__has_cpu(&target) && !target__has_per_thread(&target)))
+ ncpus = 1;
+ evlist__for_each_cpu (evsel_list, i, cpu) {
+ if (i >= ncpus)
+ break;
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->err)
+ counter->err = read_counter(counter, rs, counter->cpu_iter - 1);
+ }
+ }
+ affinity__cleanup(&affinity);

evlist__for_each_entry(evsel_list, counter) {
- ret = read_counter(counter, rs);
- if (ret)
+ if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
-
- if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+ if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
+ counter->err = 0;
}
}

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ca82a93960cd..c8af4bc23f8f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms;
struct bpf_object *bpf_obj;
int bpf_fd;
+ int err;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
--
2.23.0


2019-11-15 14:57:07

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v6 10/12] perf stat: Use affinity for reading

On Mon, Nov 11, 2019 at 04:59:39PM -0800, Andi Kleen wrote:
> From: Andi Kleen <[email protected]>
>
> Restructure event reading to use affinity to minimize the number
> of IPIs needed.
>
> Before on a large test case with 94 CPUs:
>
> % time seconds usecs/call calls errors syscall
> ------ ----------- ----------- --------- --------- ----------------
> 3.16 0.106079 4 22082 read
>
> After:
>
> 3.43 0.081295 3 22082 read
>
> Signed-off-by: Andi Kleen <[email protected]>
>
> ---
>
> v2: Use new iterator macros
> v3: Use new iterator macros
> v4: Change iterator macros even more
> v5: Preserve counter->err in all cases
> ---
> tools/perf/builtin-stat.c | 95 ++++++++++++++++++++++-----------------
> tools/perf/util/evsel.h | 1 +
> 2 files changed, 55 insertions(+), 41 deletions(-)
>
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 039aefb07777..7784f5a93944 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
> * Read out the results of a single counter:
> * do not aggregate counts across CPUs in system-wide mode
> */
> -static int read_counter(struct evsel *counter, struct timespec *rs)
> +static int read_counter(struct evsel *counter, struct timespec *rs, int cpu)

please rename this to read_counter_cpu

thanks,
jirka

2019-11-15 15:00:08

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v6 10/12] perf stat: Use affinity for reading

On Mon, Nov 11, 2019 at 04:59:39PM -0800, Andi Kleen wrote:

SNIP

>
> evlist__for_each_entry(evsel_list, counter) {
> - ret = read_counter(counter, rs);
> - if (ret)
> + if (counter->err)
> pr_debug("failed to read counter %s\n", counter->name);
> -
> - if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
> + if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
> pr_warning("failed to process counter %s\n", counter->name);
> + counter->err = 0;
> }
> }
>
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index ca82a93960cd..c8af4bc23f8f 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -86,6 +86,7 @@ struct evsel {
> struct list_head config_terms;
> struct bpf_object *bpf_obj;
> int bpf_fd;
> + int err;

I was wondering what would be the best place for this,
and all the previous variables u added and this one
are stat specific, so I think this all belongs to

(struct perf_stat_evsel*) evsel->stat

jirka

2019-11-15 15:00:16

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH v6 10/12] perf stat: Use affinity for reading

On Mon, Nov 11, 2019 at 04:59:39PM -0800, Andi Kleen wrote:

SNIP

>
> return 0;
> @@ -325,15 +318,35 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
> static void read_counters(struct timespec *rs)
> {
> struct evsel *counter;
> - int ret;
> + struct affinity affinity;
> + int i, ncpus, cpu;
> +
> + if (affinity__setup(&affinity) < 0)
> + return;
> +
> + ncpus = evsel_list->core.all_cpus->nr;
> + if (!(target__has_cpu(&target) && !target__has_per_thread(&target)))
> + ncpus = 1;

hum, could we propagate the negation inside amke this more readable?

if (!target__has_cpu(&target) || target__has_per_thread(&target))

jirka

> + evlist__for_each_cpu (evsel_list, i, cpu) {
> + if (i >= ncpus)
> + break;
> + affinity__set(&affinity, cpu);
> +
> + evlist__for_each_entry(evsel_list, counter) {
> + if (evsel__cpu_iter_skip(counter, cpu))
> + continue;
> + if (!counter->err)
> + counter->err = read_counter(counter, rs, counter->cpu_iter - 1);
> + }
> + }
> + affinity__cleanup(&affinity);

SNIP

2019-11-15 18:39:37

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH v6 10/12] perf stat: Use affinity for reading

> > diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> > index ca82a93960cd..c8af4bc23f8f 100644
> > --- a/tools/perf/util/evsel.h
> > +++ b/tools/perf/util/evsel.h
> > @@ -86,6 +86,7 @@ struct evsel {
> > struct list_head config_terms;
> > struct bpf_object *bpf_obj;
> > int bpf_fd;
> > + int err;
>
> I was wondering what would be the best place for this,
> and all the previous variables u added and this one
> are stat specific, so I think this all belongs to
>
> (struct perf_stat_evsel*) evsel->stat

I hope to eventually make perf record use affinity too.
Just not in this patchkit. So I'll keep them in the generic
evsel for now.

-Andi