Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753220AbcD2Eq5 (ORCPT ); Fri, 29 Apr 2016 00:46:57 -0400 Received: from mail-pf0-f176.google.com ([209.85.192.176]:35681 "EHLO mail-pf0-f176.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753189AbcD2Eqw (ORCPT ); Fri, 29 Apr 2016 00:46:52 -0400 From: David Carrillo-Cisneros To: Peter Zijlstra , Alexander Shishkin , Arnaldo Carvalho de Melo , Ingo Molnar Cc: Vikas Shivappa , Matt Fleming , Tony Luck , Stephane Eranian , Paul Turner , David Carrillo-Cisneros , x86@kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 32/32] perf/stat: revamp error handling for snapshot and per_pkg events Date: Thu, 28 Apr 2016 21:43:38 -0700 Message-Id: <1461905018-86355-33-git-send-email-davidcc@google.com> X-Mailer: git-send-email 2.8.0.rc3.226.g39d4020 In-Reply-To: <1461905018-86355-1-git-send-email-davidcc@google.com> References: <1461905018-86355-1-git-send-email-davidcc@google.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12399 Lines: 368 A package wide event can return a valid read even if it has not run in a specific cpu, this does not fit well with the assumption that run == 0 is equivalent to a . To fix the problem, this patch defines special error values for val, run and ena (~0ULL), and use them to signal read errors, allowing run == 0 to be a valid value for package events. A new value, NA, is output on read error and when event has not been enabled (timed enabled == 0). Finally, this patch revamps calculation of deltas and scaling for snapshot events, removing the calculation of deltas for time running and enabled in snapshot event, as should be. Reviewed-by: Stephane Eranian Signed-off-by: David Carrillo-Cisneros --- tools/perf/builtin-stat.c | 37 ++++++++++++++++++++++++++----------- tools/perf/util/counts.h | 19 +++++++++++++++++++ tools/perf/util/evsel.c | 44 +++++++++++++++++++++++++++++++++----------- tools/perf/util/evsel.h | 8 ++++++-- tools/perf/util/stat.c | 35 +++++++++++------------------------ 5 files changed, 95 insertions(+), 48 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a4e5610..f1c2166 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,6 +63,7 @@ #include "util/tool.h" #include "asm/bug.h" +#include #include #include #include @@ -290,10 +291,8 @@ static int read_counter(struct perf_evsel *counter) count = perf_counts(counter->counts, cpu, thread); if (perf_evsel__read(counter, cpu, thread, count)) { - counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; - return -1; + /* do not write stat for failed reads. */ + continue; } if (STAT_RECORD) { @@ -668,12 +667,16 @@ static int run_perf_stat(int argc, const char **argv) static void print_running(u64 run, u64 ena) { + bool is_na = run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || !ena; + if (csv_output) { - fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", - csv_sep, - run, - csv_sep, - ena ? 100.0 * run / ena : 100.0); + if (is_na) + fprintf(stat_config.output, "%sNA%sNA", csv_sep, csv_sep); + else + fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", + csv_sep, run, csv_sep, 100.0 * run / ena); + } else if (is_na) { + fprintf(stat_config.output, " (NA)"); } else if (run != ena) { fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); } @@ -1046,7 +1049,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, if (counter->cgrp) os.nfields++; } - if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || counter->counts->scaled == -1) { if (metric_only) { pm(&os, NULL, "", "", 0); return; @@ -1152,12 +1155,17 @@ static void print_aggr(char *prefix) id = aggr_map->map[s]; first = true; evlist__for_each(evsel_list, counter) { + bool all_nan = true; val = ena = run = 0; nr = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); if (s2 != id) continue; + /* skip NA reads. */ + if (perf_counts_values__is_na(perf_counts(counter->counts, cpu, 0))) + continue; + all_nan = false; val += perf_counts(counter->counts, cpu, 0)->val; ena += perf_counts(counter->counts, cpu, 0)->ena; run += perf_counts(counter->counts, cpu, 0)->run; @@ -1171,6 +1179,10 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; + if (all_nan) { + run = PERF_COUNTS_NA; + ena = PERF_COUNTS_NA; + } printout(id, nr, counter, uval, prefix, run, ena, 1.0); if (!metric_only) fputc('\n', output); @@ -1249,7 +1261,10 @@ static void print_counter(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - uval = val * counter->scale; + if (val != PERF_COUNTS_NA) + uval = val * counter->scale; + else + uval = NAN; printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 34d8baa..b65e97a 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -3,6 +3,9 @@ #include "xyarray.h" +/* Not Available (NA) value. Any operation with a NA equals a NA. */ +#define PERF_COUNTS_NA ((u64)~0ULL) + struct perf_counts_values { union { struct { @@ -14,6 +17,22 @@ struct perf_counts_values { }; }; +static inline void +perf_counts_values__make_na(struct perf_counts_values *values) +{ + values->val = PERF_COUNTS_NA; + values->ena = PERF_COUNTS_NA; + values->run = PERF_COUNTS_NA; +} + +static inline bool +perf_counts_values__is_na(struct perf_counts_values *values) +{ + return values->val == PERF_COUNTS_NA || + values->ena == PERF_COUNTS_NA || + values->run == PERF_COUNTS_NA; +} + struct perf_counts { s8 scaled; struct perf_counts_values aggr; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 52a0c35..da63a87 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1109,6 +1109,9 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, if (!evsel->prev_raw_counts) return; + if (perf_counts_values__is_na(count)) + return; + if (cpu == -1) { tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; @@ -1117,26 +1120,38 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; } - count->val = count->val - tmp.val; + /* Snapshot events do not calculate deltas for count values. */ + if (!evsel->snapshot) + count->val = count->val - tmp.val; count->ena = count->ena - tmp.ena; count->run = count->run - tmp.run; } void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled) + bool scale, bool per_pkg, bool snapshot, s8 *pscaled) { s8 scaled = 0; + if (perf_counts_values__is_na(count)) { + if (pscaled) + *pscaled = -1; + return; + } + if (scale) { - if (count->run == 0) { + /* per-pkg events can have run == 0 and be valid. */ + if (count->run == 0 && !per_pkg) { scaled = -1; count->val = 0; } else if (count->run < count->ena) { scaled = 1; - count->val = (u64)((double) count->val * count->ena / count->run + 0.5); + /* Snapshot events do not scale counts values. */ + if (!snapshot && count->run) + count->val = (u64)((double) count->val * count->ena / + count->run + 0.5); } - } else - count->ena = count->run = 0; + } + count->run = count->ena; if (pscaled) *pscaled = scaled; @@ -1150,8 +1165,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) { + perf_counts_values__make_na(count); return -errno; + } return 0; } @@ -1159,6 +1176,7 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale) { + int ret = 0; struct perf_counts_values count; size_t nv = scale ? 3 : 1; @@ -1168,13 +1186,17 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) - return -errno; + if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) { + perf_counts_values__make_na(&count); + ret = -errno; + goto exit; + } perf_evsel__compute_deltas(evsel, cpu, thread, &count); - perf_counts_values__scale(&count, scale, NULL); + perf_counts_values__scale(&count, scale, evsel->per_pkg, evsel->snapshot, NULL); +exit: *perf_counts(evsel->counts, cpu, thread) = count; - return 0; + return ret; } static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b993218..e6a5854 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -74,6 +74,10 @@ struct perf_evsel_config_term { * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or * PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all * is used there is an id sample appended to non-sample events + * @snapshot: an event that whose raw value cannot be extrapolated based on + * the ratio of running/enabled time. + * @per_pkg: an event that runs package wide. All cores in same package will + * read the same value, even if running time == 0. * @priv: And what is in its containing unnamed union are tool specific */ struct perf_evsel { @@ -144,8 +148,8 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) return perf_evsel__cpus(evsel)->nr; } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled); +void perf_counts_values__scale(struct perf_counts_values *count, bool scale, + bool per_pkg, bool snapshot, s8 *pscaled); void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4d9b481..b0f0d41 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -197,7 +197,7 @@ static void zero_per_pkg(struct perf_evsel *counter) } static int check_per_pkg(struct perf_evsel *counter, - struct perf_counts_values *vals, int cpu, bool *skip) + int cpu, bool *skip) { unsigned long *mask = counter->per_pkg_mask; struct cpu_map *cpus = perf_evsel__cpus(counter); @@ -219,17 +219,6 @@ static int check_per_pkg(struct perf_evsel *counter, counter->per_pkg_mask = mask; } - /* - * we do not consider an event that has not run as a good - * instance to mark a package as used (skip=1). Otherwise - * we may run into a situation where the first CPU in a package - * is not running anything, yet the second is, and this function - * would mark the package as used after the first CPU and would - * not read the values from the second CPU. - */ - if (!(vals->run && vals->ena)) - return 0; - s = cpu_map__get_socket(cpus, cpu, NULL); if (s < 0) return -1; @@ -244,30 +233,27 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; - static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, count, cpu, &skip)) { + if (check_per_pkg(evsel, cpu, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } - if (skip) - count = &zero; - switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: - if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); - perf_counts_values__scale(count, config->scale, NULL); + perf_evsel__compute_deltas(evsel, cpu, thread, count); + perf_counts_values__scale(count, config->scale, + evsel->per_pkg, evsel->snapshot, NULL); if (config->aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: - aggr->val += count->val; + if (!skip) + aggr->val += count->val; if (config->scale) { aggr->ena += count->ena; aggr->run += count->run; @@ -331,9 +317,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (config->aggr_mode != AGGR_GLOBAL) return 0; - if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); + perf_evsel__compute_deltas(counter, -1, -1, aggr); + perf_counts_values__scale(aggr, config->scale, + counter->per_pkg, counter->snapshot, + &counter->counts->scaled); for (i = 0; i < 3; i++) update_stats(&ps->res_stats[i], count[i]); -- 2.8.0.rc3.226.g39d4020