Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934025AbbLPOcx (ORCPT ); Wed, 16 Dec 2015 09:32:53 -0500 Received: from mail-wm0-f49.google.com ([74.125.82.49]:38279 "EHLO mail-wm0-f49.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753680AbbLPOcv (ORCPT ); Wed, 16 Dec 2015 09:32:51 -0500 MIME-Version: 1.0 In-Reply-To: <1450227266-2501-6-git-send-email-andi@firstfloor.org> References: <1450227266-2501-1-git-send-email-andi@firstfloor.org> <1450227266-2501-6-git-send-email-andi@firstfloor.org> Date: Wed, 16 Dec 2015 06:32:50 -0800 Message-ID: Subject: Re: [PATCH 05/10] perf, tools, stat: Basic support for TopDown in perf stat From: Stephane Eranian To: Andi Kleen Cc: Arnaldo Carvalho de Melo , Peter Zijlstra , Jiri Olsa , Ingo Molnar , LKML , Namhyung Kim , Andi Kleen Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8649 Lines: 236 On Tue, Dec 15, 2015 at 4:54 PM, Andi Kleen wrote: > From: Andi Kleen > > Add basic plumbing for TopDown in perf stat > > Add a new --topdown options to enable events. > When --topdown is specified set up events for all topdown > events supported by the kernel. > Add topdown-* as a special case to the event parser, as is > needed for all events containing -. > > The actual code to compute the metrics is in follow-on patches. > > v2: > Use standard sysctl read function. > Signed-off-by: Andi Kleen > --- > tools/perf/Documentation/perf-stat.txt | 8 +++ > tools/perf/builtin-stat.c | 100 ++++++++++++++++++++++++++++++++- > tools/perf/util/parse-events.l | 1 + > 3 files changed, 108 insertions(+), 1 deletion(-) > > diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt > index 58296e7..9be9ac8 100644 > --- a/tools/perf/Documentation/perf-stat.txt > +++ b/tools/perf/Documentation/perf-stat.txt > @@ -163,6 +163,14 @@ filter out the startup phase of the program, which is often very different. > > Print statistics of transactional execution if supported. > > +--topdown:: > + > +Print top down level 1 metrics if supported by the CPU. This allows to > +determine bottle necks in the CPU pipeline for CPU bound workloads, > +by breaking it down into frontend bound, backend bound, bad speculation > +and retiring. Specifying the option multiple times shows metrics even > +if the don't cross a threshold. > + > EXAMPLES > -------- > > diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c > index 1faa6fc..e10198c 100644 > --- a/tools/perf/builtin-stat.c > +++ b/tools/perf/builtin-stat.c > @@ -60,6 +60,7 @@ > #include "util/thread_map.h" > #include "util/counts.h" > > +#include > #include > #include > #include > @@ -95,6 +96,15 @@ static const char * transaction_limited_attrs = { > "}" > }; > > +static const char * topdown_attrs[] = { > + "topdown-total-slots", > + "topdown-fetch-bubbles", > + "topdown-slots-retired", > + "topdown-recovery-bubbles", > + "topdown-slots-issued", > + NULL, > +}; > + > static struct perf_evlist *evsel_list; > > static struct target target = { > @@ -109,6 +119,7 @@ static volatile pid_t child_pid = -1; > static bool null_run = false; > static int detailed_run = 0; > static bool transaction_run; > +static int topdown_run = 0; > static bool big_num = true; > static int big_num_opt = -1; > static const char *csv_sep = NULL; > @@ -1283,6 +1294,8 @@ static const struct option stat_options[] = { > "ms to wait before starting measurement after program start"), > OPT_BOOLEAN(0, "metric-only", &metric_only, > "Only print computed metrics. No raw values"), > + OPT_INCR(0, "topdown", &topdown_run, > + "measure topdown level 1 statistics"), > OPT_END() > }; > > @@ -1380,12 +1393,68 @@ static void perf_stat__exit_aggr_mode(void) > cpus_aggr_map = NULL; > } > > +static void filter_events(const char **attr, char **str, bool use_group) > +{ > + int off = 0; > + int i; > + int len = 0; > + char *s; > + > + for (i = 0; attr[i]; i++) { > + if (pmu_have_event("cpu", attr[i])) { > + len += strlen(attr[i]) + 1; > + attr[i - off] = attr[i]; > + } else > + off++; > + } > + attr[i - off] = NULL; > + > + *str = malloc(len + 1 + 2); > + if (!*str) > + return; > + s = *str; > + if (i - off == 0) { > + *s = 0; > + return; > + } > + if (use_group) > + *s++ = '{'; > + for (i = 0; attr[i]; i++) { > + strcpy(s, attr[i]); > + s += strlen(s); > + *s++ = ','; > + } > + if (use_group) { > + s[-1] = '}'; > + *s = 0; > + } else > + s[-1] = 0; > +} > + > +/* > + * Check whether we can use a group for top down. > + * Without a group may get bad results due to multiplexing. > + */ That is not because you have a counter used by the NMI that you cannot group. If HT is off you have plenty of counters to do this. > +static bool check_group(bool *warn) > +{ > + int n; > + > + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) > + return false; > + if (n > 0) { > + *warn = true; > + return false; > + } > + return true; > +} > + I do not like this part very much. You are pulling in x86 specific knowledge into builtin_stat.c. Why not move this into an x86 specific file? > /* > * Add default attributes, if there were no attributes specified or > * if -d/--detailed, -d -d or -d -d -d is used: > */ > static int add_default_attributes(void) > { > + int err; > struct perf_event_attr default_attrs[] = { > > { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, > @@ -1498,7 +1567,6 @@ static int add_default_attributes(void) > return 0; > > if (transaction_run) { > - int err; > if (pmu_have_event("cpu", "cycles-ct") && > pmu_have_event("cpu", "el-start")) > err = parse_events(evsel_list, transaction_attrs, NULL); > @@ -1511,6 +1579,36 @@ static int add_default_attributes(void) > return 0; > } > > + if (topdown_run) { > + char *str = NULL; > + bool warn = false; > + > + filter_events(topdown_attrs, &str, check_group(&warn)); > + if (topdown_attrs[0] && str) { > + if (warn) > + fprintf(stderr, > + "nmi_watchdog enabled with topdown. May give wrong results.\n" > + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); This is x86 specific. Why not just try it out and in case of error suggest checking if pinned system-wide events exist (such as NMI watchdog on x86). that would be more generic. > + err = parse_events(evsel_list, str, NULL); > + if (err) { > + fprintf(stderr, > + "Cannot set up top down events %s: %d\n", > + str, err); > + free(str); > + return -1; > + } > + } else { > + fprintf(stderr, "System does not support topdown\n"); > + return -1; > + } > + free(str); > + /* > + * Right now combining with the other attributes breaks group > + * semantics. > + */ > + return 0; > + } > + > if (!evsel_list->nr_entries) { > if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) > return -1; > diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l > index 58c5831..3e65d61 100644 > --- a/tools/perf/util/parse-events.l > +++ b/tools/perf/util/parse-events.l > @@ -248,6 +248,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > +topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } > > L1-dcache|l1-d|l1d|L1-data | > L1-icache|l1-i|l1i|L1-instruction | > -- > 2.4.3 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/