Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751327AbaLQQiN (ORCPT ); Wed, 17 Dec 2014 11:38:13 -0500 Received: from mail-oi0-f52.google.com ([209.85.218.52]:51625 "EHLO mail-oi0-f52.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751011AbaLQQiM (ORCPT ); Wed, 17 Dec 2014 11:38:12 -0500 MIME-Version: 1.0 In-Reply-To: <20141217160749.GH11607@kernel.org> References: <20141217152355.GA10053@thinkpad> <20141217160749.GH11607@kernel.org> Date: Wed, 17 Dec 2014 11:38:11 -0500 Message-ID: Subject: Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously From: Stephane Eranian To: Arnaldo Carvalho de Melo Cc: LKML , Peter Zijlstra , "mingo@elte.hu" , "ak@linux.intel.com" , Jiri Olsa , David Ahern , Don Zickus , Richard Fowles , Joe Mario , Namhyung Kim Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Wed, Dec 17, 2014 at 11:07 AM, Arnaldo Carvalho de Melo wrote: > Em Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian escreveu: >> >> This patch modifies perf mem to default to sampling loads >> and stores simultaneously. It could only do one or the other >> before yet there was no hardware restriction preventing >> simultaneous collection. With this patch, one run is sufficient >> to collect both. >> >> It is still possible to sample only loads or stores by using the >> -t option: >> $ perf mem -t load rec >> $ perf mem -t load rep >> Or >> $ perf mem -t store rec >> $ perf mem -t store rep >> >> The perf report TUI will show one event at a time. The store >> output will contain a Weight column which will be empty. >> >> In V2, we updated the man pages to reflect the change and >> also simplify the initialization of the argv vector passed >> to the cmd_*() functions as per LKML feedback. >> >> In V3, we fixed typos in the changelog. >> >> Signed-off-by: Stephane Eranian > > So here it goes a v4, just a minor change to avoid adding another global > variable and make this operation parm like the other parms inside that > struct perf_mem, attached goes the interdiff from your patch to this one > here, ok? > So you want me to post V4 with this one? I am fine if you add yours directly on top of mine. Acked-by: Stephane Eranian > diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt > index 1d78a4064da4..43310d8661fe 100644 > --- a/tools/perf/Documentation/perf-mem.txt > +++ b/tools/perf/Documentation/perf-mem.txt > @@ -12,11 +12,12 @@ SYNOPSIS > > DESCRIPTION > ----------- > -"perf mem -t record" runs a command and gathers memory operation data > +"perf mem record" runs a command and gathers memory operation data > from it, into perf.data. Perf record options are accepted and are passed through. > > -"perf mem -t report" displays the result. It invokes perf report with the > -right set of options to display a memory access profile. > +"perf mem report" displays the result. It invokes perf report with the > +right set of options to display a memory access profile. By default, loads > +and stores are sampled. Use the -t option to limit to loads or stores. > > Note that on Intel systems the memory latency reported is the use-latency, > not the pure load (or store latency). Use latency includes any pipeline > @@ -29,7 +30,7 @@ OPTIONS > > -t:: > --type=:: > - Select the memory operation type: load or store (default: load) > + Select the memory operation type: load or store (default: load,store) > > -D:: > --dump-raw-samples=:: > diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c > index 24db6ffe2957..9b5663950a4d 100644 > --- a/tools/perf/builtin-mem.c > +++ b/tools/perf/builtin-mem.c > @@ -7,44 +7,47 @@ > #include "util/session.h" > #include "util/data.h" > > -#define MEM_OPERATION_LOAD "load" > -#define MEM_OPERATION_STORE "store" > - > -static const char *mem_operation = MEM_OPERATION_LOAD; > +#define MEM_OPERATION_LOAD 0x1 > +#define MEM_OPERATION_STORE 0x2 > > struct perf_mem { > struct perf_tool tool; > char const *input_name; > bool hide_unresolved; > bool dump_raw; > + int operation; > const char *cpu_list; > DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); > }; > > -static int __cmd_record(int argc, const char **argv) > +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) > { > int rec_argc, i = 0, j; > const char **rec_argv; > - char event[64]; > int ret; > > - rec_argc = argc + 4; > + rec_argc = argc + 7; /* max number of arguments */ > rec_argv = calloc(rec_argc + 1, sizeof(char *)); > if (!rec_argv) > return -1; > > - rec_argv[i++] = strdup("record"); > - if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) > - rec_argv[i++] = strdup("-W"); > - rec_argv[i++] = strdup("-d"); > - rec_argv[i++] = strdup("-e"); > + rec_argv[i++] = "record"; > > - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) > - sprintf(event, "cpu/mem-stores/pp"); > - else > - sprintf(event, "cpu/mem-loads/pp"); > + if (mem->operation & MEM_OPERATION_LOAD) > + rec_argv[i++] = "-W"; > + > + rec_argv[i++] = "-d"; > + > + if (mem->operation & MEM_OPERATION_LOAD) { > + rec_argv[i++] = "-e"; > + rec_argv[i++] = "cpu/mem-loads/pp"; > + } > + > + if (mem->operation & MEM_OPERATION_STORE) { > + rec_argv[i++] = "-e"; > + rec_argv[i++] = "cpu/mem-stores/pp"; > + } > > - rec_argv[i++] = strdup(event); > for (j = 1; j < argc; j++, i++) > rec_argv[i] = argv[j]; > > @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) > if (!rep_argv) > return -1; > > - rep_argv[i++] = strdup("report"); > - rep_argv[i++] = strdup("--mem-mode"); > - rep_argv[i++] = strdup("-n"); /* display number of samples */ > + rep_argv[i++] = "report"; > + rep_argv[i++] = "--mem-mode"; > + rep_argv[i++] = "-n"; /* display number of samples */ > > /* > * there is no weight (cost) associated with stores, so don't print > * the column > */ > - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) > - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," > - "dso_daddr,tlb,locked"); > + if (!(mem->operation & MEM_OPERATION_LOAD)) > + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," > + "dso_daddr,tlb,locked"; > > for (j = 1; j < argc; j++, i++) > rep_argv[i] = argv[j]; > @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) > return ret; > } > > +struct mem_mode { > + const char *name; > + int mode; > +}; > + > +#define MEM_OPT(n, m) \ > + { .name = n, .mode = (m) } > + > +#define MEM_END { .name = NULL } > + > +static const struct mem_mode mem_modes[]={ > + MEM_OPT("load", MEM_OPERATION_LOAD), > + MEM_OPT("store", MEM_OPERATION_STORE), > + MEM_END > +}; > + > +static int > +parse_mem_ops(const struct option *opt, const char *str, int unset) > +{ > + int *mode = (int *)opt->value; > + const struct mem_mode *m; > + char *s, *os = NULL, *p; > + int ret = -1; > + > + if (unset) > + return 0; > + > + /* str may be NULL in case no arg is passed to -t */ > + if (str) { > + /* because str is read-only */ > + s = os = strdup(str); > + if (!s) > + return -1; > + > + /* reset mode */ > + *mode = 0; > + > + for (;;) { > + p = strchr(s, ','); > + if (p) > + *p = '\0'; > + > + for (m = mem_modes; m->name; m++) { > + if (!strcasecmp(s, m->name)) > + break; > + } > + if (!m->name) { > + fprintf(stderr, "unknown sampling op %s," > + " check man page\n", s); > + goto error; > + } > + > + *mode |= m->mode; > + > + if (!p) > + break; > + > + s = p + 1; > + } > + } > + ret = 0; > + > + if (*mode == 0) > + *mode = MEM_OPERATION_LOAD; > +error: > + free(os); > + return ret; > +} > + > int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) > { > struct stat st; > @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) > .ordered_events = true, > }, > .input_name = "perf.data", > + /* > + * default to both load an store sampling > + */ > + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE, > }; > const struct option mem_options[] = { > - OPT_STRING('t', "type", &mem_operation, > - "type", "memory operations(load/store)"), > + OPT_CALLBACK('t', "type", &mem.operation, > + "type", "memory operations(load,store) Default load,store", > + parse_mem_ops), > OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, > "dump raw samples in ASCII"), > OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, > @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) > argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, > mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); > > - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) > + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) > usage_with_options(mem_usage, mem_options); > > if (!mem.input_name || !strlen(mem.input_name)) { > @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) > } > > if (!strncmp(argv[0], "rec", 3)) > - return __cmd_record(argc, argv); > + return __cmd_record(argc, argv, &mem); > else if (!strncmp(argv[0], "rep", 3)) > return report_events(argc, argv, &mem); > else -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/