The first one fixes the problem that counters are never enabled when initial
delay configured.
The remaining two reuse the common field target::initial_delay for
record/ftrace/trace subcommands.
v2:
- introduce common filed target::initial_delay
Changbin Du (3):
perf stat: fix counting when initial delay configured
perf record: reuse target::initial_delay
perf: ftrace: reuse target::initial_delay
tools/perf/builtin-ftrace.c | 10 +++++-----
tools/perf/builtin-record.c | 12 ++++++------
tools/perf/builtin-stat.c | 15 +++++----------
tools/perf/builtin-trace.c | 8 ++++----
tools/perf/util/evlist.c | 6 +++---
tools/perf/util/evsel.c | 2 +-
tools/perf/util/ftrace.h | 1 -
tools/perf/util/record.h | 1 -
tools/perf/util/stat.c | 6 +-----
tools/perf/util/stat.h | 1 -
tools/perf/util/target.h | 12 ++++++++++++
11 files changed, 37 insertions(+), 37 deletions(-)
--
2.25.1
When creating counters with initial delay configured, the enable_on_exec
field is not set. So we need to enable the counters later. The problem
is, when a workload is specified the target__none() is true. So we also
need to check stat_config.initial_delay.
In this change, we add a new field 'initial_delay' for struct target which
could be shared by other subcommands. And define target__enable_on_exec()
which returns whether enable_on_exec should be set on normal cases.
Before this fix the event is not counted:
$ ./perf stat -e instructions -D 100 sleep 2
Events disabled
Events enabled
Performance counter stats for 'sleep 2':
<not counted> instructions
1.901661124 seconds time elapsed
0.001602000 seconds user
0.000000000 seconds sys
After fix it works:
$ ./perf stat -e instructions -D 100 sleep 2
Events disabled
Events enabled
Performance counter stats for 'sleep 2':
404,214 instructions
1.901743475 seconds time elapsed
0.001617000 seconds user
0.000000000 seconds sys
Fixes: c587e77e100f ("perf stat: Do not delay the workload with --delay")
Signed-off-by: Changbin Du <[email protected]>
---
tools/perf/builtin-stat.c | 15 +++++----------
tools/perf/util/stat.c | 6 +-----
tools/perf/util/stat.h | 1 -
tools/perf/util/target.h | 12 ++++++++++++
4 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5d18a5a6f662..fa7c40956d0f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -539,12 +539,7 @@ static int enable_counters(void)
return err;
}
- /*
- * We need to enable counters only if:
- * - we don't have tracee (attaching to task or cpu)
- * - we have initial delay configured
- */
- if (!target__none(&target)) {
+ if (!target__enable_on_exec(&target)) {
if (!all_counters_use_bpf)
evlist__enable(evsel_list);
}
@@ -914,7 +909,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
return err;
}
- if (stat_config.initial_delay) {
+ if (target.initial_delay) {
pr_info(EVLIST_DISABLED_MSG);
} else {
err = enable_counters();
@@ -926,8 +921,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (forks)
evlist__start_workload(evsel_list);
- if (stat_config.initial_delay > 0) {
- usleep(stat_config.initial_delay * USEC_PER_MSEC);
+ if (target.initial_delay > 0) {
+ usleep(target.initial_delay * USEC_PER_MSEC);
err = enable_counters();
if (err)
return -1;
@@ -1248,7 +1243,7 @@ static struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
- OPT_INTEGER('D', "delay", &stat_config.initial_delay,
+ OPT_INTEGER('D', "delay", &target.initial_delay,
"ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only),
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 534d36d26fc3..a07473703c6d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -842,11 +842,7 @@ int create_perf_stat_counter(struct evsel *evsel,
if (evsel__is_group_leader(evsel)) {
attr->disabled = 1;
- /*
- * In case of initial_delay we enable tracee
- * events manually.
- */
- if (target__none(target) && !config->initial_delay)
+ if (target__enable_on_exec(target))
attr->enable_on_exec = 1;
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b1c29156c560..bf1794ebc916 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -166,7 +166,6 @@ struct perf_stat_config {
FILE *output;
unsigned int interval;
unsigned int timeout;
- int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index daec6cba500d..880f1af7f6ad 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -18,6 +18,7 @@ struct target {
bool per_thread;
bool use_bpf;
bool hybrid;
+ int initial_delay;
const char *attr_map;
};
@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target)
return !target__has_task(target) && !target__has_cpu(target);
}
+static inline bool target__enable_on_exec(struct target *target)
+{
+ /*
+ * Normally enable_on_exec should be set if:
+ * 1) The tracee process is forked (not attaching to existed task or cpu).
+ * 2) And initial_delay is not configured.
+ * Otherwise, we enable tracee events manually.
+ */
+ return target__none(target) && !target->initial_delay;
+}
+
static inline bool target__has_per_thread(struct target *target)
{
return target->system_wide && target->per_thread;
--
2.25.1
This just simply replace record_opts::initial_delay with
target::initial_delay. Nothing else is changed.
Signed-off-by: Changbin Du <[email protected]>
---
tools/perf/builtin-record.c | 12 ++++++------
tools/perf/builtin-trace.c | 8 ++++----
tools/perf/util/evlist.c | 6 +++---
tools/perf/util/evsel.c | 2 +-
tools/perf/util/record.h | 1 -
5 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8374117e66f6..bc978bb38890 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1292,7 +1292,7 @@ static int record__open(struct record *rec)
* dummy event so that we can track PERF_RECORD_MMAP to cover the delay
* of waiting or event synthesis.
*/
- if (opts->initial_delay || target__has_cpu(&opts->target) ||
+ if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
perf_pmu__has_hybrid()) {
pos = evlist__get_tracking_event(evlist);
if (!evsel__is_dummy_event(pos)) {
@@ -1307,7 +1307,7 @@ static int record__open(struct record *rec)
* Enable the dummy event when the process is forked for
* initial_delay, immediately for system wide.
*/
- if (opts->initial_delay && !pos->immediate &&
+ if (opts->target.initial_delay && !pos->immediate &&
!target__has_cpu(&opts->target))
pos->core.attr.enable_on_exec = 1;
else
@@ -2522,7 +2522,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* (apart from group members) have enable_on_exec=1 set,
* so don't spoil it by prematurely enabling them.
*/
- if (!target__none(&opts->target) && !opts->initial_delay)
+ if (!target__none(&opts->target) && !opts->target.initial_delay)
evlist__enable(rec->evlist);
/*
@@ -2574,10 +2574,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
evlist__start_workload(rec->evlist);
}
- if (opts->initial_delay) {
+ if (opts->target.initial_delay) {
pr_info(EVLIST_DISABLED_MSG);
- if (opts->initial_delay > 0) {
- usleep(opts->initial_delay * USEC_PER_MSEC);
+ if (opts->target.initial_delay > 0) {
+ usleep(opts->target.initial_delay * USEC_PER_MSEC);
evlist__enable(rec->evlist);
pr_info(EVLIST_ENABLED_MSG);
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 610fb60b1c0d..b363c609818b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3993,14 +3993,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_mmap;
- if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
+ if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay)
evlist__enable(evlist);
if (forks)
evlist__start_workload(evlist);
- if (trace->opts.initial_delay) {
- usleep(trace->opts.initial_delay * 1000);
+ if (trace->opts.target.initial_delay) {
+ usleep(trace->opts.target.initial_delay * 1000);
evlist__enable(evlist);
}
@@ -4788,7 +4788,7 @@ int cmd_trace(int argc, const char **argv)
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
trace__parse_cgroups),
- OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
+ OPT_INTEGER('D', "delay", &trace.opts.target.initial_delay,
"ms to wait before starting measurement after program "
"start"),
OPTS_EVSWITCH(&trace.evswitch),
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 817df2504a1e..9e4b2bb0e6fa 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -2262,8 +2262,8 @@ int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *o
if (unset)
return 0;
- opts->initial_delay = str_to_delay(str);
- if (opts->initial_delay)
+ opts->target.initial_delay = str_to_delay(str);
+ if (opts->target.initial_delay)
return 0;
ret = parse_event_enable_times(str, NULL);
@@ -2306,7 +2306,7 @@ int evlist__parse_event_enable_time(struct evlist *evlist, struct record_opts *o
eet->evlist = evlist;
evlist->eet = eet;
- opts->initial_delay = eet->times[0].start;
+ opts->target.initial_delay = eet->times[0].start;
return 0;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 51e8ce6edddc..462cc0673cee 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1334,7 +1334,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
* group leaders for traced executed by perf.
*/
if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
- !opts->initial_delay)
+ !opts->target.initial_delay)
attr->enable_on_exec = 1;
if (evsel->immediate) {
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 46212bf020cf..a6566134e09e 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -65,7 +65,6 @@ struct record_opts {
const char *auxtrace_snapshot_opts;
const char *auxtrace_sample_opts;
bool sample_transaction;
- int initial_delay;
bool use_clockid;
clockid_t clockid;
u64 clockid_res_ns;
--
2.25.1
Replace perf_ftrace::initial_delay with target::initial_delay. Specifying
a negative initial_delay is meaningless for ftrace in practice but allowed
here.
Signed-off-by: Changbin Du <[email protected]>
---
tools/perf/builtin-ftrace.c | 10 +++++-----
tools/perf/util/ftrace.h | 1 -
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d7fe00f66b83..73d51ce84c3a 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -623,7 +623,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
/* display column headers */
read_tracing_file_to_stdout("trace");
- if (!ftrace->initial_delay) {
+ if (!ftrace->target.initial_delay) {
if (write_tracing_file("tracing_on", "1") < 0) {
pr_err("can't enable tracing\n");
goto out_close_fd;
@@ -632,8 +632,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace)
evlist__start_workload(ftrace->evlist);
- if (ftrace->initial_delay) {
- usleep(ftrace->initial_delay * 1000);
+ if (ftrace->target.initial_delay > 0) {
+ usleep(ftrace->target.initial_delay * 1000);
if (write_tracing_file("tracing_on", "1") < 0) {
pr_err("can't enable tracing\n");
goto out_close_fd;
@@ -1164,8 +1164,8 @@ int cmd_ftrace(int argc, const char **argv)
"Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
"Trace children processes"),
- OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
- "Number of milliseconds to wait before starting tracing after program start"),
+ OPT_INTEGER('D', "delay", &ftrace.target.initial_delay,
+ "Number of milliseconds to wait before starting tracing after program start"),
OPT_PARENT(common_options),
};
const struct option latency_options[] = {
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index a34cd15733b8..558efcb98d25 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -25,7 +25,6 @@ struct perf_ftrace {
int graph_noirqs;
int graph_verbose;
int graph_thresh;
- unsigned int initial_delay;
};
struct filter_entry {
--
2.25.1
Hello,
On Wed, Mar 1, 2023 at 7:12 PM Changbin Du <[email protected]> wrote:
>
> The first one fixes the problem that counters are never enabled when initial
> delay configured.
> The remaining two reuse the common field target::initial_delay for
> record/ftrace/trace subcommands.
>
> v2:
> - introduce common filed target::initial_delay
>
> Changbin Du (3):
> perf stat: fix counting when initial delay configured
> perf record: reuse target::initial_delay
> perf: ftrace: reuse target::initial_delay
Acked-by: Namhyung Kim <[email protected]>
Thanks,
Namhyung
>
> tools/perf/builtin-ftrace.c | 10 +++++-----
> tools/perf/builtin-record.c | 12 ++++++------
> tools/perf/builtin-stat.c | 15 +++++----------
> tools/perf/builtin-trace.c | 8 ++++----
> tools/perf/util/evlist.c | 6 +++---
> tools/perf/util/evsel.c | 2 +-
> tools/perf/util/ftrace.h | 1 -
> tools/perf/util/record.h | 1 -
> tools/perf/util/stat.c | 6 +-----
> tools/perf/util/stat.h | 1 -
> tools/perf/util/target.h | 12 ++++++++++++
> 11 files changed, 37 insertions(+), 37 deletions(-)
>
> --
> 2.25.1
>
Em Thu, Mar 02, 2023 at 11:22:37AM -0800, Namhyung Kim escreveu:
> Hello,
>
> On Wed, Mar 1, 2023 at 7:12 PM Changbin Du <[email protected]> wrote:
> >
> > The first one fixes the problem that counters are never enabled when initial
> > delay configured.
> > The remaining two reuse the common field target::initial_delay for
> > record/ftrace/trace subcommands.
> >
> > v2:
> > - introduce common filed target::initial_delay
> >
> > Changbin Du (3):
> > perf stat: fix counting when initial delay configured
> > perf record: reuse target::initial_delay
> > perf: ftrace: reuse target::initial_delay
>
> Acked-by: Namhyung Kim <[email protected]>
Thanks, applying the first to perf-tools (old perf/urgent) and the rest
to perf-tools-next (old perf/core).
- Arnaldo
> Thanks,
> Namhyung
>
> >
> > tools/perf/builtin-ftrace.c | 10 +++++-----
> > tools/perf/builtin-record.c | 12 ++++++------
> > tools/perf/builtin-stat.c | 15 +++++----------
> > tools/perf/builtin-trace.c | 8 ++++----
> > tools/perf/util/evlist.c | 6 +++---
> > tools/perf/util/evsel.c | 2 +-
> > tools/perf/util/ftrace.h | 1 -
> > tools/perf/util/record.h | 1 -
> > tools/perf/util/stat.c | 6 +-----
> > tools/perf/util/stat.h | 1 -
> > tools/perf/util/target.h | 12 ++++++++++++
> > 11 files changed, 37 insertions(+), 37 deletions(-)
> >
> > --
> > 2.25.1
> >
--
- Arnaldo
On Thu, Mar 02, 2023 at 05:40:16PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Thu, Mar 02, 2023 at 11:22:37AM -0800, Namhyung Kim escreveu:
> > Hello,
> >
> > On Wed, Mar 1, 2023 at 7:12 PM Changbin Du <[email protected]> wrote:
> > >
> > > The first one fixes the problem that counters are never enabled when initial
> > > delay configured.
> > > The remaining two reuse the common field target::initial_delay for
> > > record/ftrace/trace subcommands.
> > >
> > > v2:
> > > - introduce common filed target::initial_delay
> > >
> > > Changbin Du (3):
> > > perf stat: fix counting when initial delay configured
> > > perf record: reuse target::initial_delay
> > > perf: ftrace: reuse target::initial_delay
> >
> > Acked-by: Namhyung Kim <[email protected]>
>
> Thanks, applying the first to perf-tools (old perf/urgent) and the rest
> to perf-tools-next (old perf/core).
>
> - Arnaldo
>
Hi Arnaldo, it seems only the first one is applied. The remaining two patches
are missed in your tree.
- perf record: reuse target::initial_delay
- perf: ftrace: reuse target::initial_delay
--
Cheers,
Changbin Du
Em Fri, Mar 10, 2023 at 10:37:14AM +0800, Changbin Du escreveu:
> On Thu, Mar 02, 2023 at 05:40:16PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Thu, Mar 02, 2023 at 11:22:37AM -0800, Namhyung Kim escreveu:
> > > Hello,
> > >
> > > On Wed, Mar 1, 2023 at 7:12 PM Changbin Du <[email protected]> wrote:
> > > >
> > > > The first one fixes the problem that counters are never enabled when initial
> > > > delay configured.
> > > > The remaining two reuse the common field target::initial_delay for
> > > > record/ftrace/trace subcommands.
> > > >
> > > > v2:
> > > > - introduce common filed target::initial_delay
> > > >
> > > > Changbin Du (3):
> > > > perf stat: fix counting when initial delay configured
> > > > perf record: reuse target::initial_delay
> > > > perf: ftrace: reuse target::initial_delay
> > >
> > > Acked-by: Namhyung Kim <[email protected]>
> >
> > Thanks, applying the first to perf-tools (old perf/urgent) and the rest
> > to perf-tools-next (old perf/core).
> >
> > - Arnaldo
> >
> Hi Arnaldo, it seems only the first one is applied. The remaining two patches
> are missed in your tree.
> - perf record: reuse target::initial_delay
> - perf: ftrace: reuse target::initial_delay
Those are not fixes, right? I mentioned that I would apply it after the
first is merged.
- Arnaldo
On Fri, Mar 10, 2023 at 12:05:38PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Mar 10, 2023 at 10:37:14AM +0800, Changbin Du escreveu:
> > On Thu, Mar 02, 2023 at 05:40:16PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Thu, Mar 02, 2023 at 11:22:37AM -0800, Namhyung Kim escreveu:
> > > > Hello,
> > > >
> > > > On Wed, Mar 1, 2023 at 7:12 PM Changbin Du <[email protected]> wrote:
> > > > >
> > > > > The first one fixes the problem that counters are never enabled when initial
> > > > > delay configured.
> > > > > The remaining two reuse the common field target::initial_delay for
> > > > > record/ftrace/trace subcommands.
> > > > >
> > > > > v2:
> > > > > - introduce common filed target::initial_delay
> > > > >
> > > > > Changbin Du (3):
> > > > > perf stat: fix counting when initial delay configured
> > > > > perf record: reuse target::initial_delay
> > > > > perf: ftrace: reuse target::initial_delay
> > > >
> > > > Acked-by: Namhyung Kim <[email protected]>
> > >
> > > Thanks, applying the first to perf-tools (old perf/urgent) and the rest
> > > to perf-tools-next (old perf/core).
> > >
> > > - Arnaldo
> > >
> > Hi Arnaldo, it seems only the first one is applied. The remaining two patches
> > are missed in your tree.
> > - perf record: reuse target::initial_delay
> > - perf: ftrace: reuse target::initial_delay
>
> Those are not fixes, right? I mentioned that I would apply it after the
> first is merged.
>
Sorry, I misunderstood your previous reply :)
> - Arnaldo
--
Cheers,
Changbin Du