This patchset is a first in a series of perf-lock enhancements I am working
on. Overall it is pretty straightforward, dealing with cleanups, and some
cosmetic changes.
Patches 1-3 deal with some minor fixes and cleanups.
Patch 4 fixes a segfault in the report command when the perf.data
file is generated by a non perf-lock command.
Patch 5 and 6 are cosmetic changes.
Patch 7 allows the tool to report the average wait times.
Thanks!
Davidlohr Bueso (7):
perf lock: remove dead code
perf lock: return proper code in report_lock_*_event
perf lock: plug some memleaks
perf lock: redo __cmd_report
perf lock: do not cut lock name
perf lock: limit bad rate precision
perf lock: account for lock average wait time
tools/perf/Documentation/perf-lock.txt | 2 +-
tools/perf/builtin-lock.c | 142 ++++++++++++++++-----------------
2 files changed, 70 insertions(+), 74 deletions(-)
--
1.7.11.7
Two decimal precision should be enough for this.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/builtin-lock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2674a42..5adebe6 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -686,7 +686,7 @@ static void print_bad_events(int bad, int total)
pr_info("\n=== output for debug===\n\n");
pr_info("bad: %d, total: %d\n", bad, total);
- pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100);
+ pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
pr_info("histogram of events caused bad sequence\n");
for (i = 0; i < BROKEN_MAX; i++)
pr_info(" %10s: %d\n", name[i], bad_hist[i]);
--
1.7.11.7
While perf-lock currently reports both the total wait time
and the number of contentions, it doesn't explicitly show
the average wait time. Having this value immediately in the
report can be quite useful when looking into performance issues.
Furthermore, allowing report to sort by averages is another
handy feature to have - and thus do not only print the value, but
add it to the lock_stat structure.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/Documentation/perf-lock.txt | 2 +-
tools/perf/builtin-lock.c | 10 +++++++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index c7f5f55..ab25be2 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -48,7 +48,7 @@ REPORT OPTIONS
-k::
--key=<value>::
Sorting key. Possible values: acquired (default), contended,
- wait_total, wait_max, wait_min.
+ avg_wait, wait_total, wait_max, wait_min.
INFO OPTIONS
------------
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 5adebe6..f0a5871 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -56,7 +56,9 @@ struct lock_stat {
unsigned int nr_readlock;
unsigned int nr_trylock;
+
/* these times are in nano sec. */
+ u64 avg_wait_time;
u64 wait_time_total;
u64 wait_time_min;
u64 wait_time_max;
@@ -208,6 +210,7 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid)
SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)
+SINGLE_KEY(avg_wait_time)
SINGLE_KEY(wait_time_total)
SINGLE_KEY(wait_time_max)
@@ -244,6 +247,7 @@ static struct rb_root result; /* place to store sorted data */
struct lock_key keys[] = {
DEF_KEY_LOCK(acquired, nr_acquired),
DEF_KEY_LOCK(contended, nr_contended),
+ DEF_KEY_LOCK(avg_wait, avg_wait_time),
DEF_KEY_LOCK(wait_total, wait_time_total),
DEF_KEY_LOCK(wait_min, wait_time_min),
DEF_KEY_LOCK(wait_max, wait_time_max),
@@ -516,6 +520,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
seq->state = SEQ_STATE_ACQUIRED;
ls->nr_acquired++;
+ ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
seq->prev_event_time = sample->time;
end:
return 0;
@@ -570,6 +575,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
seq->state = SEQ_STATE_CONTENDED;
ls->nr_contended++;
+ ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
seq->prev_event_time = sample->time;
end:
return 0;
@@ -702,6 +708,7 @@ static void print_result(void)
pr_info("%10s ", "acquired");
pr_info("%10s ", "contended");
+ pr_info("%15s ", "avg wait (ns)");
pr_info("%15s ", "total wait (ns)");
pr_info("%15s ", "max wait (ns)");
pr_info("%15s ", "min wait (ns)");
@@ -720,6 +727,7 @@ static void print_result(void)
pr_info("%10u ", st->nr_acquired);
pr_info("%10u ", st->nr_contended);
+ pr_info("%15" PRIu64 " ", st->avg_wait_time);
pr_info("%15" PRIu64 " ", st->wait_time_total);
pr_info("%15" PRIu64 " ", st->wait_time_max);
pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
@@ -926,7 +934,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
};
const struct option report_options[] = {
OPT_STRING('k', "key", &sort_key, "acquired",
- "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
+ "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
/* TODO: type */
OPT_END()
};
--
1.7.11.7
Address some trivial leaks.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/builtin-lock.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index d318862..7784347 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -321,10 +321,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
new->addr = addr;
new->name = zalloc(sizeof(char) * strlen(name) + 1);
- if (!new->name)
+ if (!new->name) {
+ free(new);
goto alloc_failed;
- strcpy(new->name, name);
+ }
+ strcpy(new->name, name);
new->wait_time_min = ULLONG_MAX;
list_add(&new->hash_entry, entry);
@@ -875,7 +877,7 @@ static int __cmd_record(int argc, const char **argv)
const char *record_args[] = {
"record", "-R", "-m", "1024", "-c", "1",
};
- unsigned int rec_argc, i, j;
+ unsigned int rec_argc, i, j, ret;
const char **rec_argv;
for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
@@ -892,7 +894,7 @@ static int __cmd_record(int argc, const char **argv)
rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- if (rec_argv == NULL)
+ if (!rec_argv)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
@@ -908,7 +910,9 @@ static int __cmd_record(int argc, const char **argv)
BUG_ON(i != rec_argc);
- return cmd_record(i, rec_argv, NULL);
+ ret = cmd_record(i, rec_argv, NULL);
+ free(rec_argv);
+ return ret;
}
int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
--
1.7.11.7
This function should be straightforward, and we can remove some
trivial logic by moving the functionality of read_events() into
__cmd_report() - thus allowing a new session to be properly deleted.
Since the 'info' subcommand also needs to process the recorded events,
add a 'display_info' flag to differentiate between report and info
commands.
Furthermore, this patch also calls perf_session__has_traces(),
making sure that we don't compare apples and oranges, fixing a segfault
when using an perf.data file generated by a different subcommand. ie:
./perf mem record sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.017 MB perf.data (~724 samples) ]
./perf lock report
Segmentation fault (core dumped)
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/builtin-lock.c | 68 +++++++++++++++++++++++++----------------------
1 file changed, 36 insertions(+), 32 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 7784347..780484f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -818,6 +818,18 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return 0;
}
+static void sort_result(void)
+{
+ unsigned int i;
+ struct lock_stat *st;
+
+ for (i = 0; i < LOCKHASH_SIZE; i++) {
+ list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ insert_to_result(st, compare);
+ }
+ }
+}
+
static const struct perf_evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
{ "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
@@ -825,51 +837,47 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
};
-static int read_events(void)
+static int __cmd_report(bool display_info)
{
+ int err = -EINVAL;
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
};
+
session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
if (!session) {
pr_err("Initializing perf session failed\n");
- return -1;
+ return -ENOMEM;
}
+ if (!perf_session__has_traces(session, "lock record"))
+ goto out_delete;
+
if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
pr_err("Initializing perf session tracepoint handlers failed\n");
- return -1;
+ goto out_delete;
}
- return perf_session__process_events(session, &eops);
-}
+ if (select_key())
+ goto out_delete;
-static void sort_result(void)
-{
- unsigned int i;
- struct lock_stat *st;
-
- for (i = 0; i < LOCKHASH_SIZE; i++) {
- list_for_each_entry(st, &lockhash_table[i], hash_entry) {
- insert_to_result(st, compare);
- }
- }
-}
+ err = perf_session__process_events(session, &eops);
+ if (err)
+ goto out_delete;
-static int __cmd_report(void)
-{
setup_pager();
+ if (display_info) /* used for info subcommand */
+ err = dump_info();
+ else {
+ sort_result();
+ print_result();
+ }
- if ((select_key() != 0) ||
- (read_events() != 0))
- return -1;
-
- sort_result();
- print_result();
-
- return 0;
+out_delete:
+ perf_session__delete(session);
+ return err;
}
static int __cmd_record(int argc, const char **argv)
@@ -970,7 +978,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(report_usage, report_options);
}
- __cmd_report();
+ rc = __cmd_report(false);
} else if (!strcmp(argv[0], "script")) {
/* Aliased to 'perf script' */
return cmd_script(argc, argv, prefix);
@@ -983,11 +991,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
}
/* recycling report_lock_ops */
trace_handler = &report_lock_ops;
- setup_pager();
- if (read_events() != 0)
- rc = -1;
- else
- rc = dump_info();
+ rc = __cmd_report(true);
} else {
usage_with_options(lock_usage, lock_options);
}
--
1.7.11.7
While this could be seen as personal taste, there really isn't any
reason for being so stingy printing the lock name. Furthermore, some
symbol names are really just too long, and cutting them at 16 characters
doesn't help at all.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/builtin-lock.c | 18 ++----------------
1 file changed, 2 insertions(+), 16 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 780484f..2674a42 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -696,10 +696,9 @@ static void print_bad_events(int bad, int total)
static void print_result(void)
{
struct lock_stat *st;
- char cut_name[20];
int bad, total;
- pr_info("%20s ", "Name");
+ pr_info("%40s ", "Name");
pr_info("%10s ", "acquired");
pr_info("%10s ", "contended");
@@ -716,21 +715,8 @@ static void print_result(void)
bad++;
continue;
}
- bzero(cut_name, 20);
-
- if (strlen(st->name) < 16) {
- /* output raw name */
- pr_info("%20s ", st->name);
- } else {
- strncpy(cut_name, st->name, 16);
- cut_name[16] = '.';
- cut_name[17] = '.';
- cut_name[18] = '.';
- cut_name[19] = '\0';
- /* cut off name for saving output style */
- pr_info("%20s ", cut_name);
- }
+ pr_info("%40s ", st->name);
pr_info("%10u ", st->nr_acquired);
pr_info("%10u ", st->nr_contended);
--
1.7.11.7
No need for break statements after goto jumps.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/builtin-lock.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index ee33ba2..148f7e2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -446,7 +446,6 @@ broken:
list_del(&seq->list);
free(seq);
goto end;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -508,8 +507,6 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
list_del(&seq->list);
free(seq);
goto end;
- break;
-
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -564,7 +561,6 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
list_del(&seq->list);
free(seq);
goto end;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -606,7 +602,6 @@ static int report_lock_release_event(struct perf_evsel *evsel,
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
goto end;
- break;
case SEQ_STATE_ACQUIRED:
break;
case SEQ_STATE_READ_ACQUIRED:
@@ -624,7 +619,6 @@ static int report_lock_release_event(struct perf_evsel *evsel,
ls->discard = 1;
bad_hist[BROKEN_RELEASE]++;
goto free_seq;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
--
1.7.11.7
The report_lock_*_event() functions return -1 when lock_stat_findnew(),
thread_stat_findnew() or get_seq() return NULL. These functions only return
this value when failing to allocate memory, this return -ENOMEM instead.
Signed-off-by: Davidlohr Bueso <[email protected]>
---
tools/perf/builtin-lock.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 148f7e2..d318862 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -400,17 +400,17 @@ static int report_lock_acquire_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -472,17 +472,17 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -533,17 +533,17 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -587,17 +587,17 @@ static int report_lock_release_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
--
1.7.11.7
Davidlohr Bueso wrote:
> diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> index d318862..7784347 100644
> --- a/tools/perf/builtin-lock.c
> +++ b/tools/perf/builtin-lock.c
> @@ -321,10 +321,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
>
> new->addr = addr;
> new->name = zalloc(sizeof(char) * strlen(name) + 1);
> - if (!new->name)
> + if (!new->name) {
> + free(new);
> goto alloc_failed;
> - strcpy(new->name, name);
> + }
Why is the strcpy() missing indent?
> + strcpy(new->name, name);
> new->wait_time_min = ULLONG_MAX;
>
> list_add(&new->hash_entry, entry);
> @@ -875,7 +877,7 @@ static int __cmd_record(int argc, const char **argv)
> const char *record_args[] = {
> "record", "-R", "-m", "1024", "-c", "1",
> };
> - unsigned int rec_argc, i, j;
> + unsigned int rec_argc, i, j, ret;
> const char **rec_argv;
>
> for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
> @@ -892,7 +894,7 @@ static int __cmd_record(int argc, const char **argv)
> rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
>
> rec_argv = calloc(rec_argc + 1, sizeof(char *));
> - if (rec_argv == NULL)
> + if (!rec_argv)
It's better to check explicitly, so we know rec_argv isn't an integer.
> return -ENOMEM;
>
> for (i = 0; i < ARRAY_SIZE(record_args); i++)
> @@ -908,7 +910,9 @@ static int __cmd_record(int argc, const char **argv)
>
> BUG_ON(i != rec_argc);
>
> - return cmd_record(i, rec_argv, NULL);
> + ret = cmd_record(i, rec_argv, NULL);
> + free(rec_argv);
> + return ret;
> }
Patch looks good otherwise; thanks.
Hi Davidlohr, thanks for your enhancements and cleanings!
At Sun, 8 Sep 2013 19:19:17 -0700,
Bueso wrote:
>
> While this could be seen as personal taste, there really isn't any
> reason for being so stingy printing the lock name. Furthermore, some
> symbol names are really just too long, and cutting them at 16 characters
> doesn't help at all.
I agree with your opinion that 16 chars are too short for lock names,
but I think if we should print "..." for indicating the abbreviation.
Thanks,
Hitoshi
>
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> tools/perf/builtin-lock.c | 18 ++----------------
> 1 file changed, 2 insertions(+), 16 deletions(-)
>
> diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> index 780484f..2674a42 100644
> --- a/tools/perf/builtin-lock.c
> +++ b/tools/perf/builtin-lock.c
> @@ -696,10 +696,9 @@ static void print_bad_events(int bad, int total)
> static void print_result(void)
> {
> struct lock_stat *st;
> - char cut_name[20];
> int bad, total;
>
> - pr_info("%20s ", "Name");
> + pr_info("%40s ", "Name");
> pr_info("%10s ", "acquired");
> pr_info("%10s ", "contended");
>
> @@ -716,21 +715,8 @@ static void print_result(void)
> bad++;
> continue;
> }
> - bzero(cut_name, 20);
> -
> - if (strlen(st->name) < 16) {
> - /* output raw name */
> - pr_info("%20s ", st->name);
> - } else {
> - strncpy(cut_name, st->name, 16);
> - cut_name[16] = '.';
> - cut_name[17] = '.';
> - cut_name[18] = '.';
> - cut_name[19] = '\0';
> - /* cut off name for saving output style */
> - pr_info("%20s ", cut_name);
> - }
>
> + pr_info("%40s ", st->name);
> pr_info("%10u ", st->nr_acquired);
> pr_info("%10u ", st->nr_contended);
>
> --
> 1.7.11.7
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
At Mon, 9 Sep 2013 08:31:38 +0530,
Ramkumar Ramachandra wrote:
>
> Davidlohr Bueso wrote:
> > diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> > index d318862..7784347 100644
> > --- a/tools/perf/builtin-lock.c
> > +++ b/tools/perf/builtin-lock.c
> > @@ -321,10 +321,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
> >
> > new->addr = addr;
> > new->name = zalloc(sizeof(char) * strlen(name) + 1);
> > - if (!new->name)
> > + if (!new->name) {
> > + free(new);
> > goto alloc_failed;
> > - strcpy(new->name, name);
> > + }
>
> Why is the strcpy() missing indent?
The strcpy() is removed. It doesn't miss indantation.
Thanks,
Hitoshi
Em Sun, Sep 08, 2013 at 07:19:12PM -0700, Davidlohr Bueso escreveu:
> This patchset is a first in a series of perf-lock enhancements I am working
> on. Overall it is pretty straightforward, dealing with cleanups, and some
> cosmetic changes.
>
> Patches 1-3 deal with some minor fixes and cleanups.
>
> Patch 4 fixes a segfault in the report command when the perf.data
> file is generated by a non perf-lock command.
>
> Patch 5 and 6 are cosmetic changes.
Applied all but patch 5, since Mitake-san had concerns about it, lemme
know if that changed.
Thanks!
- Arnaldo
> Patch 7 allows the tool to report the average wait times.
>
>
> Thanks!
>
> Davidlohr Bueso (7):
> perf lock: remove dead code
> perf lock: return proper code in report_lock_*_event
> perf lock: plug some memleaks
> perf lock: redo __cmd_report
> perf lock: do not cut lock name
> perf lock: limit bad rate precision
> perf lock: account for lock average wait time
>
> tools/perf/Documentation/perf-lock.txt | 2 +-
> tools/perf/builtin-lock.c | 142 ++++++++++++++++-----------------
> 2 files changed, 70 insertions(+), 74 deletions(-)
>
> --
> 1.7.11.7
On Thu, 2013-09-19 at 16:00 -0300, Arnaldo Carvalho de Melo wrote:
> Em Sun, Sep 08, 2013 at 07:19:12PM -0700, Davidlohr Bueso escreveu:
> > This patchset is a first in a series of perf-lock enhancements I am working
> > on. Overall it is pretty straightforward, dealing with cleanups, and some
> > cosmetic changes.
> >
> > Patches 1-3 deal with some minor fixes and cleanups.
> >
> > Patch 4 fixes a segfault in the report command when the perf.data
> > file is generated by a non perf-lock command.
> >
> > Patch 5 and 6 are cosmetic changes.
>
> Applied all but patch 5, since Mitake-san had concerns about it, lemme
> know if that changed.
>
Thanks Arnaldo! By the way, we probably want patch 4 in stable, since it
does fix a day 1 bug.
Commit-ID: 02ad0702e54f9b82b697718e7e8662eb3f2266ee
Gitweb: http://git.kernel.org/tip/02ad0702e54f9b82b697718e7e8662eb3f2266ee
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Sun, 8 Sep 2013 19:19:13 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 9 Oct 2013 11:23:21 -0300
perf lock: Remove dead code
No need for break statements after goto jumps.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-lock.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index ee33ba2..148f7e2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -446,7 +446,6 @@ broken:
list_del(&seq->list);
free(seq);
goto end;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -508,8 +507,6 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
list_del(&seq->list);
free(seq);
goto end;
- break;
-
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -564,7 +561,6 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
list_del(&seq->list);
free(seq);
goto end;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -606,7 +602,6 @@ static int report_lock_release_event(struct perf_evsel *evsel,
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
goto end;
- break;
case SEQ_STATE_ACQUIRED:
break;
case SEQ_STATE_READ_ACQUIRED:
@@ -624,7 +619,6 @@ static int report_lock_release_event(struct perf_evsel *evsel,
ls->discard = 1;
bad_hist[BROKEN_RELEASE]++;
goto free_seq;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
Commit-ID: b33492ade49a223a666e582d0c63566609e7014b
Gitweb: http://git.kernel.org/tip/b33492ade49a223a666e582d0c63566609e7014b
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Sun, 8 Sep 2013 19:19:14 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 9 Oct 2013 11:23:32 -0300
perf lock: Return proper code in report_lock_*_event
The report_lock_*_event() functions return -1 when lock_stat_findnew(),
thread_stat_findnew() or get_seq() return NULL. These functions only
return this value when failing to allocate memory, this return -ENOMEM
instead.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-lock.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 148f7e2..d318862 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -400,17 +400,17 @@ static int report_lock_acquire_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -472,17 +472,17 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -533,17 +533,17 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -587,17 +587,17 @@ static int report_lock_release_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
Commit-ID: 0a98c7febf55325ebac4f28289a9433f4b66ed0e
Gitweb: http://git.kernel.org/tip/0a98c7febf55325ebac4f28289a9433f4b66ed0e
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Sun, 8 Sep 2013 19:19:15 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 9 Oct 2013 11:23:39 -0300
perf lock: Plug some memleaks
Address some trivial leaks.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-lock.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index d318862..7784347 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -321,10 +321,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
new->addr = addr;
new->name = zalloc(sizeof(char) * strlen(name) + 1);
- if (!new->name)
+ if (!new->name) {
+ free(new);
goto alloc_failed;
- strcpy(new->name, name);
+ }
+ strcpy(new->name, name);
new->wait_time_min = ULLONG_MAX;
list_add(&new->hash_entry, entry);
@@ -875,7 +877,7 @@ static int __cmd_record(int argc, const char **argv)
const char *record_args[] = {
"record", "-R", "-m", "1024", "-c", "1",
};
- unsigned int rec_argc, i, j;
+ unsigned int rec_argc, i, j, ret;
const char **rec_argv;
for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
@@ -892,7 +894,7 @@ static int __cmd_record(int argc, const char **argv)
rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- if (rec_argv == NULL)
+ if (!rec_argv)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
@@ -908,7 +910,9 @@ static int __cmd_record(int argc, const char **argv)
BUG_ON(i != rec_argc);
- return cmd_record(i, rec_argv, NULL);
+ ret = cmd_record(i, rec_argv, NULL);
+ free(rec_argv);
+ return ret;
}
int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
Commit-ID: f37376cd721a539ac398cbb7718b72fce83cd49b
Gitweb: http://git.kernel.org/tip/f37376cd721a539ac398cbb7718b72fce83cd49b
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Sun, 8 Sep 2013 19:19:19 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 9 Oct 2013 11:24:01 -0300
perf lock: Account for lock average wait time
While perf-lock currently reports both the total wait time and the
number of contentions, it doesn't explicitly show the average wait time.
Having this value immediately in the report can be quite useful when
looking into performance issues.
Furthermore, allowing report to sort by averages is another handy
feature to have - and thus do not only print the value, but add it to
the lock_stat structure.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/Documentation/perf-lock.txt | 2 +-
tools/perf/builtin-lock.c | 10 +++++++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index c7f5f55..ab25be2 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -48,7 +48,7 @@ REPORT OPTIONS
-k::
--key=<value>::
Sorting key. Possible values: acquired (default), contended,
- wait_total, wait_max, wait_min.
+ avg_wait, wait_total, wait_max, wait_min.
INFO OPTIONS
------------
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 972310c..6a9076f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -56,7 +56,9 @@ struct lock_stat {
unsigned int nr_readlock;
unsigned int nr_trylock;
+
/* these times are in nano sec. */
+ u64 avg_wait_time;
u64 wait_time_total;
u64 wait_time_min;
u64 wait_time_max;
@@ -208,6 +210,7 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid)
SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)
+SINGLE_KEY(avg_wait_time)
SINGLE_KEY(wait_time_total)
SINGLE_KEY(wait_time_max)
@@ -244,6 +247,7 @@ static struct rb_root result; /* place to store sorted data */
struct lock_key keys[] = {
DEF_KEY_LOCK(acquired, nr_acquired),
DEF_KEY_LOCK(contended, nr_contended),
+ DEF_KEY_LOCK(avg_wait, avg_wait_time),
DEF_KEY_LOCK(wait_total, wait_time_total),
DEF_KEY_LOCK(wait_min, wait_time_min),
DEF_KEY_LOCK(wait_max, wait_time_max),
@@ -516,6 +520,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
seq->state = SEQ_STATE_ACQUIRED;
ls->nr_acquired++;
+ ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
seq->prev_event_time = sample->time;
end:
return 0;
@@ -570,6 +575,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
seq->state = SEQ_STATE_CONTENDED;
ls->nr_contended++;
+ ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
seq->prev_event_time = sample->time;
end:
return 0;
@@ -703,6 +709,7 @@ static void print_result(void)
pr_info("%10s ", "acquired");
pr_info("%10s ", "contended");
+ pr_info("%15s ", "avg wait (ns)");
pr_info("%15s ", "total wait (ns)");
pr_info("%15s ", "max wait (ns)");
pr_info("%15s ", "min wait (ns)");
@@ -734,6 +741,7 @@ static void print_result(void)
pr_info("%10u ", st->nr_acquired);
pr_info("%10u ", st->nr_contended);
+ pr_info("%15" PRIu64 " ", st->avg_wait_time);
pr_info("%15" PRIu64 " ", st->wait_time_total);
pr_info("%15" PRIu64 " ", st->wait_time_max);
pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
@@ -940,7 +948,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
};
const struct option report_options[] = {
OPT_STRING('k', "key", &sort_key, "acquired",
- "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
+ "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
/* TODO: type */
OPT_END()
};
Commit-ID: 60a25cbc4a167fc0129296c3c640d8506a57acc5
Gitweb: http://git.kernel.org/tip/60a25cbc4a167fc0129296c3c640d8506a57acc5
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Sun, 8 Sep 2013 19:19:18 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 9 Oct 2013 11:23:54 -0300
perf lock: Limit bad rate precision
Two decimal precision should be enough for this.
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-lock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 780484f..972310c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -686,7 +686,7 @@ static void print_bad_events(int bad, int total)
pr_info("\n=== output for debug===\n\n");
pr_info("bad: %d, total: %d\n", bad, total);
- pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100);
+ pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
pr_info("histogram of events caused bad sequence\n");
for (i = 0; i < BROKEN_MAX; i++)
pr_info(" %10s: %d\n", name[i], bad_hist[i]);
Commit-ID: 375eb2be5584b8182a917124ca217b74e43d2dc4
Gitweb: http://git.kernel.org/tip/375eb2be5584b8182a917124ca217b74e43d2dc4
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Sun, 8 Sep 2013 19:19:16 -0700
Committer: Arnaldo Carvalho de Melo <[email protected]>
CommitDate: Wed, 9 Oct 2013 11:23:48 -0300
perf lock: Redo __cmd_report
This function should be straightforward, and we can remove some trivial
logic by moving the functionality of read_events() into __cmd_report() -
thus allowing a new session to be properly deleted.
Since the 'info' subcommand also needs to process the recorded events,
add a 'display_info' flag to differentiate between report and info
commands.
Furthermore, this patch also calls perf_session__has_traces(), making
sure that we don't compare apples and oranges, fixing a segfault when
using an perf.data file generated by a different subcommand. ie:
./perf mem record sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.017 MB perf.data (~724 samples) ]
./perf lock report
Segmentation fault (core dumped)
Signed-off-by: Davidlohr Bueso <[email protected]>
Cc: Aswin Chandramouleeswaran <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Hitoshi Mitake <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
---
tools/perf/builtin-lock.c | 68 +++++++++++++++++++++++++----------------------
1 file changed, 36 insertions(+), 32 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 7784347..780484f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -818,6 +818,18 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return 0;
}
+static void sort_result(void)
+{
+ unsigned int i;
+ struct lock_stat *st;
+
+ for (i = 0; i < LOCKHASH_SIZE; i++) {
+ list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ insert_to_result(st, compare);
+ }
+ }
+}
+
static const struct perf_evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
{ "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
@@ -825,51 +837,47 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
};
-static int read_events(void)
+static int __cmd_report(bool display_info)
{
+ int err = -EINVAL;
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
};
+
session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
if (!session) {
pr_err("Initializing perf session failed\n");
- return -1;
+ return -ENOMEM;
}
+ if (!perf_session__has_traces(session, "lock record"))
+ goto out_delete;
+
if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
pr_err("Initializing perf session tracepoint handlers failed\n");
- return -1;
+ goto out_delete;
}
- return perf_session__process_events(session, &eops);
-}
+ if (select_key())
+ goto out_delete;
-static void sort_result(void)
-{
- unsigned int i;
- struct lock_stat *st;
-
- for (i = 0; i < LOCKHASH_SIZE; i++) {
- list_for_each_entry(st, &lockhash_table[i], hash_entry) {
- insert_to_result(st, compare);
- }
- }
-}
+ err = perf_session__process_events(session, &eops);
+ if (err)
+ goto out_delete;
-static int __cmd_report(void)
-{
setup_pager();
+ if (display_info) /* used for info subcommand */
+ err = dump_info();
+ else {
+ sort_result();
+ print_result();
+ }
- if ((select_key() != 0) ||
- (read_events() != 0))
- return -1;
-
- sort_result();
- print_result();
-
- return 0;
+out_delete:
+ perf_session__delete(session);
+ return err;
}
static int __cmd_record(int argc, const char **argv)
@@ -970,7 +978,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(report_usage, report_options);
}
- __cmd_report();
+ rc = __cmd_report(false);
} else if (!strcmp(argv[0], "script")) {
/* Aliased to 'perf script' */
return cmd_script(argc, argv, prefix);
@@ -983,11 +991,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
}
/* recycling report_lock_ops */
trace_handler = &report_lock_ops;
- setup_pager();
- if (read_events() != 0)
- rc = -1;
- else
- rc = dump_info();
+ rc = __cmd_report(true);
} else {
usage_with_options(lock_usage, lock_options);
}