It can fail to collect lock stat from BPF for various reasons. For
example, I've got a report that sometimes time calculation seems wrong
in case of contended spinlocks. I suspect the time delta went negative
for some reason.
Count them separately and show in the output like below:
$ sudo perf lock contention -abE5 sleep 10
contended total wait max wait avg wait type caller
13 785.61 us 79.36 us 60.43 us spinlock remove_wait_queue+0x14
10 469.02 us 87.51 us 46.90 us spinlock prepare_to_wait+0x27
9 289.09 us 69.08 us 32.12 us spinlock finish_wait+0x36
114 251.05 us 8.56 us 2.20 us spinlock try_to_wake_up+0x1f5
132 188.63 us 5.01 us 1.43 us spinlock __wake_up_common_lock+0x62
=== output for debug ===
bad: 1, total: 279
bad rate: 0.36 %
histogram of failure reasons
task: 1
stack: 0
time: 0
Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 28 +++++++++++++++----
tools/perf/util/bpf_lock_contention.c | 4 ++-
.../perf/util/bpf_skel/lock_contention.bpf.c | 13 +++++++--
tools/perf/util/lock-contention.h | 8 +++++-
4 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 82dd2dfe5692..32ec58fb80e4 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1619,6 +1619,24 @@ static void sort_contention_result(void)
sort_result();
}
+static void print_bpf_events(int total, struct lock_contention_fails *fails)
+{
+ /* Output for debug, this have to be removed */
+ int broken = fails->task + fails->stack + fails->time;
+
+ if (quiet || total == 0 || (broken == 0 && verbose <= 0))
+ return;
+
+ total += broken;
+ pr_info("\n=== output for debug ===\n\n");
+ pr_info("bad: %d, total: %d\n", broken, total);
+ pr_info("bad rate: %.2f %%\n", (double)broken / (double)total * 100);
+
+ pr_info("histogram of failure reasons\n");
+ pr_info(" %10s: %d\n", "task", fails->task);
+ pr_info(" %10s: %d\n", "stack", fails->stack);
+ pr_info(" %10s: %d\n", "time", fails->time);
+}
static void print_contention_result(struct lock_contention *con)
{
struct lock_stat *st;
@@ -1646,8 +1664,6 @@ static void print_contention_result(struct lock_contention *con)
}
bad = total = printed = 0;
- if (use_bpf)
- bad = bad_hist[BROKEN_CONTENDED];
while ((st = pop_from_result())) {
struct thread *t;
@@ -1704,7 +1720,10 @@ static void print_contention_result(struct lock_contention *con)
break;
}
- print_bad_events(bad, total);
+ if (use_bpf)
+ print_bpf_events(total, &con->fails);
+ else
+ print_bad_events(bad, total);
}
static bool force;
@@ -1931,9 +1950,6 @@ static int __cmd_contention(int argc, const char **argv)
lock_contention_stop();
lock_contention_read(&con);
-
- /* abuse bad hist stats for lost entries */
- bad_hist[BROKEN_CONTENDED] = con.lost;
} else {
err = perf_session__process_events(session);
if (err)
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 5927bf0bd92b..b3e8aa9cd011 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -259,7 +259,9 @@ int lock_contention_read(struct lock_contention *con)
fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
- con->lost = skel->bss->lost;
+ con->fails.task = skel->bss->task_fail;
+ con->fails.stack = skel->bss->stack_fail;
+ con->fails.time = skel->bss->time_fail;
stack_trace = zalloc(stack_size);
if (stack_trace == NULL)
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index ed9160999c32..3f4ee3992e81 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -124,7 +124,9 @@ int lock_owner;
int aggr_mode;
/* error stat */
-int lost;
+int task_fail;
+int stack_fail;
+int time_fail;
static inline int can_record(u64 *ctx)
{
@@ -283,7 +285,7 @@ int contention_begin(u64 *ctx)
bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
pelem = bpf_map_lookup_elem(&tstamp, &pid);
if (pelem == NULL) {
- lost++;
+ __sync_fetch_and_add(&task_fail, 1);
return 0;
}
}
@@ -296,7 +298,7 @@ int contention_begin(u64 *ctx)
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
- lost++;
+ __sync_fetch_and_add(&stack_fail, 1);
} else if (aggr_mode == LOCK_AGGR_TASK) {
struct task_struct *task;
@@ -340,6 +342,11 @@ int contention_end(u64 *ctx)
return 0;
duration = bpf_ktime_get_ns() - pelem->timestamp;
+ if ((__s64)duration < 0) {
+ bpf_map_delete_elem(&tstamp, &pid);
+ __sync_fetch_and_add(&time_fail, 1);
+ return 0;
+ }
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 040b618b2215..10c28302420c 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -122,14 +122,20 @@ struct evlist;
struct machine;
struct target;
+struct lock_contention_fails {
+ int task;
+ int stack;
+ int time;
+};
+
struct lock_contention {
struct evlist *evlist;
struct target *target;
struct machine *machine;
struct hlist_head *result;
struct lock_filter *filters;
+ struct lock_contention_fails fails;
unsigned long map_nr_entries;
- int lost;
int max_stack;
int stack_skip;
int aggr_mode;
--
2.40.0.348.gf938b09366-goog