2022-12-09 19:34:08

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 0/4] perf lock contention: Support task/addr aggregation mode (v1)

Hello,

This patchset adds two more aggregation modes for perf lock contention.

The first one is the per-task mode with -t/--threads option. The option
was there already but it remained broken with BPF for a while. Now it
supports the output with and without BPF.

# perf lock contention -a -b -t -- sleep 1
contended total wait max wait avg wait pid comm

11 11.85 us 2.23 us 1.08 us 0 swapper
2 11.13 us 10.22 us 5.56 us 749053 ThreadPoolForeg
1 8.15 us 8.15 us 8.15 us 321353 Chrome_ChildIOT
2 2.73 us 1.77 us 1.37 us 320761 Chrome_ChildIOT
1 1.40 us 1.40 us 1.40 us 320502 chrome
1 379 ns 379 ns 379 ns 321227 chrome

The other one is the per-lock-instance mode with -l/--lock-addr option.
If the lock has a symbol, it will be displayed as well.

# perf lock contention -a -b -l -- sleep 1
contended total wait max wait avg wait address symbol

3 4.79 us 2.33 us 1.60 us ffffffffbaed50c0 rcu_state
4 4.19 us 1.62 us 1.05 us ffffffffbae07a40 jiffies_lock
1 1.94 us 1.94 us 1.94 us ffff9262277861e0
1 387 ns 387 ns 387 ns ffff9260bfda4f60

It's based on the current acme/tmp.perf/core branch.
You can find the code in the 'perf/lock-con-aggr-v1' branch in

git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git

Thanks,
Namhyung


Namhyung Kim (4):
perf lock contention: Add lock_data.h for common data
perf lock contention: Implement -t/--threads option for BPF
perf lock contention: Add -l/--lock-addr option
perf test: Update perf lock contention test

tools/perf/Documentation/perf-lock.txt | 4 +
tools/perf/builtin-lock.c | 95 ++++++++++++++-----
tools/perf/tests/shell/lock_contention.sh | 48 ++++++++++
tools/perf/util/bpf_lock_contention.c | 72 ++++++++++----
.../perf/util/bpf_skel/lock_contention.bpf.c | 67 +++++++++----
tools/perf/util/bpf_skel/lock_data.h | 30 ++++++
tools/perf/util/lock-contention.h | 1 +
7 files changed, 255 insertions(+), 62 deletions(-)
create mode 100644 tools/perf/util/bpf_skel/lock_data.h


base-commit: b22802e295a80ec16e355d7208d2fbbd7bbc1b7a
--
2.39.0.rc1.256.g54fd8350bd-goog


2022-12-09 19:39:11

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 1/4] perf lock contention: Add lock_data.h for common data

Accessing BPF maps should use the same data types. Add bpf_skel/lock_data.h
to define the common data structures. No functional changes.

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/util/bpf_lock_contention.c | 19 ++++--------
.../perf/util/bpf_skel/lock_contention.bpf.c | 17 ++---------
tools/perf/util/bpf_skel/lock_data.h | 30 +++++++++++++++++++
3 files changed, 38 insertions(+), 28 deletions(-)
create mode 100644 tools/perf/util/bpf_skel/lock_data.h

diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index f4ebb9a2e380..b6a8eb7164b3 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -12,17 +12,10 @@
#include <bpf/bpf.h>

#include "bpf_skel/lock_contention.skel.h"
+#include "bpf_skel/lock_data.h"

static struct lock_contention_bpf *skel;

-struct lock_contention_data {
- u64 total_time;
- u64 min_time;
- u64 max_time;
- u32 count;
- u32 flags;
-};
-
int lock_contention_prepare(struct lock_contention *con)
{
int i, fd;
@@ -110,8 +103,8 @@ int lock_contention_stop(void)
int lock_contention_read(struct lock_contention *con)
{
int fd, stack, err = 0;
- s32 prev_key, key;
- struct lock_contention_data data = {};
+ struct contention_key *prev_key, key;
+ struct contention_data data = {};
struct lock_stat *st = NULL;
struct machine *machine = con->machine;
u64 *stack_trace;
@@ -126,8 +119,8 @@ int lock_contention_read(struct lock_contention *con)
if (stack_trace == NULL)
return -1;

- prev_key = 0;
- while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
+ prev_key = NULL;
+ while (!bpf_map_get_next_key(fd, prev_key, &key)) {
struct map *kmap;
struct symbol *sym;
int idx = 0;
@@ -184,7 +177,7 @@ int lock_contention_read(struct lock_contention *con)
}

hlist_add_head(&st->hash_entry, con->result);
- prev_key = key;
+ prev_key = &key;

/* we're fine now, reset the values */
st = NULL;
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 9681cb59b0df..0f63cc28ccba 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -5,24 +5,11 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>

-/* maximum stack trace depth */
-#define MAX_STACKS 8
+#include "lock_data.h"

/* default buffer size */
#define MAX_ENTRIES 10240

-struct contention_key {
- __s32 stack_id;
-};
-
-struct contention_data {
- __u64 total_time;
- __u64 min_time;
- __u64 max_time;
- __u32 count;
- __u32 flags;
-};
-
struct tstamp_data {
__u64 timestamp;
__u64 lock;
@@ -34,7 +21,7 @@ struct tstamp_data {
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(__u32));
- __uint(value_size, MAX_STACKS * sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
__uint(max_entries, MAX_ENTRIES);
} stacks SEC(".maps");

diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
new file mode 100644
index 000000000000..dbdf4caedc4a
--- /dev/null
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Data structures shared between BPF and tools. */
+#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
+#define UTIL_BPF_SKEL_LOCK_DATA_H
+
+struct contention_key {
+ s32 stack_or_task_id;
+};
+
+#define TASK_COMM_LEN 16
+
+struct contention_task_data {
+ char comm[TASK_COMM_LEN];
+};
+
+struct contention_data {
+ u64 total_time;
+ u64 min_time;
+ u64 max_time;
+ u32 count;
+ u32 flags;
+};
+
+enum lock_aggr_mode {
+ LOCK_AGGR_ADDR = 0,
+ LOCK_AGGR_TASK,
+ LOCK_AGGR_CALLER,
+};
+
+#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
--
2.39.0.rc1.256.g54fd8350bd-goog

2022-12-09 19:58:20

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 2/4] perf lock contention: Implement -t/--threads option for BPF

The BPF didn't show the per-thread stat properly. Use task's thread id (PID)
as a key instead of stack_id and add a task_data map to save task comm names.

$ sudo ./perf lock con -abt -E 5 sleep 1
contended total wait max wait avg wait pid comm

1 740.66 ms 740.66 ms 740.66 ms 1950 nv_queue
3 305.50 ms 298.19 ms 101.83 ms 1884 nvidia-modeset/
1 25.14 us 25.14 us 25.14 us 2725038 EventManager_De
12 23.09 us 9.30 us 1.92 us 0 swapper
1 20.18 us 20.18 us 20.18 us 2725033 EventManager_De

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 13 ++----
tools/perf/util/bpf_lock_contention.c | 40 ++++++++++++++++--
.../perf/util/bpf_skel/lock_contention.bpf.c | 41 +++++++++++++++++--
tools/perf/util/lock-contention.h | 1 +
4 files changed, 78 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 15ce6358f127..6fa3cdfec5cb 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -12,6 +12,7 @@
#include "util/target.h"
#include "util/callchain.h"
#include "util/lock-contention.h"
+#include "util/bpf_skel/lock_data.h"

#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
@@ -61,11 +62,7 @@ static int max_stack_depth = CONTENTION_STACK_DEPTH;
static int stack_skip = CONTENTION_STACK_SKIP;
static int print_nr_entries = INT_MAX / 2;

-static enum {
- LOCK_AGGR_ADDR,
- LOCK_AGGR_TASK,
- LOCK_AGGR_CALLER,
-} aggr_mode = LOCK_AGGR_ADDR;
+static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;

static struct thread_stat *thread_stat_find(u32 tid)
{
@@ -1619,6 +1616,7 @@ static int __cmd_contention(int argc, const char **argv)
.map_nr_entries = bpf_map_entries,
.max_stack = max_stack_depth,
.stack_skip = stack_skip,
+ .aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER,
};

session = perf_session__new(use_bpf ? NULL : &data, &eops);
@@ -1691,11 +1689,6 @@ static int __cmd_contention(int argc, const char **argv)
if (select_key(true))
goto out_delete;

- if (show_thread_stats)
- aggr_mode = LOCK_AGGR_TASK;
- else
- aggr_mode = LOCK_AGGR_CALLER;
-
if (use_bpf) {
lock_contention_start();
if (argc)
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b6a8eb7164b3..1590a9f05145 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -5,6 +5,7 @@
#include "util/map.h"
#include "util/symbol.h"
#include "util/target.h"
+#include "util/thread.h"
#include "util/thread_map.h"
#include "util/lock-contention.h"
#include <linux/zalloc.h>
@@ -30,10 +31,17 @@ int lock_contention_prepare(struct lock_contention *con)
}

bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
- bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);

+ if (con->aggr_mode == LOCK_AGGR_TASK) {
+ bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.stacks, 1);
+ } else {
+ bpf_map__set_max_entries(skel->maps.task_data, 1);
+ bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
+ }
+
if (target__has_cpu(target))
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
if (target__has_task(target))
@@ -82,7 +90,9 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}

+ /* these don't work well if in the rodata section */
skel->bss->stack_skip = con->stack_skip;
+ skel->bss->aggr_mode = con->aggr_mode;

lock_contention_bpf__attach(skel);
return 0;
@@ -102,7 +112,7 @@ int lock_contention_stop(void)

int lock_contention_read(struct lock_contention *con)
{
- int fd, stack, err = 0;
+ int fd, stack, task_fd, err = 0;
struct contention_key *prev_key, key;
struct contention_data data = {};
struct lock_stat *st = NULL;
@@ -112,6 +122,7 @@ int lock_contention_read(struct lock_contention *con)

fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
+ task_fd = bpf_map__fd(skel->maps.task_data);

con->lost = skel->bss->lost;

@@ -119,6 +130,13 @@ int lock_contention_read(struct lock_contention *con)
if (stack_trace == NULL)
return -1;

+ if (con->aggr_mode == LOCK_AGGR_TASK) {
+ struct thread *idle = __machine__findnew_thread(machine,
+ /*pid=*/0,
+ /*tid=*/0);
+ thread__set_comm(idle, "swapper", /*timestamp=*/0);
+ }
+
prev_key = NULL;
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
struct map *kmap;
@@ -143,6 +161,22 @@ int lock_contention_read(struct lock_contention *con)

st->flags = data.flags;

+ if (con->aggr_mode == LOCK_AGGR_TASK) {
+ struct contention_task_data task;
+ struct thread *t;
+
+ st->addr = key.stack_or_task_id;
+
+ /* do not update idle comm which contains CPU number */
+ if (st->addr) {
+ bpf_map_lookup_elem(task_fd, &key, &task);
+ t = __machine__findnew_thread(machine, /*pid=*/-1,
+ key.stack_or_task_id);
+ thread__set_comm(t, task.comm, /*timestamp=*/0);
+ }
+ goto next;
+ }
+
bpf_map_lookup_elem(stack, &key, stack_trace);

/* skip lock internal functions */
@@ -175,7 +209,7 @@ int lock_contention_read(struct lock_contention *con)
if (st->callstack == NULL)
break;
}
-
+next:
hlist_add_head(&st->hash_entry, con->result);
prev_key = &key;

diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 0f63cc28ccba..cd405adcd252 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -41,6 +41,13 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} lock_stat SEC(".maps");

+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct contention_task_data));
+ __uint(max_entries, MAX_ENTRIES);
+} task_data SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
@@ -61,6 +68,9 @@ int has_cpu;
int has_task;
int stack_skip;

+/* determine the key of lock stat */
+int aggr_mode;
+
/* error stat */
int lost;

@@ -87,6 +97,19 @@ static inline int can_record(void)
return 1;
}

+static inline void update_task_data(__u32 pid)
+{
+ struct contention_task_data *p;
+
+ p = bpf_map_lookup_elem(&task_data, &pid);
+ if (p == NULL) {
+ struct contention_task_data data;
+
+ bpf_get_current_comm(data.comm, sizeof(data.comm));
+ bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
+ }
+}
+
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
@@ -115,10 +138,14 @@ int contention_begin(u64 *ctx)
pelem->timestamp = bpf_ktime_get_ns();
pelem->lock = (__u64)ctx[0];
pelem->flags = (__u32)ctx[1];
- pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);

- if (pelem->stack_id < 0)
- lost++;
+ if (aggr_mode == LOCK_AGGR_CALLER) {
+ pelem->stack_id = bpf_get_stackid(ctx, &stacks,
+ BPF_F_FAST_STACK_CMP | stack_skip);
+ if (pelem->stack_id < 0)
+ lost++;
+ }
+
return 0;
}

@@ -141,7 +168,13 @@ int contention_end(u64 *ctx)

duration = bpf_ktime_get_ns() - pelem->timestamp;

- key.stack_id = pelem->stack_id;
+ if (aggr_mode == LOCK_AGGR_CALLER) {
+ key.stack_or_task_id = pelem->stack_id;
+ } else {
+ key.stack_or_task_id = pid;
+ update_task_data(pid);
+ }
+
data = bpf_map_lookup_elem(&lock_stat, &key);
if (!data) {
struct contention_data first = {
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index a2346875098d..47fd47fb56c1 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -117,6 +117,7 @@ struct lock_contention {
int lost;
int max_stack;
int stack_skip;
+ int aggr_mode;
};

#ifdef HAVE_BPF_SKEL
--
2.39.0.rc1.256.g54fd8350bd-goog

2022-12-12 20:05:51

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf lock contention: Add lock_data.h for common data

Em Fri, Dec 09, 2022 at 11:07:24AM -0800, Namhyung Kim escreveu:
> Accessing BPF maps should use the same data types. Add bpf_skel/lock_data.h
> to define the common data structures. No functional changes.

You forgot to update one of the stack_id users, that field got renamed:

util/bpf_skel/lock_contention.bpf.c:144:6: error: no member named 'stack_id' in 'struct contention_key'
key.stack_id = pelem->stack_id;
~~~ ^
1 error generated.
make[2]: *** [Makefile.perf:1075: /tmp/build/perf/util/bpf_skel/.tmp/lock_contention.bpf.o] Error 1
make[1]: *** [Makefile.perf:236: sub-make] Error 2
make: *** [Makefile:113: install-bin] Error 2
make: Leaving directory '/var/home/acme/git/perf/tools/perf'

Performance counter stats for 'make -k NO_LIBTRACEEVENT=1 BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf -C tools/perf install-bin':

7,005,216,342 cycles:u
11,851,225,594 instructions:u # 1.69 insn per cycle

3.168945139 seconds time elapsed

1.730964000 seconds user
1.578932000 seconds sys


⬢[acme@toolbox perf]$ git log --oneline -4
f6e7a5f1db49dc8e (HEAD) perf lock contention: Add lock_data.h for common data
5d9b55713c5c037f perf python: Account for multiple words in CC
d9078bf3f3320457 perf off_cpu: Fix a typo in BTF tracepoint name, it should be 'btf_trace_sched_switch'
3b7ea76f0f7844f5 perf test: Update event group check for support of uncore event
⬢[acme@toolbox perf]$

After some point it builds.

I'm fixing this to keep it bisectable.

- Arnaldo

> Signed-off-by: Namhyung Kim <[email protected]>
> ---
> tools/perf/util/bpf_lock_contention.c | 19 ++++--------
> .../perf/util/bpf_skel/lock_contention.bpf.c | 17 ++---------
> tools/perf/util/bpf_skel/lock_data.h | 30 +++++++++++++++++++
> 3 files changed, 38 insertions(+), 28 deletions(-)
> create mode 100644 tools/perf/util/bpf_skel/lock_data.h
>
> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> index f4ebb9a2e380..b6a8eb7164b3 100644
> --- a/tools/perf/util/bpf_lock_contention.c
> +++ b/tools/perf/util/bpf_lock_contention.c
> @@ -12,17 +12,10 @@
> #include <bpf/bpf.h>
>
> #include "bpf_skel/lock_contention.skel.h"
> +#include "bpf_skel/lock_data.h"
>
> static struct lock_contention_bpf *skel;
>
> -struct lock_contention_data {
> - u64 total_time;
> - u64 min_time;
> - u64 max_time;
> - u32 count;
> - u32 flags;
> -};
> -
> int lock_contention_prepare(struct lock_contention *con)
> {
> int i, fd;
> @@ -110,8 +103,8 @@ int lock_contention_stop(void)
> int lock_contention_read(struct lock_contention *con)
> {
> int fd, stack, err = 0;
> - s32 prev_key, key;
> - struct lock_contention_data data = {};
> + struct contention_key *prev_key, key;
> + struct contention_data data = {};
> struct lock_stat *st = NULL;
> struct machine *machine = con->machine;
> u64 *stack_trace;
> @@ -126,8 +119,8 @@ int lock_contention_read(struct lock_contention *con)
> if (stack_trace == NULL)
> return -1;
>
> - prev_key = 0;
> - while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
> + prev_key = NULL;
> + while (!bpf_map_get_next_key(fd, prev_key, &key)) {
> struct map *kmap;
> struct symbol *sym;
> int idx = 0;
> @@ -184,7 +177,7 @@ int lock_contention_read(struct lock_contention *con)
> }
>
> hlist_add_head(&st->hash_entry, con->result);
> - prev_key = key;
> + prev_key = &key;
>
> /* we're fine now, reset the values */
> st = NULL;
> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index 9681cb59b0df..0f63cc28ccba 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@ -5,24 +5,11 @@
> #include <bpf/bpf_tracing.h>
> #include <bpf/bpf_core_read.h>
>
> -/* maximum stack trace depth */
> -#define MAX_STACKS 8
> +#include "lock_data.h"
>
> /* default buffer size */
> #define MAX_ENTRIES 10240
>
> -struct contention_key {
> - __s32 stack_id;
> -};
> -
> -struct contention_data {
> - __u64 total_time;
> - __u64 min_time;
> - __u64 max_time;
> - __u32 count;
> - __u32 flags;
> -};
> -
> struct tstamp_data {
> __u64 timestamp;
> __u64 lock;
> @@ -34,7 +21,7 @@ struct tstamp_data {
> struct {
> __uint(type, BPF_MAP_TYPE_STACK_TRACE);
> __uint(key_size, sizeof(__u32));
> - __uint(value_size, MAX_STACKS * sizeof(__u64));
> + __uint(value_size, sizeof(__u64));
> __uint(max_entries, MAX_ENTRIES);
> } stacks SEC(".maps");
>
> diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
> new file mode 100644
> index 000000000000..dbdf4caedc4a
> --- /dev/null
> +++ b/tools/perf/util/bpf_skel/lock_data.h
> @@ -0,0 +1,30 @@
> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +/* Data structures shared between BPF and tools. */
> +#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
> +#define UTIL_BPF_SKEL_LOCK_DATA_H
> +
> +struct contention_key {
> + s32 stack_or_task_id;
> +};
> +
> +#define TASK_COMM_LEN 16
> +
> +struct contention_task_data {
> + char comm[TASK_COMM_LEN];
> +};
> +
> +struct contention_data {
> + u64 total_time;
> + u64 min_time;
> + u64 max_time;
> + u32 count;
> + u32 flags;
> +};
> +
> +enum lock_aggr_mode {
> + LOCK_AGGR_ADDR = 0,
> + LOCK_AGGR_TASK,
> + LOCK_AGGR_CALLER,
> +};
> +
> +#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
> --
> 2.39.0.rc1.256.g54fd8350bd-goog

--

- Arnaldo

2022-12-12 20:13:42

by Namhyung Kim

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf lock contention: Add lock_data.h for common data

Hi Arnaldo,

On Mon, Dec 12, 2022 at 11:45 AM Arnaldo Carvalho de Melo
<[email protected]> wrote:
>
> Em Mon, Dec 12, 2022 at 04:43:43PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Mon, Dec 12, 2022 at 04:42:30PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Fri, Dec 09, 2022 at 11:07:24AM -0800, Namhyung Kim escreveu:
> > > > Accessing BPF maps should use the same data types. Add bpf_skel/lock_data.h
> > > > to define the common data structures. No functional changes.
> > >
> > > You forgot to update one of the stack_id users, that field got renamed:
> > >
> > > util/bpf_skel/lock_contention.bpf.c:144:6: error: no member named 'stack_id' in 'struct contention_key'
> > > key.stack_id = pelem->stack_id;
> > > ~~~ ^
> > > 1 error generated.
> > > make[2]: *** [Makefile.perf:1075: /tmp/build/perf/util/bpf_skel/.tmp/lock_contention.bpf.o] Error 1
> > > make[1]: *** [Makefile.perf:236: sub-make] Error 2
> > > make: *** [Makefile:113: install-bin] Error 2
> > > make: Leaving directory '/var/home/acme/git/perf/tools/perf'

Oops, right.

> > >
> > > Performance counter stats for 'make -k NO_LIBTRACEEVENT=1 BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf -C tools/perf install-bin':
> > >
> > > 7,005,216,342 cycles:u
> > > 11,851,225,594 instructions:u # 1.69 insn per cycle
> > >
> > > 3.168945139 seconds time elapsed
> > >
> > > 1.730964000 seconds user
> > > 1.578932000 seconds sys
> > >
> > >
> > > ⬢[acme@toolbox perf]$ git log --oneline -4
> > > f6e7a5f1db49dc8e (HEAD) perf lock contention: Add lock_data.h for common data
> > > 5d9b55713c5c037f perf python: Account for multiple words in CC
> > > d9078bf3f3320457 perf off_cpu: Fix a typo in BTF tracepoint name, it should be 'btf_trace_sched_switch'
> > > 3b7ea76f0f7844f5 perf test: Update event group check for support of uncore event
> > > ⬢[acme@toolbox perf]$
> > >
> > > After some point it builds.
> > >
> > > I'm fixing this to keep it bisectable.
> >
> > I folded this:
> >
> > diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > index 0f63cc28ccbabd21..64fd1e040ac86e58 100644
> > --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> > @@ -141,7 +141,7 @@ int contention_end(u64 *ctx)
> >
> > duration = bpf_ktime_get_ns() - pelem->timestamp;
> >
> > - key.stack_id = pelem->stack_id;
> > + key.stack_or_task_id = pelem->stack_id;
> > data = bpf_map_lookup_elem(&lock_stat, &key);
> > if (!data) {
> > struct contention_data first = {

Thanks for fixing this.

>
>
> And then fixed up this:
>
> Could not apply 3d4947c7bd10beba... perf lock contention: Implement -t/--threads option for BPF
> ⬢[acme@toolbox perf]$
> ⬢[acme@toolbox perf]$
> ⬢[acme@toolbox perf]$ git diff
> diff --cc tools/perf/util/bpf_skel/lock_contention.bpf.c
> index 64fd1e040ac86e58,cd405adcd252b82d..0000000000000000
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@@ -141,7 -168,13 +168,17 @@@ int contention_end(u64 *ctx
>
> duration = bpf_ktime_get_ns() - pelem->timestamp;
>
> ++<<<<<<< HEAD
> + key.stack_or_task_id = pelem->stack_id;
> ++=======
> + if (aggr_mode == LOCK_AGGR_CALLER) {
> + key.stack_or_task_id = pelem->stack_id;
> + } else {
> + key.stack_or_task_id = pid;
> + update_task_data(pid);
> + }
> +
> ++>>>>>>> 3d4947c7bd10beba (perf lock contention: Implement -t/--threads option for BPF)

Sure, it looks good to me.

Thanks,
Namhyung

2022-12-12 20:35:50

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf lock contention: Add lock_data.h for common data

Em Mon, Dec 12, 2022 at 04:43:43PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Mon, Dec 12, 2022 at 04:42:30PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Dec 09, 2022 at 11:07:24AM -0800, Namhyung Kim escreveu:
> > > Accessing BPF maps should use the same data types. Add bpf_skel/lock_data.h
> > > to define the common data structures. No functional changes.
> >
> > You forgot to update one of the stack_id users, that field got renamed:
> >
> > util/bpf_skel/lock_contention.bpf.c:144:6: error: no member named 'stack_id' in 'struct contention_key'
> > key.stack_id = pelem->stack_id;
> > ~~~ ^
> > 1 error generated.
> > make[2]: *** [Makefile.perf:1075: /tmp/build/perf/util/bpf_skel/.tmp/lock_contention.bpf.o] Error 1
> > make[1]: *** [Makefile.perf:236: sub-make] Error 2
> > make: *** [Makefile:113: install-bin] Error 2
> > make: Leaving directory '/var/home/acme/git/perf/tools/perf'
> >
> > Performance counter stats for 'make -k NO_LIBTRACEEVENT=1 BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf -C tools/perf install-bin':
> >
> > 7,005,216,342 cycles:u
> > 11,851,225,594 instructions:u # 1.69 insn per cycle
> >
> > 3.168945139 seconds time elapsed
> >
> > 1.730964000 seconds user
> > 1.578932000 seconds sys
> >
> >
> > ⬢[acme@toolbox perf]$ git log --oneline -4
> > f6e7a5f1db49dc8e (HEAD) perf lock contention: Add lock_data.h for common data
> > 5d9b55713c5c037f perf python: Account for multiple words in CC
> > d9078bf3f3320457 perf off_cpu: Fix a typo in BTF tracepoint name, it should be 'btf_trace_sched_switch'
> > 3b7ea76f0f7844f5 perf test: Update event group check for support of uncore event
> > ⬢[acme@toolbox perf]$
> >
> > After some point it builds.
> >
> > I'm fixing this to keep it bisectable.
>
> I folded this:
>
> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index 0f63cc28ccbabd21..64fd1e040ac86e58 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@ -141,7 +141,7 @@ int contention_end(u64 *ctx)
>
> duration = bpf_ktime_get_ns() - pelem->timestamp;
>
> - key.stack_id = pelem->stack_id;
> + key.stack_or_task_id = pelem->stack_id;
> data = bpf_map_lookup_elem(&lock_stat, &key);
> if (!data) {
> struct contention_data first = {


And then fixed up this:

Could not apply 3d4947c7bd10beba... perf lock contention: Implement -t/--threads option for BPF
⬢[acme@toolbox perf]$
⬢[acme@toolbox perf]$
⬢[acme@toolbox perf]$ git diff
diff --cc tools/perf/util/bpf_skel/lock_contention.bpf.c
index 64fd1e040ac86e58,cd405adcd252b82d..0000000000000000
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@@ -141,7 -168,13 +168,17 @@@ int contention_end(u64 *ctx

duration = bpf_ktime_get_ns() - pelem->timestamp;

++<<<<<<< HEAD
+ key.stack_or_task_id = pelem->stack_id;
++=======
+ if (aggr_mode == LOCK_AGGR_CALLER) {
+ key.stack_or_task_id = pelem->stack_id;
+ } else {
+ key.stack_or_task_id = pid;
+ update_task_data(pid);
+ }
+
++>>>>>>> 3d4947c7bd10beba (perf lock contention: Implement -t/--threads option for BPF)

2022-12-12 20:38:10

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 1/4] perf lock contention: Add lock_data.h for common data

Em Mon, Dec 12, 2022 at 04:42:30PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Dec 09, 2022 at 11:07:24AM -0800, Namhyung Kim escreveu:
> > Accessing BPF maps should use the same data types. Add bpf_skel/lock_data.h
> > to define the common data structures. No functional changes.
>
> You forgot to update one of the stack_id users, that field got renamed:
>
> util/bpf_skel/lock_contention.bpf.c:144:6: error: no member named 'stack_id' in 'struct contention_key'
> key.stack_id = pelem->stack_id;
> ~~~ ^
> 1 error generated.
> make[2]: *** [Makefile.perf:1075: /tmp/build/perf/util/bpf_skel/.tmp/lock_contention.bpf.o] Error 1
> make[1]: *** [Makefile.perf:236: sub-make] Error 2
> make: *** [Makefile:113: install-bin] Error 2
> make: Leaving directory '/var/home/acme/git/perf/tools/perf'
>
> Performance counter stats for 'make -k NO_LIBTRACEEVENT=1 BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf -C tools/perf install-bin':
>
> 7,005,216,342 cycles:u
> 11,851,225,594 instructions:u # 1.69 insn per cycle
>
> 3.168945139 seconds time elapsed
>
> 1.730964000 seconds user
> 1.578932000 seconds sys
>
>
> ⬢[acme@toolbox perf]$ git log --oneline -4
> f6e7a5f1db49dc8e (HEAD) perf lock contention: Add lock_data.h for common data
> 5d9b55713c5c037f perf python: Account for multiple words in CC
> d9078bf3f3320457 perf off_cpu: Fix a typo in BTF tracepoint name, it should be 'btf_trace_sched_switch'
> 3b7ea76f0f7844f5 perf test: Update event group check for support of uncore event
> ⬢[acme@toolbox perf]$
>
> After some point it builds.
>
> I'm fixing this to keep it bisectable.

I folded this:


diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 0f63cc28ccbabd21..64fd1e040ac86e58 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -141,7 +141,7 @@ int contention_end(u64 *ctx)

duration = bpf_ktime_get_ns() - pelem->timestamp;

- key.stack_id = pelem->stack_id;
+ key.stack_or_task_id = pelem->stack_id;
data = bpf_map_lookup_elem(&lock_stat, &key);
if (!data) {
struct contention_data first = {

2022-12-12 20:45:06

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 0/4] perf lock contention: Support task/addr aggregation mode (v1)

Em Fri, Dec 09, 2022 at 11:07:23AM -0800, Namhyung Kim escreveu:
> Hello,
>
> This patchset adds two more aggregation modes for perf lock contention.


Thanks, applied.

- Arnaldo


> The first one is the per-task mode with -t/--threads option. The option
> was there already but it remained broken with BPF for a while. Now it
> supports the output with and without BPF.
>
> # perf lock contention -a -b -t -- sleep 1
> contended total wait max wait avg wait pid comm
>
> 11 11.85 us 2.23 us 1.08 us 0 swapper
> 2 11.13 us 10.22 us 5.56 us 749053 ThreadPoolForeg
> 1 8.15 us 8.15 us 8.15 us 321353 Chrome_ChildIOT
> 2 2.73 us 1.77 us 1.37 us 320761 Chrome_ChildIOT
> 1 1.40 us 1.40 us 1.40 us 320502 chrome
> 1 379 ns 379 ns 379 ns 321227 chrome
>
> The other one is the per-lock-instance mode with -l/--lock-addr option.
> If the lock has a symbol, it will be displayed as well.
>
> # perf lock contention -a -b -l -- sleep 1
> contended total wait max wait avg wait address symbol
>
> 3 4.79 us 2.33 us 1.60 us ffffffffbaed50c0 rcu_state
> 4 4.19 us 1.62 us 1.05 us ffffffffbae07a40 jiffies_lock
> 1 1.94 us 1.94 us 1.94 us ffff9262277861e0
> 1 387 ns 387 ns 387 ns ffff9260bfda4f60
>
> It's based on the current acme/tmp.perf/core branch.
> You can find the code in the 'perf/lock-con-aggr-v1' branch in
>
> git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>
> Thanks,
> Namhyung
>
>
> Namhyung Kim (4):
> perf lock contention: Add lock_data.h for common data
> perf lock contention: Implement -t/--threads option for BPF
> perf lock contention: Add -l/--lock-addr option
> perf test: Update perf lock contention test
>
> tools/perf/Documentation/perf-lock.txt | 4 +
> tools/perf/builtin-lock.c | 95 ++++++++++++++-----
> tools/perf/tests/shell/lock_contention.sh | 48 ++++++++++
> tools/perf/util/bpf_lock_contention.c | 72 ++++++++++----
> .../perf/util/bpf_skel/lock_contention.bpf.c | 67 +++++++++----
> tools/perf/util/bpf_skel/lock_data.h | 30 ++++++
> tools/perf/util/lock-contention.h | 1 +
> 7 files changed, 255 insertions(+), 62 deletions(-)
> create mode 100644 tools/perf/util/bpf_skel/lock_data.h
>
>
> base-commit: b22802e295a80ec16e355d7208d2fbbd7bbc1b7a
> --
> 2.39.0.rc1.256.g54fd8350bd-goog

--

- Arnaldo