2022-01-04 18:21:00

by Namhyung Kim

[permalink] [raw]
Subject: [PATCHSET 0/6] perf lock: Random updates for the locking analysis (v1)

Hello,

I have some updates in the perf lock command (focused on 'report').
The main change is to add -c (or --combine-locks) option to aggregate
results based on lock class name.

Without this option, the result deals with lock addresses so instances
in the same lock class will have separate entries like below:

# perf lock report
Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)

rcu_read_lock 251225 0 0 0 0 0
&(ei->i_block_re... 8731 0 0 0 0 0
&sb->s_type->i_l... 8731 0 0 0 0 0
hrtimer_bases.lo... 5261 0 0 0 0 0
hrtimer_bases.lo... 2626 0 0 0 0 0
hrtimer_bases.lo... 1953 0 0 0 0 0
hrtimer_bases.lo... 1382 0 0 0 0 0
cpu_hotplug_lock... 1350 0 0 0 0 0
hrtimer_bases.lo... 1273 0 0 0 0 0
hrtimer_bases.lo... 1269 0 0 0 0 0
hrtimer_bases.lo... 1198 0 0 0 0 0
hrtimer_bases.lo... 1116 0 0 0 0 0
&base->lock 1109 0 0 0 0 0
hrtimer_bases.lo... 1067 0 0 0 0 0
hrtimer_bases.lo... 1052 0 0 0 0 0
hrtimer_bases.lo... 957 0 0 0 0 0
hrtimer_bases.lo... 948 0 0 0 0 0
css_set_lock 791 0 0 0 0 0
hrtimer_bases.lo... 752 0 0 0 0 0
&lruvec->lru_loc... 747 5 11254 56272 18317 1412
hrtimer_bases.lo... 738 0 0 0 0 0
&newf->file_lock... 706 15 1025 15388 2279 618
hrtimer_bases.lo... 702 0 0 0 0 0
hrtimer_bases.lo... 694 0 0 0 0 0
...

With -c option, the hrtimer_bases.lock would be combined into a single
entry. Also note that the lock names are correctly displayed now.

# perf lock report -c
Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)

rcu_read_lock 251225 0 0 0 0 0
hrtimer_bases.lock 39449 0 0 0 0 0
&sb->s_type->i_l... 10301 1 662 662 662 662
ptlock_ptr(page) 10173 2 701 1402 760 642
&(ei->i_block_re... 8732 0 0 0 0 0
&base->lock 6705 0 0 0 0 0
&p->pi_lock 5549 0 0 0 0 0
&dentry->d_lockr... 5010 4 1274 5097 1844 789
&ep->lock 2750 0 0 0 0 0
&(__futex_data.q... 2331 0 0 0 0 0
(null) 1878 0 0 0 0 0
cpu_hotplug_lock 1350 0 0 0 0 0
&____s->seqcount 1349 0 0 0 0 0
&newf->file_lock 1001 15 1025 15388 2279 618
...

Maybe we can make it default later (with a config and --no-combine-locks).

You can get it from 'perf/lock-combine-v1' branch at

git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git

Thanks,
Namhyung


Namhyung Kim (6):
perf lock: Convert lockhash_table to use hlist
perf lock: Change type of lock_stat->addr to u64
perf lock: Sort map info based on class name
perf lock: Fix lock name length check for printing
perf lock: Add -c/--combine-locks option
perf lock: Carefully combine lock stats for discarded entries

tools/perf/Documentation/perf-lock.txt | 4 +
tools/perf/builtin-lock.c | 155 +++++++++++++++++++------
2 files changed, 124 insertions(+), 35 deletions(-)


base-commit: b9f6fbb3b2c29736970ae9fcc0e82b0bd459442b
--
2.34.1.448.ga2b2bfdf31-goog



2022-01-04 18:21:05

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 1/6] perf lock: Convert lockhash_table to use hlist

The hlist_head has a single entry so we can save some memory.

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index d70131b7b1b1..43139166f02e 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -38,13 +38,13 @@ static struct perf_session *session;
#define LOCKHASH_BITS 12
#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)

-static struct list_head lockhash_table[LOCKHASH_SIZE];
+static struct hlist_head lockhash_table[LOCKHASH_SIZE];

#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))

struct lock_stat {
- struct list_head hash_entry;
+ struct hlist_node hash_entry;
struct rb_node rb; /* used for sorting */

/*
@@ -317,10 +317,10 @@ static struct lock_stat *pop_from_result(void)

static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
{
- struct list_head *entry = lockhashentry(addr);
+ struct hlist_head *entry = lockhashentry(addr);
struct lock_stat *ret, *new;

- list_for_each_entry(ret, entry, hash_entry) {
+ hlist_for_each_entry(ret, entry, hash_entry) {
if (ret->addr == addr)
return ret;
}
@@ -339,7 +339,7 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
strcpy(new->name, name);
new->wait_time_min = ULLONG_MAX;

- list_add(&new->hash_entry, entry);
+ hlist_add_head(&new->hash_entry, entry);
return new;

alloc_failed:
@@ -781,7 +781,7 @@ static void dump_map(void)

pr_info("Address of instance: name of class\n");
for (i = 0; i < LOCKHASH_SIZE; i++) {
- list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
pr_info(" %p: %s\n", st->addr, st->name);
}
}
@@ -838,7 +838,7 @@ static void sort_result(void)
struct lock_stat *st;

for (i = 0; i < LOCKHASH_SIZE; i++) {
- list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
insert_to_result(st, compare);
}
}
@@ -990,7 +990,7 @@ int cmd_lock(int argc, const char **argv)
int rc = 0;

for (i = 0; i < LOCKHASH_SIZE; i++)
- INIT_LIST_HEAD(lockhash_table + i);
+ INIT_HLIST_HEAD(lockhash_table + i);

argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
--
2.34.1.448.ga2b2bfdf31-goog


2022-01-04 18:21:08

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 2/6] perf lock: Change type of lock_stat->addr to u64

As evsel__intval() returns u64, we can just use it as is.

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 35 +++++++++--------------------------
1 file changed, 9 insertions(+), 26 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 43139166f02e..c4b5c3d71ae3 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -47,12 +47,7 @@ struct lock_stat {
struct hlist_node hash_entry;
struct rb_node rb; /* used for sorting */

- /*
- * FIXME: evsel__intval() returns u64,
- * so address of lockdep_map should be treated as 64bit.
- * Is there more better solution?
- */
- void *addr; /* address of lockdep_map, used as ID */
+ u64 addr; /* address of lockdep_map, used as ID */
char *name; /* for strcpy(), we cannot use const */

unsigned int nr_acquire;
@@ -106,7 +101,7 @@ struct lock_seq_stat {
struct list_head list;
int state;
u64 prev_event_time;
- void *addr;
+ u64 addr;

int read_count;
};
@@ -315,7 +310,7 @@ static struct lock_stat *pop_from_result(void)
return container_of(node, struct lock_stat, rb);
}

-static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
+static struct lock_stat *lock_stat_findnew(u64 addr, const char *name)
{
struct hlist_head *entry = lockhashentry(addr);
struct lock_stat *ret, *new;
@@ -361,7 +356,7 @@ struct trace_lock_handler {
struct perf_sample *sample);
};

-static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr)
+static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
{
struct lock_seq_stat *seq;

@@ -400,16 +395,13 @@ enum acquire_flags {
static int report_lock_acquire_event(struct evsel *evsel,
struct perf_sample *sample)
{
- void *addr;
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
const char *name = evsel__strval(evsel, sample, "name");
- u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
+ u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
int flag = evsel__intval(evsel, sample, "flags");

- memcpy(&addr, &tmp, sizeof(void *));
-
ls = lock_stat_findnew(addr, name);
if (!ls)
return -ENOMEM;
@@ -472,15 +464,12 @@ static int report_lock_acquire_event(struct evsel *evsel,
static int report_lock_acquired_event(struct evsel *evsel,
struct perf_sample *sample)
{
- void *addr;
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
u64 contended_term;
const char *name = evsel__strval(evsel, sample, "name");
- u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
-
- memcpy(&addr, &tmp, sizeof(void *));
+ u64 addr = evsel__intval(evsel, sample, "lockdep_addr");

ls = lock_stat_findnew(addr, name);
if (!ls)
@@ -535,14 +524,11 @@ static int report_lock_acquired_event(struct evsel *evsel,
static int report_lock_contended_event(struct evsel *evsel,
struct perf_sample *sample)
{
- void *addr;
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
const char *name = evsel__strval(evsel, sample, "name");
- u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
-
- memcpy(&addr, &tmp, sizeof(void *));
+ u64 addr = evsel__intval(evsel, sample, "lockdep_addr");

ls = lock_stat_findnew(addr, name);
if (!ls)
@@ -590,14 +576,11 @@ static int report_lock_contended_event(struct evsel *evsel,
static int report_lock_release_event(struct evsel *evsel,
struct perf_sample *sample)
{
- void *addr;
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
const char *name = evsel__strval(evsel, sample, "name");
- u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
-
- memcpy(&addr, &tmp, sizeof(void *));
+ u64 addr = evsel__intval(evsel, sample, "lockdep_addr");

ls = lock_stat_findnew(addr, name);
if (!ls)
@@ -782,7 +765,7 @@ static void dump_map(void)
pr_info("Address of instance: name of class\n");
for (i = 0; i < LOCKHASH_SIZE; i++) {
hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
- pr_info(" %p: %s\n", st->addr, st->name);
+ pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name);
}
}
}
--
2.34.1.448.ga2b2bfdf31-goog


2022-01-04 18:21:10

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 3/6] perf lock: Sort map info based on class name

Instead of the random order, sort it by lock class name.

Before:
# perf lock info -m
Address of instance: name of class
0xffffa0d940ac5310: &dentry->d_lockref.lock
0xffffa0c20b0e1cb0: &dentry->d_lockref.lock
0xffffa0d8e051cc48: &base->lock
0xffffa0d94f992110: &anon_vma->rwsem
0xffffa0d947a4f278: (null)
0xffffa0c208f6e108: &map->lock
0xffffa0c213ad32c8: &cfs_rq->removed.lock
0xffffa0c20d695888: &parent->list_lock
0xffffa0c278775278: (null)
0xffffa0c212ad4690: &dentry->d_lockref.lock

After:
# perf lock info -m
Address of instance: name of class
0xffffa0c20d538800: &(&sig->stats_lock)->lock
0xffffa0c216d4ec40: &(&sig->stats_lock)->lock
0xffffa1fe4cb04610: &(__futex_data.queues)[i].lock
0xffffa1fe4cb07750: &(__futex_data.queues)[i].lock
0xffffa1fe4cb07b50: &(__futex_data.queues)[i].lock
0xffffa1fe4cb0b850: &(__futex_data.queues)[i].lock
0xffffa1fe4cb0bcd0: &(__futex_data.queues)[i].lock
0xffffa1fe4cb0e5d0: &(__futex_data.queues)[i].lock
0xffffa1fe4cb11ad0: &(__futex_data.queues)[i].lock

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index c4b5c3d71ae3..8078f7ca826d 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -757,6 +757,21 @@ static void dump_threads(void)
}
}

+static int compare_maps(struct lock_stat *a, struct lock_stat *b)
+{
+ int ret;
+
+ if (a->name && b->name)
+ ret = strcmp(a->name, b->name);
+ else
+ ret = !!a->name - !!b->name;
+
+ if (!ret)
+ return a->addr < b->addr;
+ else
+ return ret < 0;
+}
+
static void dump_map(void)
{
unsigned int i;
@@ -765,9 +780,12 @@ static void dump_map(void)
pr_info("Address of instance: name of class\n");
for (i = 0; i < LOCKHASH_SIZE; i++) {
hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
- pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name);
+ insert_to_result(st, compare_maps);
}
}
+
+ while ((st = pop_from_result()))
+ pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name);
}

static int dump_info(void)
--
2.34.1.448.ga2b2bfdf31-goog


2022-01-04 18:21:14

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 4/6] perf lock: Fix lock name length check for printing

It has 20 character spaces for name so lock names shorter than 20
should be printed without ellipsis.

Before:
# perf lock report
Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)

rcu_read_lock 251225 0 0 0 0 0
&(ei->i_block_re... 8731 0 0 0 0 0
&sb->s_type->i_l... 8731 0 0 0 0 0
hrtimer_bases.lo... 5261 0 0 0 0 0
hrtimer_bases.lo... 2626 0 0 0 0 0
hrtimer_bases.lo... 1953 0 0 0 0 0
hrtimer_bases.lo... 1382 0 0 0 0 0
cpu_hotplug_lock... 1350 0 0 0 0 0

After:
# perf lock report
Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)

rcu_read_lock 251225 0 0 0 0 0
&(ei->i_block_re... 8731 0 0 0 0 0
&sb->s_type->i_l... 8731 0 0 0 0 0
hrtimer_bases.lock 5261 0 0 0 0 0
hrtimer_bases.lock 2626 0 0 0 0 0
hrtimer_bases.lock 1953 0 0 0 0 0
hrtimer_bases.lock 1382 0 0 0 0 0
cpu_hotplug_lock 1350 0 0 0 0 0

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 8078f7ca826d..f6adf3cdd1e2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -710,7 +710,7 @@ static void print_result(void)
}
bzero(cut_name, 20);

- if (strlen(st->name) < 16) {
+ if (strlen(st->name) < 20) {
/* output raw name */
pr_info("%20s ", st->name);
} else {
--
2.34.1.448.ga2b2bfdf31-goog


2022-01-04 18:21:15

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 5/6] perf lock: Add -c/--combine-locks option

The -c or --combine-locks option is to merge lock instances in the
same class into a single entry. It compares the name of the locks
and marks duplicated entries using lock_stat->combined.

# perf lock report
Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)

rcu_read_lock 251225 0 0 0 0 0
&(ei->i_block_re... 8731 0 0 0 0 0
&sb->s_type->i_l... 8731 0 0 0 0 0
hrtimer_bases.lock 5261 0 0 0 0 0
hrtimer_bases.lock 2626 0 0 0 0 0
hrtimer_bases.lock 1953 0 0 0 0 0
hrtimer_bases.lock 1382 0 0 0 0 0
cpu_hotplug_lock 1350 0 0 0 0 0
hrtimer_bases.lock 1273 0 0 0 0 0
hrtimer_bases.lock 1269 0 0 0 0 0

# perf lock report -c
Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)

rcu_read_lock 251225 0 0 0 0 0
hrtimer_bases.lock 39450 0 0 0 0 0
&sb->s_type->i_l... 10301 1 662 662 662 662
ptlock_ptr(page) 10173 2 701 1402 760 642
&(ei->i_block_re... 8732 0 0 0 0 0
&xa->xa_lock 8088 0 0 0 0 0
&base->lock 6705 0 0 0 0 0
&p->pi_lock 5549 0 0 0 0 0
&dentry->d_lockr... 5010 4 1274 5097 1844 789
&ep->lock 3958 0 0 0 0 0

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/Documentation/perf-lock.txt | 4 ++
tools/perf/builtin-lock.c | 68 ++++++++++++++++++++++++++
2 files changed, 72 insertions(+)

diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 1b4d452923d7..f5eb95788969 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -54,6 +54,10 @@ REPORT OPTIONS
Sorting key. Possible values: acquired (default), contended,
avg_wait, wait_total, wait_max, wait_min.

+-c::
+--combine-locks::
+ Merge lock instances in the same class (based on name).
+
INFO OPTIONS
------------

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index f6adf3cdd1e2..bbfeba79426a 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -65,6 +65,7 @@ struct lock_stat {
u64 wait_time_max;

int discard; /* flag of blacklist */
+ int combined;
};

/*
@@ -115,6 +116,8 @@ struct thread_stat {

static struct rb_root thread_stats;

+static bool combine_locks;
+
static struct thread_stat *thread_stat_find(u32 tid)
{
struct rb_node *node;
@@ -241,6 +244,7 @@ static const char *sort_key = "acquired";

static int (*compare)(struct lock_stat *, struct lock_stat *);

+static struct rb_root sorted; /* place to store intermediate data */
static struct rb_root result; /* place to store sorted data */

#define DEF_KEY_LOCK(name, fn_suffix) \
@@ -274,6 +278,49 @@ static int select_key(void)
return -1;
}

+static void combine_lock_stats(struct lock_stat *st)
+{
+ struct rb_node **rb = &sorted.rb_node;
+ struct rb_node *parent = NULL;
+ struct lock_stat *p;
+ int ret;
+
+ while (*rb) {
+ p = container_of(*rb, struct lock_stat, rb);
+ parent = *rb;
+
+ if (st->name && p->name)
+ ret = strcmp(st->name, p->name);
+ else
+ ret = !!st->name - !!p->name;
+
+ if (ret == 0) {
+ p->nr_acquired += st->nr_acquired;
+ p->nr_contended += st->nr_contended;
+ p->wait_time_total += st->wait_time_total;
+
+ if (p->nr_contended)
+ p->avg_wait_time = p->wait_time_total / p->nr_contended;
+
+ if (p->wait_time_min > st->wait_time_min)
+ p->wait_time_min = st->wait_time_min;
+ if (p->wait_time_max < st->wait_time_max)
+ p->wait_time_max = st->wait_time_max;
+
+ st->combined = 1;
+ return;
+ }
+
+ if (ret < 0)
+ rb = &(*rb)->rb_left;
+ else
+ rb = &(*rb)->rb_right;
+ }
+
+ rb_link_node(&st->rb, parent, rb);
+ rb_insert_color(&st->rb, &sorted);
+}
+
static void insert_to_result(struct lock_stat *st,
int (*bigger)(struct lock_stat *, struct lock_stat *))
{
@@ -281,6 +328,9 @@ static void insert_to_result(struct lock_stat *st,
struct rb_node *parent = NULL;
struct lock_stat *p;

+ if (combine_locks && st->combined)
+ return;
+
while (*rb) {
p = container_of(*rb, struct lock_stat, rb);
parent = *rb;
@@ -833,6 +883,21 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return err;
}

+static void combine_result(void)
+{
+ unsigned int i;
+ struct lock_stat *st;
+
+ if (!combine_locks)
+ return;
+
+ for (i = 0; i < LOCKHASH_SIZE; i++) {
+ hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ combine_lock_stats(st);
+ }
+ }
+}
+
static void sort_result(void)
{
unsigned int i;
@@ -896,6 +961,7 @@ static int __cmd_report(bool display_info)
if (display_info) /* used for info subcommand */
err = dump_info();
else {
+ combine_result();
sort_result();
print_result();
}
@@ -970,6 +1036,8 @@ int cmd_lock(int argc, const char **argv)
OPT_STRING('k', "key", &sort_key, "acquired",
"key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
/* TODO: type */
+ OPT_BOOLEAN('c', "combine-locks", &combine_locks,
+ "combine locks in the same class"),
OPT_PARENT(lock_options)
};

--
2.34.1.448.ga2b2bfdf31-goog


2022-01-04 18:21:20

by Namhyung Kim

[permalink] [raw]
Subject: [PATCH 6/6] perf lock: Carefully combine lock stats for discarded entries

Stats from discarded entries should be omitted. But a lock class may
have both good and bad entries. If the first entry was bad, we can
zero-fill the stats and only add good stats if any. The entry can
remove the discard state if it finds a good entry later.

Signed-off-by: Namhyung Kim <[email protected]>
---
tools/perf/builtin-lock.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index bbfeba79426a..57b9ebd7118a 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -295,6 +295,9 @@ static void combine_lock_stats(struct lock_stat *st)
ret = !!st->name - !!p->name;

if (ret == 0) {
+ if (st->discard)
+ goto out;
+
p->nr_acquired += st->nr_acquired;
p->nr_contended += st->nr_contended;
p->wait_time_total += st->wait_time_total;
@@ -307,6 +310,10 @@ static void combine_lock_stats(struct lock_stat *st)
if (p->wait_time_max < st->wait_time_max)
p->wait_time_max = st->wait_time_max;

+ /* now it got a new !discard record */
+ p->discard = 0;
+
+out:
st->combined = 1;
return;
}
@@ -319,6 +326,15 @@ static void combine_lock_stats(struct lock_stat *st)

rb_link_node(&st->rb, parent, rb);
rb_insert_color(&st->rb, &sorted);
+
+ if (st->discard) {
+ st->nr_acquired = 0;
+ st->nr_contended = 0;
+ st->wait_time_total = 0;
+ st->avg_wait_time = 0;
+ st->wait_time_min = ULLONG_MAX;
+ st->wait_time_max = 0;
+ }
}

static void insert_to_result(struct lock_stat *st,
--
2.34.1.448.ga2b2bfdf31-goog


2022-01-09 13:13:17

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCHSET 0/6] perf lock: Random updates for the locking analysis (v1)

On Tue, Jan 04, 2022 at 10:20:48AM -0800, Namhyung Kim wrote:

SNIP

> With -c option, the hrtimer_bases.lock would be combined into a single
> entry. Also note that the lock names are correctly displayed now.
>
> # perf lock report -c
> Name acquired contended avg wait (ns) total wait (ns) max wait (ns) min wait (ns)
>
> rcu_read_lock 251225 0 0 0 0 0
> hrtimer_bases.lock 39449 0 0 0 0 0
> &sb->s_type->i_l... 10301 1 662 662 662 662
> ptlock_ptr(page) 10173 2 701 1402 760 642
> &(ei->i_block_re... 8732 0 0 0 0 0
> &base->lock 6705 0 0 0 0 0
> &p->pi_lock 5549 0 0 0 0 0
> &dentry->d_lockr... 5010 4 1274 5097 1844 789
> &ep->lock 2750 0 0 0 0 0
> &(__futex_data.q... 2331 0 0 0 0 0
> (null) 1878 0 0 0 0 0
> cpu_hotplug_lock 1350 0 0 0 0 0
> &____s->seqcount 1349 0 0 0 0 0
> &newf->file_lock 1001 15 1025 15388 2279 618
> ...
>
> Maybe we can make it default later (with a config and --no-combine-locks).
>
> You can get it from 'perf/lock-combine-v1' branch at
>
> git://git.kernel.org/pub/scm/linux/kernel/git/namhyung/linux-perf.git
>
> Thanks,
> Namhyung
>
>
> Namhyung Kim (6):
> perf lock: Convert lockhash_table to use hlist
> perf lock: Change type of lock_stat->addr to u64
> perf lock: Sort map info based on class name
> perf lock: Fix lock name length check for printing
> perf lock: Add -c/--combine-locks option
> perf lock: Carefully combine lock stats for discarded entries
>
> tools/perf/Documentation/perf-lock.txt | 4 +
> tools/perf/builtin-lock.c | 155 +++++++++++++++++++------
> 2 files changed, 124 insertions(+), 35 deletions(-)

LGTM

Acked-by: Jiri Olsa <[email protected]>

thanks,
jirka

>
>
> base-commit: b9f6fbb3b2c29736970ae9fcc0e82b0bd459442b
> --
> 2.34.1.448.ga2b2bfdf31-goog
>