2007-05-20 10:30:53

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH] lockdep: lock contention tracking


Add lock contention tracking to lockdep

[root@taijtu ~]# cat /proc/lockdep_contentions | sort -rnk 2 | head
dcache_lock: 3000 0 [618] [<ffffffff8033badd>] _atomic_dec_and_lock+0x39/0x58 [17] [<ffffffff802d6dce>] sysfs_open_file+0x28/0x25a [160] [<ffffffff802a5b61>] d_instantiate+0x2a/0xad [52] [<ffffffff8029d926>] __link_path_walk+0x270/0xde9
&inode->i_mutex: 235 0 [15] [<ffffffff8029b053>] pipe_read+0x74/0x39d [23] [<ffffffff8029a9f8>] pipe_wait+0x86/0x8d [105] [<ffffffff8029bc9a>] do_lookup+0x81/0x1ae [85] [<ffffffff802a0be7>] vfs_readdir+0x5c/0xa9
&zone->lru_lock: 141 0 [11] [<ffffffff8027366b>] activate_page+0x37/0xd6 [75] [<ffffffff80273327>] release_pages+0xb2/0x16c [42] [<ffffffff80273443>] __pagevec_lru_add_active+0x62/0xe5 [12] [<ffffffff80273529>] __pagevec_lru_add+0x63/0xe2
&rq->rq_lock_key: 93 0 [66] [<ffffffff8022b202>] task_rq_lock+0x42/0x74 [24] [<ffffffff80461f2a>] __sched_text_start+0x17a/0x869 [2] [<ffffffff8022a58e>] double_rq_lock+0x38/0x3d [1] [<ffffffff8022b1af>] double_lock_balance+0x41/0x52
&rq->rq_lock_key: 59 0 [15] [<ffffffff80461f2a>] __sched_text_start+0x17a/0x869 [27] [<ffffffff8022b202>] task_rq_lock+0x42/0x74 [17] [<ffffffff80462090>] __sched_text_start+0x2e0/0x869
files_lock: 57 0 [28] [<ffffffff8029581d>] file_move+0x1d/0x51 [29] [<ffffffff802957dc>] file_kill+0x15/0x39
&inode->i_data.i_mmap_lock: 52 0 [16] [<ffffffff8023149f>] copy_process+0xe10/0x1684 [5] [<ffffffff8027e38d>] vma_link+0x40/0x107 [20] [<ffffffff8027d866>] unlink_file_vma+0x2c/0x53 [11] [<ffffffff8027da53>] vma_adjust+0x154/0x452
&q->__queue_lock: 47 0 [16] [<ffffffff8033348a>] __make_request+0x5e/0x663 [4] [<ffffffff803339e8>] __make_request+0x5bc/0x663 [2] [<ffffffff803320c3>] generic_unplug_device+0x18/0x25 [1] [<ffffffff803320bb>] generic_unplug_device+0x10/0x25
&dentry->d_lock: 26 0 [22] [<ffffffff802a58be>] __d_lookup+0x7d/0x136 [4] [<ffffffff802ccd61>] dnotify_parent+0x1f/0x6d
vfsmount_lock: 17 0 [16] [<ffffffff802ab31d>] lookup_mnt+0x19/0x4c [1] [<ffffffff802a51df>] __d_path+0xae/0x14c

read as:
<lock name> <(write) contentiond> <read contentions>
(<contentions> <ip> <symbol>){,4}

The 4 points are the first 4 unique callsites that cause lock contention
for the specified lock class.

writing a 0 to /proc/lockdep_contentions clears the stats

Signed-off-by: Peter Zijlstra <[email protected]>
---
include/linux/lockdep.h | 14 +++++++
kernel/lockdep.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/lockdep_proc.c | 77 ++++++++++++++++++++++++++++++++++++++
kernel/mutex.c | 7 +++
kernel/rwsem.c | 20 ++++++++--
kernel/spinlock.c | 70 ++++++++++++++++++++++++++++-------
6 files changed, 266 insertions(+), 18 deletions(-)

Index: linux-2.6/include/linux/lockdep.h
===================================================================
--- linux-2.6.orig/include/linux/lockdep.h 2007-04-03 13:58:27.000000000 +0200
+++ linux-2.6/include/linux/lockdep.h 2007-05-19 10:26:44.000000000 +0200
@@ -16,6 +16,7 @@ struct task_struct;
#include <linux/list.h>
#include <linux/debug_locks.h>
#include <linux/stacktrace.h>
+#include <asm/atomic.h>

/*
* Lock-class usage-state bits:
@@ -71,6 +72,11 @@ struct lock_class_key {
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
};

+struct lockdep_contention_point {
+ unsigned long ip;
+ atomic_t count;
+};
+
/*
* The lock-class itself:
*/
@@ -114,6 +120,10 @@ struct lock_class {

const char *name;
int name_version;
+
+ atomic_t read_contentions;
+ atomic_t write_contentions;
+ struct lockdep_contention_point contention_point[4];
};

/*
@@ -243,6 +253,9 @@ extern void lock_acquire(struct lockdep_
extern void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);

+extern void lock_contended(struct lockdep_map *lock, int read,
+ unsigned long ip);
+
# define INIT_LOCKDEP .lockdep_recursion = 0,

#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
@@ -259,6 +272,7 @@ static inline void lockdep_on(void)

# define lock_acquire(l, s, t, r, c, i) do { } while (0)
# define lock_release(l, n, i) do { } while (0)
+# define lock_contended(l, r, i) do { } while (0)
# define lockdep_init() do { } while (0)
# define lockdep_info() do { } while (0)
# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0)
Index: linux-2.6/kernel/lockdep.c
===================================================================
--- linux-2.6.orig/kernel/lockdep.c 2007-05-10 17:24:41.000000000 +0200
+++ linux-2.6/kernel/lockdep.c 2007-05-19 10:58:40.000000000 +0200
@@ -2261,6 +2261,33 @@ print_unlock_inbalance_bug(struct task_s
return 0;
}

+static int
+print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
+ unsigned long ip)
+{
+ if (!debug_locks_off())
+ return 0;
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n=================================\n");
+ printk( "[ BUG: bad contention detected! ]\n");
+ printk( "---------------------------------\n");
+ printk("%s/%d is trying to content lock (",
+ curr->comm, curr->pid);
+ print_lockdep_cache(lock);
+ printk(") at:\n");
+ print_ip_sym(ip);
+ printk("but there are no locks held!\n");
+ printk("\nother info that might help us debug this:\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
/*
* Common debugging checks for both nested and non-nested unlock:
*/
@@ -2401,6 +2428,59 @@ __lock_release(struct lockdep_map *lock,
check_chain_key(curr);
}

+static void
+__lock_contended(struct lockdep_map *lock, int read, unsigned long ip)
+{
+ struct task_struct *curr = current;
+ struct held_lock *hlock, *prev_hlock;
+ struct lock_class *class;
+ unsigned int depth;
+ int i;
+
+ class = lock->class_cache;
+ if (class)
+ goto found_it;
+
+ depth = curr->lockdep_depth;
+ if (DEBUG_LOCKS_WARN_ON(!depth))
+ return;
+
+ prev_hlock = NULL;
+ for (i = depth-1; i >= 0; i--) {
+ hlock = curr->held_locks + i;
+ /*
+ * We must not cross into another context:
+ */
+ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
+ break;
+ if (hlock->instance == lock) {
+ class = hlock->class;
+ goto found_it;
+ }
+ prev_hlock = hlock;
+ }
+ print_lock_contention_bug(curr, lock, ip);
+ return;
+
+found_it:
+ if (read)
+ atomic_inc(&class->read_contentions);
+ else
+ atomic_inc(&class->write_contentions);
+
+ for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
+ if (class->contention_point[i].ip == 0) {
+ class->contention_point[i].ip = ip;
+ atomic_set(&class->contention_point[i].count, 1);
+ break;
+ }
+ if (class->contention_point[i].ip == ip) {
+ atomic_inc(&class->contention_point[i].count);
+ break;
+ }
+ }
+}
+
/*
* Check whether we follow the irq-flags state precisely:
*/
@@ -2473,6 +2553,22 @@ void lock_release(struct lockdep_map *lo

EXPORT_SYMBOL_GPL(lock_release);

+void lock_contended(struct lockdep_map *lock, int read, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ check_flags(flags);
+ current->lockdep_recursion = 1;
+ __lock_contended(lock, read, ip);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_contended);
+
/*
* Used by the testsuite, sanitize the validator state
* after a simulated failure:
Index: linux-2.6/kernel/mutex.c
===================================================================
--- linux-2.6.orig/kernel/mutex.c 2007-05-10 10:19:03.000000000 +0200
+++ linux-2.6/kernel/mutex.c 2007-05-18 21:47:38.000000000 +0200
@@ -139,6 +139,12 @@ __mutex_lock_common(struct mutex *lock,
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;

+ old_val = atomic_xchg(&lock->count, -1);
+ if (old_val == 1)
+ goto done;
+
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+
for (;;) {
/*
* Lets try to take the lock again - this is needed even if
@@ -174,6 +180,7 @@ __mutex_lock_common(struct mutex *lock,
spin_lock_mutex(&lock->wait_lock, flags);
}

+done:
/* got the lock - rejoice! */
mutex_remove_waiter(lock, &waiter, task_thread_info(task));
debug_mutex_set_owner(lock, task_thread_info(task));
Index: linux-2.6/kernel/lockdep_proc.c
===================================================================
--- linux-2.6.orig/kernel/lockdep_proc.c 2007-04-03 13:58:27.000000000 +0200
+++ linux-2.6/kernel/lockdep_proc.c 2007-05-19 10:50:18.000000000 +0200
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
+#include <asm/uaccess.h>

#include "lockdep_internals.h"

@@ -342,6 +343,78 @@ static const struct file_operations proc
.release = seq_release,
};

+static int lockdep_contentions_show(struct seq_file *m, void *v)
+{
+ struct lock_class *class;
+
+ list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ int r, w;
+
+ r = atomic_read(&class->read_contentions);
+ w = atomic_read(&class->write_contentions);
+
+ if (r || w) {
+ int i;
+ char sym[KSYM_SYMBOL_LEN];
+
+ seq_printf(m, "%s: %d %d", class->name, w, r);
+ for (i = 0; i < ARRAY_SIZE(class->contention_point);
+ i++) {
+ struct lockdep_contention_point *cp =
+ &class->contention_point[i];
+
+ if (cp->ip == 0)
+ break;
+ sprint_symbol(sym, cp->ip);
+ seq_printf(m, " [%d] [<%p>] %s",
+ atomic_read(&cp->count),
+ (void *)cp->ip, sym);
+ }
+ seq_printf(m, "\n");
+ }
+ }
+
+ return 0;
+}
+
+static int lockdep_contentions_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lockdep_contentions_show, NULL);
+}
+
+ssize_t lockdep_contentions_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct lock_class *class;
+ char c;
+ int i;
+
+ if (count) {
+ if (get_user(c, buf))
+ return -EFAULT;
+
+ if (c != '0')
+ return count;
+
+ list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ atomic_set(&class->write_contentions, 0);
+ atomic_set(&class->read_contentions, 0);
+ for (i = 0; i < ARRAY_SIZE(class->contention_point);
+ i++)
+ class->contention_point[i].ip = 0;
+ }
+ }
+ return count;
+}
+
+static const struct file_operations proc_lockdep_contentions_operations = {
+ .open = lockdep_contentions_open,
+ .write = lockdep_contentions_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static int __init lockdep_proc_init(void)
{
struct proc_dir_entry *entry;
@@ -354,6 +427,10 @@ static int __init lockdep_proc_init(void
if (entry)
entry->proc_fops = &proc_lockdep_stats_operations;

+ entry = create_proc_entry("lockdep_contentions", S_IRUSR, NULL);
+ if (entry)
+ entry->proc_fops = &proc_lockdep_contentions_operations;
+
return 0;
}

Index: linux-2.6/kernel/rwsem.c
===================================================================
--- linux-2.6.orig/kernel/rwsem.c 2007-05-19 10:06:44.000000000 +0200
+++ linux-2.6/kernel/rwsem.c 2007-05-19 10:37:59.000000000 +0200
@@ -20,7 +20,10 @@ void down_read(struct rw_semaphore *sem)
might_sleep();
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);

- __down_read(sem);
+ if (!__down_read_trylock(sem)) {
+ lock_contended(&sem->dep_map, 1, _RET_IP_);
+ __down_read(sem);
+ }
}

EXPORT_SYMBOL(down_read);
@@ -47,7 +50,10 @@ void down_write(struct rw_semaphore *sem
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);

- __down_write(sem);
+ if (!__down_write_trylock(sem)) {
+ lock_contended(&sem->dep_map, 0, _RET_IP_);
+ __down_write(sem);
+ }
}

EXPORT_SYMBOL(down_write);
@@ -111,7 +117,10 @@ void down_read_nested(struct rw_semaphor
might_sleep();
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);

- __down_read(sem);
+ if (!__down_read_trylock(sem)) {
+ lock_contended(&sem->dep_map, 1, _RET_IP_);
+ __down_read(sem);
+ }
}

EXPORT_SYMBOL(down_read_nested);
@@ -130,7 +139,10 @@ void down_write_nested(struct rw_semapho
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);

- __down_write_nested(sem, subclass);
+ if (!__down_write_trylock(sem)) {
+ lock_contended(&sem->dep_map, 0, _RET_IP_);
+ __down_write_nested(sem, subclass);
+ }
}

EXPORT_SYMBOL(down_write_nested);
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c 2007-05-10 10:19:03.000000000 +0200
+++ linux-2.6/kernel/spinlock.c 2007-05-19 10:20:47.000000000 +0200
@@ -72,7 +72,10 @@ void __lockfunc _read_lock(rwlock_t *loc
{
preempt_disable();
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_read_lock(lock);
+ if (!_raw_read_trylock(lock)) {
+ lock_contended(&lock->dep_map, 1, _RET_IP_);
+ _raw_read_lock(lock);
+ }
}
EXPORT_SYMBOL(_read_lock);

@@ -89,7 +92,10 @@ unsigned long __lockfunc _spin_lock_irqs
* that interrupts are not re-enabled during lock-acquire:
*/
#ifdef CONFIG_PROVE_LOCKING
- _raw_spin_lock(lock);
+ if (!_raw_spin_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+ }
#else
_raw_spin_lock_flags(lock, &flags);
#endif
@@ -102,7 +108,10 @@ void __lockfunc _spin_lock_irq(spinlock_
local_irq_disable();
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_spin_lock(lock);
+ if (!_raw_spin_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+ }
}
EXPORT_SYMBOL(_spin_lock_irq);

@@ -111,7 +120,10 @@ void __lockfunc _spin_lock_bh(spinlock_t
local_bh_disable();
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_spin_lock(lock);
+ if (!_raw_spin_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+ }
}
EXPORT_SYMBOL(_spin_lock_bh);

@@ -122,7 +134,10 @@ unsigned long __lockfunc _read_lock_irqs
local_irq_save(flags);
preempt_disable();
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_read_lock(lock);
+ if (!_raw_read_trylock(lock)) {
+ lock_contended(&lock->dep_map, 1, _RET_IP_);
+ _raw_read_lock(lock);
+ }
return flags;
}
EXPORT_SYMBOL(_read_lock_irqsave);
@@ -132,7 +147,10 @@ void __lockfunc _read_lock_irq(rwlock_t
local_irq_disable();
preempt_disable();
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_read_lock(lock);
+ if (!_raw_read_trylock(lock)) {
+ lock_contended(&lock->dep_map, 1, _RET_IP_);
+ _raw_read_lock(lock);
+ }
}
EXPORT_SYMBOL(_read_lock_irq);

@@ -141,7 +159,10 @@ void __lockfunc _read_lock_bh(rwlock_t *
local_bh_disable();
preempt_disable();
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_read_lock(lock);
+ if (!_raw_read_trylock(lock)) {
+ lock_contended(&lock->dep_map, 1, _RET_IP_);
+ _raw_read_lock(lock);
+ }
}
EXPORT_SYMBOL(_read_lock_bh);

@@ -152,7 +173,10 @@ unsigned long __lockfunc _write_lock_irq
local_irq_save(flags);
preempt_disable();
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_write_lock(lock);
+ if (!_raw_write_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_write_lock(lock);
+ }
return flags;
}
EXPORT_SYMBOL(_write_lock_irqsave);
@@ -162,7 +186,10 @@ void __lockfunc _write_lock_irq(rwlock_t
local_irq_disable();
preempt_disable();
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_write_lock(lock);
+ if (!_raw_write_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_write_lock(lock);
+ }
}
EXPORT_SYMBOL(_write_lock_irq);

@@ -171,7 +198,10 @@ void __lockfunc _write_lock_bh(rwlock_t
local_bh_disable();
preempt_disable();
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_write_lock(lock);
+ if (!_raw_write_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_write_lock(lock);
+ }
}
EXPORT_SYMBOL(_write_lock_bh);

@@ -179,7 +209,10 @@ void __lockfunc _spin_lock(spinlock_t *l
{
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_spin_lock(lock);
+ if (!_raw_spin_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+ }
}

EXPORT_SYMBOL(_spin_lock);
@@ -188,7 +221,10 @@ void __lockfunc _write_lock(rwlock_t *lo
{
preempt_disable();
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- _raw_write_lock(lock);
+ if (!_raw_write_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_write_lock(lock);
+ }
}

EXPORT_SYMBOL(_write_lock);
@@ -289,7 +325,10 @@ void __lockfunc _spin_lock_nested(spinlo
{
preempt_disable();
spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- _raw_spin_lock(lock);
+ if (!_raw_spin_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+ }
}

EXPORT_SYMBOL(_spin_lock_nested);
@@ -306,7 +345,10 @@ unsigned long __lockfunc _spin_lock_irqs
* that interrupts are not re-enabled during lock-acquire:
*/
#ifdef CONFIG_PROVE_SPIN_LOCKING
- _raw_spin_lock(lock);
+ if (!_raw_spin_trylock(lock)) {
+ lock_contended(&lock->dep_map, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+ }
#else
_raw_spin_lock_flags(lock, &flags);
#endif



2007-05-20 18:05:48

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


* Peter Zijlstra <[email protected]> wrote:

> Add lock contention tracking to lockdep
>
> [root@taijtu ~]# cat /proc/lockdep_contentions | sort -rnk 2 | head
> dcache_lock: 3000 0 [618] [<ffffffff8033badd>] _atomic_dec_and_lock+0x39/0x58 [17] [<ffffffff802d6dce>] sysfs_open_file+0x28/0x25a [160] [<ffffffff802a5b61>] d_instantiate+0x2a/0xad [52] [<ffffffff8029d926>] __link_path_walk+0x270/0xde9
> &inode->i_mutex: 235 0 [15] [<ffffffff8029b053>] pipe_read+0x74/0x39d [23] [<ffffffff8029a9f8>] pipe_wait+0x86/0x8d [105] [<ffffffff8029bc9a>] do_lookup+0x81/0x1ae [85] [<ffffffff802a0be7>] vfs_readdir+0x5c/0xa9
> &zone->lru_lock: 141 0 [11] [<ffffffff8027366b>] activate_page+0x37/0xd6 [75] [<ffffffff80273327>] release_pages+0xb2/0x16c [42] [<ffffffff80273443>] __pagevec_lru_add_active+0x62/0xe5 [12] [<ffffffff80273529>] __pagevec_lru_add+0x63/0xe2

looks really nice!

> 6 files changed, 266 insertions(+), 18 deletions(-)

wow, that's really simple!

in general i like this alot because it cleverly reuses all the lock API
wrappers and lock tracking infrastructure that lockdep already adds.

Acked-by: Ingo Molnar <[email protected]>

i'd suggest to add a CONFIG_DEBUG_LOCK_STAT Kconfig method to turn this
on without any of the other lock-dependency tracking costs that lockdep
has.

here are a few minor nits about the code:

> +struct lockdep_contention_point {
> + unsigned long ip;
> + atomic_t count;
> +};

i'd say rename this to "lock_contention_point" - this is not really a
lockdep thing. Lockdep offers the machinery to track lock usage, but
there's no "dependency" portion of this.

> + printk("%s/%d is trying to content lock (",

s/content/contend

> +static int lockdep_contentions_show(struct seq_file *m, void *v)
> +ssize_t lockdep_contentions_write(struct file *file, const char __user *buf,

s/lockdep/lock

> + for (i = 0; i < ARRAY_SIZE(class->contention_point);
> + i++)
> + class->contention_point[i].ip = 0;

(loop body needs curly braces because the whole construct is 3-line.)

> + entry = create_proc_entry("lockdep_contentions", S_IRUSR, NULL);

s/lockdep/lock

> @@ -20,7 +20,10 @@ void down_read(struct rw_semaphore *sem)
> might_sleep();
> rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
>
> - __down_read(sem);
> + if (!__down_read_trylock(sem)) {
> + lock_contended(&sem->dep_map, 1, _RET_IP_);
> + __down_read(sem);
> + }

heh, clever trick that avoids the touching of dozens of architectures
;-) I guess make this dependent on CONFIG_LOCK_STAT though, because gcc
cannot optimize this away even if lock_contended() is a NOP.

(same for the other trylock additions in rwsem.c and spinlock.c)

Ingo

2007-05-21 06:05:19

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Sun, May 20, 2007 at 12:30:26PM +0200, Peter Zijlstra wrote:
> The 4 points are the first 4 unique callsites that cause lock contention
> for the specified lock class.
>
> writing a 0 to /proc/lockdep_contentions clears the stats

We should talk about unifying it with my lockstat work for -rt so that
we have a comprehensive solution for the "world". But you know that
already :)

Unifying lock initializer hash key initialization functions is a key
first step to that. Keep in mind, we can do more with this mechanism
than just kernel locks and we should probably keep that open and not
code into a corner.

bill

2007-05-21 06:08:42

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


* Bill Huey <[email protected]> wrote:

> On Sun, May 20, 2007 at 12:30:26PM +0200, Peter Zijlstra wrote:
> > The 4 points are the first 4 unique callsites that cause lock contention
> > for the specified lock class.
> >
> > writing a 0 to /proc/lockdep_contentions clears the stats
>
> We should talk about unifying it with my lockstat work for -rt so that
> we have a comprehensive solution for the "world". But you know that
> already :)

To me it appears Peter's stuff is already a pretty complete solution on
its own, and it's a whole lot simpler (and less duplicative) than your
lockstat patch. Could you list the specific items/features that you
think Peter's stuff doesnt have?

Ingo

2007-05-21 06:42:21

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 08:08:28AM +0200, Ingo Molnar wrote:
> To me it appears Peter's stuff is already a pretty complete solution on
> its own, and it's a whole lot simpler (and less duplicative) than your
> lockstat patch. Could you list the specific items/features that you
> think Peter's stuff doesnt have?

First of all, this isn't an either/or kind of thing nor should it be thought
of in that way.

Precise file/function/line placement for one thing. My patch is specifically
for -rt which does checks that Peter's doesn't and is needed to characterize
-rt better. My stuff is potentially more extensible since I have other ideas
for it that really are outside of the lockdep logic currently. These can be
unified, but not so that one overrides the intended features of other. That's
why I was hessitant to completely unify with lockdep in the manner you
suggested.

bill

2007-05-21 07:50:31

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


* Bill Huey <[email protected]> wrote:

> On Mon, May 21, 2007 at 08:08:28AM +0200, Ingo Molnar wrote:
> > To me it appears Peter's stuff is already a pretty complete solution on
> > its own, and it's a whole lot simpler (and less duplicative) than your
> > lockstat patch. Could you list the specific items/features that you
> > think Peter's stuff doesnt have?
>
> First of all, this isn't an either/or kind of thing nor should it be
> thought of in that way.

why? Please be specific.

> Precise file/function/line placement for one thing. [...]

Have you looked at the output Peter's patch produces? It prints out
precise symbols:

dcache_lock: 3000 0 [618] [<ffffffff8033badd>] _atomic_dec_and_lock+0x39/0x58

which can easily be turned into line numbers using debuginfo packages or
using gdb. (But normally one only needs the symbol name, and we
certainly do not want to burden the kernel source with tracking
__FILE__/__LINE__ metadata, if the same is already available via
CONFIG_DEBUG_INFO.)

anything else?

> [...] My stuff is potentially more extensible since I have other ideas
> for it that really are outside of the lockdep logic currently. [...]

what do you mean, specifically?

> That's why I was hessitant to completely unify with lockdep in the
> manner you suggested.

i really need specifics. Currently i have the choice between your stuff:

17 files changed, 1425 insertions(+), 80 deletions(-)

and Peter's patch:

6 files changed, 266 insertions(+), 18 deletions(-)

and Peter's patch (if it works out fine in testing - and it seemed fine
so far on my testbox), is smaller, more maintainable, better integrated
and thus the clear candidate for merging into -rt and merging upstream
as well. It's far cleaner than i hoped this whole lock-stats thing could
be done based on lockdep, so i'm pretty happy with Peter's current patch
already.

Ingo

2007-05-21 09:19:23

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 09:50:13AM +0200, Ingo Molnar wrote:
> Have you looked at the output Peter's patch produces? It prints out
> precise symbols:
>
> dcache_lock: 3000 0 [618] [<ffffffff8033badd>] _atomic_dec_and_lock+0x39/0x58
>
> which can easily be turned into line numbers using debuginfo packages or
> using gdb. (But normally one only needs the symbol name, and we
> certainly do not want to burden the kernel source with tracking
> __FILE__/__LINE__ metadata, if the same is already available via
> CONFIG_DEBUG_INFO.)
>
> anything else?

If his hashing scheme can produce precise locations of where locks are
initialized both by a initializer function or a statically allocated
object then my code is baroque and you should use Peter's code.

I write lockstat without the knowledge that lockdep was replicating the
same work and I audited 1600 something lock points in the kernel to
convert the usage of C99 style initializers to something more regular.

I also did this without consideration of things like debuginfo since
I don't use those things.

> > [...] My stuff is potentially more extensible since I have other ideas
> > for it that really are outside of the lockdep logic currently. [...]
>
> what do you mean, specifically?

Better if I show you the patches in the future instead of saying now.

> i really need specifics. Currently i have the choice between your stuff:
>
> 17 files changed, 1425 insertions(+), 80 deletions(-)
>
> and Peter's patch:
>
> 6 files changed, 266 insertions(+), 18 deletions(-)
>
> and Peter's patch (if it works out fine in testing - and it seemed fine
> so far on my testbox), is smaller, more maintainable, better integrated
> and thus the clear candidate for merging into -rt and merging upstream
> as well. It's far cleaner than i hoped this whole lock-stats thing could
> be done based on lockdep, so i'm pretty happy with Peter's current patch
> already.

If it meets your criteria and what you mentioned about is completely
accurate, then use it instead of mine. I'll just finish up what I have
done with reader tracking in my lockstat and migrate my -rt specific
goodies to his infrastructure.

bill

2007-05-21 09:37:01

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


* Bill Huey <[email protected]> wrote:

> I write lockstat without the knowledge that lockdep was replicating
> the same work and I audited 1600 something lock points in the kernel
> to convert the usage of C99 style initializers to something more
> regular.

you got the history wrong i think: the first version of lockdep was
released to lkml a year ago (May 2006), while the first time you
mentioned your lock contention patch was November 2006 and you released
it to lkml in December 2006 - so it was _you_ who was "replicating the
same work", not lockdep :-) And this was pointed out to you very early
on, many months ago.

and regarding C99 style lock initializers: the -rt project has been
removing a whole heap of them in the past 2.5 years, since Oct 2004 or
so, and regularly cleansed the upstream kernel for old-style
initializers ever since then - so i'm not sure what you are referring
to.

> I also did this without consideration of things like debuginfo since I
> don't use those things.

btw., you dont even need CONFIG_DEBUG_INFO to get usable symbol names,
CONFIG_KALLSYMS alone will do it too. (It's only if you really cannot
tell from the lock symbol name and the function name what the entry is
about - which is very rare - that you need to look at any debug-info)

Ingo

2007-05-21 09:56:02

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 11:36:39AM +0200, Ingo Molnar wrote:
> you got the history wrong i think: the first version of lockdep was
> released to lkml a year ago (May 2006), while the first time you
> mentioned your lock contention patch was November 2006 and you released
> it to lkml in December 2006 - so it was _you_ who was "replicating the
> same work", not lockdep :-) And this was pointed out to you very early
> on, many months ago.

And lockmeter, the very first patch of this sort is from the 90s, but
got mostly ignored here on lkml, of course :)

2007-05-21 10:22:04

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 11:36:39AM +0200, Ingo Molnar wrote:
> you got the history wrong i think: the first version of lockdep was
> released to lkml a year ago (May 2006), while the first time you
> mentioned your lock contention patch was November 2006 and you released
> it to lkml in December 2006 - so it was _you_ who was "replicating the
> same work", not lockdep :-) And this was pointed out to you very early
> on, many months ago.

Yeah, and where do we disagree here again ? So I take it you're disagreeing
with my agreement with you that lockdep came first ? Geez, think about that
one for a bit. (chuckle) :)

I'd like to remind you that I mapped out the lock hierarchy for a fully
preemptive -rt kernel while you and *others* were wanking around with
voluntary preempt remember ? :) Keep in mind, I'm single obsessed with -rt.

[back to the topic]

> and regarding C99 style lock initializers: the -rt project has been
> removing a whole heap of them in the past 2.5 years, since Oct 2004 or
> so, and regularly cleansed the upstream kernel for old-style
> initializers ever since then - so i'm not sure what you are referring
> to.

Don't worry about it. I did the same work only to realize that there wasn't
much left to convert over.

> btw., you dont even need CONFIG_DEBUG_INFO to get usable symbol names,
> CONFIG_KALLSYMS alone will do it too. (It's only if you really cannot
> tell from the lock symbol name and the function name what the entry is
> about - which is very rare - that you need to look at any debug-info)

I'm anal about these things. I thought that you can do more magic than that
from your previous email, but it just confirms my understanding of how
symbols work already, unless there was a meltdown of the universal physical
laws here or something. That's why I made the choices I did.

The inode initialization code is ambiguous which is why having a specific
line number was very useful. It showed that one of the locks protecting a
tree was heavily hit. There was multipule places in which it could have
been if I hadn't had this information.

Sleep time...

bill

2007-05-21 10:22:33

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 10:55:47AM +0100, Christoph Hellwig wrote:
> On Mon, May 21, 2007 at 11:36:39AM +0200, Ingo Molnar wrote:
> > you got the history wrong i think: the first version of lockdep was
> > released to lkml a year ago (May 2006), while the first time you
> > mentioned your lock contention patch was November 2006 and you released
> > it to lkml in December 2006 - so it was _you_ who was "replicating the
> > same work", not lockdep :-) And this was pointed out to you very early
> > on, many months ago.
>
> And lockmeter, the very first patch of this sort is from the 90s, but
> got mostly ignored here on lkml, of course :)

Unfortunately, it's not nearly as cool as my patch by default because I wrote
it. :)

bill

2007-05-21 10:29:58

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 03:19:46AM -0700, Bill Huey wrote:
> On Mon, May 21, 2007 at 11:36:39AM +0200, Ingo Molnar wrote:
> > you got the history wrong i think: the first version of lockdep was
> > released to lkml a year ago (May 2006), while the first time you
> > mentioned your lock contention patch was November 2006 and you released
> > it to lkml in December 2006 - so it was _you_ who was "replicating the
> > same work", not lockdep :-) And this was pointed out to you very early
> > on, many months ago.
>
> Yeah, and where do we disagree here again ? So I take it you're disagreeing
> with my agreement with you that lockdep came first ? Geez, think about that
> one for a bit. (chuckle) :)

Yeah, sorry about the wording reversal. It was unintentional. I tend to drop
minor words like "not" and stuff which can create confusion.

bill

2007-05-21 10:58:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


* Christoph Hellwig <[email protected]> wrote:

> On Mon, May 21, 2007 at 11:36:39AM +0200, Ingo Molnar wrote:
> > you got the history wrong i think: the first version of lockdep was
> > released to lkml a year ago (May 2006), while the first time you
> > mentioned your lock contention patch was November 2006 and you released
> > it to lkml in December 2006 - so it was _you_ who was "replicating the
> > same work", not lockdep :-) And this was pointed out to you very early
> > on, many months ago.
>
> And lockmeter, the very first patch of this sort is from the 90s, but
> got mostly ignored here on lkml, of course :)

i certainly did not ignore lockmeter - i periodically tried it and even
considered adding it to -rt. But it was always pretty problematic from
an impact POV:

23 files changed, 2659 insertions(+), 13 deletions(-)

and nobody pushed strong enough to get it included. But ... Peter's
patch could perhaps be extended to cover similar stats as lockmeter,
ontop of the existing lockdep instrumentation. Peter, can you see any
particular roadblocks with that?

Ingo

2007-05-21 12:47:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


* Bill Huey <[email protected]> wrote:

> > and regarding C99 style lock initializers: the -rt project has been
> > removing a whole heap of them in the past 2.5 years, since Oct 2004
> > or so, and regularly cleansed the upstream kernel for old-style
> > initializers ever since then - so i'm not sure what you are
> > referring to.
>
> Don't worry about it. I did the same work only to realize that there
> wasn't much left to convert over.

i dont worry about it the least, i only replied to this sentence of
yours:

> > > I write lockstat without the knowledge that lockdep was
> > > replicating the same work and I audited 1600 something lock points
> > > in the kernel to convert the usage of C99 style initializers to
> > > something more regular.

which combines into this statement of yours: "I audited 1600 something
lock points in the kernel to convert the usage of C99 style initializers
something more regular, only to find out that there wasn't much left to
convert over?", correct? Which begs the question: why did you mention
this then at all? I usually reply to points made by others in the
assumption that there's some meaning behind them ;-)

Ingo

2007-05-21 19:09:54

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 02:46:55PM +0200, Ingo Molnar wrote:
> which combines into this statement of yours: "I audited 1600 something
> lock points in the kernel to convert the usage of C99 style initializers
> something more regular, only to find out that there wasn't much left to
> convert over?", correct? Which begs the question: why did you mention
> this then at all? I usually reply to points made by others in the
> assumption that there's some meaning behind them ;-)

It was about how much time I wasted replicating work that I didn't know
about. Some folks have different communication styles and it isn't ment
to be anything further than that.

Me and Peter are talking about possibly merging parts of each other's
patch. I've been working on splitting up the reader/writer paths so that
each type of contention is logged as well for tracking reader contentions
against that existing rwsem problem and the like, but my stuff doesn't
boot yet (working part time drags things). I/We'll keep you updated.

bill

2007-05-21 19:18:03

by Bill Huey

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking

On Mon, May 21, 2007 at 12:58:03PM +0200, Ingo Molnar wrote:
> and nobody pushed strong enough to get it included. But ... Peter's
> patch could perhaps be extended to cover similar stats as lockmeter,
> ontop of the existing lockdep instrumentation. Peter, can you see any
> particular roadblocks with that?

Definitely. Lockmeter isn't terribly Linux-ish from my examination of
that patch a while back. Doing it against lockdep is definitely the
right thing to do in that it unifies lock handling through initializer
keys that lockmeter doesn't, from my memory.

The spin time tracking can be put into the slow path of the spin, like
what peter has now, so that it has minimal impact against the uncontended
case. Updating the times would then be a trivial pointer dereference
plus add and hopefully won't have instrumentation side effects against
the rest of the locking behavior in the system.

bill

2007-05-21 21:18:46

by Jason Baron

[permalink] [raw]
Subject: Re: [PATCH] lockdep: lock contention tracking


On Sun, 20 May 2007, Peter Zijlstra wrote:

>
> Add lock contention tracking to lockdep
>

looks really nice to me.

To me, in addition to the number of times the lock is contended we also
need the total number of times the lock was acquired, to make the
statistics significant. I've included one such patch below, which also
introduces a 'event', thiking of things like lock timings that were
mentioned, but this is not necessary.

thanks,

-Jason


Signed-off-by: Jason Baron <[email protected]>

--- linux-2.6.22-rc2-lockdep/include/linux/lockdep.h.bak 2007-05-21 15:55:30.000000000 -0400
+++ linux-2.6.22-rc2-lockdep/include/linux/lockdep.h 2007-05-21 16:30:13.000000000 -0400
@@ -36,6 +36,17 @@
};

/*
+ * lock contention types
+ */
+
+enum lock_contention_event
+{
+ LOCK_CONTENTION_WRITE = 0,
+ LOCK_CONTENTION_READ,
+ LOCK_CONTENTION_TOTAL
+};
+
+/*
* Usage-state bitmasks:
*/
#define LOCKF_USED (1 << LOCK_USED)
@@ -121,6 +132,7 @@
const char *name;
int name_version;

+ atomic_t total;
atomic_t read_contentions;
atomic_t write_contentions;
struct lockdep_contention_point contention_point[4];
@@ -253,7 +265,7 @@
extern void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip);

-extern void lock_contended(struct lockdep_map *lock, int read,
+extern void lock_contended(struct lockdep_map *lock, enum lock_contention_event,
unsigned long ip);

# define INIT_LOCKDEP .lockdep_recursion = 0,
--- linux-2.6.22-rc2-lockdep/kernel/lockdep_proc.c.bak 2007-05-21 16:10:12.000000000 -0400
+++ linux-2.6.22-rc2-lockdep/kernel/lockdep_proc.c 2007-05-21 16:11:38.000000000 -0400
@@ -348,16 +348,17 @@
struct lock_class *class;

list_for_each_entry(class, &all_lock_classes, lock_entry) {
- int r, w;
+ int r, w, t;

r = atomic_read(&class->read_contentions);
w = atomic_read(&class->write_contentions);
+ t = atomic_read(&class->total);

if (r || w) {
int i;
char sym[KSYM_SYMBOL_LEN];

- seq_printf(m, "%s: %d %d", class->name, w, r);
+ seq_printf(m, "%s: %d %d %d", class->name, w, r, t);
for (i = 0; i < ARRAY_SIZE(class->contention_point);
i++) {
struct lockdep_contention_point *cp =
--- linux-2.6.22-rc2-lockdep/kernel/lockdep.c.bak 2007-05-21 16:02:05.000000000 -0400
+++ linux-2.6.22-rc2-lockdep/kernel/lockdep.c 2007-05-21 16:29:35.000000000 -0400
@@ -2429,7 +2429,7 @@
}

static void
-__lock_contended(struct lockdep_map *lock, int read, unsigned long ip)
+__lock_contended(struct lockdep_map *lock, enum lock_contention_event event, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
@@ -2463,10 +2463,17 @@
return;

found_it:
- if (read)
+ switch (event) {
+ case LOCK_CONTENTION_READ:
atomic_inc(&class->read_contentions);
- else
+ break;
+ case LOCK_CONTENTION_WRITE:
atomic_inc(&class->write_contentions);
+ break;
+ case LOCK_CONTENTION_TOTAL:
+ atomic_inc(&class->total);
+ return;
+ }

for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
if (class->contention_point[i].ip == 0) {
@@ -2530,6 +2537,7 @@
current->lockdep_recursion = 1;
__lock_acquire(lock, subclass, trylock, read, check,
irqs_disabled_flags(flags), ip);
+ __lock_contended(lock, LOCK_CONTENTION_TOTAL, 0);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
@@ -2553,7 +2561,7 @@

EXPORT_SYMBOL_GPL(lock_release);

-void lock_contended(struct lockdep_map *lock, int read, unsigned long ip)
+void lock_contended(struct lockdep_map *lock, enum lock_contention_event event, unsigned long ip)
{
unsigned long flags;

@@ -2563,7 +2571,7 @@
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
- __lock_contended(lock, read, ip);
+ __lock_contended(lock, event, ip);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}