2014-06-26 17:39:34

by Waiman Long

[permalink] [raw]
Subject: [PATCH v5 0/2] lockdep: add support for queued rwlock

v4->v5:
- Add patch 2 to update the locking selftest code to handle recursive
read_lock correctly. Patch 1 has no change.

v3->v4:
- Document the new read state and move the conditional compilation code
to lockdep.h.

v2->v3:
- Add a new read mode (3) for rwlock (used in
lock_acquire_shared_cond_recursive()) to avoid conflict with other
use cases of lock_acquire_shared_recursive().

v1->v2:
- Use less conditional & make it easier to read

With the merging of qrwlock into 3.16, it was found that the btrfs
filesystem hanged readily. A fix was devised and merged into rc2 and
the use of recursive read_lock call was part of the problem.

This patch series addes code to the lockdep subsystem to catch this
kind of recursive read_lock calls in kernel code. It also updates
the locking selftest to handle recursive read_lock correctly so that
it won't complain about test failures whether or not queue rwlock
is configured.

Waiman Long (2):
lockdep: restrict the use of recursive read_lock with qrwlock
locking-selftest: Support queue rwlock

include/linux/lockdep.h | 12 ++++++++++++
kernel/locking/lockdep.c | 6 ++++++
lib/locking-selftest.c | 16 +++++++++++++---
3 files changed, 31 insertions(+), 3 deletions(-)


2014-06-26 17:39:37

by Waiman Long

[permalink] [raw]
Subject: [PATCH v5 1/2] lockdep: restrict the use of recursive read_lock with qrwlock

Unlike the original unfair rwlock implementation, queued rwlock
will grant lock according to the chronological sequence of the lock
requests except when the lock requester is in the interrupt context.
Consequently, recursive read_lock calls will now hang the process if
there is a write_lock call somewhere in between the read_lock calls.

This patch updates the lockdep implementation to look for recursive
read_lock calls when queued rwlock is being used. A new read state (3)
is used to mark those read_lock call that cannot be recursively called
except in the interrupt context. The new read state does exhaust the
2 bits available in held_lock:read bit field. The addition of any new
read state in the future may require a redesign of how all those bits
are squeezed together in the held_lock structure.

Signed-off-by: Waiman Long <[email protected]>
---
include/linux/lockdep.h | 12 ++++++++++++
kernel/locking/lockdep.c | 6 ++++++
2 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f..c7fd62d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -478,16 +478,28 @@ static inline void print_irqtrace_events(struct task_struct *curr)
* on the per lock-class debug mode:
*/

+/*
+ * Read states in the 2-bit held_lock:read field:
+ * 0: Exclusive lock
+ * 1: Shareable lock, cannot be recursively called
+ * 2: Shareable lock, can be recursively called
+ * 3: Shareable lock, cannot be recursively called except in interrupt context
+ */
#define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
#define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i)
#define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i)
+#define lock_acquire_shared_irecursive(l, s, t, n, i) lock_acquire(l, s, t, 3, 1, n, i)

#define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
#define spin_release(l, n, i) lock_release(l, n, i)

#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
+#ifdef CONFIG_QUEUE_RWLOCK
+#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_irecursive(l, s, t, NULL, i)
+#else
#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
+#endif
#define rwlock_release(l, n, i) lock_release(l, n, i)

#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index d24e433..879bb4c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1774,6 +1774,12 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
return 2;

/*
+ * Recursive read-lock allowed only in interrupt context
+ */
+ if ((read == 3) && prev->read && in_interrupt())
+ return 2;
+
+ /*
* We're holding the nest_lock, which serializes this lock's
* nesting behaviour.
*/
--
1.7.1

2014-06-26 17:39:41

by Waiman Long

[permalink] [raw]
Subject: [PATCH v5 2/2] locking-selftest: Support queue rwlock

The queue rwlock does not support the use of recursive read-lock in
the process context. With changes in the lockdep code to check and
disallow recursive read-lock when queue rwlock is configured, it
is also necessary for the locking selftest to be updated to change
the process context recursive read locking results from SUCCESS to
FAILURE for queue rwlock.

Signed-off-by: Waiman Long <[email protected]>
---
lib/locking-selftest.c | 16 +++++++++++++---
1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 872a15a..0ba8816 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -940,6 +940,16 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
init_rwsem(&rwsem_##x); \
} while (0)

+/*
+ * If queue rwlock is used, recursive read-lock is not allowed in the
+ * process context. It is allowed in the interrupt context.
+ */
+#ifdef CONFIG_QUEUE_RWLOCK
+#define RRLOCK_RESULT FAILURE
+#else
+#define RRLOCK_RESULT SUCCESS
+#endif
+
static void reset_locks(void)
{
local_irq_disable();
@@ -1069,7 +1079,7 @@ static inline void print_testname(const char *testname)
print_testname(desc); \
dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
- dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock, RRLOCK_RESULT, LOCKTYPE_RWLOCK); \
dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
@@ -1830,14 +1840,14 @@ void locking_selftest(void)
printk(" --------------------------------------------------------------------------\n");
print_testname("recursive read-lock");
printk(" |");
- dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
+ dotest(rlock_AA1, RRLOCK_RESULT, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
printk("\n");

print_testname("recursive read-lock #2");
printk(" |");
- dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
+ dotest(rlock_AA1B, RRLOCK_RESULT, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
--
1.7.1

2014-07-07 12:50:30

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v5 0/2] lockdep: add support for queued rwlock

On Thu, Jun 26, 2014 at 01:39:09PM -0400, Waiman Long wrote:
> v4->v5:
> - Add patch 2 to update the locking selftest code to handle recursive
> read_lock correctly. Patch 1 has no change.

I removed all CONFIG_QUEUE_RWLOCK dependencies and made lockdep
unconditionally assume the stronger constraints.

Since we want all code 'clean' for the strongest possible
implementation, everybody should run with those semantics, it doesn't
make sense to have that configurable.

Eg. someone on (say ARM, which doesn't -- yet -- have QUEUE_RWLOCK)
could unwittingly introduce faulty code.


Attachments:
(No filename) (584.00 B)
(No filename) (836.00 B)
Download all attachments

2014-07-15 19:20:04

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH v5 0/2] lockdep: add support for queued rwlock

On 07/07/2014 08:50 AM, Peter Zijlstra wrote:
> On Thu, Jun 26, 2014 at 01:39:09PM -0400, Waiman Long wrote:
>> v4->v5:
>> - Add patch 2 to update the locking selftest code to handle recursive
>> read_lock correctly. Patch 1 has no change.
> I removed all CONFIG_QUEUE_RWLOCK dependencies and made lockdep
> unconditionally assume the stronger constraints.
>
> Since we want all code 'clean' for the strongest possible
> implementation, everybody should run with those semantics, it doesn't
> make sense to have that configurable.
>
> Eg. someone on (say ARM, which doesn't -- yet -- have QUEUE_RWLOCK)
> could unwittingly introduce faulty code.

I think this is a better way to go. I didn't take this way in my patch
for fear that I may be pushing too much.

-Longman

Subject: [tip:locking/core] locking/selftest: Support queued rwlock

Commit-ID: 586fefe5bbdc931fb0725b850f7002f6d71a1aa3
Gitweb: http://git.kernel.org/tip/586fefe5bbdc931fb0725b850f7002f6d71a1aa3
Author: Waiman Long <[email protected]>
AuthorDate: Thu, 26 Jun 2014 13:39:11 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 17 Jul 2014 12:32:53 +0200

locking/selftest: Support queued rwlock

The queued rwlock does not support the use of recursive read-lock in
the process context. With changes in the lockdep code to check and
disallow recursive read-lock when queued rwlock is configured, it
is also necessary for the locking selftest to be updated to change
the process context recursive read locking results from SUCCESS to
FAILURE for queued rwlock.

Cc: Scott J Norton <[email protected]>
Cc: Fengguang Wu <[email protected]>
Cc: Maarten Lankhorst <[email protected]>
Cc: Rik van Riel <[email protected]>
Signed-off-by: Waiman Long <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Cc: Linus Torvalds <[email protected]>
Cc: [email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
lib/locking-selftest.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 872a15a..596934d 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1069,7 +1069,7 @@ static inline void print_testname(const char *testname)
print_testname(desc); \
dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
- dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK); \
dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
@@ -1830,14 +1830,14 @@ void locking_selftest(void)
printk(" --------------------------------------------------------------------------\n");
print_testname("recursive read-lock");
printk(" |");
- dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
+ dotest(rlock_AA1, FAILURE, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
printk("\n");

print_testname("recursive read-lock #2");
printk(" |");
- dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
+ dotest(rlock_AA1B, FAILURE, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
printk("\n");

Subject: [tip:locking/core] locking/lockdep: Restrict the use of recursive read_lock() with qrwlock

Commit-ID: e0645a111cb44e01adc6bfff34f683323863f4d2
Gitweb: http://git.kernel.org/tip/e0645a111cb44e01adc6bfff34f683323863f4d2
Author: Waiman Long <[email protected]>
AuthorDate: Thu, 26 Jun 2014 13:39:10 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 17 Jul 2014 12:32:52 +0200

locking/lockdep: Restrict the use of recursive read_lock() with qrwlock

Unlike the original unfair rwlock implementation, queued rwlock
will grant lock according to the chronological sequence of the lock
requests except when the lock requester is in the interrupt context.
Consequently, recursive read_lock calls will now hang the process if
there is a write_lock call somewhere in between the read_lock calls.

This patch updates the lockdep implementation to look for recursive
read_lock calls when queued rwlock is being used. A new read state (3)
is used to mark those read_lock call that cannot be recursively called
except in the interrupt context. The new read state does exhaust the
2 bits available in held_lock:read bit field. The addition of any new
read state in the future may require a redesign of how all those bits
are squeezed together in the held_lock structure.

Signed-off-by: Waiman Long <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Scott J Norton <[email protected]>
Cc: Fengguang Wu <[email protected]>
Cc: Maarten Lankhorst <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/lockdep.h | 10 +++++++++-
kernel/locking/lockdep.c | 6 ++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f9..dadd6ba 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -478,16 +478,24 @@ static inline void print_irqtrace_events(struct task_struct *curr)
* on the per lock-class debug mode:
*/

+/*
+ * Read states in the 2-bit held_lock:read field:
+ * 0: Exclusive lock
+ * 1: Shareable lock, cannot be recursively called
+ * 2: Shareable lock, can be recursively called
+ * 3: Shareable lock, cannot be recursively called except in interrupt context
+ */
#define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
#define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i)
#define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i)
+#define lock_acquire_shared_irecursive(l, s, t, n, i) lock_acquire(l, s, t, 3, 1, n, i)

#define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
#define spin_release(l, n, i) lock_release(l, n, i)

#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
-#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_irecursive(l, s, t, NULL, i)
#define rwlock_release(l, n, i) lock_release(l, n, i)

#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 88d0d44..be83c3c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1776,6 +1776,12 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
return 2;

/*
+ * Recursive read-lock allowed only in interrupt context
+ */
+ if ((read == 3) && prev->read && in_interrupt())
+ return 2;
+
+ /*
* We're holding the nest_lock, which serializes this lock's
* nesting behaviour.
*/