Subject: rt_mutex based ww_mutex implementation

On -RT we use struct rt_mutex instead of struct mutex for a regular "mutex".
And since people use GPUs there, too we needed a port of ww_mutex based
rtmutex. I snipped the implementaton out of -RT tried to fit on mainline
and not breaking too much. It doesn't look that bad now :)
#1 is here to keep the changes in #2 a little smaller.

Not sure if #3 makes sense without #2 but it looks like this has been
forgotten.

Sebastian


Subject: [PATCH 1/3] locking: ww_mutex: add one level of indirection for access of the lock

This level of indirection is one step towards replacing the "mutex" with
a different kind of locking mechanism.
The new functions of the form "__.*_lock" provide access to whatever is
hidden behind the "base" member in ww-mutex's struct

Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
include/linux/ww_mutex.h | 48 ++++++++++++++++++++++++++++++++++++------
lib/locking-selftest.c | 54 ++++++++++++++++++++++++------------------------
2 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 760399a470bd..6e5d5ee3138d 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -72,6 +72,42 @@ struct ww_mutex {
#define DEFINE_WW_MUTEX(mutexname, ww_class) \
struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)

+static inline void __ww_mutex_init(struct mutex *lock,
+ struct ww_class *ww_class)
+{
+ __mutex_init(lock, ww_class->mutex_name, &ww_class->mutex_key);
+}
+
+static inline void __ww_mutex_lock_lock(struct mutex *lock)
+{
+ mutex_lock(lock);
+}
+
+static inline int __ww_mutex_lock_interruptible_lock(struct mutex *lock)
+{
+ return mutex_lock_interruptible(lock);
+}
+
+static inline int __ww_mutex_trylock_lock(struct mutex *lock)
+{
+ return mutex_trylock(lock);
+}
+
+static inline void __ww_mutex_destroy_lock(struct mutex *lock)
+{
+ mutex_destroy(lock);
+}
+
+static inline bool __ww_mutex_is_locked_lock(struct mutex *lock)
+{
+ return mutex_is_locked(lock);
+}
+
+static inline void __ww_mutex_unlock_lock(struct mutex *lock)
+{
+ mutex_unlock(lock);
+}
+
/**
* ww_mutex_init - initialize the w/w mutex
* @lock: the mutex to be initialized
@@ -85,7 +121,7 @@ struct ww_mutex {
static inline void ww_mutex_init(struct ww_mutex *lock,
struct ww_class *ww_class)
{
- __mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key);
+ __ww_mutex_init(&lock->base, ww_class);
lock->ctx = NULL;
#ifdef CONFIG_DEBUG_MUTEXES
lock->ww_class = ww_class;
@@ -225,7 +261,7 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct
if (ctx)
return __ww_mutex_lock(lock, ctx);

- mutex_lock(&lock->base);
+ __ww_mutex_lock_lock(&lock->base);
return 0;
}

@@ -265,7 +301,7 @@ static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock
if (ctx)
return __ww_mutex_lock_interruptible(lock, ctx);
else
- return mutex_lock_interruptible(&lock->base);
+ return __ww_mutex_lock_interruptible_lock(&lock->base);
}

/**
@@ -348,7 +384,7 @@ extern void ww_mutex_unlock(struct ww_mutex *lock);
*/
static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
{
- return mutex_trylock(&lock->base);
+ return __ww_mutex_trylock_lock(&lock->base);
}

/***
@@ -361,7 +397,7 @@ static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
*/
static inline void ww_mutex_destroy(struct ww_mutex *lock)
{
- mutex_destroy(&lock->base);
+ __ww_mutex_destroy_lock(&lock->base);
}

/**
@@ -372,7 +408,7 @@ static inline void ww_mutex_destroy(struct ww_mutex *lock)
*/
static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
{
- return mutex_is_locked(&lock->base);
+ return __ww_mutex_is_locked_lock(&lock->base);
}

#endif
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 872a15a2a637..2c35e38b4013 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1179,15 +1179,15 @@ static void ww_test_normal(void)

/* mutex_lock (and indirectly, mutex_lock_nested) */
o.ctx = (void *)~0UL;
- mutex_lock(&o.base);
- mutex_unlock(&o.base);
+ __ww_mutex_lock_lock(&o.base);
+ __ww_mutex_unlock_lock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);

/* mutex_lock_interruptible (and *_nested) */
o.ctx = (void *)~0UL;
- ret = mutex_lock_interruptible(&o.base);
+ ret = __ww_mutex_lock_interruptible_lock(&o.base);
if (!ret)
- mutex_unlock(&o.base);
+ __ww_mutex_unlock_lock(&o.base);
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);
@@ -1196,33 +1196,33 @@ static void ww_test_normal(void)
o.ctx = (void *)~0UL;
ret = mutex_lock_killable(&o.base);
if (!ret)
- mutex_unlock(&o.base);
+ __ww_mutex_unlock_lock(&o.base);
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);

/* trylock, succeeding */
o.ctx = (void *)~0UL;
- ret = mutex_trylock(&o.base);
+ ret = __ww_mutex_trylock_lock(&o.base);
WARN_ON(!ret);
if (ret)
- mutex_unlock(&o.base);
+ __ww_mutex_unlock_lock(&o.base);
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);

/* trylock, failing */
o.ctx = (void *)~0UL;
- mutex_lock(&o.base);
- ret = mutex_trylock(&o.base);
+ __ww_mutex_lock_lock(&o.base);
+ ret = __ww_mutex_trylock_lock(&o.base);
WARN_ON(ret);
- mutex_unlock(&o.base);
+ __ww_mutex_unlock_lock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);

/* nest_lock */
o.ctx = (void *)~0UL;
mutex_lock_nest_lock(&o.base, &t);
- mutex_unlock(&o.base);
+ __ww_mutex_unlock_lock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);
}

@@ -1299,7 +1299,7 @@ static void ww_test_edeadlk_normal(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
o2.ctx = &t2;
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);

@@ -1315,7 +1315,7 @@ static void ww_test_edeadlk_normal(void)

o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ __ww_mutex_unlock_lock(&o2.base);
WWU(&o);

WWL(&o2, &t);
@@ -1325,7 +1325,7 @@ static void ww_test_edeadlk_normal_slow(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

@@ -1341,7 +1341,7 @@ static void ww_test_edeadlk_normal_slow(void)

o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ __ww_mutex_unlock_lock(&o2.base);
WWU(&o);

ww_mutex_lock_slow(&o2, &t);
@@ -1351,7 +1351,7 @@ static void ww_test_edeadlk_no_unlock(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
o2.ctx = &t2;
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);

@@ -1367,7 +1367,7 @@ static void ww_test_edeadlk_no_unlock(void)

o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ __ww_mutex_unlock_lock(&o2.base);

WWL(&o2, &t);
}
@@ -1376,7 +1376,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

@@ -1392,7 +1392,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)

o2.ctx = NULL;
mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
- mutex_unlock(&o2.base);
+ __ww_mutex_unlock_lock(&o2.base);

ww_mutex_lock_slow(&o2, &t);
}
@@ -1401,7 +1401,7 @@ static void ww_test_edeadlk_acquire_more(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

@@ -1422,7 +1422,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

@@ -1443,11 +1443,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

- mutex_lock(&o3.base);
+ __ww_mutex_lock_lock(&o3.base);
mutex_release(&o3.base.dep_map, 1, _THIS_IP_);
o3.ctx = &t2;

@@ -1469,11 +1469,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

- mutex_lock(&o3.base);
+ __ww_mutex_lock_lock(&o3.base);
mutex_release(&o3.base.dep_map, 1, _THIS_IP_);
o3.ctx = &t2;

@@ -1494,7 +1494,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

@@ -1519,7 +1519,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
{
int ret;

- mutex_lock(&o2.base);
+ __ww_mutex_lock_lock(&o2.base);
mutex_release(&o2.base.dep_map, 1, _THIS_IP_);
o2.ctx = &t2;

--
2.1.4

Subject: [PATCH 2/3] locking: ww_mutex: Allow to use rt_mutex instead of mutex for the baselock

This patch makes it possible to replace the base mutex by a rt_mutex. In
general one would not do this. In -RT we use rt_mutex instead of the
mutex by default and this means we need the ww_mutex functionality based
on rt_mutex.
This patch includes a slightly modified version of what we have in the
-RT try including Mike Galbraith's latest lockdep annotations fixups and
proper deadlock detection which was broken after the rt_mutex rework.

Cc: Mike Galbraith <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
include/linux/ww_mutex.h | 87 ++++++++++++++++-
kernel/locking/mutex.c | 22 ++++-
kernel/locking/rtmutex.c | 237 ++++++++++++++++++++++++++++++++++++++++++++---
lib/Kconfig.debug | 6 ++
lib/locking-selftest.c | 4 +
5 files changed, 339 insertions(+), 17 deletions(-)

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 6e5d5ee3138d..99793f00755c 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -15,6 +15,7 @@
#define __LINUX_WW_MUTEX_H

#include <linux/mutex.h>
+#include <linux/rtmutex.h>

struct ww_class {
atomic_long_t stamp;
@@ -42,8 +43,19 @@ struct ww_acquire_ctx {
#endif
};

+struct rt_mutex_base_lock {
+ struct rt_mutex lock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
struct ww_mutex {
+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+ struct rt_mutex_base_lock base;
+#else
struct mutex base;
+#endif
struct ww_acquire_ctx *ctx;
#ifdef CONFIG_DEBUG_MUTEXES
struct ww_class *ww_class;
@@ -62,9 +74,15 @@ struct ww_mutex {
, .acquire_name = #ww_class "_acquire" \
, .mutex_name = #ww_class "_mutex" }

-#define __WW_MUTEX_INITIALIZER(lockname, class) \
+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+# define __WW_MUTEX_INITIALIZER(lockname, class) \
+ { .base = { \__RT_MUTEX_INITIALIZER(lockname) } \
+ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
+#else
+# define __WW_MUTEX_INITIALIZER(lockname, class) \
{ .base = { \__MUTEX_INITIALIZER(lockname) } \
__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
+#endif

#define DEFINE_WW_CLASS(classname) \
struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
@@ -72,6 +90,71 @@ struct ww_mutex {
#define DEFINE_WW_MUTEX(mutexname, ww_class) \
struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)

+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+
+static inline void __ww_mutex_init(struct rt_mutex_base_lock *lock,
+ struct ww_class *ww_class)
+{
+ __rt_mutex_init(&lock->lock, ww_class->mutex_name);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed(lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, ww_class->mutex_name,
+ &ww_class->mutex_key, 0);
+#endif
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+ lock->lock.save_state = 0;
+#endif
+}
+
+static inline void __ww_mutex_lock_lock(struct rt_mutex_base_lock *lock)
+{
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ rt_mutex_lock(&lock->lock);
+}
+
+static inline int
+__ww_mutex_lock_interruptible_lock(struct rt_mutex_base_lock *lock)
+{
+ int ret;
+
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ ret = rt_mutex_lock_interruptible(&lock->lock);
+ if (ret)
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
+ return ret;
+}
+
+static inline int __ww_mutex_trylock_lock(struct rt_mutex_base_lock *lock)
+{
+ int ret;
+
+ ret = rt_mutex_trylock(&lock->lock);
+ if (ret)
+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ return ret;
+}
+
+static inline void __ww_mutex_destroy_lock(struct rt_mutex_base_lock *lock)
+{
+ rt_mutex_destroy(&lock->lock);
+}
+
+static inline bool __ww_mutex_is_locked_lock(struct rt_mutex_base_lock *lock)
+{
+ return rt_mutex_is_locked(&lock->lock);
+}
+
+static inline void __ww_mutex_unlock_lock(struct rt_mutex_base_lock *lock)
+{
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
+ rt_mutex_unlock(&lock->lock);
+}
+
+#else
+
static inline void __ww_mutex_init(struct mutex *lock,
struct ww_class *ww_class)
{
@@ -108,6 +191,8 @@ static inline void __ww_mutex_unlock_lock(struct mutex *lock)
mutex_unlock(lock);
}

+#endif
+
/**
* ww_mutex_init - initialize the w/w mutex
* @lock: the mutex to be initialized
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 16b2d3cc88b0..0a652ba46081 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -106,6 +106,7 @@ void __sched mutex_lock(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock);
#endif

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
struct ww_acquire_ctx *ww_ctx)
{
@@ -215,6 +216,7 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
wake_up_process(cur->task);
}
}
+#endif

#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
@@ -328,6 +330,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
while (true) {
struct task_struct *owner;

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;

@@ -343,7 +346,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
if (READ_ONCE(ww->ctx))
break;
}
-
+#endif
/*
* If there's an owner, wait for it to either
* release the lock or go to sleep.
@@ -356,12 +359,14 @@ static bool mutex_optimistic_spin(struct mutex *lock,
if (mutex_try_to_acquire(lock)) {
lock_acquired(&lock->dep_map, ip);

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
if (use_ww_ctx) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);

ww_mutex_set_context_fastpath(ww, ww_ctx);
}
+#endif

mutex_set_owner(lock);
osq_unlock(&lock->osq);
@@ -445,6 +450,7 @@ void __sched mutex_unlock(struct mutex *lock)

EXPORT_SYMBOL(mutex_unlock);

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
/**
* ww_mutex_unlock - release the w/w mutex
* @lock: the mutex to be released
@@ -506,6 +512,7 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)

return 0;
}
+#endif

/*
* Lock a mutex (possibly interruptible), slowpath:
@@ -570,12 +577,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
ret = -EINTR;
goto err;
}
-
+#ifndef CONFIG_WW_MUTEX_RTMUTEX
if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
}
+#endif

__set_task_state(task, state);

@@ -597,11 +605,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
lock_acquired(&lock->dep_map, ip);
mutex_set_owner(lock);

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
ww_mutex_set_context_slowpath(ww, ww_ctx);
}
-
+#endif
spin_unlock_mutex(&lock->wait_lock, flags);
preempt_enable();
return 0;
@@ -655,6 +664,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)

EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
static inline int
ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
@@ -711,7 +721,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
return ret;
}
EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
-
+#endif
#endif

/*
@@ -840,6 +850,7 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock)
NULL, _RET_IP_, NULL, 0);
}

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
@@ -854,6 +865,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
NULL, _RET_IP_, ctx, 1);
}
+#endif

#endif

@@ -915,6 +927,7 @@ int __sched mutex_trylock(struct mutex *lock)
EXPORT_SYMBOL(mutex_trylock);

#ifndef CONFIG_DEBUG_LOCK_ALLOC
+#ifndef CONFIG_WW_MUTEX_RTMUTEX
int __sched
__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
@@ -952,6 +965,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
EXPORT_SYMBOL(__ww_mutex_lock_interruptible);

#endif
+#endif

/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index e16e5542bf13..6d7d72ffa619 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -16,6 +16,7 @@
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/timer.h>
+#include <linux/ww_mutex.h>

#include "rtmutex_common.h"

@@ -738,6 +739,31 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
return ret;
}

+static int __sched __mutex_lock_check_stamp(struct rt_mutex *lock,
+ struct ww_acquire_ctx *ctx)
+{
+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
+ struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
+
+ if (!hold_ctx)
+ return 0;
+
+ if (unlikely(ctx == hold_ctx))
+ return -EALREADY;
+
+ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
+ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
+ ctx->contending_lock = ww;
+#endif
+ return -EDEADLK;
+ }
+#endif
+ return 0;
+}
+
/*
* Try to take an rt-mutex
*
@@ -1097,7 +1123,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter)
+ struct rt_mutex_waiter *waiter,
+ struct ww_acquire_ctx *ww_ctx)
{
int ret = 0;

@@ -1120,6 +1147,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}

+ if (ww_ctx && ww_ctx->acquired > 0) {
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
+ if (ret)
+ break;
+ }
+
raw_spin_unlock(&lock->wait_lock);

debug_rt_mutex_print_deadlock(waiter);
@@ -1154,13 +1187,84 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
}
}

+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+static void ww_mutex_lock_acquired(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+ /*
+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
+ * but released with a normal mutex_unlock in this call.
+ *
+ * This should never happen, always use ww_mutex_unlock.
+ */
+ DEBUG_LOCKS_WARN_ON(ww->ctx);
+
+ /*
+ * Not quite done after calling ww_acquire_done() ?
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
+
+ if (ww_ctx->contending_lock) {
+ /*
+ * After -EDEADLK you tried to
+ * acquire a different ww_mutex? Bad!
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
+
+ /*
+ * You called ww_mutex_lock after receiving -EDEADLK,
+ * but 'forgot' to unlock everything else first?
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
+ ww_ctx->contending_lock = NULL;
+ }
+
+ /*
+ * Naughty, using a different class will lead to undefined behavior!
+ */
+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
+#endif
+ ww_ctx->acquired++;
+}
+#endif
+
+static void ww_mutex_account_lock(struct rt_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
+ struct rt_mutex_waiter *waiter, *n;
+
+ /*
+ * This branch gets optimized out for the common case,
+ * and is only important for ww_mutex_lock.
+ */
+ ww_mutex_lock_acquired(ww, ww_ctx);
+ ww->ctx = ww_ctx;
+
+ /*
+ * Give any possible sleeping processes the chance to wake up,
+ * so they can recheck if they have to back off.
+ */
+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
+ tree_entry) {
+ /* XXX debug rt mutex waiter wakeup */
+
+ BUG_ON(waiter->lock != lock);
+ wake_up_process(waiter->task);
+ }
+#endif
+}
+
/*
* Slow path lock function:
*/
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- enum rtmutex_chainwalk chwalk)
+ enum rtmutex_chainwalk chwalk,
+ struct ww_acquire_ctx *ww_ctx)
{
struct rt_mutex_waiter waiter;
int ret = 0;
@@ -1173,6 +1277,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,

/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock, current, NULL)) {
+ if (ww_ctx)
+ ww_mutex_account_lock(lock, ww_ctx);
raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -1190,12 +1296,22 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,

if (likely(!ret))
/* sleep on the mutex */
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
+ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
+ ww_ctx);
+ else if (ww_ctx) {
+ /* ww_mutex may need to return EALREADY insted of EDEADLK */
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
+ BUG_ON(!ret);
+ }

if (unlikely(ret)) {
if (rt_mutex_has_waiters(lock))
remove_waiter(lock, &waiter);
- rt_mutex_handle_deadlock(ret, chwalk, &waiter);
+ /* ww_mutex need the error reported */
+ if (!ww_ctx)
+ rt_mutex_handle_deadlock(ret, chwalk, &waiter);
+ } else if (ww_ctx) {
+ ww_mutex_account_lock(lock, ww_ctx);
}

/*
@@ -1320,31 +1436,36 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
*/
static inline int
rt_mutex_fastlock(struct rt_mutex *lock, int state,
+ struct ww_acquire_ctx *ww_ctx,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- enum rtmutex_chainwalk chwalk))
+ enum rtmutex_chainwalk chwalk,
+ struct ww_acquire_ctx *ww_ctx))
{
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
+ ww_ctx);
}

static inline int
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk,
+ struct ww_acquire_ctx *ww_ctx,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- enum rtmutex_chainwalk chwalk))
+ enum rtmutex_chainwalk chwalk,
+ struct ww_acquire_ctx *ww_ctx))
{
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
- return slowfn(lock, state, timeout, chwalk);
+ return slowfn(lock, state, timeout, chwalk, ww_ctx);
}

static inline int
@@ -1377,7 +1498,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
{
might_sleep();

- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
+ rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);

@@ -1394,7 +1515,8 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
{
might_sleep();

- return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+ return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL,
+ rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);

@@ -1407,7 +1529,7 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
might_sleep();

return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
- RT_MUTEX_FULL_CHAINWALK,
+ RT_MUTEX_FULL_CHAINWALK, NULL,
rt_mutex_slowlock);
}

@@ -1431,6 +1553,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)

return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
RT_MUTEX_MIN_CHAINWALK,
+ NULL,
rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
@@ -1628,7 +1751,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
set_current_state(TASK_INTERRUPTIBLE);

/* sleep on the mutex */
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);

if (unlikely(ret))
remove_waiter(lock, waiter);
@@ -1643,3 +1766,93 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,

return ret;
}
+
+#ifdef CONFIG_WW_MUTEX_RTMUTEX
+static int ww_mutex_deadlock_injection(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ctx)
+{
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+ unsigned tmp;
+
+ if (ctx->deadlock_inject_countdown-- == 0) {
+ tmp = ctx->deadlock_inject_interval;
+ if (tmp > UINT_MAX/4)
+ tmp = UINT_MAX;
+ else
+ tmp = tmp*2 + tmp + tmp/2;
+
+ ctx->deadlock_inject_interval = tmp;
+ ctx->deadlock_inject_countdown = tmp;
+ ctx->contending_lock = lock;
+
+ ww_mutex_unlock(lock);
+
+ return -EDEADLK;
+ }
+#endif
+
+ return 0;
+}
+
+int __sched __ww_mutex_lock_interruptible(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ might_sleep();
+
+ mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map,
+ _RET_IP_);
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0,
+ ww_ctx);
+ if (ret)
+ mutex_release(&lock->base.dep_map, 1, _RET_IP_);
+ else if (!ret && ww_ctx->acquired > 1)
+ return ww_mutex_deadlock_injection(lock, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
+
+int __sched __ww_mutex_lock(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ might_sleep();
+
+ mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map,
+ _RET_IP_);
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0,
+ ww_ctx);
+ if (ret)
+ mutex_release(&lock->base.dep_map, 1, _RET_IP_);
+ else if (!ret && ww_ctx->acquired > 1)
+ return ww_mutex_deadlock_injection(lock, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__ww_mutex_lock);
+
+void __sched ww_mutex_unlock(struct ww_mutex *lock)
+{
+ int nest = !!lock->ctx;
+
+ /*
+ * The unlocking fastpath is the 0->1 transition from 'locked'
+ * into 'unlocked' state:
+ */
+ if (nest) {
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
+#endif
+ if (lock->ctx->acquired > 0)
+ lock->ctx->acquired--;
+ lock->ctx = NULL;
+ }
+
+ mutex_release(&lock->base.dep_map, nest, _RET_IP_);
+ rt_mutex_unlock(&lock->base.lock);
+}
+EXPORT_SYMBOL(ww_mutex_unlock);
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3c009c..05b6a63f7218 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -921,6 +921,12 @@ config DEBUG_MUTEXES
This feature allows mutex semantics violations to be detected and
reported.

+config WW_MUTEX_RTMUTEX
+ bool "Wait/wound mutex: use RT-Mutex instead of the regular mutex"
+ help
+ This replaces the mutex in ww_mutex by an rt_mutex. This is probably
+ only useful for testing.
+
config DEBUG_WW_MUTEX_SLOWPATH
bool "Wait/wound mutex debugging: Slowpath testing"
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 2c35e38b4013..84c017ac9c39 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1192,6 +1192,7 @@ static void ww_test_normal(void)
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
/* mutex_lock_killable (and *_nested) */
o.ctx = (void *)~0UL;
ret = mutex_lock_killable(&o.base);
@@ -1200,6 +1201,7 @@ static void ww_test_normal(void)
else
WARN_ON(1);
WARN_ON(o.ctx != (void *)~0UL);
+#endif

/* trylock, succeeding */
o.ctx = (void *)~0UL;
@@ -1219,11 +1221,13 @@ static void ww_test_normal(void)
__ww_mutex_unlock_lock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);

+#ifndef CONFIG_WW_MUTEX_RTMUTEX
/* nest_lock */
o.ctx = (void *)~0UL;
mutex_lock_nest_lock(&o.base, &t);
__ww_mutex_unlock_lock(&o.base);
WARN_ON(o.ctx != (void *)~0UL);
+#endif
}

static void ww_test_two_contexts(void)
--
2.1.4

Subject: [PATCH 3/3] locking: rtmutex: set state back to running on error

The "usual" path is:
- rt_mutex_slowlock()
- set_current_state()
- task_blocks_on_rt_mutex() (ret 0)
- __rt_mutex_slowlock()
- sleep or not but do return with __set_current_state(TASK_RUNNING)
- back to caller.

In the early error case where task_blocks_on_rt_mutex() return -EDEADLK
we never change the task's state back to RUNNING. I assume this is
intended. Without this change after ww_mutex using rt_mutex the selftest
passes but later I get plenty of
| bad: scheduling from the idle thread!
backtraces.

Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
kernel/locking/rtmutex.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6d7d72ffa619..c4d07f254bb4 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1305,6 +1305,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
}

if (unlikely(ret)) {
+ set_current_state(TASK_RUNNING);
if (rt_mutex_has_waiters(lock))
remove_waiter(lock, &waiter);
/* ww_mutex need the error reported */
--
2.1.4

2015-02-27 18:28:56

by Maarten Lankhorst

[permalink] [raw]
Subject: Re: [PATCH 1/3] locking: ww_mutex: add one level of indirection for access of the lock

Assuming it's compile tested, can't really test that here. :)

Reviewed-by: Maarten Lankhorst <[email protected]>

I can't review the second or third patch, I don't understand rt mutexes well enough.

Subject: Re: [PATCH 1/3] locking: ww_mutex: add one level of indirection for access of the lock

On 02/27/2015 07:20 PM, Maarten Lankhorst wrote:
> Assuming it's compile tested, can't really test that here. :)

It passes the self-test without the killable one and the other one I
switched off. And that thing that we have in -RT has been reported to
be working with the nouveau driver.
There is the Kconfig switch which would enable it so you could use it.
Not sure if we should keep that way or hide it better :)

> Reviewed-by: Maarten Lankhorst <[email protected]>

Thanks.

> I can't review the second or third patch, I don't understand rt mutexes well enough.

Sebastian

Subject: [tip:locking/urgent] locking/rtmutex: Set state back to running on error

Commit-ID: 8671e69f00c2733c995d188b9f8d6f9cbb67897c
Gitweb: http://git.kernel.org/tip/8671e69f00c2733c995d188b9f8d6f9cbb67897c
Author: Sebastian Andrzej Siewior <[email protected]>
AuthorDate: Fri, 27 Feb 2015 17:57:09 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 28 Feb 2015 10:47:37 +0100

locking/rtmutex: Set state back to running on error

The "usual" path is:

- rt_mutex_slowlock()
- set_current_state()
- task_blocks_on_rt_mutex() (ret 0)
- __rt_mutex_slowlock()
- sleep or not but do return with __set_current_state(TASK_RUNNING)
- back to caller.

In the early error case where task_blocks_on_rt_mutex() return
-EDEADLK we never change the task's state back to RUNNING. I
assume this is intended. Without this change after ww_mutex
using rt_mutex the selftest passes but later I get plenty of:

| bad: scheduling from the idle thread!

backtraces.

Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Maarten Lankhorst <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/locking/rtmutex.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index e16e554..5845068 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,6 +1193,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);

if (unlikely(ret)) {
+ set_current_state(TASK_RUNNING);
if (rt_mutex_has_waiters(lock))
remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);