2021-08-05 15:49:13

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V3 64/64] locking/rtmutex: Add adaptive spinwait mechanism

From: Steven Rostedt <[email protected]>

Going to sleep when locks are contended can be quite inefficient when the
contention time is short and the lock owner is running on a different CPU.

The MCS mechanism cannot be used because MCS is strictly FIFO ordered while
for rtmutex based locks the waiter ordering is priority based.

Provide a simple adaptive spinwait mechanism which currently restricts the
spinning to the top priority waiter.

[ tglx: Provide a contemporary changelog ]

Originally-by: Gregory Haskins <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
V3: Fold the extension for regular sleeping locks and add the missing spin
wait checks (PeterZ)
---
kernel/locking/rtmutex.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 57 insertions(+), 2 deletions(-)
---
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -8,6 +8,11 @@
* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <[email protected]>
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
* Copyright (C) 2006 Esben Nielsen
+ * Adaptive Spinlocks:
+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
+ * and Peter Morreale,
+ * Adaptive Spinlocks simplification:
+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <[email protected]>
*
* See Documentation/locking/rt-mutex-design.rst for details.
*/
@@ -1278,6 +1283,44 @@ static __always_inline void __rt_mutex_u
rt_mutex_slowunlock(lock);
}

+#ifdef CONFIG_SMP
+/*
+ * Note that owner is a speculative pointer and dereferencing relies
+ * on rcu_read_lock() and the check against the lock owner.
+ */
+static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
+ struct task_struct *owner)
+{
+ bool res = true;
+
+ rcu_read_lock();
+ for (;;) {
+ /* Owner changed. Trylock again */
+ if (owner != rt_mutex_owner(lock))
+ break;
+ /*
+ * Ensure that owner->on_cpu is dereferenced _after_
+ * checking the above to be valid.
+ */
+ barrier();
+ if (!owner->on_cpu || need_resched() ||
+ vcpu_is_preempted(task_cpu(owner))) {
+ res = false;
+ break;
+ }
+ cpu_relax();
+ }
+ rcu_read_unlock();
+ return res;
+}
+#else
+static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
+ struct task_struct *owner)
+{
+ return false;
+}
+#endif
+
#ifdef RT_MUTEX_BUILD_MUTEX
/*
* Functions required for:
@@ -1362,6 +1405,7 @@ static int __sched rt_mutex_slowlock_blo
struct rt_mutex_waiter *waiter)
{
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
+ struct task_struct *owner;
int ret = 0;

for (;;) {
@@ -1384,9 +1428,14 @@ static int __sched rt_mutex_slowlock_blo
break;
}

+ if (waiter == rt_mutex_top_waiter(lock))
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
raw_spin_unlock_irq(&lock->wait_lock);

- schedule();
+ if (!owner || !rtmutex_adaptive_spinwait(lock, owner))
+ schedule();

raw_spin_lock_irq(&lock->wait_lock);
set_current_state(state);
@@ -1542,6 +1591,7 @@ static __always_inline int __rt_mutex_lo
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
{
struct rt_mutex_waiter waiter;
+ struct task_struct *owner;

lockdep_assert_held(&lock->wait_lock);

@@ -1560,9 +1610,14 @@ static void __sched rtlock_slowlock_lock
if (try_to_take_rt_mutex(lock, current, &waiter))
break;

+ if (&waiter == rt_mutex_top_waiter(lock))
+ owner = rt_mutex_owner(lock);
+ else
+ owner = NULL;
raw_spin_unlock_irq(&lock->wait_lock);

- schedule_rtlock();
+ if (!owner || !rtmutex_adaptive_spinwait(lock, owner))
+ schedule_rtlock();

raw_spin_lock_irq(&lock->wait_lock);
set_current_state(TASK_RTLOCK_WAIT);


2021-08-09 22:36:15

by Davidlohr Bueso

[permalink] [raw]
Subject: Re: [patch V3 64/64] locking/rtmutex: Add adaptive spinwait mechanism

On Thu, 05 Aug 2021, Thomas Gleixner wrote:

>+#ifdef CONFIG_SMP
>+/*
>+ * Note that owner is a speculative pointer and dereferencing relies
>+ * on rcu_read_lock() and the check against the lock owner.
>+ */

I think the description we have in mutex.c is better.

>+static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
>+ struct task_struct *owner)

I realize that adaptive spinning is the original term from Greg Haskins,
but as Peter suggested, rt_mutex_spin_on_owner() would probably be a
better name upstream considering all our other sleeping locks.

>+{
>+ bool res = true;
>+
>+ rcu_read_lock();
>+ for (;;) {
>+ /* Owner changed. Trylock again */
>+ if (owner != rt_mutex_owner(lock))
>+ break;
>+ /*
>+ * Ensure that owner->on_cpu is dereferenced _after_
>+ * checking the above to be valid.
>+ */
>+ barrier();
>+ if (!owner->on_cpu || need_resched() ||
>+ vcpu_is_preempted(task_cpu(owner))) {

I'm thinking we should also check for if the spinning waiter is no
longer the top-waiter, which could have changed while busy waiting.

Thanks,
Davidlohr

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7522c3abacb6..c6925ebb3c9f 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1284,12 +1284,9 @@ static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
}

#ifdef CONFIG_SMP
-/*
- * Note that owner is a speculative pointer and dereferencing relies
- * on rcu_read_lock() and the check against the lock owner.
- */
static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
- struct task_struct *owner)
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *owner)
{
bool res = true;

@@ -1299,11 +1296,18 @@ static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
if (owner != rt_mutex_owner(lock))
break;
/*
- * Ensure that owner->on_cpu is dereferenced _after_
- * checking the above to be valid.
+ * Ensure we emit the owner->on_cpu, dereference _after_
+ * checking lock->owner still matches owner. If that fails,
+ * owner might point to freed memory. If it still matches,
+ * the rcu_read_lock() ensures the memory stays valid.
+ *
+ * Also account for changes in the lock's top-waiter, if
+ * it's not us, it was updated while busy waiting.
*/
barrier();
+
if (!owner->on_cpu || need_resched() ||
+ waiter != rt_mutex_top_waiter(lock) ||
vcpu_is_preempted(task_cpu(owner))) {
res = false;
break;
@@ -1315,7 +1319,8 @@ static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
}
#else
static bool rtmutex_adaptive_spinwait(struct rt_mutex_base *lock,
- struct task_struct *owner)
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *owner)
{
return false;
}
@@ -1434,7 +1439,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
owner = NULL;
raw_spin_unlock_irq(&lock->wait_lock);

- if (!owner || !rtmutex_adaptive_spinwait(lock, owner))
+ if (!owner || !rtmutex_adaptive_spinwait(lock, waiter, owner))
schedule();

raw_spin_lock_irq(&lock->wait_lock);
@@ -1616,7 +1621,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
owner = NULL;
raw_spin_unlock_irq(&lock->wait_lock);

- if (!owner || !rtmutex_adaptive_spinwait(lock, owner))
+ if (!owner || !rtmutex_adaptive_spinwait(lock, waiter, owner))
schedule_rtlock();

raw_spin_lock_irq(&lock->wait_lock);