Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755179AbbESRZf (ORCPT ); Tue, 19 May 2015 13:25:35 -0400 Received: from smtp2.provo.novell.com ([137.65.250.81]:52132 "EHLO smtp2.provo.novell.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751619AbbESRZb (ORCPT ); Tue, 19 May 2015 13:25:31 -0400 From: Davidlohr Bueso To: Thomas Gleixner , Peter Zijlstra , Ingo Molnar Cc: Steven Rostedt , Mike Galbraith , "Paul E. McKenney" , Sebastian Andrzej Siewior , Davidlohr Bueso , linux-kernel@vger.kernel.org, Davidlohr Bueso Subject: [PATCH -rfc 4/4] locking/rtmutex: Support spin on owner (osq) Date: Tue, 19 May 2015 10:24:58 -0700 Message-Id: <1432056298-18738-5-git-send-email-dave@stgolabs.net> X-Mailer: git-send-email 2.1.4 In-Reply-To: <1432056298-18738-1-git-send-email-dave@stgolabs.net> References: <1432056298-18738-1-git-send-email-dave@stgolabs.net> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7051 Lines: 241 Similar to what we have in other locks, particularly regular mutexes, the idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus a task trying to acquire the rtmutex will better off spinning instead of blocking immediately after the fastpath. Conditions to stop spinning and enter the slowpath are simple: (1) Upon need_resched() (2) Current lock owner blocks Spinning tasks performance is further enhanced by queuing in cancelable mcs (osq). Because rtmutexes track the lock owner atomically, we can extend the fastpath to continue polling on the lock owner via cmpxchg(lock->owner, NULL, current). This is a conservative approach, such that upon entering the slowpath, we force all lock spinners (including future ones) to serialize on the wait_lock as mark_rt_mutex_waiters() is called, unconditionally. CPU0 CPU1 optimistic_spin() (failed) try_to_take_rt_mutex() mark_rt_mutex_waiters() optimistic_spin() (failed cmpxchg) spin_lock(&lock->wait_lock) As such we check for RT_MUTEX_HAS_WAITERS bit0 (rt_mutex_has_waiters_fast()). This allows priority boosting to take precedence over spinning, as otherwise we could starve a higher priority queued-up task (ie: top waiter) if spinners constantly steal the lock. Another alternative would be to stop spinning when we should really wakeup a higher priority waiter, but of course we do not hold the wait_lock, so that is racy. Signed-off-by: Davidlohr Bueso --- include/linux/rtmutex.h | 4 ++ kernel/Kconfig.locks | 4 ++ kernel/locking/rtmutex.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 1abba5c..b5e85ca 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -15,6 +15,7 @@ #include #include #include +#include extern int max_lock_depth; /* for sysctl */ @@ -31,6 +32,9 @@ struct rt_mutex { struct rb_root waiters; struct rb_node *waiters_leftmost; struct task_struct *owner; +#ifdef CONFIG_RT_MUTEX_SPIN_ON_OWNER + struct optimistic_spin_queue osq; /* Spinner MCS lock */ +#endif #ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; const char *name, *file; diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index ebdb004..628915d 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -227,6 +227,10 @@ config MUTEX_SPIN_ON_OWNER def_bool y depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW +config RT_MUTEX_SPIN_ON_OWNER + def_bool y + depends on SMP && RT_MUTEXES && !CONFIG_DEBUG_RT_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW + config RWSEM_SPIN_ON_OWNER def_bool y depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 81aad32..6524c7c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -63,6 +63,20 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); } +/* + * Lockless alternative to rt_mutex_has_waiters() as we do not need the + * wait_lock to check if we are in, for instance, a transitional state + * after calling mark_rt_mutex_waiters(). + */ +static inline bool rt_mutex_has_waiters_fast(struct rt_mutex *lock) +{ + unsigned long val = (unsigned long)lock->owner; + + if (!val) + return false; + return val & RT_MUTEX_HAS_WAITERS; +} + static void fixup_rt_mutex_waiters(struct rt_mutex *lock) { if (!rt_mutex_has_waiters(lock)) @@ -1152,6 +1166,121 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, } } +#ifdef CONFIG_RT_MUTEX_SPIN_ON_OWNER +static noinline +bool rt_mutex_spin_on_owner(struct rt_mutex *lock, struct task_struct *owner) +{ + bool ret = true; + + rcu_read_lock(); + while (rt_mutex_owner(lock) == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking lock->owner still matches owner. If that fails, + * owner might point to freed memory. If it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + if (!owner->on_cpu || need_resched()) { + ret = false; + break; + } + + cpu_relax_lowlatency(); + } + rcu_read_unlock(); + + return ret; +} + +/* + * Initial check for entering the mutex spinning loop + */ +static inline bool rt_mutex_can_spin_on_owner(struct rt_mutex *lock) +{ + struct task_struct *owner; + /* default return to spin: if no owner, the lock is free */ + int ret = true; + + if (need_resched()) + return 0; + + rcu_read_lock(); + owner = rt_mutex_owner(lock); + if (owner) + ret = owner->on_cpu; + rcu_read_unlock(); + + return ret; +} + +static bool rt_mutex_optimistic_spin(struct rt_mutex *lock) +{ + bool taken = false; + + preempt_disable(); + + if (!rt_mutex_can_spin_on_owner(lock)) + goto done; + /* + * In order to avoid a stampede of mutex spinners trying to + * acquire the mutex all at once, the spinners need to take a + * MCS (queued) lock first before spinning on the owner field. + */ + if (!osq_lock(&lock->osq)) + goto done; + + while (true) { + struct task_struct *owner; + + /* + * When another task is competing for the lock in the + * slowpath (either transitional or not), avoid the + * cmpxchg and immediately block. If the situation is + * later fixed by clearing the waiters bit, future + * tasks that atempt to take the rtmutex can spin. + */ + if (rt_mutex_has_waiters_fast(lock)) + goto done_unlock; + + /* + * If there's an owner, wait for it to either + * release the lock or go to sleep. + */ + owner = rt_mutex_owner(lock); + if (owner && !rt_mutex_spin_on_owner(lock, owner)) + break; + + /* Try to acquire the lock, if it is unlocked. */ + if (rt_mutex_cmpxchg(lock, NULL, current)) { + taken = true; + goto done_unlock; + } + + /* + * The cpu_relax() call is a compiler barrier which forces + * everything in this loop to be re-loaded. We don't need + * memory barriers as we'll eventually observe the right + * values at the cost of a few extra spins. + */ + cpu_relax_lowlatency(); + } + +done_unlock: + osq_unlock(&lock->osq); +done: + preempt_enable(); + return taken; +} + +#else +static bool rt_mutex_optimistic_spin(struct rt_mutex *lock) +{ + return false; +} +#endif + /* * Slow path lock function: */ @@ -1163,6 +1292,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, struct rt_mutex_waiter waiter; int ret = 0; + if (rt_mutex_optimistic_spin(lock)) + return ret; + debug_rt_mutex_init_waiter(&waiter); RB_CLEAR_NODE(&waiter.pi_tree_entry); RB_CLEAR_NODE(&waiter.tree_entry); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/