Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756516AbbFRUbs (ORCPT ); Thu, 18 Jun 2015 16:31:48 -0400 Received: from terminus.zytor.com ([198.137.202.10]:37264 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754505AbbFRUb3 (ORCPT ); Thu, 18 Jun 2015 16:31:29 -0400 Date: Thu, 18 Jun 2015 13:30:54 -0700 From: tip-bot for Sebastian Andrzej Siewior Message-ID: Cc: rostedt@goodmis.org, tglx@linutronix.de, mingo@kernel.org, hpa@zytor.com, bigeasy@linutronix.de, dave@stgolabs.net, peterz@infradead.org, linux-kernel@vger.kernel.org, umgwanakikbuti@gmail.com, paulmck@linux.vnet.ibm.com Reply-To: bigeasy@linutronix.de, tglx@linutronix.de, mingo@kernel.org, hpa@zytor.com, rostedt@goodmis.org, paulmck@linux.vnet.ibm.com, linux-kernel@vger.kernel.org, peterz@infradead.org, umgwanakikbuti@gmail.com, dave@stgolabs.net In-Reply-To: <20150617083350.GA2433@linutronix.de> References: <20150617083350.GA2433@linutronix.de> To: linux-tip-commits@vger.kernel.org Subject: [tip:sched/core] futex: Lower the lock contention on the HB lock during wake up Git-Commit-ID: 881bd58d6e9eba4240b9dbc49fdc03a3374d7508 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7366 Lines: 228 Commit-ID: 881bd58d6e9eba4240b9dbc49fdc03a3374d7508 Gitweb: http://git.kernel.org/tip/881bd58d6e9eba4240b9dbc49fdc03a3374d7508 Author: Sebastian Andrzej Siewior AuthorDate: Wed, 17 Jun 2015 10:33:50 +0200 Committer: Thomas Gleixner CommitDate: Thu, 18 Jun 2015 22:27:46 +0200 futex: Lower the lock contention on the HB lock during wake up wake_futex_pi() wakes the task before releasing the hash bucket lock (HB). The first thing the woken up task usually does is to acquire the lock which requires the HB lock. On SMP Systems this leads to blocking on the HB lock which is released by the owner shortly after. This patch rearranges the unlock path by first releasing the HB lock and then waking up the task. Originally-from: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Davidlohr Bueso Link: http://lkml.kernel.org/r/20150617083350.GA2433@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 32 +++++++++++++++++++++++--- kernel/locking/rtmutex.c | 51 +++++++++++++++++++++++++++++------------ kernel/locking/rtmutex_common.h | 3 +++ 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index f9984c3..a0cf6fa 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1117,11 +1117,14 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) q->lock_ptr = NULL; } -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, + struct futex_hash_bucket *hb) { struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 uninitialized_var(curval), newval; + WAKE_Q(wake_q); + bool deboost; int ret = 0; if (!pi_state) @@ -1173,7 +1176,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) raw_spin_unlock_irq(&new_owner->pi_lock); raw_spin_unlock(&pi_state->pi_mutex.wait_lock); - rt_mutex_unlock(&pi_state->pi_mutex); + + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + + /* + * First unlock HB so the waiter does not spin on it once he got woken + * up. Second wake up the waiter before the priority is adjusted. If we + * deboost first (and lose our higher priority), then the task might get + * scheduled away before the wake up can take place. + */ + spin_unlock(&hb->lock); + wake_up_q(&wake_q); + if (deboost) + rt_mutex_adjust_prio(current); return 0; } @@ -2413,13 +2428,23 @@ retry: */ match = futex_top_waiter(hb, &key); if (match) { - ret = wake_futex_pi(uaddr, uval, match); + ret = wake_futex_pi(uaddr, uval, match, hb); + /* + * In case of success wake_futex_pi dropped the hash + * bucket lock. + */ + if (!ret) + goto out_putkey; /* * The atomic access to the futex value generated a * pagefault, so retry the user-access and the wakeup: */ if (ret == -EFAULT) goto pi_faulted; + /* + * wake_futex_pi has detected invalid state. Tell user + * space. + */ goto out_unlock; } @@ -2440,6 +2465,7 @@ retry: out_unlock: spin_unlock(&hb->lock); +out_putkey: put_futex_key(&key); return ret; diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 44ee8f8..1130130 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -300,7 +300,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task) * of task. We do not use the spin_xx_mutex() variants here as we are * outside of the debug path.) */ -static void rt_mutex_adjust_prio(struct task_struct *task) +void rt_mutex_adjust_prio(struct task_struct *task) { unsigned long flags; @@ -1247,13 +1247,12 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) } /* - * Slow path to release a rt-mutex: + * Slow path to release a rt-mutex. + * Return whether the current task needs to undo a potential priority boosting. */ -static void __sched -rt_mutex_slowunlock(struct rt_mutex *lock) +static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, + struct wake_q_head *wake_q) { - WAKE_Q(wake_q); - raw_spin_lock(&lock->wait_lock); debug_rt_mutex_unlock(lock); @@ -1294,7 +1293,7 @@ rt_mutex_slowunlock(struct rt_mutex *lock) while (!rt_mutex_has_waiters(lock)) { /* Drops lock->wait_lock ! */ if (unlock_rt_mutex_safe(lock) == true) - return; + return false; /* Relock the rtmutex and try again */ raw_spin_lock(&lock->wait_lock); } @@ -1305,13 +1304,12 @@ rt_mutex_slowunlock(struct rt_mutex *lock) * * Queue the next waiter for wakeup once we release the wait_lock. */ - mark_wakeup_next_waiter(&wake_q, lock); + mark_wakeup_next_waiter(wake_q, lock); raw_spin_unlock(&lock->wait_lock); - wake_up_q(&wake_q); - /* Undo pi boosting if necessary: */ - rt_mutex_adjust_prio(current); + /* check PI boosting */ + return true; } /* @@ -1362,12 +1360,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, static inline void rt_mutex_fastunlock(struct rt_mutex *lock, - void (*slowfn)(struct rt_mutex *lock)) + bool (*slowfn)(struct rt_mutex *lock, + struct wake_q_head *wqh)) { - if (likely(rt_mutex_cmpxchg(lock, current, NULL))) + WAKE_Q(wake_q); + + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) { rt_mutex_deadlock_account_unlock(current); - else - slowfn(lock); + + } else if (slowfn(lock, &wake_q)) { + /* Undo pi boosting if necessary: */ + rt_mutex_adjust_prio(current); + } } /** @@ -1462,6 +1466,23 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) EXPORT_SYMBOL_GPL(rt_mutex_unlock); /** + * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock + * @lock: the rt_mutex to be unlocked + * + * Returns: true/false indicating whether priority adjustment is + * required or not. + */ +bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wqh) +{ + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) { + rt_mutex_deadlock_account_unlock(current); + return false; + } + return rt_mutex_slowunlock(lock, wqh); +} + +/** * rt_mutex_destroy - mark a mutex unusable * @lock: the mutex to be destroyed * diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 8552125..7844f8f 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -131,6 +131,9 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter); extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); +extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wqh); +extern void rt_mutex_adjust_prio(struct task_struct *task); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/