LinuxLists.cc - [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

2016-10-07 15:34:07

Subject: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

Implement lock handoff to avoid lock starvation.

Lock starvation is possible because mutex_lock() allows lock stealing,
where a running (or optimistic spinning) task beats the woken waiter
to the acquire.

Lock stealing is an important performance optimization because waiting
for a waiter to wake up and get runtime can take a significant time,
during which everyboy would stall on the lock.

The down-side is of course that it allows for starvation.

This patch has the waiter requesting a handoff if it fails to acquire
the lock upon waking. This re-introduces some of the wait time,
because once we do a handoff we have to wait for the waiter to wake up
again.

A future patch will add a round of optimistic spinning to attempt to
alleviate this penalty, but if that turns out to not be enough, we can
add a counter and only request handoff after multiple failed wakeups.

There are a few tricky implementation details:

- accepting a handoff must only be done in the wait-loop. Since the
handoff condition is owner == current, it can easily cause
recursive locking trouble.

- accepting the handoff must be careful to provide the ACQUIRE
semantics.

- having the HANDOFF bit set on unlock requires care, we must not
clear the owner.

- we must be careful to not leave HANDOFF set after we've acquired
the lock. The tricky scenario is setting the HANDOFF bit on an
unlocked mutex.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
kernel/locking/mutex.c | 142 +++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 119 insertions(+), 23 deletions(-)

--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -54,8 +54,10 @@ EXPORT_SYMBOL(__mutex_init);
* bits to store extra state.
*
* Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
+ * Bit1 indicates unlock needs to hand the lock to the top-waiter
*/
#define MUTEX_FLAG_WAITERS 0x01
+#define MUTEX_FLAG_HANDOFF 0x02

#define MUTEX_FLAGS 0x03

@@ -71,20 +73,48 @@ static inline unsigned long __owner_flag

/*
* Actual trylock that will work on any unlocked state.
+ *
+ * When setting the owner field, we must preserve the low flag bits.
+ *
+ * Be careful with @handoff, only set that in a wait-loop (where you set
+ * HANDOFF) to avoid recursive lock attempts.
*/
-static inline bool __mutex_trylock(struct mutex *lock)
+static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
{
unsigned long owner, curr = (unsigned long)current;

owner = atomic_long_read(&lock->owner);
for (;;) { /* must loop, can race against a flag */
- unsigned long old;
+ unsigned long old, flags = __owner_flags(owner);
+
+ if (__owner_task(owner)) {
+ if (handoff && unlikely(__owner_task(owner) == current)) {
+ /*
+ * Provide ACQUIRE semantics for the lock-handoff.
+ *
+ * We cannot easily use load-acquire here, since
+ * the actual load is a failed cmpxchg, which
+ * doesn't imply any barriers.
+ *
+ * Also, this is a fairly unlikely scenario, and
+ * this contains the cost.
+ */
+ smp_mb(); /* ACQUIRE */
+ return true;
+ }

- if (__owner_task(owner))
return false;
+ }

- old = atomic_long_cmpxchg_acquire(&lock->owner, owner,
- curr | __owner_flags(owner));
+ /*
+ * We set the HANDOFF bit, we must make sure it doesn't live
+ * past the point where we acquire it. This would be possible
+ * if we (accidentally) set the bit on an unlocked mutex.
+ */
+ if (handoff)
+ flags &= ~MUTEX_FLAG_HANDOFF;
+
+ old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
if (old == owner)
return true;

@@ -134,6 +164,39 @@ static inline void __mutex_clear_flag(st
atomic_long_andnot(flag, &lock->owner);
}

+static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter)
+{
+ return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter;
+}
+
+/*
+ * Give up ownership to a specific task, when @task = NULL, this is equivalent
+ * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE
+ * semantics like a regular unlock, the __mutex_trylock() provides matching
+ * ACQUIRE semantics for the handoff.
+ */
+static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
+{
+ unsigned long owner = atomic_long_read(&lock->owner);
+
+ for (;;) {
+ unsigned long old, new;
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+#endif
+
+ new = (owner & MUTEX_FLAG_WAITERS);
+ new |= (unsigned long)task;
+
+ old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
+ if (old == owner)
+ break;
+
+ owner = old;
+ }
+}
+
#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* We split the mutex lock/unlock logic into separate fastpath and
@@ -398,7 +461,7 @@ static bool mutex_optimistic_spin(struct
break;

/* Try to acquire the mutex if it is unlocked. */
- if (__mutex_trylock(lock)) {
+ if (__mutex_trylock(lock, false)) {
osq_unlock(&lock->osq);
return true;
}
@@ -523,6 +586,7 @@ __mutex_lock_common(struct mutex *lock,
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
+ bool first = false;
int ret;

if (use_ww_ctx) {
@@ -534,7 +598,8 @@ __mutex_lock_common(struct mutex *lock,
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);

- if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
+ if (__mutex_trylock(lock, false) ||
+ mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
/* got the lock, yay! */
lock_acquired(&lock->dep_map, ip);
if (use_ww_ctx) {
@@ -551,7 +616,7 @@ __mutex_lock_common(struct mutex *lock,
/*
* After waiting to acquire the wait_lock, try again.
*/
- if (__mutex_trylock(lock))
+ if (__mutex_trylock(lock, false))
goto skip_wait;

debug_mutex_lock_common(lock, &waiter);
@@ -561,13 +626,13 @@ __mutex_lock_common(struct mutex *lock,
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;

- if (list_first_entry(&lock->wait_list, struct mutex_waiter, list) == &waiter)
+ if (__mutex_waiter_is_first(lock, &waiter))
__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);

lock_contended(&lock->dep_map, ip);

for (;;) {
- if (__mutex_trylock(lock))
+ if (__mutex_trylock(lock, first))
break;

/*
@@ -586,17 +651,20 @@ __mutex_lock_common(struct mutex *lock,
}

__set_task_state(task, state);
-
- /* didn't get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
schedule_preempt_disabled();
spin_lock_mutex(&lock->wait_lock, flags);
+
+ if (!first && __mutex_waiter_is_first(lock, &waiter)) {
+ first = true;
+ __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
+ }
}
__set_task_state(task, TASK_RUNNING);

mutex_remove_waiter(lock, &waiter, task);
if (likely(list_empty(&lock->wait_list)))
- __mutex_clear_flag(lock, MUTEX_FLAG_WAITERS);
+ __mutex_clear_flag(lock, MUTEX_FLAGS);

debug_mutex_free_waiter(&waiter);

@@ -724,33 +792,61 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interr
*/
static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
{
+ struct task_struct *next = NULL;
unsigned long owner, flags;
WAKE_Q(wake_q);

mutex_release(&lock->dep_map, 1, ip);

/*
- * Release the lock before (potentially) taking the spinlock
- * such that other contenders can get on with things ASAP.
+ * Release the lock before (potentially) taking the spinlock such that
+ * other contenders can get on with things ASAP.
+ *
+ * Except when HANDOFF, in that case we must not clear the owner field,
+ * but instead set it to the top waiter.
*/
- owner = atomic_long_fetch_and_release(MUTEX_FLAGS, &lock->owner);
- if (!__owner_flags(owner))
- return;
+ owner = atomic_long_read(&lock->owner);
+ for (;;) {
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+#endif
+
+ if (owner & MUTEX_FLAG_HANDOFF)
+ break;
+
+ old = atomic_long_cmpxchg_release(&lock->owner, owner,
+ __owner_flags(owner));
+ if (old == owner) {
+ if (owner & MUTEX_FLAG_WAITERS)
+ break;
+
+ return;
+ }
+
+ owner = old;
+ }

spin_lock_mutex(&lock->wait_lock, flags);
debug_mutex_unlock(lock);
-
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
struct mutex_waiter *waiter =
- list_entry(lock->wait_list.next,
- struct mutex_waiter, list);
+ list_first_entry(&lock->wait_list,
+ struct mutex_waiter, list);
+
+ next = waiter->task;

debug_mutex_wake_waiter(lock, waiter);
- wake_q_add(&wake_q, waiter->task);
+ wake_q_add(&wake_q, next);
}

+ if (owner & MUTEX_FLAG_HANDOFF)
+ __mutex_handoff(lock, next);
+
spin_unlock_mutex(&lock->wait_lock, flags);
+
wake_up_q(&wake_q);
}

@@ -853,7 +949,7 @@ __ww_mutex_lock_interruptible_slowpath(s
*/
int __sched mutex_trylock(struct mutex *lock)
{
- bool locked = __mutex_trylock(lock);
+ bool locked = __mutex_trylock(lock, false);

if (locked)
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);

2016-10-13 15:16:12

by Will Deacon

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

Hi Peter,

Just one comment below.

On Fri, Oct 07, 2016 at 04:52:48PM +0200, Peter Zijlstra wrote:
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -54,8 +54,10 @@ EXPORT_SYMBOL(__mutex_init);
> * bits to store extra state.
> *
> * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
> + * Bit1 indicates unlock needs to hand the lock to the top-waiter
> */
> #define MUTEX_FLAG_WAITERS 0x01
> +#define MUTEX_FLAG_HANDOFF 0x02
>
> #define MUTEX_FLAGS 0x03
>
> @@ -71,20 +73,48 @@ static inline unsigned long __owner_flag
>
> /*
> * Actual trylock that will work on any unlocked state.
> + *
> + * When setting the owner field, we must preserve the low flag bits.
> + *
> + * Be careful with @handoff, only set that in a wait-loop (where you set
> + * HANDOFF) to avoid recursive lock attempts.
> */
> -static inline bool __mutex_trylock(struct mutex *lock)
> +static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
> {
> unsigned long owner, curr = (unsigned long)current;
>
> owner = atomic_long_read(&lock->owner);
> for (;;) { /* must loop, can race against a flag */
> - unsigned long old;
> + unsigned long old, flags = __owner_flags(owner);
> +
> + if (__owner_task(owner)) {
> + if (handoff && unlikely(__owner_task(owner) == current)) {
> + /*
> + * Provide ACQUIRE semantics for the lock-handoff.
> + *
> + * We cannot easily use load-acquire here, since
> + * the actual load is a failed cmpxchg, which
> + * doesn't imply any barriers.
> + *
> + * Also, this is a fairly unlikely scenario, and
> + * this contains the cost.
> + */
> + smp_mb(); /* ACQUIRE */

As we discussed on another thread recently, a failed cmpxchg_acquire
will always give you ACQUIRE semantics in practice. Maybe we should update
the documentation to allow this? The only special case is the full-barrier
version.

Will

2016-10-17 09:22:50

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

On Thu, Oct 13, 2016 at 04:14:47PM +0100, Will Deacon wrote:

> > + if (__owner_task(owner)) {
> > + if (handoff && unlikely(__owner_task(owner) == current)) {
> > + /*
> > + * Provide ACQUIRE semantics for the lock-handoff.
> > + *
> > + * We cannot easily use load-acquire here, since
> > + * the actual load is a failed cmpxchg, which
> > + * doesn't imply any barriers.
> > + *
> > + * Also, this is a fairly unlikely scenario, and
> > + * this contains the cost.
> > + */
> > + smp_mb(); /* ACQUIRE */
>
> As we discussed on another thread recently, a failed cmpxchg_acquire
> will always give you ACQUIRE semantics in practice. Maybe we should update
> the documentation to allow this? The only special case is the full-barrier
> version.

So on PPC we do:

static __always_inline unsigned long
__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new)
{
unsigned long prev;

__asm__ __volatile__ (
"1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n"
" cmpw 0,%0,%3\n"
" bne- 2f\n"
PPC405_ERR77(0, %2)
" stwcx. %4,0,%2\n"
" bne- 1b\n"
PPC_ACQUIRE_BARRIER
"\n"
"2:"
: "=&r" (prev), "+m" (*p)
: "r" (p), "r" (old), "r" (new)
: "cc", "memory");

return prev;
}

which I read to skip over the ACQUIRE_BARRIER on fail.

Similarly, we _could_ make the generic version skip the barrier entirely
(we currently do not it seems).

And while I agree that it makes semantic sense, in that we always issue
the LOAD, and since we defined the ACQUIRE to apply to the LOADs only,
and we always issue the LOAD, we should also always provide ACQUIRE
semantics. I'm not entirely convinced we should go there just yet. It
would make failed cmpxchg_acquire()'s more expensive, and this really is
the only place we care about those.

So I would propose for now we keep these explicit barriers; both here
and the other place you mentioned, but keep this in mind.

Also, I don't feel we need more complexity in this patch set just now.

2016-10-17 18:46:06

by Waiman Long

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

On 10/07/2016 10:52 AM, Peter Zijlstra wrote:
> /*
> * Actual trylock that will work on any unlocked state.
> + *
> + * When setting the owner field, we must preserve the low flag bits.
> + *
> + * Be careful with @handoff, only set that in a wait-loop (where you set
> + * HANDOFF) to avoid recursive lock attempts.
> */
> -static inline bool __mutex_trylock(struct mutex *lock)
> +static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
> {
> unsigned long owner, curr = (unsigned long)current;
>
> owner = atomic_long_read(&lock->owner);
> for (;;) { /* must loop, can race against a flag */
> - unsigned long old;
> + unsigned long old, flags = __owner_flags(owner);
> +
> + if (__owner_task(owner)) {
> + if (handoff&& unlikely(__owner_task(owner) == current)) {
> + /*
> + * Provide ACQUIRE semantics for the lock-handoff.
> + *
> + * We cannot easily use load-acquire here, since
> + * the actual load is a failed cmpxchg, which
> + * doesn't imply any barriers.
> + *
> + * Also, this is a fairly unlikely scenario, and
> + * this contains the cost.
> + */

I am not so sure about your comment here. I guess you are referring to
the atomic_long_cmpxchg_acquire below for the failed cmpxchg. However,
it is also possible that the path can be triggered on the first round
without cmpxchg. Maybe we can do a load_acquire on the owner again to
satisfy this requirement without a smp_mb().

> + smp_mb(); /* ACQUIRE */
> + return true;
> + }
>
> - if (__owner_task(owner))
> return false;
> + }
>
> - old = atomic_long_cmpxchg_acquire(&lock->owner, owner,
> - curr | __owner_flags(owner));
> + /*
> + * We set the HANDOFF bit, we must make sure it doesn't live
> + * past the point where we acquire it. This would be possible
> + * if we (accidentally) set the bit on an unlocked mutex.
> + */
> + if (handoff)
> + flags&= ~MUTEX_FLAG_HANDOFF;
> +
> + old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
> if (old == owner)
> return true;
>
>

Other than that, the code is fine.

Reviewed-by: Waiman Long <[email protected]>

2016-10-17 19:08:13

by Waiman Long

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

On 10/17/2016 02:45 PM, Waiman Long wrote:
> On 10/07/2016 10:52 AM, Peter Zijlstra wrote:
>> /*
>> * Actual trylock that will work on any unlocked state.
>> + *
>> + * When setting the owner field, we must preserve the low flag bits.
>> + *
>> + * Be careful with @handoff, only set that in a wait-loop (where you
>> set
>> + * HANDOFF) to avoid recursive lock attempts.
>> */
>> -static inline bool __mutex_trylock(struct mutex *lock)
>> +static inline bool __mutex_trylock(struct mutex *lock, const bool
>> handoff)
>> {
>> unsigned long owner, curr = (unsigned long)current;
>>
>> owner = atomic_long_read(&lock->owner);
>> for (;;) { /* must loop, can race against a flag */
>> - unsigned long old;
>> + unsigned long old, flags = __owner_flags(owner);
>> +
>> + if (__owner_task(owner)) {
>> + if (handoff&& unlikely(__owner_task(owner) == current)) {
>> + /*
>> + * Provide ACQUIRE semantics for the lock-handoff.
>> + *
>> + * We cannot easily use load-acquire here, since
>> + * the actual load is a failed cmpxchg, which
>> + * doesn't imply any barriers.
>> + *
>> + * Also, this is a fairly unlikely scenario, and
>> + * this contains the cost.
>> + */
>
> I am not so sure about your comment here. I guess you are referring to
> the atomic_long_cmpxchg_acquire below for the failed cmpxchg. However,
> it is also possible that the path can be triggered on the first round
> without cmpxchg. Maybe we can do a load_acquire on the owner again to
> satisfy this requirement without a smp_mb().
>
>> + smp_mb(); /* ACQUIRE */
>> + return true;
>> + }
>>
>> - if (__owner_task(owner))
>> return false;
>> + }
>>
>> - old = atomic_long_cmpxchg_acquire(&lock->owner, owner,
>> - curr | __owner_flags(owner));
>> + /*
>> + * We set the HANDOFF bit, we must make sure it doesn't live
>> + * past the point where we acquire it. This would be possible
>> + * if we (accidentally) set the bit on an unlocked mutex.
>> + */
>> + if (handoff)
>> + flags&= ~MUTEX_FLAG_HANDOFF;
>> +
>> + old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr
>> | flags);
>> if (old == owner)
>> return true;
>>
>>
>
> Other than that, the code is fine.
>
> Reviewed-by: Waiman Long <[email protected]>
>

One more thing, I think it may be worthwhile to add another comment
about what happens when the HANDOFF bit was set while we take the error
path (goto err). As the actual handoff is serialized by the wait_lock,
the code will still do the right thing. Either the next one in the queue
will be handed off or it will be unlocked if the queue is empty.

Cheers,
Longman

2016-10-18 12:37:09

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

On Mon, Oct 17, 2016 at 02:45:50PM -0400, Waiman Long wrote:
> On 10/07/2016 10:52 AM, Peter Zijlstra wrote:
> > /*
> > * Actual trylock that will work on any unlocked state.
> >+ *
> >+ * When setting the owner field, we must preserve the low flag bits.
> >+ *
> >+ * Be careful with @handoff, only set that in a wait-loop (where you set
> >+ * HANDOFF) to avoid recursive lock attempts.
> > */
> >-static inline bool __mutex_trylock(struct mutex *lock)
> >+static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
> > {
> > unsigned long owner, curr = (unsigned long)current;
> >
> > owner = atomic_long_read(&lock->owner);
> > for (;;) { /* must loop, can race against a flag */
> >- unsigned long old;
> >+ unsigned long old, flags = __owner_flags(owner);
> >+
> >+ if (__owner_task(owner)) {
> >+ if (handoff&& unlikely(__owner_task(owner) == current)) {
> >+ /*
> >+ * Provide ACQUIRE semantics for the lock-handoff.
> >+ *
> >+ * We cannot easily use load-acquire here, since
> >+ * the actual load is a failed cmpxchg, which
> >+ * doesn't imply any barriers.
> >+ *
> >+ * Also, this is a fairly unlikely scenario, and
> >+ * this contains the cost.
> >+ */
>
> I am not so sure about your comment here. I guess you are referring to the
> atomic_long_cmpxchg_acquire below for the failed cmpxchg. However, it is
> also possible that the path can be triggered on the first round without
> cmpxchg. Maybe we can do a load_acquire on the owner again to satisfy this
> requirement without a smp_mb().

Yes, I refer to the atomic_long_cmpxchg_acquire() below. If that cmpxchg
fails, no barriers are implied.

Yes we could fix that, but that would make all cmpxchg_acquire() loops
more expensive. And only fixing the initial load doesn't help, since we
still need to deal with the cmpxchg case failing.

We _could_ re-issue the load I suppose, because since if owner==current,
nobody else is going to change it, but I feel slightly uneasy with that.
Also, its a relative slow path, so for now, lets keep it simple and
explicit like so.

>
> >+ smp_mb(); /* ACQUIRE */
> >+ return true;
> >+ }
> >
> >- if (__owner_task(owner))
> > return false;
> >+ }
> >
> >- old = atomic_long_cmpxchg_acquire(&lock->owner, owner,
> >- curr | __owner_flags(owner));
> >+ /*
> >+ * We set the HANDOFF bit, we must make sure it doesn't live
> >+ * past the point where we acquire it. This would be possible
> >+ * if we (accidentally) set the bit on an unlocked mutex.
> >+ */
> >+ if (handoff)
> >+ flags&= ~MUTEX_FLAG_HANDOFF;
> >+
> >+ old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
> > if (old == owner)
> > return true;
> >
> >
>
> Other than that, the code is fine.
>
> Reviewed-by: Waiman Long <[email protected]>

Thanks!

2016-10-18 13:03:07

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

On Mon, Oct 17, 2016 at 03:07:54PM -0400, Waiman Long wrote:
> One more thing, I think it may be worthwhile to add another comment about
> what happens when the HANDOFF bit was set while we take the error path (goto
> err). As the actual handoff is serialized by the wait_lock, the code will
> still do the right thing. Either the next one in the queue will be handed
> off or it will be unlocked if the queue is empty.

Doesn't the next patch add just such a comment?

2016-12-27 13:56:27

by Chris Wilson

[permalink] [raw]

Subject: Re: [PATCH -v4 5/8] locking/mutex: Add lock handoff to avoid starvation

On Fri, Oct 07, 2016 at 04:52:48PM +0200, Peter Zijlstra wrote:
> Implement lock handoff to avoid lock starvation.
>
> Lock starvation is possible because mutex_lock() allows lock stealing,
> where a running (or optimistic spinning) task beats the woken waiter
> to the acquire.
>
> Lock stealing is an important performance optimization because waiting
> for a waiter to wake up and get runtime can take a significant time,
> during which everyboy would stall on the lock.
>
> The down-side is of course that it allows for starvation.
>
> This patch has the waiter requesting a handoff if it fails to acquire
> the lock upon waking. This re-introduces some of the wait time,
> because once we do a handoff we have to wait for the waiter to wake up
> again.
>
> A future patch will add a round of optimistic spinning to attempt to
> alleviate this penalty, but if that turns out to not be enough, we can
> add a counter and only request handoff after multiple failed wakeups.
>
> There are a few tricky implementation details:
>
> - accepting a handoff must only be done in the wait-loop. Since the
> handoff condition is owner == current, it can easily cause
> recursive locking trouble.
>
> - accepting the handoff must be careful to provide the ACQUIRE
> semantics.
>
> - having the HANDOFF bit set on unlock requires care, we must not
> clear the owner.
>
> - we must be careful to not leave HANDOFF set after we've acquired
> the lock. The tricky scenario is setting the HANDOFF bit on an
> unlocked mutex.

There's a hole along the interruptible path - we leave the HANDOFF bit
set, even though the first waiter returns with -EINTR. The unlock then
sees the HANDOFF, assigns it to the next waiter, but that waiter does a
racy check to decide if it is first, decides it is not and so skips the
trylock and also returns with -EINTR. (i.e. invalidating the

/*
* Here we order against unlock; we must either see it change
* state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire.
*/

as we may not reach the next schedule). Repeating the
__mutex_waiter_is_first() after acquiring the wait_lock is sufficient,
as is clearing the HANDOFF bit before -EINTR.

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 9b349619f431..6f7e3bf0d595 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -684,6 +684,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* against mutex_unlock() and wake-ups do not go missing.
*/
if (unlikely(signal_pending_state(state, task))) {
+ if (first)
+ __mutex_clear_flag(lock, MUTEX_FLAG_HANDOFF);
ret = -EINTR;
goto err;
}

Though I expect you will be able to find a better solution.
-Chris

--
Chris Wilson, Intel Open Source Technology Centre