[patch1] locking:remove rcu_read_lock/unlock as we already disabled preemption
v1->2: also remove the unnecessary rcu_read_lock/unlock in rwsem
[patch2] locking/rwsem: disable preemption for spinning region
[patch3] locking/rwsem: fix comments about reader optimistic lock stealing conditions
These two patches were sent to mailing list some weeks ago, but it seemed
to be missed, so send them again.
Yanfei Xu (3):
locking: remove rcu_read_lock/unlock as we already disabled preemption
locking/rwsem: disable preemption for spinning region
locking/rwsem: fix comments about reader optimistic lock stealing
conditions
kernel/locking/mutex.c | 22 +++++++++++++++-------
kernel/locking/rwsem.c | 28 ++++++++++++++++++----------
2 files changed, 33 insertions(+), 17 deletions(-)
--
2.27.0
preempt_disable/enable() is equal to RCU read-side crital section, and
the spinning codes in mutex and rwsem could ensure that the preemption
is disabled. So let's remove the unnecessary rcu_read_lock/unlock for
saving some cycles in hot codes.
Signed-off-by: Yanfei Xu <[email protected]>
---
kernel/locking/mutex.c | 22 +++++++++++++++-------
kernel/locking/rwsem.c | 14 +++++++++-----
2 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2fede72b6af5..db1913611192 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -351,13 +351,16 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
{
bool ret = true;
- rcu_read_lock();
+ lockdep_assert_preemption_disabled();
+
while (__mutex_owner(lock) == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
- * checking lock->owner still matches owner. If that fails,
- * owner might point to freed memory. If it still matches,
- * the rcu_read_lock() ensures the memory stays valid.
+ * checking lock->owner still matches owner. And we already
+ * disabled preemption which is equal to the RCU read-side
+ * crital section in optimistic spinning code. Thus the
+ * task_strcut structure won't go away during the spinning
+ * period
*/
barrier();
@@ -377,7 +380,6 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
cpu_relax();
}
- rcu_read_unlock();
return ret;
}
@@ -390,19 +392,25 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
struct task_struct *owner;
int retval = 1;
+ lockdep_assert_preemption_disabled();
+
if (need_resched())
return 0;
- rcu_read_lock();
+ /*
+ * We already disabled preemption which is equal to the RCU read-side
+ * crital section in optimistic spinning code. Thus the task_strcut
+ * structure won't go away during the spinning period.
+ */
owner = __mutex_owner(lock);
/*
* As lock holder preemption issue, we both skip spinning if task is not
* on cpu or its cpu is preempted
*/
+
if (owner)
retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
- rcu_read_unlock();
/*
* If lock->owner is not set, the mutex has been released. Return true
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 000e8d5a2884..7b5af452ace2 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -617,7 +617,10 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
}
preempt_disable();
- rcu_read_lock();
+ /*
+ * Disable preemption is equal to the RCU read-side crital section,
+ * thus the task_strcut structure won't go away.
+ */
owner = rwsem_owner_flags(sem, &flags);
/*
* Don't check the read-owner as the entry may be stale.
@@ -625,7 +628,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
if ((flags & RWSEM_NONSPINNABLE) ||
(owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
ret = false;
- rcu_read_unlock();
preempt_enable();
lockevent_cond_inc(rwsem_opt_fail, !ret);
@@ -670,12 +672,13 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
unsigned long flags, new_flags;
enum owner_state state;
+ lockdep_assert_preemption_disabled();
+
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags);
if (state != OWNER_WRITER)
return state;
- rcu_read_lock();
for (;;) {
/*
* When a waiting writer set the handoff flag, it may spin
@@ -693,7 +696,9 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
* Ensure we emit the owner->on_cpu, dereference _after_
* checking sem->owner still matches owner, if that fails,
* owner might point to free()d memory, if it still matches,
- * the rcu_read_lock() ensures the memory stays valid.
+ * our spinning context already disabled preemption which is
+ * equal to RCU read-side crital section ensures the memory
+ * stays valid.
*/
barrier();
@@ -704,7 +709,6 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
cpu_relax();
}
- rcu_read_unlock();
return state;
}
--
2.27.0
After the commit 617f3ef95177 ("locking/rwsem: Remove reader
optimistic spinning"), reader doesn't support optimistic spinning
anymore, there is no need meet the condition which OSQ is empty.
BTW, add an unlikely() for the max reader wakeup check in the loop.
Signed-off-by: Yanfei Xu <[email protected]>
---
kernel/locking/rwsem.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 06925b43c3e7..2bd914187399 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -56,7 +56,6 @@
*
* A fast path reader optimistic lock stealing is supported when the rwsem
* is previously owned by a writer and the following conditions are met:
- * - OSQ is empty
* - rwsem is not currently writer owned
* - the handoff isn't set.
*/
@@ -485,7 +484,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
/*
* Limit # of readers that can be woken up per wakeup call.
*/
- if (woken >= MAX_READERS_WAKEUP)
+ if (unlikely(woken >= MAX_READERS_WAKEUP))
break;
}
--
2.27.0
The spinning region rwsem_spin_on_owner() should not be preempted,
however the rwsem_down_write_slowpath() invokes it and don't disable
preemption. Fix it by adding a pair of preempt_disable/enable().
Signed-off-by: Yanfei Xu <[email protected]>
---
kernel/locking/rwsem.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 7b5af452ace2..06925b43c3e7 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1024,6 +1024,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
enum writer_wait_state wstate;
struct rwsem_waiter waiter;
struct rw_semaphore *ret = sem;
+ enum owner_state owner_state;
DEFINE_WAKE_Q(wake_q);
/* do optimistic spinning and steal lock if possible */
@@ -1099,9 +1100,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
* In this case, we attempt to acquire the lock again
* without sleeping.
*/
- if (wstate == WRITER_HANDOFF &&
- rwsem_spin_on_owner(sem) == OWNER_NULL)
- goto trylock_again;
+ if (wstate == WRITER_HANDOFF) {
+ preempt_disable();
+ owner_state = rwsem_spin_on_owner(sem);
+ preempt_enable();
+ if (owner_state == OWNER_NULL)
+ goto trylock_again;
+ }
/* Block until there are no active lockers. */
for (;;) {
--
2.27.0
On 10/13/21 9:41 AM, Yanfei Xu wrote:
> [patch1] locking:remove rcu_read_lock/unlock as we already disabled preemption
> v1->2: also remove the unnecessary rcu_read_lock/unlock in rwsem
>
> [patch2] locking/rwsem: disable preemption for spinning region
> [patch3] locking/rwsem: fix comments about reader optimistic lock stealing conditions
> These two patches were sent to mailing list some weeks ago, but it seemed
> to be missed, so send them again.
>
> Yanfei Xu (3):
> locking: remove rcu_read_lock/unlock as we already disabled preemption
> locking/rwsem: disable preemption for spinning region
> locking/rwsem: fix comments about reader optimistic lock stealing
> conditions
>
> kernel/locking/mutex.c | 22 +++++++++++++++-------
> kernel/locking/rwsem.c | 28 ++++++++++++++++++----------
> 2 files changed, 33 insertions(+), 17 deletions(-)
>
The patches look good to me. Thanks!
For the series,
Acked-by: Waiman Long <[email protected]>
On Wed, Oct 13, 2021 at 09:41:53PM +0800, Yanfei Xu wrote:
> The spinning region rwsem_spin_on_owner() should not be preempted,
> however the rwsem_down_write_slowpath() invokes it and don't disable
> preemption. Fix it by adding a pair of preempt_disable/enable().
I'm thinking we should do this patch before #1, otherwise we have a
single patch window where we'll trigger the assertion, no?
>
> Signed-off-by: Yanfei Xu <[email protected]>
> ---
> kernel/locking/rwsem.c | 11 ++++++++---
> 1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
> index 7b5af452ace2..06925b43c3e7 100644
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -1024,6 +1024,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
> enum writer_wait_state wstate;
> struct rwsem_waiter waiter;
> struct rw_semaphore *ret = sem;
> + enum owner_state owner_state;
> DEFINE_WAKE_Q(wake_q);
>
> /* do optimistic spinning and steal lock if possible */
> @@ -1099,9 +1100,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
> * In this case, we attempt to acquire the lock again
> * without sleeping.
> */
> - if (wstate == WRITER_HANDOFF &&
> - rwsem_spin_on_owner(sem) == OWNER_NULL)
> - goto trylock_again;
> + if (wstate == WRITER_HANDOFF) {
> + preempt_disable();
> + owner_state = rwsem_spin_on_owner(sem);
> + preempt_enable();
> + if (owner_state == OWNER_NULL)
> + goto trylock_again;
> + }
>
> /* Block until there are no active lockers. */
> for (;;) {
> --
> 2.27.0
>
On Fri, Oct 15, 2021 at 12:13:59PM +0200, Peter Zijlstra wrote:
> On Wed, Oct 13, 2021 at 09:41:53PM +0800, Yanfei Xu wrote:
> > The spinning region rwsem_spin_on_owner() should not be preempted,
> > however the rwsem_down_write_slowpath() invokes it and don't disable
> > preemption. Fix it by adding a pair of preempt_disable/enable().
>
> I'm thinking we should do this patch before #1, otherwise we have a
> single patch window where we'll trigger the assertion, no?
Anyway, I've stuck the lot (reordered) into my locking/core branch, lets
see what the robots make of it ;-)
On 10/15/21 6:20 PM, Peter Zijlstra wrote:
> [Please note: This e-mail is from an EXTERNAL e-mail address]
>
> On Fri, Oct 15, 2021 at 12:13:59PM +0200, Peter Zijlstra wrote:
>> On Wed, Oct 13, 2021 at 09:41:53PM +0800, Yanfei Xu wrote:
>>> The spinning region rwsem_spin_on_owner() should not be preempted,
>>> however the rwsem_down_write_slowpath() invokes it and don't disable
>>> preemption. Fix it by adding a pair of preempt_disable/enable().
>>
>> I'm thinking we should do this patch before #1, otherwise we have a
>> single patch window where we'll trigger the assertion, no?
>
> Anyway, I've stuck the lot (reordered) into my locking/core branch, lets
> see what the robots make of it ;-)
>
Well, thanks for reordering them :)
Cheers,
Yanfei
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 5197fcd09ab6dcc4df79edec7e8e27575276374c
Gitweb: https://git.kernel.org/tip/5197fcd09ab6dcc4df79edec7e8e27575276374c
Author: Yanfei Xu <[email protected]>
AuthorDate: Wed, 13 Oct 2021 21:41:54 +08:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Tue, 19 Oct 2021 17:27:06 +02:00
locking/rwsem: Fix comments about reader optimistic lock stealing conditions
After the commit 617f3ef95177 ("locking/rwsem: Remove reader
optimistic spinning"), reader doesn't support optimistic spinning
anymore, there is no need meet the condition which OSQ is empty.
BTW, add an unlikely() for the max reader wakeup check in the loop.
Signed-off-by: Yanfei Xu <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Waiman Long <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/rwsem.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 884aa08..c51387a 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -56,7 +56,6 @@
*
* A fast path reader optimistic lock stealing is supported when the rwsem
* is previously owned by a writer and the following conditions are met:
- * - OSQ is empty
* - rwsem is not currently writer owned
* - the handoff isn't set.
*/
@@ -485,7 +484,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
/*
* Limit # of readers that can be woken up per wakeup call.
*/
- if (woken >= MAX_READERS_WAKEUP)
+ if (unlikely(woken >= MAX_READERS_WAKEUP))
break;
}
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 7cdacc5f52d68a9370f182c844b5b3e6cc975cc1
Gitweb: https://git.kernel.org/tip/7cdacc5f52d68a9370f182c844b5b3e6cc975cc1
Author: Yanfei Xu <[email protected]>
AuthorDate: Wed, 13 Oct 2021 21:41:53 +08:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Tue, 19 Oct 2021 17:27:05 +02:00
locking/rwsem: Disable preemption for spinning region
The spinning region rwsem_spin_on_owner() should not be preempted,
however the rwsem_down_write_slowpath() invokes it and don't disable
preemption. Fix it by adding a pair of preempt_disable/enable().
Signed-off-by: Yanfei Xu <[email protected]>
[peterz: Fix CONFIG_RWSEM_SPIN_ON_OWNER=n build]
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Waiman Long <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/rwsem.c | 53 +++++++++++++++++++++++------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 000e8d5..29eea50 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -577,6 +577,24 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
return true;
}
+/*
+ * The rwsem_spin_on_owner() function returns the following 4 values
+ * depending on the lock owner state.
+ * OWNER_NULL : owner is currently NULL
+ * OWNER_WRITER: when owner changes and is a writer
+ * OWNER_READER: when owner changes and the new owner may be a reader.
+ * OWNER_NONSPINNABLE:
+ * when optimistic spinning has to stop because either the
+ * owner stops running, is unknown, or its timeslice has
+ * been used up.
+ */
+enum owner_state {
+ OWNER_NULL = 1 << 0,
+ OWNER_WRITER = 1 << 1,
+ OWNER_READER = 1 << 2,
+ OWNER_NONSPINNABLE = 1 << 3,
+};
+
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* Try to acquire write lock before the writer has been put on wait queue.
@@ -632,23 +650,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
return ret;
}
-/*
- * The rwsem_spin_on_owner() function returns the following 4 values
- * depending on the lock owner state.
- * OWNER_NULL : owner is currently NULL
- * OWNER_WRITER: when owner changes and is a writer
- * OWNER_READER: when owner changes and the new owner may be a reader.
- * OWNER_NONSPINNABLE:
- * when optimistic spinning has to stop because either the
- * owner stops running, is unknown, or its timeslice has
- * been used up.
- */
-enum owner_state {
- OWNER_NULL = 1 << 0,
- OWNER_WRITER = 1 << 1,
- OWNER_READER = 1 << 2,
- OWNER_NONSPINNABLE = 1 << 3,
-};
#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
static inline enum owner_state
@@ -878,12 +879,11 @@ static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
-static inline int
+static inline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore *sem)
{
- return 0;
+ return OWNER_NONSPINNABLE;
}
-#define OWNER_NULL 1
#endif
/*
@@ -1095,9 +1095,16 @@ wait:
* In this case, we attempt to acquire the lock again
* without sleeping.
*/
- if (wstate == WRITER_HANDOFF &&
- rwsem_spin_on_owner(sem) == OWNER_NULL)
- goto trylock_again;
+ if (wstate == WRITER_HANDOFF) {
+ enum owner_state owner_state;
+
+ preempt_disable();
+ owner_state = rwsem_spin_on_owner(sem);
+ preempt_enable();
+
+ if (owner_state == OWNER_NULL)
+ goto trylock_again;
+ }
/* Block until there are no active lockers. */
for (;;) {
The following commit has been merged into the locking/core branch of tip:
Commit-ID: 6c2787f2a20ceb49c98bd06f7dad1589eed1c951
Gitweb: https://git.kernel.org/tip/6c2787f2a20ceb49c98bd06f7dad1589eed1c951
Author: Yanfei Xu <[email protected]>
AuthorDate: Wed, 13 Oct 2021 21:41:52 +08:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Tue, 19 Oct 2021 17:27:06 +02:00
locking: Remove rcu_read_{,un}lock() for preempt_{dis,en}able()
preempt_disable/enable() is equal to RCU read-side crital section, and
the spinning codes in mutex and rwsem could ensure that the preemption
is disabled. So let's remove the unnecessary rcu_read_lock/unlock for
saving some cycles in hot codes.
Signed-off-by: Yanfei Xu <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Waiman Long <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/mutex.c | 22 +++++++++++++++-------
kernel/locking/rwsem.c | 14 +++++++++-----
2 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2fede72..db19136 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -351,13 +351,16 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
{
bool ret = true;
- rcu_read_lock();
+ lockdep_assert_preemption_disabled();
+
while (__mutex_owner(lock) == owner) {
/*
* Ensure we emit the owner->on_cpu, dereference _after_
- * checking lock->owner still matches owner. If that fails,
- * owner might point to freed memory. If it still matches,
- * the rcu_read_lock() ensures the memory stays valid.
+ * checking lock->owner still matches owner. And we already
+ * disabled preemption which is equal to the RCU read-side
+ * crital section in optimistic spinning code. Thus the
+ * task_strcut structure won't go away during the spinning
+ * period
*/
barrier();
@@ -377,7 +380,6 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
cpu_relax();
}
- rcu_read_unlock();
return ret;
}
@@ -390,19 +392,25 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
struct task_struct *owner;
int retval = 1;
+ lockdep_assert_preemption_disabled();
+
if (need_resched())
return 0;
- rcu_read_lock();
+ /*
+ * We already disabled preemption which is equal to the RCU read-side
+ * crital section in optimistic spinning code. Thus the task_strcut
+ * structure won't go away during the spinning period.
+ */
owner = __mutex_owner(lock);
/*
* As lock holder preemption issue, we both skip spinning if task is not
* on cpu or its cpu is preempted
*/
+
if (owner)
retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
- rcu_read_unlock();
/*
* If lock->owner is not set, the mutex has been released. Return true
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 29eea50..884aa08 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -635,7 +635,10 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
}
preempt_disable();
- rcu_read_lock();
+ /*
+ * Disable preemption is equal to the RCU read-side crital section,
+ * thus the task_strcut structure won't go away.
+ */
owner = rwsem_owner_flags(sem, &flags);
/*
* Don't check the read-owner as the entry may be stale.
@@ -643,7 +646,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
if ((flags & RWSEM_NONSPINNABLE) ||
(owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
ret = false;
- rcu_read_unlock();
preempt_enable();
lockevent_cond_inc(rwsem_opt_fail, !ret);
@@ -671,12 +673,13 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
unsigned long flags, new_flags;
enum owner_state state;
+ lockdep_assert_preemption_disabled();
+
owner = rwsem_owner_flags(sem, &flags);
state = rwsem_owner_state(owner, flags);
if (state != OWNER_WRITER)
return state;
- rcu_read_lock();
for (;;) {
/*
* When a waiting writer set the handoff flag, it may spin
@@ -694,7 +697,9 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
* Ensure we emit the owner->on_cpu, dereference _after_
* checking sem->owner still matches owner, if that fails,
* owner might point to free()d memory, if it still matches,
- * the rcu_read_lock() ensures the memory stays valid.
+ * our spinning context already disabled preemption which is
+ * equal to RCU read-side crital section ensures the memory
+ * stays valid.
*/
barrier();
@@ -705,7 +710,6 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
cpu_relax();
}
- rcu_read_unlock();
return state;
}