Now that the reader and writer wait loops are identical, share the
code.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------
1 file changed, 47 insertions(+), 70 deletions(-)
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se
* optionally wake up waiters before it returns.
*/
static inline void
-rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
- struct wake_q_head *wake_q)
+rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
__releases(&sem->wait_lock)
{
bool first = rwsem_first_waiter(sem) == waiter;
-
- wake_q_init(wake_q);
+ DEFINE_WAKE_Q(wake_q);
/*
* If the wait_list isn't empty and the waiter to be deleted is
@@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor
* be eligible to acquire or spin on the lock.
*/
if (rwsem_del_waiter(sem, waiter) && first)
- rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
- if (!wake_q_empty(wake_q))
- wake_up_q(wake_q);
+ if (!wake_q_empty(&wake_q))
+ wake_up_q(&wake_q);
}
/*
@@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite
rwsem_mark_wake(sem, wake_type, wake_q);
}
+#define waiter_type(_waiter, _r, _w) \
+ ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
+
+static __always_inline struct rw_semaphore *
+rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
+{
+ trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
+
+ /* wait to be given the lock */
+ for (;;) {
+ set_current_state(state);
+ if (!smp_load_acquire(&waiter->task)) {
+ /* Matches rwsem_waiter_wake()'s smp_store_release(). */
+ break;
+ }
+ if (signal_pending_state(state, current)) {
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (waiter->task)
+ goto out_nolock;
+ raw_spin_unlock_irq(&sem->wait_lock);
+ /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
+ break;
+ }
+ schedule_preempt_disabled();
+ lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer));
+ }
+
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
+ trace_contention_end(sem, 0);
+ return sem;
+
+out_nolock:
+ rwsem_del_wake_waiter(sem, waiter);
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail));
+ trace_contention_end(sem, -EINTR);
+ return ERR_PTR(-EINTR);
+}
+
/*
* Wait for the read lock to be granted
*/
@@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap
if (!wake_q_empty(&wake_q))
wake_up_q(&wake_q);
- trace_contention_begin(sem, LCB_F_READ);
-
- /* wait to be given the lock */
- for (;;) {
- set_current_state(state);
- if (!smp_load_acquire(&waiter.task)) {
- /* Matches rwsem_waiter_wake()'s smp_store_release(). */
- break;
- }
- if (signal_pending_state(state, current)) {
- raw_spin_lock_irq(&sem->wait_lock);
- if (waiter.task)
- goto out_nolock;
- raw_spin_unlock_irq(&sem->wait_lock);
- /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
- break;
- }
- schedule_preempt_disabled();
- lockevent_inc(rwsem_sleep_reader);
- }
-
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_rlock);
- trace_contention_end(sem, 0);
- return sem;
-
-out_nolock:
- rwsem_del_wake_waiter(sem, &waiter, &wake_q);
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_rlock_fail);
- trace_contention_end(sem, -EINTR);
- return ERR_PTR(-EINTR);
+ return rwsem_waiter_wait(sem, &waiter, state);
}
/*
@@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema
}
raw_spin_unlock_irq(&sem->wait_lock);
- /* wait until we successfully acquire the lock */
- trace_contention_begin(sem, LCB_F_WRITE);
-
- for (;;) {
- set_current_state(state);
- if (!smp_load_acquire(&waiter.task)) {
- /* Matches rwsem_waiter_wake()'s smp_store_release(). */
- break;
- }
- if (signal_pending_state(state, current)) {
- raw_spin_lock_irq(&sem->wait_lock);
- if (waiter.task)
- goto out_nolock;
- raw_spin_unlock_irq(&sem->wait_lock);
- /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
- break;
- }
- schedule_preempt_disabled();
- lockevent_inc(rwsem_sleep_writer);
- }
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_wlock);
- trace_contention_end(sem, 0);
- return sem;
-
-out_nolock:
- rwsem_del_wake_waiter(sem, &waiter, &wake_q);
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_wlock_fail);
- trace_contention_end(sem, -EINTR);
- return ERR_PTR(-EINTR);
+ return rwsem_waiter_wait(sem, &waiter, state);
}
/*
On Thu, Feb 23, 2023 at 01:26:47PM +0100, Peter Zijlstra wrote:
> Now that the reader and writer wait loops are identical, share the
> code.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------
> 1 file changed, 47 insertions(+), 70 deletions(-)
>
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se
> * optionally wake up waiters before it returns.
> */
> static inline void
> -rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
> - struct wake_q_head *wake_q)
> +rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
> __releases(&sem->wait_lock)
> {
> bool first = rwsem_first_waiter(sem) == waiter;
> -
> - wake_q_init(wake_q);
> + DEFINE_WAKE_Q(wake_q);
>
> /*
> * If the wait_list isn't empty and the waiter to be deleted is
> @@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor
> * be eligible to acquire or spin on the lock.
> */
> if (rwsem_del_waiter(sem, waiter) && first)
> - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
> + rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
> raw_spin_unlock_irq(&sem->wait_lock);
> - if (!wake_q_empty(wake_q))
> - wake_up_q(wake_q);
> + if (!wake_q_empty(&wake_q))
> + wake_up_q(&wake_q);
> }
>
> /*
> @@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite
> rwsem_mark_wake(sem, wake_type, wake_q);
> }
>
> +#define waiter_type(_waiter, _r, _w) \
> + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
> +
> +static __always_inline struct rw_semaphore *
> +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
> +{
> + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
> +
> + /* wait to be given the lock */
> + for (;;) {
> + set_current_state(state);
> + if (!smp_load_acquire(&waiter->task)) {
> + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> + break;
> + }
> + if (signal_pending_state(state, current)) {
> + raw_spin_lock_irq(&sem->wait_lock);
Move the below __set_current_state(TASK_RUNNING)s up here? I think we
need the preemption protection when changing the task state here.
> + if (waiter->task)
> + goto out_nolock;
I originally wanted to suggest renaming the label to "out_locked", but I
think we can just move the labeled code up here? And even open-code
rwsem_del_wake_waiter() since it only has one usage.
Regards,
Boqun
> + raw_spin_unlock_irq(&sem->wait_lock);
> + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> + break;
> + }
> + schedule_preempt_disabled();
> + lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer));
> + }
> +
> + __set_current_state(TASK_RUNNING);
> + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
> + trace_contention_end(sem, 0);
> + return sem;
> +
> +out_nolock:
> + rwsem_del_wake_waiter(sem, waiter);
> + __set_current_state(TASK_RUNNING);
> + lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail));
> + trace_contention_end(sem, -EINTR);
> + return ERR_PTR(-EINTR);
> +}
> +
> /*
> * Wait for the read lock to be granted
> */
> @@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap
> if (!wake_q_empty(&wake_q))
> wake_up_q(&wake_q);
>
> - trace_contention_begin(sem, LCB_F_READ);
> -
> - /* wait to be given the lock */
> - for (;;) {
> - set_current_state(state);
> - if (!smp_load_acquire(&waiter.task)) {
> - /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> - break;
> - }
> - if (signal_pending_state(state, current)) {
> - raw_spin_lock_irq(&sem->wait_lock);
> - if (waiter.task)
> - goto out_nolock;
> - raw_spin_unlock_irq(&sem->wait_lock);
> - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> - break;
> - }
> - schedule_preempt_disabled();
> - lockevent_inc(rwsem_sleep_reader);
> - }
> -
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_rlock);
> - trace_contention_end(sem, 0);
> - return sem;
> -
> -out_nolock:
> - rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_rlock_fail);
> - trace_contention_end(sem, -EINTR);
> - return ERR_PTR(-EINTR);
> + return rwsem_waiter_wait(sem, &waiter, state);
> }
>
> /*
> @@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema
> }
> raw_spin_unlock_irq(&sem->wait_lock);
>
> - /* wait until we successfully acquire the lock */
> - trace_contention_begin(sem, LCB_F_WRITE);
> -
> - for (;;) {
> - set_current_state(state);
> - if (!smp_load_acquire(&waiter.task)) {
> - /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> - break;
> - }
> - if (signal_pending_state(state, current)) {
> - raw_spin_lock_irq(&sem->wait_lock);
> - if (waiter.task)
> - goto out_nolock;
> - raw_spin_unlock_irq(&sem->wait_lock);
> - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> - break;
> - }
> - schedule_preempt_disabled();
> - lockevent_inc(rwsem_sleep_writer);
> - }
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_wlock);
> - trace_contention_end(sem, 0);
> - return sem;
> -
> -out_nolock:
> - rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> - __set_current_state(TASK_RUNNING);
> - lockevent_inc(rwsem_wlock_fail);
> - trace_contention_end(sem, -EINTR);
> - return ERR_PTR(-EINTR);
> + return rwsem_waiter_wait(sem, &waiter, state);
> }
>
> /*
>
>
On 2/23/23 07:26, Peter Zijlstra wrote:
> Now that the reader and writer wait loops are identical, share the
> code.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/locking/rwsem.c | 117 +++++++++++++++++++------------------------------
> 1 file changed, 47 insertions(+), 70 deletions(-)
>
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -650,13 +650,11 @@ static void rwsem_mark_wake(struct rw_se
> * optionally wake up waiters before it returns.
> */
> static inline void
> -rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
> - struct wake_q_head *wake_q)
> +rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
> __releases(&sem->wait_lock)
> {
> bool first = rwsem_first_waiter(sem) == waiter;
> -
> - wake_q_init(wake_q);
> + DEFINE_WAKE_Q(wake_q);
>
> /*
> * If the wait_list isn't empty and the waiter to be deleted is
> @@ -664,10 +662,10 @@ rwsem_del_wake_waiter(struct rw_semaphor
> * be eligible to acquire or spin on the lock.
> */
> if (rwsem_del_waiter(sem, waiter) && first)
> - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
> + rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
> raw_spin_unlock_irq(&sem->wait_lock);
> - if (!wake_q_empty(wake_q))
> - wake_up_q(wake_q);
> + if (!wake_q_empty(&wake_q))
> + wake_up_q(&wake_q);
> }
>
> /*
> @@ -993,6 +991,46 @@ static inline void rwsem_cond_wake_waite
> rwsem_mark_wake(sem, wake_type, wake_q);
> }
>
> +#define waiter_type(_waiter, _r, _w) \
> + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
> +
> +static __always_inline struct rw_semaphore *
> +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
> +{
> + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
> +
> + /* wait to be given the lock */
> + for (;;) {
> + set_current_state(state);
> + if (!smp_load_acquire(&waiter->task)) {
> + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> + break;
> + }
> + if (signal_pending_state(state, current)) {
> + raw_spin_lock_irq(&sem->wait_lock);
> + if (waiter->task)
> + goto out_nolock;
> + raw_spin_unlock_irq(&sem->wait_lock);
> + /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> + break;
> + }
> + schedule_preempt_disabled();
> + lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer));
> + }
> +
> + __set_current_state(TASK_RUNNING);
> + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
> + trace_contention_end(sem, 0);
> + return sem;
> +
> +out_nolock:
> + rwsem_del_wake_waiter(sem, waiter);
> + __set_current_state(TASK_RUNNING);
Similar to boqun's comment, we should move __set_current_state() before
rwsem_del_wake_waiter().
Unfortunately, lockevent_inc() doesn't work with waiter_type() like that
as the compilation will fail if CONFIG_LOCK_EVENT_COUNTS is enabled.
Could you include the attached patch in your series and make the
following changes?
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index deb0d016a6ce..5b14b0d076fd 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1021,13 +1021,14 @@ static inline void rwsem_cond_wake_waiter(struct
rw_semaphore *sem, long count,
rwsem_mark_wake(sem, wake_type, wake_q);
}
-#define waiter_type(_waiter, _r, _w) \
- ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
+#define waiter_type(_reader, _r, _w) ((_reader) ? (_r) : (_w))
static __always_inline struct rw_semaphore *
rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter
*waiter, int state)
{
- trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ,
LCB_F_WRITE));
+ bool reader = waiter->type == RWSEM_WAITING_FOR_READ;
+
+ trace_contention_begin(sem, waiter_type(reader, LCB_F_READ,
LCB_F_WRITE));
/* wait to be given the lock */
for (;;) {
@@ -1045,18 +1046,18 @@ rwsem_waiter_wait(struct rw_semaphore *sem,
struct rwsem_waiter *waiter, int sta
break;
}
schedule_preempt_disabled();
- lockevent_inc(waiter_type(waiter, rwsem_sleep_reader,
rwsem_sleep_writer));
+ lockevent_cond_inc2(reader, rwsem_sleep_reader,
rwsem_sleep_writer);
}
__set_current_state(TASK_RUNNING);
- lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
+ lockevent_cond_inc2(reader, rwsem_rlock, rwsem_wlock);
trace_contention_end(sem, 0);
return sem;
out_nolock:
rwsem_del_wake_waiter(sem, waiter);
__set_current_state(TASK_RUNNING);
- lockevent_inc(waiter_type(waiter, rwsem_rlock_fail,
rwsem_wlock_fail));
+ lockevent_cond_inc2(reader, rwsem_rlock_fail, rwsem_wlock_fail);
trace_contention_end(sem, -EINTR);
return ERR_PTR(-EINTR);
}
Thanks,
Longman
lockevent_inc
On Thu, Feb 23, 2023 at 11:31:47AM -0800, Boqun Feng wrote:
[..]
> > +#define waiter_type(_waiter, _r, _w) \
> > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
> > +
> > +static __always_inline struct rw_semaphore *
> > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
> > +{
> > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
> > +
> > + /* wait to be given the lock */
> > + for (;;) {
> > + set_current_state(state);
> > + if (!smp_load_acquire(&waiter->task)) {
> > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> > + break;
> > + }
> > + if (signal_pending_state(state, current)) {
> > + raw_spin_lock_irq(&sem->wait_lock);
>
> Move the below __set_current_state(TASK_RUNNING)s up here? I think we
> need the preemption protection when changing the task state here.
>
Nevermind since we have the preemption protection for the whole
function... but merging two __set_current_state()s into one still looks
good.
Regards,
Boqun
> > + if (waiter->task)
> > + goto out_nolock;
>
[...]
> > +
> > + __set_current_state(TASK_RUNNING);
> > + lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
> > + trace_contention_end(sem, 0);
> > + return sem;
> > +
> > +out_nolock:
> > + rwsem_del_wake_waiter(sem, waiter);
> > + __set_current_state(TASK_RUNNING);
> > + lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail));
> > + trace_contention_end(sem, -EINTR);
> > + return ERR_PTR(-EINTR);
> > +}
> > +
> > /*
> > * Wait for the read lock to be granted
> > */
> > @@ -1071,38 +1109,7 @@ rwsem_down_read_slowpath(struct rw_semap
> > if (!wake_q_empty(&wake_q))
> > wake_up_q(&wake_q);
> >
> > - trace_contention_begin(sem, LCB_F_READ);
> > -
> > - /* wait to be given the lock */
> > - for (;;) {
> > - set_current_state(state);
> > - if (!smp_load_acquire(&waiter.task)) {
> > - /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> > - break;
> > - }
> > - if (signal_pending_state(state, current)) {
> > - raw_spin_lock_irq(&sem->wait_lock);
> > - if (waiter.task)
> > - goto out_nolock;
> > - raw_spin_unlock_irq(&sem->wait_lock);
> > - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> > - break;
> > - }
> > - schedule_preempt_disabled();
> > - lockevent_inc(rwsem_sleep_reader);
> > - }
> > -
> > - __set_current_state(TASK_RUNNING);
> > - lockevent_inc(rwsem_rlock);
> > - trace_contention_end(sem, 0);
> > - return sem;
> > -
> > -out_nolock:
> > - rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> > - __set_current_state(TASK_RUNNING);
> > - lockevent_inc(rwsem_rlock_fail);
> > - trace_contention_end(sem, -EINTR);
> > - return ERR_PTR(-EINTR);
> > + return rwsem_waiter_wait(sem, &waiter, state);
> > }
> >
> > /*
> > @@ -1150,37 +1157,7 @@ rwsem_down_write_slowpath(struct rw_sema
> > }
> > raw_spin_unlock_irq(&sem->wait_lock);
> >
> > - /* wait until we successfully acquire the lock */
> > - trace_contention_begin(sem, LCB_F_WRITE);
> > -
> > - for (;;) {
> > - set_current_state(state);
> > - if (!smp_load_acquire(&waiter.task)) {
> > - /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> > - break;
> > - }
> > - if (signal_pending_state(state, current)) {
> > - raw_spin_lock_irq(&sem->wait_lock);
> > - if (waiter.task)
> > - goto out_nolock;
> > - raw_spin_unlock_irq(&sem->wait_lock);
> > - /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
> > - break;
> > - }
> > - schedule_preempt_disabled();
> > - lockevent_inc(rwsem_sleep_writer);
> > - }
> > - __set_current_state(TASK_RUNNING);
> > - lockevent_inc(rwsem_wlock);
> > - trace_contention_end(sem, 0);
> > - return sem;
> > -
> > -out_nolock:
> > - rwsem_del_wake_waiter(sem, &waiter, &wake_q);
> > - __set_current_state(TASK_RUNNING);
> > - lockevent_inc(rwsem_wlock_fail);
> > - trace_contention_end(sem, -EINTR);
> > - return ERR_PTR(-EINTR);
> > + return rwsem_waiter_wait(sem, &waiter, state);
> > }
> >
> > /*
> >
> >
On Thu, Feb 23, 2023 at 05:33:53PM -0800, Boqun Feng wrote:
> On Thu, Feb 23, 2023 at 11:31:47AM -0800, Boqun Feng wrote:
> [..]
> > > +#define waiter_type(_waiter, _r, _w) \
> > > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
> > > +
> > > +static __always_inline struct rw_semaphore *
> > > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
> > > +{
> > > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
> > > +
> > > + /* wait to be given the lock */
> > > + for (;;) {
> > > + set_current_state(state);
> > > + if (!smp_load_acquire(&waiter->task)) {
> > > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> > > + break;
> > > + }
> > > + if (signal_pending_state(state, current)) {
> > > + raw_spin_lock_irq(&sem->wait_lock);
> >
> > Move the below __set_current_state(TASK_RUNNING)s up here? I think we
> > need the preemption protection when changing the task state here.
> >
>
> Nevermind since we have the preemption protection for the whole
> function... but merging two __set_current_state()s into one still looks
> good.
Even if it were not; I still don't understand the concern. Preemption
ignores task state.
On Thu, Feb 23, 2023 at 05:45:56PM -0500, Waiman Long wrote:
> Unfortunately, lockevent_inc() doesn't work with waiter_type() like that as
> the compilation will fail if CONFIG_LOCK_EVENT_COUNTS is enabled.? Could you
> include the attached patch in your series and make the following changes?
Yeah, robot told me; fixed it like so:
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -995,13 +995,16 @@ static inline void rwsem_cond_wake_waite
rwsem_mark_wake(sem, wake_type, wake_q);
}
-#define waiter_type(_waiter, _r, _w) \
- ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
+#define lockevent_rw_inc(rd, evr, evw) do { \
+ lockevent_cond_inc(evr, (rd)); \
+ lockevent_cond_inc(evw, !(rd)); \
+} while (0)
static __always_inline struct rw_semaphore *
-rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
+rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
+ int state, bool reader)
{
- trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
+ trace_contention_begin(sem, reader ? LCB_F_READ : LCB_F_WRITE);
/* wait to be given the lock */
for (;;) {
@@ -1019,18 +1022,19 @@ rwsem_waiter_wait(struct rw_semaphore *s
break;
}
schedule_preempt_disabled();
- lockevent_inc(waiter_type(waiter, rwsem_sleep_reader, rwsem_sleep_writer));
+ lockevent_rw_inc(reader, rwsem_sleep_reader, rwsem_sleep_writer);
}
__set_current_state(TASK_RUNNING);
- lockevent_inc(waiter_type(waiter, rwsem_rlock, rwsem_wlock));
+
+ lockevent_rw_inc(reader, rwsem_rlock, rwsem_wlock);
trace_contention_end(sem, 0);
return sem;
out_nolock:
rwsem_del_wake_waiter(sem, waiter);
__set_current_state(TASK_RUNNING);
- lockevent_inc(waiter_type(waiter, rwsem_rlock_fail, rwsem_wlock_fail));
+ lockevent_rw_inc(reader, rwem_rlock_fail, rwsem_wlock_fail);
trace_contention_end(sem, -EINTR);
return ERR_PTR(-EINTR);
}
@@ -1112,7 +1116,7 @@ rwsem_down_read_slowpath(struct rw_semap
if (!wake_q_empty(&wake_q))
wake_up_q(&wake_q);
- return rwsem_waiter_wait(sem, &waiter, state);
+ return rwsem_waiter_wait(sem, &waiter, state, true);
}
/*
@@ -1162,7 +1166,7 @@ rwsem_down_write_slowpath(struct rw_sema
}
raw_spin_unlock_irq(&sem->wait_lock);
- return rwsem_waiter_wait(sem, &waiter, state);
+ return rwsem_waiter_wait(sem, &waiter, state, false);
}
/*
On Sun, Feb 26, 2023 at 01:01:10PM +0100, Peter Zijlstra wrote:
> On Thu, Feb 23, 2023 at 05:33:53PM -0800, Boqun Feng wrote:
> > On Thu, Feb 23, 2023 at 11:31:47AM -0800, Boqun Feng wrote:
> > [..]
> > > > +#define waiter_type(_waiter, _r, _w) \
> > > > + ((_waiter)->type == RWSEM_WAITING_FOR_READ ? (_r) : (_w))
> > > > +
> > > > +static __always_inline struct rw_semaphore *
> > > > +rwsem_waiter_wait(struct rw_semaphore *sem, struct rwsem_waiter *waiter, int state)
> > > > +{
> > > > + trace_contention_begin(sem, waiter_type(waiter, LCB_F_READ, LCB_F_WRITE));
> > > > +
> > > > + /* wait to be given the lock */
> > > > + for (;;) {
> > > > + set_current_state(state);
> > > > + if (!smp_load_acquire(&waiter->task)) {
> > > > + /* Matches rwsem_waiter_wake()'s smp_store_release(). */
> > > > + break;
> > > > + }
> > > > + if (signal_pending_state(state, current)) {
> > > > + raw_spin_lock_irq(&sem->wait_lock);
> > >
> > > Move the below __set_current_state(TASK_RUNNING)s up here? I think we
> > > need the preemption protection when changing the task state here.
> > >
> >
> > Nevermind since we have the preemption protection for the whole
> > function... but merging two __set_current_state()s into one still looks
> > good.
>
> Even if it were not; I still don't understand the concern. Preemption
> ignores task state.
Because I missed the exact thing you just mentioned... ;-)
I was worried about the following case:
ttwu();
set_current_state(TASK_UNINTERRUPTIBLE);
....
<preemption enable>
<preempted>
preempt_schedule_irq():
__schedule(...):
deactivate_task(); // Wakeup missed.
However this is not true, since __schedule() in preempt_schedule_irq()
is a SM_PREEMPT one.
Sorry for the noise then. But good for me to revisit these stuffs ;-)
Regards,
Boqun