2013-06-21 23:51:34

by Tim Chen

[permalink] [raw]
Subject: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

Doing cmpxchg will cause cache bouncing when checking
sem->count. This could cause scalability issue
in a large machine (e.g. a 80 cores box).

A pre-read of sem->count can mitigate this.

Signed-off-by: Alex Shi <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
---
include/asm-generic/rwsem.h | 8 ++++----
lib/rwsem.c | 21 +++++++++++++--------
2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index bb1e2cd..052d973 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -70,11 +70,11 @@ static inline void __down_write(struct rw_semaphore *sem)

static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- long tmp;
+ if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
+ return 0;

- tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- return tmp == RWSEM_UNLOCKED_VALUE;
+ return cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_UNLOCKED_VALUE;
}

/*
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 19c5fa9..2072af5 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -75,7 +75,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
* will block as they will notice the queued writer.
*/
wake_up_process(waiter->task);
- goto out;
+ return sem;
}

/* Writers might steal the lock before we grant it to the next reader.
@@ -85,15 +85,21 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
adjustment = 0;
if (wake_type != RWSEM_WAKE_READ_OWNED) {
adjustment = RWSEM_ACTIVE_READ_BIAS;
- try_reader_grant:
- oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
- if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
- /* A writer stole the lock. Undo our reader grant. */
+ while (1) {
+ /* A writer stole the lock. */
+ if (sem->count < RWSEM_WAITING_BIAS)
+ return sem;
+
+ oldcount = rwsem_atomic_update(adjustment, sem)
+ - adjustment;
+ if (likely(oldcount >= RWSEM_WAITING_BIAS))
+ break;
+
+ /* A writer stole the lock. Undo our reader grant. */
if (rwsem_atomic_update(-adjustment, sem) &
RWSEM_ACTIVE_MASK)
- goto out;
+ return sem;
/* Last active locker left. Retry waking readers. */
- goto try_reader_grant;
}
}

@@ -136,7 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
sem->wait_list.next = next;
next->prev = &sem->wait_list;

- out:
return sem;
}

--
1.7.4.4



2013-06-22 00:10:34

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

On 06/22/2013 07:51 AM, Tim Chen wrote:
> Doing cmpxchg will cause cache bouncing when checking
> sem->count. This could cause scalability issue
> in a large machine (e.g. a 80 cores box).
>
> A pre-read of sem->count can mitigate this.
>
> Signed-off-by: Alex Shi <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>

Hi Tim,
there is a technical error in this patch.
the "From: " line should be 'Alex Shi', since he made the most input of
this patch.

And I still think split this patch to 4 smaller will make it more simple
to review, that I had sent you and Davidlohr.

could you like to re-send with my 4 patch version? :)

> ---
> include/asm-generic/rwsem.h | 8 ++++----
> lib/rwsem.c | 21 +++++++++++++--------
> 2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
> index bb1e2cd..052d973 100644
> --- a/include/asm-generic/rwsem.h
> +++ b/include/asm-generic/rwsem.h
> @@ -70,11 +70,11 @@ static inline void __down_write(struct rw_semaphore *sem)
>
> static inline int __down_write_trylock(struct rw_semaphore *sem)
> {
> - long tmp;
> + if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
> + return 0;
>
> - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
> - RWSEM_ACTIVE_WRITE_BIAS);
> - return tmp == RWSEM_UNLOCKED_VALUE;
> + return cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
> + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_UNLOCKED_VALUE;
> }
>
> /*
> diff --git a/lib/rwsem.c b/lib/rwsem.c
> index 19c5fa9..2072af5 100644
> --- a/lib/rwsem.c
> +++ b/lib/rwsem.c
> @@ -75,7 +75,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> * will block as they will notice the queued writer.
> */
> wake_up_process(waiter->task);
> - goto out;
> + return sem;
> }
>
> /* Writers might steal the lock before we grant it to the next reader.
> @@ -85,15 +85,21 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> adjustment = 0;
> if (wake_type != RWSEM_WAKE_READ_OWNED) {
> adjustment = RWSEM_ACTIVE_READ_BIAS;
> - try_reader_grant:
> - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
> - if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
> - /* A writer stole the lock. Undo our reader grant. */
> + while (1) {
> + /* A writer stole the lock. */
> + if (sem->count < RWSEM_WAITING_BIAS)
> + return sem;
> +
> + oldcount = rwsem_atomic_update(adjustment, sem)
> + - adjustment;
> + if (likely(oldcount >= RWSEM_WAITING_BIAS))
> + break;
> +
> + /* A writer stole the lock. Undo our reader grant. */
> if (rwsem_atomic_update(-adjustment, sem) &
> RWSEM_ACTIVE_MASK)
> - goto out;
> + return sem;
> /* Last active locker left. Retry waking readers. */
> - goto try_reader_grant;
> }
> }
>
> @@ -136,7 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> sem->wait_list.next = next;
> next->prev = &sem->wait_list;
>
> - out:
> return sem;
> }
>
>


--
Thanks
Alex

2013-06-22 00:15:38

by Davidlohr Bueso

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

On Sat, 2013-06-22 at 08:10 +0800, Alex Shi wrote:
> On 06/22/2013 07:51 AM, Tim Chen wrote:
> > Doing cmpxchg will cause cache bouncing when checking
> > sem->count. This could cause scalability issue
> > in a large machine (e.g. a 80 cores box).
> >
> > A pre-read of sem->count can mitigate this.
> >
> > Signed-off-by: Alex Shi <[email protected]>
> > Signed-off-by: Tim Chen <[email protected]>
>
> Hi Tim,
> there is a technical error in this patch.
> the "From: " line should be 'Alex Shi', since he made the most input of
> this patch.
>
> And I still think split this patch to 4 smaller will make it more simple
> to review, that I had sent you and Davidlohr.

Yep, and you had updated the changelog for 1/4: rwsem: check the lock
before cpmxchg in down_write_trylock to:

"cmpxchg will cause cache bouncing when do the value checking, that
cause scalability issue in a large machine (like a 80 cores box).

A lock status pre-read can relief this."

>
> could you like to re-send with my 4 patch version? :)

For those 4 patches:
Acked-by: Davidlohr Bueso <[email protected]>

2013-06-22 07:21:46

by Peter Hurley

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

On 06/21/2013 07:51 PM, Tim Chen wrote:
> Doing cmpxchg will cause cache bouncing when checking
> sem->count. This could cause scalability issue
> in a large machine (e.g. a 80 cores box).
>
> A pre-read of sem->count can mitigate this.
>
> Signed-off-by: Alex Shi <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>
> ---
> include/asm-generic/rwsem.h | 8 ++++----
> lib/rwsem.c | 21 +++++++++++++--------
> 2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
> index bb1e2cd..052d973 100644
> --- a/include/asm-generic/rwsem.h
> +++ b/include/asm-generic/rwsem.h
> @@ -70,11 +70,11 @@ static inline void __down_write(struct rw_semaphore *sem)
>
> static inline int __down_write_trylock(struct rw_semaphore *sem)
> {
> - long tmp;
> + if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
^^^^^^^^^^^

This is probably not what you want.


> + return 0;
>
> - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
> - RWSEM_ACTIVE_WRITE_BIAS);
> - return tmp == RWSEM_UNLOCKED_VALUE;
> + return cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
> + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_UNLOCKED_VALUE;
> }
>
> /*
> diff --git a/lib/rwsem.c b/lib/rwsem.c
> index 19c5fa9..2072af5 100644
> --- a/lib/rwsem.c
> +++ b/lib/rwsem.c
> @@ -75,7 +75,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> * will block as they will notice the queued writer.
> */
> wake_up_process(waiter->task);
> - goto out;
> + return sem;

Please put these flow control changes in a separate patch.


> }
>
> /* Writers might steal the lock before we grant it to the next reader.
> @@ -85,15 +85,21 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> adjustment = 0;
> if (wake_type != RWSEM_WAKE_READ_OWNED) {
> adjustment = RWSEM_ACTIVE_READ_BIAS;
> - try_reader_grant:
> - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
> - if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
> - /* A writer stole the lock. Undo our reader grant. */
> + while (1) {
> + /* A writer stole the lock. */
> + if (sem->count < RWSEM_WAITING_BIAS)
> + return sem;

I'm all for structured looping instead of goto labels but this optimization
is only useful on the 1st iteration. IOW, on the second iteration you already
know that you need to try for reclaiming the lock.


> +
> + oldcount = rwsem_atomic_update(adjustment, sem)
> + - adjustment;
> + if (likely(oldcount >= RWSEM_WAITING_BIAS))
> + break;
> +
> + /* A writer stole the lock. Undo our reader grant. */
> if (rwsem_atomic_update(-adjustment, sem) &
> RWSEM_ACTIVE_MASK)
> - goto out;
> + return sem;
> /* Last active locker left. Retry waking readers. */
> - goto try_reader_grant;
> }
> }
>
> @@ -136,7 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> sem->wait_list.next = next;
> next->prev = &sem->wait_list;
>
> - out:
> return sem;
> }


Alex and Tim,

Was there a v1 of this series; ie., is this v2 (or higher)?

How are you validating lock correctness/behavior with this series?

Regards,
Peter Hurley

2013-06-23 01:16:25

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

On 06/22/2013 03:21 PM, Peter Hurley wrote:
> On 06/21/2013 07:51 PM, Tim Chen wrote:
>> Doing cmpxchg will cause cache bouncing when checking
>> sem->count. This could cause scalability issue
>> in a large machine (e.g. a 80 cores box).
>>
>> A pre-read of sem->count can mitigate this.
>>
>> Signed-off-by: Alex Shi <[email protected]>
>> Signed-off-by: Tim Chen <[email protected]>
>> ---
>> include/asm-generic/rwsem.h | 8 ++++----
>> lib/rwsem.c | 21 +++++++++++++--------
>> 2 files changed, 17 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
>> index bb1e2cd..052d973 100644
>> --- a/include/asm-generic/rwsem.h
>> +++ b/include/asm-generic/rwsem.h
>> @@ -70,11 +70,11 @@ static inline void __down_write(struct
>> rw_semaphore *sem)
>>
>> static inline int __down_write_trylock(struct rw_semaphore *sem)
>> {
>> - long tmp;
>> + if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
> ^^^^^^^^^^^
>
> This is probably not what you want.
>

this function logical is quite simple. check the sem->count before
cmpxchg is no harm this logical.

So could you like to tell us what should we want?

>
>> + return 0;
>>
>> - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
>> - RWSEM_ACTIVE_WRITE_BIAS);
>> - return tmp == RWSEM_UNLOCKED_VALUE;
>> + return cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
>> + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_UNLOCKED_VALUE;
>> }
>>
>> /*
>> diff --git a/lib/rwsem.c b/lib/rwsem.c
>> index 19c5fa9..2072af5 100644
>> --- a/lib/rwsem.c
>> +++ b/lib/rwsem.c
>> @@ -75,7 +75,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum
>> rwsem_wake_type wake_type)
>> * will block as they will notice the queued writer.
>> */
>> wake_up_process(waiter->task);
>> - goto out;
>> + return sem;
>
> Please put these flow control changes in a separate patch.

I had sent the split patches to Tim&Davidlohr. They will send them out
as a single patchset.
>
>
>> }
>>
>> /* Writers might steal the lock before we grant it to the next
>> reader.
>> @@ -85,15 +85,21 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum
>> rwsem_wake_type wake_type)
>> adjustment = 0;
>> if (wake_type != RWSEM_WAKE_READ_OWNED) {
>> adjustment = RWSEM_ACTIVE_READ_BIAS;
>> - try_reader_grant:
>> - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
>> - if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
>> - /* A writer stole the lock. Undo our reader grant. */
>> + while (1) {
>> + /* A writer stole the lock. */
>> + if (sem->count < RWSEM_WAITING_BIAS)
>> + return sem;
>
> I'm all for structured looping instead of goto labels but this optimization
> is only useful on the 1st iteration. IOW, on the second iteration you
> already
> know that you need to try for reclaiming the lock.
>

sorry. could you like to say more clear, what's the 1st or 2nd iteration
or others?
>
>> +
>> + oldcount = rwsem_atomic_update(adjustment, sem)
>> + - adjustment;
>> + if (likely(oldcount >= RWSEM_WAITING_BIAS))
>> + break;
>> +
>> + /* A writer stole the lock. Undo our reader grant. */
>> if (rwsem_atomic_update(-adjustment, sem) &
>> RWSEM_ACTIVE_MASK)
>> - goto out;
>> + return sem;
>> /* Last active locker left. Retry waking readers. */
>> - goto try_reader_grant;
>> }
>> }
>>
>> @@ -136,7 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum
>> rwsem_wake_type wake_type)
>> sem->wait_list.next = next;
>> next->prev = &sem->wait_list;
>>
>> - out:
>> return sem;
>> }
>
>
> Alex and Tim,
>
> Was there a v1 of this series; ie., is this v2 (or higher)?
>
> How are you validating lock correctness/behavior with this series?

some benchmark tested against this patch, mainly aim7. plus by eyes, we
didn't change the logical except check the lock value before do locking
>
> Regards,
> Peter Hurley
>


--
Thanks
Alex

2013-06-23 05:10:23

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

> >> diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
> >> index bb1e2cd..052d973 100644
> >> --- a/include/asm-generic/rwsem.h
> >> +++ b/include/asm-generic/rwsem.h
> >> @@ -70,11 +70,11 @@ static inline void __down_write(struct
> >> rw_semaphore *sem)
> >>
> >> static inline int __down_write_trylock(struct rw_semaphore *sem)
> >> {
> >> - long tmp;
> >> + if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
> > ^^^^^^^^^^^
> >
> > This is probably not what you want.
> >
>
> this function logical is quite simple. check the sem->count before
> cmpxchg is no harm this logical.
>
> So could you like to tell us what should we want?

You are comparing the address, not the value. Remove the &
This was a nop too.

-Andi
--
[email protected] -- Speaking for myself only.

2013-06-23 11:52:34

by Alex Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

On 06/23/2013 01:10 PM, Andi Kleen wrote:
>>>> diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
>>>> > >> index bb1e2cd..052d973 100644
>>>> > >> --- a/include/asm-generic/rwsem.h
>>>> > >> +++ b/include/asm-generic/rwsem.h
>>>> > >> @@ -70,11 +70,11 @@ static inline void __down_write(struct
>>>> > >> rw_semaphore *sem)
>>>> > >>
>>>> > >> static inline int __down_write_trylock(struct rw_semaphore *sem)
>>>> > >> {
>>>> > >> - long tmp;
>>>> > >> + if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
>>> > > ^^^^^^^^^^^
>>> > >
>>> > > This is probably not what you want.
>>> > >
>> >
>> > this function logical is quite simple. check the sem->count before
>> > cmpxchg is no harm this logical.
>> >
>> > So could you like to tell us what should we want?
> You are comparing the address, not the value. Remove the &
> This was a nop too.

ops, So stupid I am! :(

--
Thanks
Alex

2013-06-24 16:34:22

by Tim Chen

[permalink] [raw]
Subject: Re: [PATCH 1/2] rwsem: check the lock before cpmxchg in down_write_trylock and rwsem_do_wake

On Sat, 2013-06-22 at 08:10 +0800, Alex Shi wrote:
> On 06/22/2013 07:51 AM, Tim Chen wrote:
> > Doing cmpxchg will cause cache bouncing when checking
> > sem->count. This could cause scalability issue
> > in a large machine (e.g. a 80 cores box).
> >
> > A pre-read of sem->count can mitigate this.
> >
> > Signed-off-by: Alex Shi <[email protected]>
> > Signed-off-by: Tim Chen <[email protected]>
>
> Hi Tim,
> there is a technical error in this patch.
> the "From: " line should be 'Alex Shi', since he made the most input of
> this patch.
>

Actually in my local git repository, I have the From line as from you
that's reflected in the change log of patch[0/2].
I'll put this as your signed-off only if you preferred for v2. The
"From" field was you in my git but changes when I send them out.


> And I still think split this patch to 4 smaller will make it more simple
> to review, that I had sent you and Davidlohr.
>
> could you like to re-send with my 4 patch version? :)
>
> > ---
> > include/asm-generic/rwsem.h | 8 ++++----
> > lib/rwsem.c | 21 +++++++++++++--------
> > 2 files changed, 17 insertions(+), 12 deletions(-)
> >
> > diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
> > index bb1e2cd..052d973 100644
> > --- a/include/asm-generic/rwsem.h
> > +++ b/include/asm-generic/rwsem.h
> > @@ -70,11 +70,11 @@ static inline void __down_write(struct rw_semaphore *sem)
> >
> > static inline int __down_write_trylock(struct rw_semaphore *sem)
> > {
> > - long tmp;
> > + if (unlikely(&sem->count != RWSEM_UNLOCKED_VALUE))
> > + return 0;
> >
> > - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
> > - RWSEM_ACTIVE_WRITE_BIAS);
> > - return tmp == RWSEM_UNLOCKED_VALUE;
> > + return cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
> > + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_UNLOCKED_VALUE;
> > }
> >
> > /*
> > diff --git a/lib/rwsem.c b/lib/rwsem.c
> > index 19c5fa9..2072af5 100644
> > --- a/lib/rwsem.c
> > +++ b/lib/rwsem.c
> > @@ -75,7 +75,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> > * will block as they will notice the queued writer.
> > */
> > wake_up_process(waiter->task);
> > - goto out;
> > + return sem;
> > }
> >
> > /* Writers might steal the lock before we grant it to the next reader.
> > @@ -85,15 +85,21 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> > adjustment = 0;
> > if (wake_type != RWSEM_WAKE_READ_OWNED) {
> > adjustment = RWSEM_ACTIVE_READ_BIAS;
> > - try_reader_grant:
> > - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
> > - if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
> > - /* A writer stole the lock. Undo our reader grant. */
> > + while (1) {
> > + /* A writer stole the lock. */
> > + if (sem->count < RWSEM_WAITING_BIAS)
> > + return sem;
> > +
> > + oldcount = rwsem_atomic_update(adjustment, sem)
> > + - adjustment;
> > + if (likely(oldcount >= RWSEM_WAITING_BIAS))
> > + break;
> > +
> > + /* A writer stole the lock. Undo our reader grant. */
> > if (rwsem_atomic_update(-adjustment, sem) &
> > RWSEM_ACTIVE_MASK)
> > - goto out;
> > + return sem;
> > /* Last active locker left. Retry waking readers. */
> > - goto try_reader_grant;
> > }
> > }
> >
> > @@ -136,7 +142,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
> > sem->wait_list.next = next;
> > next->prev = &sem->wait_list;
> >
> > - out:
> > return sem;
> > }
> >
> >
>
>