2024-03-25 16:21:21

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after cmpxchg.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Waiman Long <[email protected]>
Cc: Boqun Feng <[email protected]>
---
kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it.
*/
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
+ u16 old = _Q_PENDING_VAL;
+
return !READ_ONCE(lock->locked) &&
- (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
- _Q_LOCKED_VAL) == _Q_PENDING_VAL);
+ try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}

-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
- int val = atomic_read(&lock->val);
-
- for (;;) {
- int old, new;
-
- if (val & _Q_LOCKED_MASK)
- break;
+ int old, new;

+ old = atomic_read(&lock->val);
+ do {
+ if (old & _Q_LOCKED_MASK)
+ return false;
/*
* Try to clear pending bit & set locked bit
*/
- old = val;
- new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
- val = atomic_cmpxchg_acquire(&lock->val, old, new);
+ new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));

- if (val == old)
- return 1;
- }
- return 0;
+ return true;
}
#endif /* _Q_PENDING_BITS == 8 */

--
2.44.0



2024-03-25 16:28:59

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

Use try_cmpxchg(*ptr, &old, new) instead of
cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Waiman Long <[email protected]>
Cc: Boqun Feng <[email protected]>
---
kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 77ba80bd95f9..3db5f811260f 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
*/
for (;;) {
int val = atomic_read(&lock->val);
+ u8 old = 0;

if (!(val & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
lockevent_inc(pv_lock_stealing);
return true;
}
@@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
int hopcnt = 0;

for_each_hash_entry(he, offset, hash) {
+ struct qspinlock *old = NULL;
hopcnt++;
- if (!cmpxchg(&he->lock, NULL, lock)) {
+ if (try_cmpxchg(&he->lock, &old, lock)) {
WRITE_ONCE(he->node, node);
lockevent_pv_hop(hopcnt);
return &he->lock;
@@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
-
+ enum vcpu_state old = vcpu_halted;
/*
* If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* subsequent writes.
*/
smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
- != vcpu_halted)
+ if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
return;

/*
@@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
{
- u8 locked;
+ u8 locked = _Q_LOCKED_VAL;

/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
+ if (try_cmpxchg_release(&lock->locked, &locked, 0);
return;

__pv_queued_spin_unlock_slowpath(lock, locked);
--
2.44.0


2024-03-28 01:53:14

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

On 3/25/24 10:09, Uros Bizjak wrote:
> Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
> x86 CMPXCHG instruction returns success in ZF flag, so this change
> saves a compare after cmpxchg.
>
> Also change the return type of the function to bool.
>
> No functional change intended.
>
> Signed-off-by: Uros Bizjak <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Waiman Long <[email protected]>
> Cc: Boqun Feng <[email protected]>
> ---
> kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
> 1 file changed, 13 insertions(+), 18 deletions(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 169950fe1aad..77ba80bd95f9 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
> * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
> * lock just to be sure that it will get it.
> */
> -static __always_inline int trylock_clear_pending(struct qspinlock *lock)
> +static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
> {
> + u16 old = _Q_PENDING_VAL;
> +
> return !READ_ONCE(lock->locked) &&
> - (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
> - _Q_LOCKED_VAL) == _Q_PENDING_VAL);
> + try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
> }
> #else /* _Q_PENDING_BITS == 8 */
> static __always_inline void set_pending(struct qspinlock *lock)
> @@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
> atomic_or(_Q_PENDING_VAL, &lock->val);
> }
>
> -static __always_inline int trylock_clear_pending(struct qspinlock *lock)
> +static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
> {
> - int val = atomic_read(&lock->val);
> -
> - for (;;) {
> - int old, new;
> -
> - if (val & _Q_LOCKED_MASK)
> - break;
> + int old, new;
>
> + old = atomic_read(&lock->val);
> + do {
> + if (old & _Q_LOCKED_MASK)
> + return false;
> /*
> * Try to clear pending bit & set locked bit
> */
> - old = val;
> - new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> - val = atomic_cmpxchg_acquire(&lock->val, old, new);
> + new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> + } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));
>
> - if (val == old)
> - return 1;
> - }
> - return 0;
> + return true;
> }
> #endif /* _Q_PENDING_BITS == 8 */
>
Reviewed-by: Waiman Long <[email protected]>


2024-03-28 02:23:00

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h


On 3/25/24 10:09, Uros Bizjak wrote:
> Use try_cmpxchg(*ptr, &old, new) instead of
> cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
> x86 CMPXCHG instruction returns success in ZF flag, so
> this change saves a compare after cmpxchg.
>
> No functional change intended.
>
> Signed-off-by: Uros Bizjak <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Waiman Long <[email protected]>
> Cc: Boqun Feng <[email protected]>
> ---
> kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
> 1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 77ba80bd95f9..3db5f811260f 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
> */
> for (;;) {
> int val = atomic_read(&lock->val);
> + u8 old = 0;
>
> if (!(val & _Q_LOCKED_PENDING_MASK) &&
> - (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
> + try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
> lockevent_inc(pv_lock_stealing);
> return true;
> }
> @@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
> int hopcnt = 0;
>
> for_each_hash_entry(he, offset, hash) {
> + struct qspinlock *old = NULL;
> hopcnt++;
> - if (!cmpxchg(&he->lock, NULL, lock)) {
> + if (try_cmpxchg(&he->lock, &old, lock)) {
> WRITE_ONCE(he->node, node);
> lockevent_pv_hop(hopcnt);
> return &he->lock;
> @@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
> static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
> {
> struct pv_node *pn = (struct pv_node *)node;
> -
> + enum vcpu_state old = vcpu_halted;
> /*
> * If the vCPU is indeed halted, advance its state to match that of
> * pv_wait_node(). If OTOH this fails, the vCPU was running and will
> @@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
> * subsequent writes.
> */
> smp_mb__before_atomic();
> - if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
> - != vcpu_halted)
> + if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
> return;
>
> /*
> @@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
> #ifndef __pv_queued_spin_unlock
> __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
> {
> - u8 locked;
> + u8 locked = _Q_LOCKED_VAL;
>
> /*
> * We must not unlock if SLOW, because in that case we must first
> * unhash. Otherwise it would be possible to have multiple @lock
> * entries, which would be BAD.
> */
> - locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> - if (likely(locked == _Q_LOCKED_VAL))
> + if (try_cmpxchg_release(&lock->locked, &locked, 0);
> return;
>
> __pv_queued_spin_unlock_slowpath(lock, locked);
Reviewed-by: Waiman Long <[email protected]>


2024-04-11 13:24:40

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h


* Uros Bizjak <[email protected]> wrote:

> - locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> - if (likely(locked == _Q_LOCKED_VAL))
> + if (try_cmpxchg_release(&lock->locked, &locked, 0);
> return; ^------------ ???

This doesn't appear to be a particularly well-tested patch. ;-)

Thanks,

Ingo

Subject: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

The following commit has been merged into the locking/core branch of tip:

Commit-ID: 7316aec453e4f5cb7bd0d7b856c01ed71d64eafc
Gitweb: https://git.kernel.org/tip/7316aec453e4f5cb7bd0d7b856c01ed71d64eafc
Author: Uros Bizjak <[email protected]>
AuthorDate: Mon, 25 Mar 2024 15:09:32 +01:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Thu, 11 Apr 2024 15:19:08 +02:00

locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().

x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after CMPXCHG.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/qspinlock_paravirt.h | 31 +++++++++++-----------------
1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950f..77ba80b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it.
*/
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
+ u16 old = _Q_PENDING_VAL;
+
return !READ_ONCE(lock->locked) &&
- (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
- _Q_LOCKED_VAL) == _Q_PENDING_VAL);
+ try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}

-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
- int val = atomic_read(&lock->val);
-
- for (;;) {
- int old, new;
-
- if (val & _Q_LOCKED_MASK)
- break;
+ int old, new;

+ old = atomic_read(&lock->val);
+ do {
+ if (old & _Q_LOCKED_MASK)
+ return false;
/*
* Try to clear pending bit & set locked bit
*/
- old = val;
- new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
- val = atomic_cmpxchg_acquire(&lock->val, old, new);
+ new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));

- if (val == old)
- return 1;
- }
- return 0;
+ return true;
}
#endif /* _Q_PENDING_BITS == 8 */


2024-04-11 13:36:17

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

On Thu, Apr 11, 2024 at 3:24 PM Ingo Molnar <[email protected]> wrote:
>
>
> * Uros Bizjak <[email protected]> wrote:
>
> > - locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> > - if (likely(locked == _Q_LOCKED_VAL))
> > + if (try_cmpxchg_release(&lock->locked, &locked, 0);
> > return; ^------------ ???
>
> This doesn't appear to be a particularly well-tested patch. ;-)

Ouch, embarrassing... oh it is a generic function, conditionally compiled with

#ifndef __pv_queued_spin_lock
#endif

and x86 defines its own function ...

I concentrated on different settings of _Q_PENDING_BITS and the above
slipped through.

Thanks,
Uros.

2024-04-11 17:54:53

by Linus Torvalds

[permalink] [raw]
Subject: Re: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

On Thu, 11 Apr 2024 at 06:33, tip-bot2 for Uros Bizjak
<[email protected]> wrote:
>
> Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().

The above commit message is horribly confusing and wrong.

I was going "that's not right", because it says "use acquire instead
of relaxed" memory ordering, and then goes on to say "No functional
change intended".

But it turns out the *code* was always acquire, and it's only the
commit message that is wrong, presumably due to a bit too much
cut-and-paste.

But please fix the commit message, and use the right memory ordering
in the explanations too.

Linus

2024-04-11 19:08:44

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

On Thu, Apr 11, 2024 at 3:35 PM Uros Bizjak <[email protected]> wrote:
>
> On Thu, Apr 11, 2024 at 3:24 PM Ingo Molnar <[email protected]> wrote:
> >
> >
> > * Uros Bizjak <[email protected]> wrote:
> >
> > > - locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
> > > - if (likely(locked == _Q_LOCKED_VAL))
> > > + if (try_cmpxchg_release(&lock->locked, &locked, 0);
> > > return; ^------------ ???
> >
> > This doesn't appear to be a particularly well-tested patch. ;-)
>
> Ouch, embarrassing... oh it is a generic function, conditionally compiled with
>
> #ifndef __pv_queued_spin_lock
> #endif
>
> and x86 defines its own function ...

Looking at the assembly of the fixed function, it looks that improved
generic function is better than x86_64 special asm:

This is the new generic function:

0000000000000750 <__pv_queued_spin_unlock>:
750: f3 0f 1e fa endbr64
754: b8 01 00 00 00 mov $0x1,%eax
759: 31 d2 xor %edx,%edx
75b: f0 0f b0 17 lock cmpxchg %dl,(%rdi)
75f: 75 05 jne 766 <__pv_queued_spin_unlock+0x16>
761: e9 00 00 00 00 jmp 766 <__pv_queued_spin_unlock+0x16>
762: R_X86_64_PLT32 __x86_return_thunk-0x4
766: 0f b6 f0 movzbl %al,%esi
769: e9 02 ff ff ff jmp 670 <__pv_queued_spin_unlock_slowpath>

and the x86_64 asm version:

0000000000000050 <__raw_callee_save___pv_queued_spin_unlock>:
50: f3 0f 1e fa endbr64
54: 52 push %rdx
55: b8 01 00 00 00 mov $0x1,%eax
5a: 31 d2 xor %edx,%edx
5c: f0 0f b0 17 lock cmpxchg %dl,(%rdi)
60: 3c 01 cmp $0x1,%al
62: 75 06 jne 6a <.slowpath>
64: 5a pop %rdx
65: e9 00 00 00 00 jmp 6a <.slowpath>

I didn't investigate slowpath, but the generic fast-path part is
certainly better than x86_64 special asm.

Uros.

Subject: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

The following commit has been merged into the locking/core branch of tip:

Commit-ID: 6a97734f2222e0352f1900e3eb3167e9069b91bb
Gitweb: https://git.kernel.org/tip/6a97734f2222e0352f1900e3eb3167e9069b91bb
Author: Uros Bizjak <[email protected]>
AuthorDate: Mon, 25 Mar 2024 15:09:32 +01:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 12 Apr 2024 11:40:51 +02:00

locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Replace this pattern in trylock_clear_pending():

cmpxchg_acquire(*ptr, old, new) == old

.. with the simpler and faster:

try_cmpxchg_acquire(*ptr, &old, new)

The x86 CMPXCHG instruction returns success in the ZF flag, so this change
saves a compare after the CMPXCHG.

Also change the return type of the function to bool and streamline
the control flow in the _Q_PENDING_BITS == 8 variant a bit.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
Reviewed-by: Linus Torvalds <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/qspinlock_paravirt.h | 31 +++++++++++-----------------
1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950f..77ba80b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it.
*/
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
+ u16 old = _Q_PENDING_VAL;
+
return !READ_ONCE(lock->locked) &&
- (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
- _Q_LOCKED_VAL) == _Q_PENDING_VAL);
+ try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}

-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
- int val = atomic_read(&lock->val);
-
- for (;;) {
- int old, new;
-
- if (val & _Q_LOCKED_MASK)
- break;
+ int old, new;

+ old = atomic_read(&lock->val);
+ do {
+ if (old & _Q_LOCKED_MASK)
+ return false;
/*
* Try to clear pending bit & set locked bit
*/
- old = val;
- new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
- val = atomic_cmpxchg_acquire(&lock->val, old, new);
+ new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));

- if (val == old)
- return 1;
- }
- return 0;
+ return true;
}
#endif /* _Q_PENDING_BITS == 8 */


2024-04-12 10:00:45

by Ingo Molnar

[permalink] [raw]
Subject: [PATCH v2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()


* Linus Torvalds <[email protected]> wrote:

> On Thu, 11 Apr 2024 at 06:33, tip-bot2 for Uros Bizjak
> <[email protected]> wrote:
> >
> > Use try_cmpxchg_acquire(*ptr, &old, new) instead of
> > cmpxchg_relaxed(*ptr, old, new) == old in trylock_clear_pending().
>
> The above commit message is horribly confusing and wrong.
>
> I was going "that's not right", because it says "use acquire instead
> of relaxed" memory ordering, and then goes on to say "No functional
> change intended".
>
> But it turns out the *code* was always acquire, and it's only the
> commit message that is wrong, presumably due to a bit too much
> cut-and-paste.

Yeah, the replacement is cmpxchg_acquire() => try_cmpxchg_acquire(), with
no change to memory ordering.

> But please fix the commit message, and use the right memory ordering
> in the explanations too.

Done, find below the new patch, which a hopefully better commit message.

I also added your Reviewed-by tag optimistically :)

Thanks,

Ingo

===========================>
From: Uros Bizjak <[email protected]>
Date: Mon, 25 Mar 2024 15:09:32 +0100
Subject: [PATCH] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Replace this pattern in trylock_clear_pending():

cmpxchg_acquire(*ptr, old, new) == old

.. with the simpler and faster:

try_cmpxchg_acquire(*ptr, &old, new)

The x86 CMPXCHG instruction returns success in the ZF flag, so this change
saves a compare after the CMPXCHG.

Also change the return type of the function to bool and streamline
the control flow in the _Q_PENDING_BITS == 8 variant a bit.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
Reviewed-by: Linus Torvalds <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/qspinlock_paravirt.h | 31 +++++++++++++------------------
1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it.
*/
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
+ u16 old = _Q_PENDING_VAL;
+
return !READ_ONCE(lock->locked) &&
- (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
- _Q_LOCKED_VAL) == _Q_PENDING_VAL);
+ try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}

-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
- int val = atomic_read(&lock->val);
-
- for (;;) {
- int old, new;
-
- if (val & _Q_LOCKED_MASK)
- break;
+ int old, new;

+ old = atomic_read(&lock->val);
+ do {
+ if (old & _Q_LOCKED_MASK)
+ return false;
/*
* Try to clear pending bit & set locked bit
*/
- old = val;
- new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
- val = atomic_cmpxchg_acquire(&lock->val, old, new);
+ new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));

- if (val == old)
- return 1;
- }
- return 0;
+ return true;
}
#endif /* _Q_PENDING_BITS == 8 */