2024-04-11 19:24:58

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH v2 1/2] locking/pvqspinlock: Use try_cmpxchg_acquire() in trylock_clear_pending()

Use try_cmpxchg_acquire(*ptr, &old, new) instead of
cmpxchg_acquire(*ptr, old, new) == old in trylock_clear_pending().
x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after cmpxchg.

Also change the return type of the function to bool.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Boqun Feng <[email protected]>
Cc: Linus Torvalds <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
---
v2: Correct cut-n-pasto in the ChangeLog.
---
kernel/locking/qspinlock_paravirt.h | 31 ++++++++++++-----------------
1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 169950fe1aad..77ba80bd95f9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -116,11 +116,12 @@ static __always_inline void set_pending(struct qspinlock *lock)
* barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
* lock just to be sure that it will get it.
*/
-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
+ u16 old = _Q_PENDING_VAL;
+
return !READ_ONCE(lock->locked) &&
- (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
- _Q_LOCKED_VAL) == _Q_PENDING_VAL);
+ try_cmpxchg_acquire(&lock->locked_pending, &old, _Q_LOCKED_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
static __always_inline void set_pending(struct qspinlock *lock)
@@ -128,27 +129,21 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}

-static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+static __always_inline bool trylock_clear_pending(struct qspinlock *lock)
{
- int val = atomic_read(&lock->val);
-
- for (;;) {
- int old, new;
-
- if (val & _Q_LOCKED_MASK)
- break;
+ int old, new;

+ old = atomic_read(&lock->val);
+ do {
+ if (old & _Q_LOCKED_MASK)
+ return false;
/*
* Try to clear pending bit & set locked bit
*/
- old = val;
- new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
- val = atomic_cmpxchg_acquire(&lock->val, old, new);
+ new = (old & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ } while (!atomic_try_cmpxchg_acquire (&lock->val, &old, new));

- if (val == old)
- return 1;
- }
- return 0;
+ return true;
}
#endif /* _Q_PENDING_BITS == 8 */

--
2.42.0



2024-04-11 19:25:14

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH v2 2/2] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

Use try_cmpxchg(*ptr, &old, new) instead of
cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Boqun Feng <[email protected]>
Cc: Linus Torvalds <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
---
v2: Correct a build error in __pv_queued_spin_unlock().
---
kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 77ba80bd95f9..f5a36e67b593 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
*/
for (;;) {
int val = atomic_read(&lock->val);
+ u8 old = 0;

if (!(val & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
lockevent_inc(pv_lock_stealing);
return true;
}
@@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
int hopcnt = 0;

for_each_hash_entry(he, offset, hash) {
+ struct qspinlock *old = NULL;
hopcnt++;
- if (!cmpxchg(&he->lock, NULL, lock)) {
+ if (try_cmpxchg(&he->lock, &old, lock)) {
WRITE_ONCE(he->node, node);
lockevent_pv_hop(hopcnt);
return &he->lock;
@@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
-
+ enum vcpu_state old = vcpu_halted;
/*
* If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* subsequent writes.
*/
smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
- != vcpu_halted)
+ if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
return;

/*
@@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
{
- u8 locked;
+ u8 locked = _Q_LOCKED_VAL;

/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
+ if (try_cmpxchg_release(&lock->locked, &locked, 0))
return;

__pv_queued_spin_unlock_slowpath(lock, locked);
--
2.42.0


Subject: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

The following commit has been merged into the locking/core branch of tip:

Commit-ID: e361be1ff2621493f5c638719d706e9789f7ab4c
Gitweb: https://git.kernel.org/tip/e361be1ff2621493f5c638719d706e9789f7ab4c
Author: Uros Bizjak <[email protected]>
AuthorDate: Thu, 11 Apr 2024 21:22:55 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 12 Apr 2024 10:56:45 +02:00

locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

Use try_cmpxchg(*ptr, &old, new) instead of
cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 77ba80b..f5a36e6 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
*/
for (;;) {
int val = atomic_read(&lock->val);
+ u8 old = 0;

if (!(val & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
lockevent_inc(pv_lock_stealing);
return true;
}
@@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
int hopcnt = 0;

for_each_hash_entry(he, offset, hash) {
+ struct qspinlock *old = NULL;
hopcnt++;
- if (!cmpxchg(&he->lock, NULL, lock)) {
+ if (try_cmpxchg(&he->lock, &old, lock)) {
WRITE_ONCE(he->node, node);
lockevent_pv_hop(hopcnt);
return &he->lock;
@@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
-
+ enum vcpu_state old = vcpu_halted;
/*
* If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* subsequent writes.
*/
smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
- != vcpu_halted)
+ if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
return;

/*
@@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
{
- u8 locked;
+ u8 locked = _Q_LOCKED_VAL;

/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
+ if (try_cmpxchg_release(&lock->locked, &locked, 0))
return;

__pv_queued_spin_unlock_slowpath(lock, locked);

Subject: [tip: locking/core] locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

The following commit has been merged into the locking/core branch of tip:

Commit-ID: fea0e1820b51fff95c85518eb9cf3386f367908d
Gitweb: https://git.kernel.org/tip/fea0e1820b51fff95c85518eb9cf3386f367908d
Author: Uros Bizjak <[email protected]>
AuthorDate: Thu, 11 Apr 2024 21:22:55 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 12 Apr 2024 11:42:39 +02:00

locking/pvqspinlock: Use try_cmpxchg() in qspinlock_paravirt.h

Use try_cmpxchg(*ptr, &old, new) instead of
cmpxchg(*ptr, old, new) == old in qspinlock_paravirt.h
x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg.

No functional change intended.

Signed-off-by: Uros Bizjak <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Waiman Long <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/locking/qspinlock_paravirt.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 77ba80b..f5a36e6 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -86,9 +86,10 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
*/
for (;;) {
int val = atomic_read(&lock->val);
+ u8 old = 0;

if (!(val & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ try_cmpxchg_acquire(&lock->locked, &old, _Q_LOCKED_VAL)) {
lockevent_inc(pv_lock_stealing);
return true;
}
@@ -211,8 +212,9 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
int hopcnt = 0;

for_each_hash_entry(he, offset, hash) {
+ struct qspinlock *old = NULL;
hopcnt++;
- if (!cmpxchg(&he->lock, NULL, lock)) {
+ if (try_cmpxchg(&he->lock, &old, lock)) {
WRITE_ONCE(he->node, node);
lockevent_pv_hop(hopcnt);
return &he->lock;
@@ -355,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
-
+ enum vcpu_state old = vcpu_halted;
/*
* If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -372,8 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* subsequent writes.
*/
smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
- != vcpu_halted)
+ if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed))
return;

/*
@@ -541,15 +542,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
{
- u8 locked;
+ u8 locked = _Q_LOCKED_VAL;

/*
* We must not unlock if SLOW, because in that case we must first
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
+ if (try_cmpxchg_release(&lock->locked, &locked, 0))
return;

__pv_queued_spin_unlock_slowpath(lock, locked);