2022-07-04 14:51:25

by Nicholas Piggin

[permalink] [raw]
Subject: [PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock

qspinlock uses mcs_spinlock for the struct type (.next, .locked, and the
misplaced .count), and arch_mcs_spin_{un}lock_contended(). These can be
trivially inlined into qspinlock, and the unused mcs_spinlock code
removed.

Signed-off-by: Nicholas Piggin <[email protected]>
---
arch/alpha/include/asm/Kbuild | 1 -
arch/arc/include/asm/Kbuild | 1 -
arch/arm/include/asm/mcs_spinlock.h | 24 ------
arch/arm64/include/asm/Kbuild | 1 -
arch/hexagon/include/asm/Kbuild | 1 -
arch/ia64/include/asm/Kbuild | 1 -
arch/m68k/include/asm/Kbuild | 1 -
arch/microblaze/include/asm/Kbuild | 1 -
arch/mips/include/asm/Kbuild | 1 -
arch/nios2/include/asm/Kbuild | 1 -
arch/parisc/include/asm/Kbuild | 1 -
arch/powerpc/include/asm/Kbuild | 1 -
arch/s390/include/asm/Kbuild | 1 -
arch/sh/include/asm/Kbuild | 1 -
arch/sparc/include/asm/Kbuild | 1 -
arch/um/include/asm/Kbuild | 1 -
arch/x86/include/asm/Kbuild | 1 -
arch/xtensa/include/asm/Kbuild | 1 -
include/asm-generic/mcs_spinlock.h | 13 ---
kernel/locking/mcs_spinlock.h | 121 ----------------------------
kernel/locking/qspinlock.c | 38 ++++-----
kernel/locking/qspinlock_paravirt.h | 55 ++++++-------
22 files changed, 43 insertions(+), 225 deletions(-)
delete mode 100644 arch/arm/include/asm/mcs_spinlock.h
delete mode 100644 include/asm-generic/mcs_spinlock.h
delete mode 100644 kernel/locking/mcs_spinlock.h

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 42911c8340c7..d21cf7b3173a 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,4 +3,3 @@
generated-y += syscall_table.h
generic-y += export.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 3c1afa524b9c..5ae4337a9301 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += extable.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += user.h
diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs_spinlock.h
deleted file mode 100644
index 529d2cf4d06f..000000000000
--- a/arch/arm/include/asm/mcs_spinlock.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MCS_LOCK_H
-#define __ASM_MCS_LOCK_H
-
-#ifdef CONFIG_SMP
-#include <asm/spinlock.h>
-
-/* MCS spin-locking. */
-#define arch_mcs_spin_lock_contended(lock) \
-do { \
- /* Ensure prior stores are observed before we enter wfe. */ \
- smp_mb(); \
- while (!(smp_load_acquire(lock))) \
- wfe(); \
-} while (0) \
-
-#define arch_mcs_spin_unlock_contended(lock) \
-do { \
- smp_store_release(lock, 1); \
- dsb_sev(); \
-} while (0)
-
-#endif /* CONFIG_SMP */
-#endif /* __ASM_MCS_LOCK_H */
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 5c8ee5a541d2..57e9ad366d25 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += early_ioremap.h
-generic-y += mcs_spinlock.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += parport.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 3ece3c93fe08..37bbf99f66d4 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -2,4 +2,3 @@
generic-y += extable.h
generic-y += iomap.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index f994c1daf9d4..a0198c12e339 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
generated-y += syscall_table.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += vtime.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 1b720299deb1..8dbef73ce01d 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -3,5 +3,4 @@ generated-y += syscall_table.h
generic-y += export.h
generic-y += extable.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += spinlock.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index a055f5dbe00a..7615a27e0851 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -3,7 +3,6 @@ generated-y += syscall_table.h
generic-y += cmpxchg.h
generic-y += extable.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += syscalls.h
generic-y += tlb.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index dee172716581..65cedca08771 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -9,7 +9,6 @@ generated-y += unistd_nr_o32.h

generic-y += export.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += qrwlock.h
generic-y += qspinlock.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 7fe7437555fb..5718eee9665c 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -2,6 +2,5 @@
generic-y += cmpxchg.h
generic-y += extable.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += spinlock.h
generic-y += user.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index e6e7f74c8ac9..1f0c28d74c88 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -2,5 +2,4 @@
generated-y += syscall_table_32.h
generated-y += syscall_table_64.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += user.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index bcf95ce0964f..813a8c3405ad 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,7 +4,6 @@ generated-y += syscall_table_64.h
generated-y += syscall_table_spu.h
generic-y += export.h
generic-y += kvm_types.h
-generic-y += mcs_spinlock.h
generic-y += qrwlock.h
generic-y += vtime.h
generic-y += early_ioremap.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 1a18d7b82f86..8b036a4ee2ca 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,4 +7,3 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += export.h
generic-y += kvm_types.h
-generic-y += mcs_spinlock.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index fc44d9c88b41..3192f19bcf85 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
generated-y += syscall_table.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += parport.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 0b9d98ced34a..f0b913f7ba05 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -3,4 +3,3 @@ generated-y += syscall_table_32.h
generated-y += syscall_table_64.h
generic-y += export.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b2d834a29f3a..04080c0c1aec 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += hw_irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
-generic-y += mcs_spinlock.h
generic-y += mmiowb.h
generic-y += module.lds.h
generic-y += param.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 1e51650b79d7..beb7683f7b8f 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,4 +10,3 @@ generated-y += xen-hypercalls.h

generic-y += early_ioremap.h
generic-y += export.h
-generic-y += mcs_spinlock.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index fa07c686cbcc..29ae65cb68c2 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -2,7 +2,6 @@
generated-y += syscall_table.h
generic-y += extable.h
generic-y += kvm_para.h
-generic-y += mcs_spinlock.h
generic-y += param.h
generic-y += parport.h
generic-y += qrwlock.h
diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h
deleted file mode 100644
index 10cd4ffc6ba2..000000000000
--- a/include/asm-generic/mcs_spinlock.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __ASM_MCS_SPINLOCK_H
-#define __ASM_MCS_SPINLOCK_H
-
-/*
- * Architectures can define their own:
- *
- * arch_mcs_spin_lock_contended(l)
- * arch_mcs_spin_unlock_contended(l)
- *
- * See kernel/locking/mcs_spinlock.c.
- */
-
-#endif /* __ASM_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
deleted file mode 100644
index 85251d8771d9..000000000000
--- a/kernel/locking/mcs_spinlock.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
- *
- * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
- * with the desirable properties of being fair, and with each cpu trying
- * to acquire the lock spinning on a local variable.
- * It avoids expensive cache bounces that common test-and-set spin-lock
- * implementations incur.
- */
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H
-
-#include <asm/mcs_spinlock.h>
-
-struct mcs_spinlock {
- struct mcs_spinlock *next;
- int locked; /* 1 if lock acquired */
- int count; /* nesting count, see qspinlock.c */
-};
-
-#ifndef arch_mcs_spin_lock_contended
-/*
- * Using smp_cond_load_acquire() provides the acquire semantics
- * required so that subsequent operations happen after the
- * lock is acquired. Additionally, some architectures such as
- * ARM64 would like to do spin-waiting instead of purely
- * spinning, and smp_cond_load_acquire() provides that behavior.
- */
-#define arch_mcs_spin_lock_contended(l) \
-do { \
- smp_cond_load_acquire(l, VAL); \
-} while (0)
-#endif
-
-#ifndef arch_mcs_spin_unlock_contended
-/*
- * smp_store_release() provides a memory barrier to ensure all
- * operations in the critical section has been completed before
- * unlocking.
- */
-#define arch_mcs_spin_unlock_contended(l) \
- smp_store_release((l), 1)
-#endif
-
-/*
- * Note: the smp_load_acquire/smp_store_release pair is not
- * sufficient to form a full memory barrier across
- * cpus for many architectures (except x86) for mcs_unlock and mcs_lock.
- * For applications that need a full barrier across multiple cpus
- * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be
- * used after mcs_lock.
- */
-
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- */
-static inline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
- struct mcs_spinlock *prev;
-
- /* Init node */
- node->locked = 0;
- node->next = NULL;
-
- /*
- * We rely on the full barrier with global transitivity implied by the
- * below xchg() to order the initialization stores above against any
- * observation of @node. And to provide the ACQUIRE ordering associated
- * with a LOCK primitive.
- */
- prev = xchg(lock, node);
- if (likely(prev == NULL)) {
- /*
- * Lock acquired, don't need to set node->locked to 1. Threads
- * only spin on its own node->locked value for lock acquisition.
- * However, since this thread can immediately acquire the lock
- * and does not proceed to spin on its own node->locked, this
- * value won't be used. If a debug mode is needed to
- * audit lock status, then set node->locked value here.
- */
- return;
- }
- WRITE_ONCE(prev->next, node);
-
- /* Wait until the lock holder passes the lock down. */
- arch_mcs_spin_lock_contended(&node->locked);
-}
-
-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static inline
-void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
- struct mcs_spinlock *next = READ_ONCE(node->next);
-
- if (likely(!next)) {
- /*
- * Release the lock by setting it to NULL
- */
- if (likely(cmpxchg_release(lock, node, NULL) == node))
- return;
- /* Wait until the next pointer is set */
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
- }
-
- /* Pass lock to next waiter. */
- arch_mcs_spin_unlock_contended(&next->locked);
-}
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index a0fc21d99199..32f401e966ab 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -66,11 +66,10 @@
*
*/

-#include "mcs_spinlock.h"
#define MAX_NODES 4

/*
- * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
+ * On 64-bit architectures, the qnode structure will be 16 bytes in
* size and four of them will fit nicely in one 64-byte cacheline. For
* pvqspinlock, however, we need more space for extra data. To accommodate
* that, we insert two more long words to pad it up to 32 bytes. IOW, only
@@ -80,7 +79,9 @@
* qspinlocks.
*/
struct qnode {
- struct mcs_spinlock mcs;
+ struct qnode *next;
+ int locked; /* 1 if lock acquired */
+ int count; /* nesting count */
#ifdef CONFIG_PARAVIRT_SPINLOCKS
int cpu;
u8 state;
@@ -124,18 +125,18 @@ static inline __pure u32 encode_tail(int cpu, int idx)
return tail;
}

-static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
+static inline __pure struct qnode *decode_tail(u32 tail)
{
int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;

- return per_cpu_ptr(&qnodes[idx].mcs, cpu);
+ return per_cpu_ptr(&qnodes[idx], cpu);
}

static inline __pure
-struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
+struct qnode *grab_qnode(struct qnode *base, int idx)
{
- return &((struct qnode *)base + idx)->mcs;
+ return &base[idx];
}

#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
@@ -271,13 +272,13 @@ static __always_inline void set_locked(struct qspinlock *lock)
* all the PV callbacks.
*/

-static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
- struct mcs_spinlock *prev) { }
+static __always_inline void __pv_init_node(struct qnode *node) { }
+static __always_inline void __pv_wait_node(struct qnode *node,
+ struct qnode *prev) { }
static __always_inline void __pv_kick_node(struct qspinlock *lock,
- struct mcs_spinlock *node) { }
+ struct qnode *node) { }
static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
- struct mcs_spinlock *node)
+ struct qnode *node)
{ return 0; }

#define pv_enabled() false
@@ -316,7 +317,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
*/
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
- struct mcs_spinlock *prev, *next, *node;
+ struct qnode *prev, *next, *node;
u32 old, tail;
int idx;

@@ -399,7 +400,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
queue:
lockevent_inc(lock_slowpath);
pv_queue:
- node = this_cpu_ptr(&qnodes[0].mcs);
+ node = this_cpu_ptr(&qnodes[0]);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);

@@ -421,7 +422,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
goto release;
}

- node = grab_mcs_node(node, idx);
+ node = grab_qnode(node, idx);

/*
* Keep counts of non-zero index values:
@@ -475,7 +476,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
WRITE_ONCE(prev->next, node);

pv_wait_node(node, prev);
- arch_mcs_spin_lock_contended(&node->locked);
+ /* Wait for mcs node lock to be released */
+ smp_cond_load_acquire(&node->locked, VAL);

/*
* While waiting for the MCS lock, the next pointer may have
@@ -554,7 +556,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
if (!next)
next = smp_cond_load_relaxed(&node->next, (VAL));

- arch_mcs_spin_unlock_contended(&next->locked);
+ smp_store_release(&next->locked, 1); /* unlock the mcs node lock */
pv_kick_node(lock, next);

release:
@@ -563,7 +565,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
/*
* release the node
*/
- __this_cpu_dec(qnodes[0].mcs.count);
+ __this_cpu_dec(qnodes[0].count);
}
EXPORT_SYMBOL(queued_spin_lock_slowpath);

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index b6a175155f36..cce3d3dde216 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -267,16 +267,12 @@ pv_wait_early(struct qnode *prev, int loop)
}

/*
- * Initialize the PV part of the mcs_spinlock node.
+ * Initialize the PV part of the qnode.
*/
-static void pv_init_node(struct mcs_spinlock *node)
+static void pv_init_node(struct qnode *node)
{
- struct qnode *pn = (struct qnode *)node;
-
- BUILD_BUG_ON(sizeof(struct qnode) > sizeof(struct qnode));
-
- pn->cpu = smp_processor_id();
- pn->state = vcpu_running;
+ node->cpu = smp_processor_id();
+ node->state = vcpu_running;
}

/*
@@ -284,10 +280,8 @@ static void pv_init_node(struct mcs_spinlock *node)
* pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
* behalf.
*/
-static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
+static void pv_wait_node(struct qnode *node, struct qnode *prev)
{
- struct qnode *pn = (struct qnode *)node;
- struct qnode *pp = (struct qnode *)prev;
int loop;
bool wait_early;

@@ -295,7 +289,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
- if (pv_wait_early(pp, loop)) {
+ if (pv_wait_early(prev, loop)) {
wait_early = true;
break;
}
@@ -303,20 +297,20 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
}

/*
- * Order pn->state vs pn->locked thusly:
+ * Order node->state vs node->locked thusly:
*
- * [S] pn->state = vcpu_halted [S] next->locked = 1
- * MB MB
- * [L] pn->locked [RmW] pn->state = vcpu_hashed
+ * [S] node->state = vcpu_halted [S] next->locked = 1
+ * MB MB
+ * [L] node->locked [RmW] node->state = vcpu_hashed
*
* Matches the cmpxchg() from pv_kick_node().
*/
- smp_store_mb(pn->state, vcpu_halted);
+ smp_store_mb(node->state, vcpu_halted);

if (!READ_ONCE(node->locked)) {
lockevent_inc(pv_wait_node);
lockevent_cond_inc(pv_wait_early, wait_early);
- pv_wait(&pn->state, vcpu_halted);
+ pv_wait(&node->state, vcpu_halted);
}

/*
@@ -324,7 +318,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
* value so that pv_wait_head_or_lock() knows to not also try
* to hash this lock.
*/
- cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+ cmpxchg(&node->state, vcpu_halted, vcpu_running);

/*
* If the locked flag is still not set after wakeup, it is a
@@ -351,10 +345,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
* such that they're waiting in pv_wait_head_or_lock(), this avoids a
* wake/sleep cycle.
*/
-static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct qnode *node)
{
- struct qnode *pn = (struct qnode *)node;
-
/*
* If the vCPU is indeed halted, advance its state to match that of
* pv_wait_node(). If OTOH this fails, the vCPU was running and will
@@ -363,15 +355,15 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
*
* The write to next->locked in arch_mcs_spin_unlock_contended()
- * must be ordered before the read of pn->state in the cmpxchg()
+ * must be ordered before the read of node->state in the cmpxchg()
* below for the code to work correctly. To guarantee full ordering
* irrespective of the success or failure of the cmpxchg(),
* a relaxed version with explicit barrier is used. The control
- * dependency will order the reading of pn->state before any
+ * dependency will order the reading of node->state before any
* subsequent writes.
*/
smp_mb__before_atomic();
- if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
+ if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed)
!= vcpu_halted)
return;

@@ -383,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* needed.
*/
WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
- (void)pv_hash(lock, pn);
+ (void)pv_hash(lock, node);
}

/*
@@ -394,9 +386,8 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* The current value of the lock will be returned for additional processing.
*/
static u32
-pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
+pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node)
{
- struct qnode *pn = (struct qnode *)node;
struct qspinlock **lp = NULL;
int waitcnt = 0;
int loop;
@@ -405,7 +396,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
* If pv_kick_node() already advanced our state, we don't need to
* insert ourselves into the hash table anymore.
*/
- if (READ_ONCE(pn->state) == vcpu_hashed)
+ if (READ_ONCE(node->state) == vcpu_hashed)
lp = (struct qspinlock **)1;

/*
@@ -418,7 +409,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
* Set correct vCPU state to be used by queue node wait-early
* mechanism.
*/
- WRITE_ONCE(pn->state, vcpu_running);
+ WRITE_ONCE(node->state, vcpu_running);

/*
* Set the pending bit in the active lock spinning loop to
@@ -434,7 +425,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)


if (!lp) { /* ONCE */
- lp = pv_hash(lock, pn);
+ lp = pv_hash(lock, node);

/*
* We must hash before setting _Q_SLOW_VAL, such that
@@ -458,7 +449,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
goto gotlock;
}
}
- WRITE_ONCE(pn->state, vcpu_hashed);
+ WRITE_ONCE(node->state, vcpu_hashed);
lockevent_inc(pv_wait_head);
lockevent_cond_inc(pv_wait_again, waitcnt);
pv_wait(&lock->locked, _Q_SLOW_VAL);
--
2.35.1


2022-07-05 17:29:19

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock

On Tue, Jul 05, 2022 at 12:38:09AM +1000, Nicholas Piggin wrote:
> qspinlock uses mcs_spinlock for the struct type (.next, .locked, and the
> misplaced .count), and arch_mcs_spin_{un}lock_contended(). These can be
> trivially inlined into qspinlock, and the unused mcs_spinlock code
> removed.
>
> Signed-off-by: Nicholas Piggin <[email protected]>
> ---

> arch/arm/include/asm/mcs_spinlock.h | 24 ------

> --- a/arch/arm/include/asm/mcs_spinlock.h
> +++ /dev/null
> @@ -1,24 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef __ASM_MCS_LOCK_H
> -#define __ASM_MCS_LOCK_H
> -
> -#ifdef CONFIG_SMP
> -#include <asm/spinlock.h>
> -
> -/* MCS spin-locking. */
> -#define arch_mcs_spin_lock_contended(lock) \
> -do { \
> - /* Ensure prior stores are observed before we enter wfe. */ \
> - smp_mb(); \
> - while (!(smp_load_acquire(lock))) \
> - wfe(); \
> -} while (0) \
> -
> -#define arch_mcs_spin_unlock_contended(lock) \
> -do { \
> - smp_store_release(lock, 1); \
> - dsb_sev(); \
> -} while (0)
> -
> -#endif /* CONFIG_SMP */
> -#endif /* __ASM_MCS_LOCK_H */

> @@ -475,7 +476,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
> WRITE_ONCE(prev->next, node);
>
> pv_wait_node(node, prev);
> - arch_mcs_spin_lock_contended(&node->locked);
> + /* Wait for mcs node lock to be released */
> + smp_cond_load_acquire(&node->locked, VAL);
>
> /*
> * While waiting for the MCS lock, the next pointer may have
> @@ -554,7 +556,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
> if (!next)
> next = smp_cond_load_relaxed(&node->next, (VAL));
>
> - arch_mcs_spin_unlock_contended(&next->locked);
> + smp_store_release(&next->locked, 1); /* unlock the mcs node lock */

These are not equivalent. Now it so happens that ARM doesn't use
qspinlock and the other mcs lock users are gone by now, but something
like this should at least have been called out in the Changelog I think.

2022-07-12 00:09:22

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock

Excerpts from Peter Zijlstra's message of July 6, 2022 2:57 am:
> On Tue, Jul 05, 2022 at 12:38:09AM +1000, Nicholas Piggin wrote:
>> qspinlock uses mcs_spinlock for the struct type (.next, .locked, and the
>> misplaced .count), and arch_mcs_spin_{un}lock_contended(). These can be
>> trivially inlined into qspinlock, and the unused mcs_spinlock code
>> removed.
>>
>> Signed-off-by: Nicholas Piggin <[email protected]>
>> ---
>
>> arch/arm/include/asm/mcs_spinlock.h | 24 ------
>
>> --- a/arch/arm/include/asm/mcs_spinlock.h
>> +++ /dev/null
>> @@ -1,24 +0,0 @@
>> -/* SPDX-License-Identifier: GPL-2.0 */
>> -#ifndef __ASM_MCS_LOCK_H
>> -#define __ASM_MCS_LOCK_H
>> -
>> -#ifdef CONFIG_SMP
>> -#include <asm/spinlock.h>
>> -
>> -/* MCS spin-locking. */
>> -#define arch_mcs_spin_lock_contended(lock) \
>> -do { \
>> - /* Ensure prior stores are observed before we enter wfe. */ \
>> - smp_mb(); \
>> - while (!(smp_load_acquire(lock))) \
>> - wfe(); \
>> -} while (0) \
>> -
>> -#define arch_mcs_spin_unlock_contended(lock) \
>> -do { \
>> - smp_store_release(lock, 1); \
>> - dsb_sev(); \
>> -} while (0)
>> -
>> -#endif /* CONFIG_SMP */
>> -#endif /* __ASM_MCS_LOCK_H */
>
>> @@ -475,7 +476,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>> WRITE_ONCE(prev->next, node);
>>
>> pv_wait_node(node, prev);
>> - arch_mcs_spin_lock_contended(&node->locked);
>> + /* Wait for mcs node lock to be released */
>> + smp_cond_load_acquire(&node->locked, VAL);
>>
>> /*
>> * While waiting for the MCS lock, the next pointer may have
>> @@ -554,7 +556,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>> if (!next)
>> next = smp_cond_load_relaxed(&node->next, (VAL));
>>
>> - arch_mcs_spin_unlock_contended(&next->locked);
>> + smp_store_release(&next->locked, 1); /* unlock the mcs node lock */
>
> These are not equivalent. Now it so happens that ARM doesn't use
> qspinlock and the other mcs lock users are gone by now, but something
> like this should at least have been called out in the Changelog I think.
>

Yeah I could mention it in the changelog, I just didn't because it is
practically equivalent.

Thanks,
Nick