2024-05-28 15:12:05

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 0/7] Zacas/Zabha support and qspinlocks

This implements [cmp]xchgXX() macros using Zacas and Zabha extensions
and finally uses those newly introduced macros to add support for
qspinlocks: note that this implementation of qspinlocks satisfies the
forward progress guarantee.

Thanks to Guo and Leonardo for their work!

Alexandre Ghiti (5):
riscv: Implement cmpxchg32/64() using Zacas
riscv: Implement cmpxchg8/16() using Zabha
riscv: Implement arch_cmpxchg128() using Zacas
riscv: Implement xchg8/16() using Zabha
riscv: Add qspinlock support based on Zabha extension

Guo Ren (2):
asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock
asm-generic: ticket-lock: Add separate ticket-lock.h

.../locking/queued-spinlocks/arch-support.txt | 2 +-
arch/riscv/Kconfig | 35 ++++++
arch/riscv/Makefile | 21 ++++
arch/riscv/include/asm/Kbuild | 4 +-
arch/riscv/include/asm/cmpxchg.h | 114 ++++++++++++++++--
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/include/asm/spinlock.h | 39 ++++++
arch/riscv/kernel/cpufeature.c | 1 +
arch/riscv/kernel/setup.c | 18 +++
include/asm-generic/qspinlock.h | 2 +
include/asm-generic/spinlock.h | 87 +------------
include/asm-generic/spinlock_types.h | 12 +-
include/asm-generic/ticket_spinlock.h | 105 ++++++++++++++++
13 files changed, 336 insertions(+), 105 deletions(-)
create mode 100644 arch/riscv/include/asm/spinlock.h
create mode 100644 include/asm-generic/ticket_spinlock.h

--
2.39.2



2024-05-28 15:13:16

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 1/7] riscv: Implement cmpxchg32/64() using Zacas

This adds runtime support for Zacas in cmpxchg operations.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/Kconfig | 17 +++++++++++++++++
arch/riscv/Makefile | 11 +++++++++++
arch/riscv/include/asm/cmpxchg.h | 23 ++++++++++++++++++++---
3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8a0f403432e8..b443def70139 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -579,6 +579,23 @@ config RISCV_ISA_V_PREEMPTIVE
preemption. Enabling this config will result in higher memory
consumption due to the allocation of per-task's kernel Vector context.

+config TOOLCHAIN_HAS_ZACAS
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZACAS
+ bool "Zacas extension support for atomic CAS"
+ depends on TOOLCHAIN_HAS_ZACAS
+ default y
+ help
+ Adds support to use atomic CAS instead of LR/SC to implement kernel
+ atomic cmpxchg operation.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 5b3115a19852..d5b60b87998c 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -78,6 +78,17 @@ endif
# Check if the toolchain supports Zihintpause extension
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause

+# Check if the toolchain supports Zacas
+ifdef CONFIG_AS_IS_LLVM
+# Support for experimental Zacas was merged in LLVM 17, but the removal of
+# the "experimental" was merged in LLVM 19.
+KBUILD_CFLAGS += -menable-experimental-extensions
+KBUILD_AFLAGS += -menable-experimental-extensions
+riscv-march-y := $(riscv-march-y)_zacas1p0
+else
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+endif
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 4d23f0c35b94..1c50b4821ac8 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -9,6 +9,7 @@
#include <linux/bug.h>

#include <asm/fence.h>
+#include <asm/alternative.h>

#define __arch_xchg_masked(prepend, append, r, p, n) \
({ \
@@ -132,21 +133,37 @@
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
})

-#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
({ \
+ __label__ zacas, end; \
register unsigned int __rc; \
\
+ asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \
+ RISCV_ISA_EXT_ZACAS, 1) \
+ : : : : zacas); \
+ \
__asm__ __volatile__ ( \
prepend \
"0: lr" lr_sfx " %0, %2\n" \
" bne %0, %z3, 1f\n" \
- " sc" sc_sfx " %1, %z4, %2\n" \
+ " sc" sc_cas_sfx " %1, %z4, %2\n" \
" bnez %1, 0b\n" \
append \
"1:\n" \
: "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
: "rJ" (co o), "rJ" (n) \
: "memory"); \
+ goto end; \
+ \
+zacas: \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" sc_cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+end: \
})

#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
@@ -154,7 +171,7 @@
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
__typeof__(*(__ptr)) __new = (new); \
- __typeof__(*(__ptr)) __ret; \
+ __typeof__(*(__ptr)) __ret = (old); \
\
switch (sizeof(*__ptr)) { \
case 1: \
--
2.39.2


2024-05-28 15:14:49

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

This adds runtime support for Zabha in cmpxchg8/16 operations.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/Kconfig | 16 ++++++++++++++++
arch/riscv/Makefile | 10 ++++++++++
arch/riscv/include/asm/cmpxchg.h | 26 ++++++++++++++++++++++++--
3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b443def70139..05597719bb1c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -579,6 +579,22 @@ config RISCV_ISA_V_PREEMPTIVE
preemption. Enabling this config will result in higher memory
consumption due to the allocation of per-task's kernel Vector context.

+config TOOLCHAIN_HAS_ZABHA
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZABHA
+ bool "Zabha extension support for atomic byte/half-word operations"
+ depends on TOOLCHAIN_HAS_ZABHA
+ default y
+ help
+ Adds support to use atomic byte/half-word operations in the kernel.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZACAS
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index d5b60b87998c..f58ac921dece 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -89,6 +89,16 @@ else
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
endif

+# Check if the toolchain supports Zabha
+ifdef CONFIG_AS_IS_LLVM
+# Support for experimental Zabha was merged in LLVM 19.
+KBUILD_CFLAGS += -menable-experimental-extensions
+KBUILD_AFLAGS += -menable-experimental-extensions
+riscv-march-y := $(riscv-march-y)_zabha1p0
+else
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
+endif
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 1c50b4821ac8..65de9771078e 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -103,8 +103,14 @@
* indicated by comparing RETURN with OLD.
*/

-#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
({ \
+ __label__ zabha, end; \
+ \
+ asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
+ RISCV_ISA_EXT_ZABHA, 1) \
+ : : : : zabha); \
+ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
@@ -131,6 +137,17 @@
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ goto end; \
+ \
+zabha: \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+end: \
})

#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
@@ -175,8 +192,13 @@ end: \
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_cmpxchg_masked(sc_sfx, ".b" sc_sfx, \
+ prepend, append, \
+ __ret, __ptr, __old, __new); \
+ break; \
case 2: \
- __arch_cmpxchg_masked(sc_sfx, prepend, append, \
+ __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \
+ prepend, append, \
__ret, __ptr, __old, __new); \
break; \
case 4: \
--
2.39.2


2024-05-28 15:16:03

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 3/7] riscv: Implement arch_cmpxchg128() using Zacas

Now that Zacas is supported in the kernel, let's use the double word
atomic version of amocas to improve the SLUB allocator.

Note that we have to select fixed registers, otherwise gcc fails to pick
even registers and then produces a reserved encoding which fails to
assemble.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/cmpxchg.h | 41 ++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 05597719bb1c..184a9edb04e0 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -101,6 +101,7 @@ config RISCV
select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HARDIRQS_SW_RESEND
select HAS_IOPORT if MMU
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 65de9771078e..0789fbe38b23 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -242,4 +242,45 @@ end: \
arch_cmpxchg_relaxed((ptr), (o), (n)); \
})

+#ifdef CONFIG_RISCV_ISA_ZACAS
+
+#define system_has_cmpxchg128() \
+ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
+
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(p, o, n, prepend, append) \
+({ \
+ __typeof__(*(p)) __o = (o); \
+ union __u128_halves new = { .full = (n) }; \
+ union __u128_halves old = { .full = (__o) }; \
+ register unsigned long x6 asm ("x6") = new.low; \
+ register unsigned long x7 asm ("x7") = new.high; \
+ register unsigned long x28 asm ("x28") = old.low; \
+ register unsigned long x29 asm ("x29") = old.high; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas.q %0, %z2, %1\n" \
+ append \
+ : "+&r" (x28), "+A" (*(p)) \
+ : "rJ" (x6) \
+ : "memory"); \
+ \
+ __o; \
+})
+
+#define arch_cmpxchg128(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), "", " fence rw, rw\n")
+
+#define arch_cmpxchg128_local(ptr, o, n) \
+ __arch_cmpxchg128((ptr), (o), (n), "", "")
+
+#endif /* CONFIG_RISCV_ISA_ZACAS */
+
#endif /* _ASM_RISCV_CMPXCHG_H */
--
2.39.2


2024-05-28 15:16:36

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 4/7] riscv: Implement xchg8/16() using Zabha

This adds runtime support for Zabha in xchg8/16() operations.

Signed-off-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/include/asm/cmpxchg.h | 24 ++++++++++++++++++++++--
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 0789fbe38b23..43696d9e13aa 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -11,8 +11,14 @@
#include <asm/fence.h>
#include <asm/alternative.h>

-#define __arch_xchg_masked(prepend, append, r, p, n) \
+#define __arch_xchg_masked(swap_sfx, prepend, append, r, p, n) \
({ \
+ __label__ zabha, end; \
+ \
+ asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
+ RISCV_ISA_EXT_ZABHA, 1) \
+ : : : : zabha); \
+ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
@@ -34,6 +40,17 @@
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ goto end; \
+ \
+zabha: \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amoswap" swap_sfx " %0, %z2, %1\n" \
+ append \
+ : "=&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+end: \
})

#define __arch_xchg(sfx, prepend, append, r, p, n) \
@@ -55,8 +72,11 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_xchg_masked(".b" sfx, prepend, append, \
+ __ret, __ptr, __new); \
+ break; \
case 2: \
- __arch_xchg_masked(prepend, append, \
+ __arch_xchg_masked(".h" sfx, prepend, append, \
__ret, __ptr, __new); \
break; \
case 4: \
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e17d0078a651..f71ddd2ca163 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,7 @@
#define RISCV_ISA_EXT_ZTSO 72
#define RISCV_ISA_EXT_ZACAS 73
#define RISCV_ISA_EXT_XANDESPMU 74
+#define RISCV_ISA_EXT_ZABHA 75

#define RISCV_ISA_EXT_XLINUXENVCFG 127

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 3ed2359eae35..8d0f56dd2f53 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -257,6 +257,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
__RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
+ __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
__RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
__RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
__RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),
--
2.39.2


2024-05-28 15:17:27

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 5/7] asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock

From: Guo Ren <[email protected]>

The arch_spinlock_t of qspinlock has contained the atomic_t val, which
satisfies the ticket-lock requirement. Thus, unify the arch_spinlock_t
into qspinlock_types.h. This is the preparation for the next combo
spinlock.

Reviewed-by: Leonardo Bras <[email protected]>
Suggested-by: Arnd Bergmann <[email protected]>
Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/
Signed-off-by: Guo Ren <[email protected]>
Signed-off-by: Guo Ren <[email protected]>
---
include/asm-generic/spinlock.h | 14 +++++++-------
include/asm-generic/spinlock_types.h | 12 ++----------
2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 90803a826ba0..4773334ee638 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -32,7 +32,7 @@

static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
- u32 val = atomic_fetch_add(1<<16, lock);
+ u32 val = atomic_fetch_add(1<<16, &lock->val);
u16 ticket = val >> 16;

if (ticket == (u16)val)
@@ -46,31 +46,31 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
* have no outstanding writes due to the atomic_fetch_add() the extra
* orderings are free.
*/
- atomic_cond_read_acquire(lock, ticket == (u16)VAL);
+ atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
smp_mb();
}

static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
{
- u32 old = atomic_read(lock);
+ u32 old = atomic_read(&lock->val);

if ((old >> 16) != (old & 0xffff))
return false;

- return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
+ return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
}

static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
- u32 val = atomic_read(lock);
+ u32 val = atomic_read(&lock->val);

smp_store_release(ptr, (u16)val + 1);
}

static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
- u32 val = lock.counter;
+ u32 val = lock.val.counter;

return ((val >> 16) == (val & 0xffff));
}
@@ -84,7 +84,7 @@ static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)

static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- u32 val = atomic_read(lock);
+ u32 val = atomic_read(&lock->val);

return (s16)((val >> 16) - (val & 0xffff)) > 1;
}
diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spinlock_types.h
index 8962bb730945..f534aa5de394 100644
--- a/include/asm-generic/spinlock_types.h
+++ b/include/asm-generic/spinlock_types.h
@@ -3,15 +3,7 @@
#ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
#define __ASM_GENERIC_SPINLOCK_TYPES_H

-#include <linux/types.h>
-typedef atomic_t arch_spinlock_t;
-
-/*
- * qrwlock_types depends on arch_spinlock_t, so we must typedef that before the
- * include.
- */
-#include <asm/qrwlock_types.h>
-
-#define __ARCH_SPIN_LOCK_UNLOCKED ATOMIC_INIT(0)
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>

#endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */
--
2.39.2


2024-05-28 15:18:13

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 6/7] asm-generic: ticket-lock: Add separate ticket-lock.h

From: Guo Ren <[email protected]>

Add a separate ticket-lock.h to include multiple spinlock versions and
select one at compile time or runtime.

Reviewed-by: Leonardo Bras <[email protected]>
Suggested-by: Arnd Bergmann <[email protected]>
Link: https://lore.kernel.org/linux-riscv/CAK8P3a2rnz9mQqhN6-e0CGUUv9rntRELFdxt_weiD7FxH7fkfQ@mail.gmail.com/
Signed-off-by: Guo Ren <[email protected]>
Signed-off-by: Guo Ren <[email protected]>
---
include/asm-generic/spinlock.h | 87 +---------------------
include/asm-generic/ticket_spinlock.h | 103 ++++++++++++++++++++++++++
2 files changed, 104 insertions(+), 86 deletions(-)
create mode 100644 include/asm-generic/ticket_spinlock.h

diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index 4773334ee638..970590baf61b 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -1,94 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */

-/*
- * 'Generic' ticket-lock implementation.
- *
- * It relies on atomic_fetch_add() having well defined forward progress
- * guarantees under contention. If your architecture cannot provide this, stick
- * to a test-and-set lock.
- *
- * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
- * sub-word of the value. This is generally true for anything LL/SC although
- * you'd be hard pressed to find anything useful in architecture specifications
- * about this. If your architecture cannot do this you might be better off with
- * a test-and-set.
- *
- * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
- * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
- * a full fence after the spin to upgrade the otherwise-RCpc
- * atomic_cond_read_acquire().
- *
- * The implementation uses smp_cond_load_acquire() to spin, so if the
- * architecture has WFE like instructions to sleep instead of poll for word
- * modifications be sure to implement that (see ARM64 for example).
- *
- */
-
#ifndef __ASM_GENERIC_SPINLOCK_H
#define __ASM_GENERIC_SPINLOCK_H

-#include <linux/atomic.h>
-#include <asm-generic/spinlock_types.h>
-
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- u32 val = atomic_fetch_add(1<<16, &lock->val);
- u16 ticket = val >> 16;
-
- if (ticket == (u16)val)
- return;
-
- /*
- * atomic_cond_read_acquire() is RCpc, but rather than defining a
- * custom cond_read_rcsc() here we just emit a full fence. We only
- * need the prior reads before subsequent writes ordering from
- * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
- * have no outstanding writes due to the atomic_fetch_add() the extra
- * orderings are free.
- */
- atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
- smp_mb();
-}
-
-static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
-{
- u32 old = atomic_read(&lock->val);
-
- if ((old >> 16) != (old & 0xffff))
- return false;
-
- return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
- u32 val = atomic_read(&lock->val);
-
- smp_store_release(ptr, (u16)val + 1);
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
- u32 val = lock.val.counter;
-
- return ((val >> 16) == (val & 0xffff));
-}
-
-static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
- arch_spinlock_t val = READ_ONCE(*lock);
-
- return !arch_spin_value_unlocked(val);
-}
-
-static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
- u32 val = atomic_read(&lock->val);
-
- return (s16)((val >> 16) - (val & 0xffff)) > 1;
-}
-
+#include <asm-generic/ticket_spinlock.h>
#include <asm/qrwlock.h>

#endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
new file mode 100644
index 000000000000..cfcff22b37b3
--- /dev/null
+++ b/include/asm-generic/ticket_spinlock.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * 'Generic' ticket-lock implementation.
+ *
+ * It relies on atomic_fetch_add() having well defined forward progress
+ * guarantees under contention. If your architecture cannot provide this, stick
+ * to a test-and-set lock.
+ *
+ * It also relies on atomic_fetch_add() being safe vs smp_store_release() on a
+ * sub-word of the value. This is generally true for anything LL/SC although
+ * you'd be hard pressed to find anything useful in architecture specifications
+ * about this. If your architecture cannot do this you might be better off with
+ * a test-and-set.
+ *
+ * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and hence
+ * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along with
+ * a full fence after the spin to upgrade the otherwise-RCpc
+ * atomic_cond_read_acquire().
+ *
+ * The implementation uses smp_cond_load_acquire() to spin, so if the
+ * architecture has WFE like instructions to sleep instead of poll for word
+ * modifications be sure to implement that (see ARM64 for example).
+ *
+ */
+
+#ifndef __ASM_GENERIC_TICKET_SPINLOCK_H
+#define __ASM_GENERIC_TICKET_SPINLOCK_H
+
+#include <linux/atomic.h>
+#include <asm-generic/spinlock_types.h>
+
+static __always_inline void ticket_spin_lock(arch_spinlock_t *lock)
+{
+ u32 val = atomic_fetch_add(1<<16, &lock->val);
+ u16 ticket = val >> 16;
+
+ if (ticket == (u16)val)
+ return;
+
+ /*
+ * atomic_cond_read_acquire() is RCpc, but rather than defining a
+ * custom cond_read_rcsc() here we just emit a full fence. We only
+ * need the prior reads before subsequent writes ordering from
+ * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
+ * have no outstanding writes due to the atomic_fetch_add() the extra
+ * orderings are free.
+ */
+ atomic_cond_read_acquire(&lock->val, ticket == (u16)VAL);
+ smp_mb();
+}
+
+static __always_inline bool ticket_spin_trylock(arch_spinlock_t *lock)
+{
+ u32 old = atomic_read(&lock->val);
+
+ if ((old >> 16) != (old & 0xffff))
+ return false;
+
+ return atomic_try_cmpxchg(&lock->val, &old, old + (1<<16)); /* SC, for RCsc */
+}
+
+static __always_inline void ticket_spin_unlock(arch_spinlock_t *lock)
+{
+ u16 *ptr = (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ u32 val = atomic_read(&lock->val);
+
+ smp_store_release(ptr, (u16)val + 1);
+}
+
+static __always_inline int ticket_spin_value_unlocked(arch_spinlock_t lock)
+{
+ u32 val = lock.val.counter;
+
+ return ((val >> 16) == (val & 0xffff));
+}
+
+static __always_inline int ticket_spin_is_locked(arch_spinlock_t *lock)
+{
+ arch_spinlock_t val = READ_ONCE(*lock);
+
+ return !ticket_spin_value_unlocked(val);
+}
+
+static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
+{
+ u32 val = atomic_read(&lock->val);
+
+ return (s16)((val >> 16) - (val & 0xffff)) > 1;
+}
+
+/*
+ * Remapping spinlock architecture specific functions to the corresponding
+ * ticket spinlock functions.
+ */
+#define arch_spin_is_locked(l) ticket_spin_is_locked(l)
+#define arch_spin_is_contended(l) ticket_spin_is_contended(l)
+#define arch_spin_value_unlocked(l) ticket_spin_value_unlocked(l)
+#define arch_spin_lock(l) ticket_spin_lock(l)
+#define arch_spin_trylock(l) ticket_spin_trylock(l)
+#define arch_spin_unlock(l) ticket_spin_unlock(l)
+
+#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
--
2.39.2


2024-05-28 15:18:48

by Alexandre Ghiti

[permalink] [raw]
Subject: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

In order to produce a generic kernel, a user can select
CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
spinlock implementation if Zabha is not present.

Note that we can't use alternatives here because the discovery of
extensions is done too late and we need to start with the qspinlock
implementation because the ticket spinlock implementation would pollute
the spinlock value, so let's use static keys.

This is largely based on Guo's work and Leonardo reviews at [1].

Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
Signed-off-by: Alexandre Ghiti <[email protected]>
---
.../locking/queued-spinlocks/arch-support.txt | 2 +-
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/Kbuild | 4 +-
arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
arch/riscv/kernel/setup.c | 18 +++++++++
include/asm-generic/qspinlock.h | 2 +
include/asm-generic/ticket_spinlock.h | 2 +
7 files changed, 66 insertions(+), 2 deletions(-)
create mode 100644 arch/riscv/include/asm/spinlock.h

diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index 22f2990392ff..cf26042480e2 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -20,7 +20,7 @@
| openrisc: | ok |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | TODO |
| sh: | TODO |
| sparc: | ok |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 184a9edb04e0..ccf1703edeb9 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -59,6 +59,7 @@ config RISCV
select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
+ select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
select ARCH_USES_CFI_TRAPS if CFI_CLANG
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 504f8b7e72d4..ad72f2bd4cc9 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -2,10 +2,12 @@
generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
+generic-y += mcs_spinlock.h
generic-y += parport.h
-generic-y += spinlock.h
generic-y += spinlock_types.h
+generic-y += ticket_spinlock.h
generic-y += qrwlock.h
generic-y += qrwlock_types.h
+generic-y += qspinlock.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
new file mode 100644
index 000000000000..e00429ac20ed
--- /dev/null
+++ b/arch/riscv/include/asm/spinlock.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RISCV_SPINLOCK_H
+#define __ASM_RISCV_SPINLOCK_H
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#define _Q_PENDING_LOOPS (1 << 9)
+
+#define __no_arch_spinlock_redefine
+#include <asm/ticket_spinlock.h>
+#include <asm/qspinlock.h>
+#include <asm/alternative.h>
+
+DECLARE_STATIC_KEY_TRUE(qspinlock_key);
+
+#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
+static __always_inline type arch_spin_##op(type_lock lock) \
+{ \
+ if (static_branch_unlikely(&qspinlock_key)) \
+ return queued_spin_##op(lock); \
+ return ticket_spin_##op(lock); \
+}
+
+SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
+SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
+
+#else
+
+#include <asm/ticket_spinlock.h>
+
+#endif
+
+#include <asm/qrwlock.h>
+
+#endif /* __ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 4f73c0ae44b2..31ce75522fd4 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -244,6 +244,23 @@ static void __init parse_dtb(void)
#endif
}

+DEFINE_STATIC_KEY_TRUE(qspinlock_key);
+EXPORT_SYMBOL(qspinlock_key);
+
+static void __init riscv_spinlock_init(void)
+{
+ asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
+ : : : : qspinlock);
+
+ static_branch_disable(&qspinlock_key);
+ pr_info("Ticket spinlock: enabled\n");
+
+ return;
+
+qspinlock:
+ pr_info("Queued spinlock: enabled\n");
+}
+
extern void __init init_rt_signal_env(void);

void __init setup_arch(char **cmdline_p)
@@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
riscv_set_dma_cache_alignment();

riscv_user_isa_enable();
+ riscv_spinlock_init();
}

bool arch_cpu_is_hotpluggable(int cpu)
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 0655aa5b57b2..bf47cca2c375 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
}
#endif

+#ifndef __no_arch_spinlock_redefine
/*
* Remapping spinlock architecture specific functions to the corresponding
* queued spinlock functions.
@@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
#define arch_spin_lock(l) queued_spin_lock(l)
#define arch_spin_trylock(l) queued_spin_trylock(l)
#define arch_spin_unlock(l) queued_spin_unlock(l)
+#endif

#endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
index cfcff22b37b3..325779970d8a 100644
--- a/include/asm-generic/ticket_spinlock.h
+++ b/include/asm-generic/ticket_spinlock.h
@@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
return (s16)((val >> 16) - (val & 0xffff)) > 1;
}

+#ifndef __no_arch_spinlock_redefine
/*
* Remapping spinlock architecture specific functions to the corresponding
* ticket spinlock functions.
@@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
#define arch_spin_lock(l) ticket_spin_lock(l)
#define arch_spin_trylock(l) ticket_spin_trylock(l)
#define arch_spin_unlock(l) ticket_spin_unlock(l)
+#endif

#endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
--
2.39.2


2024-05-28 15:22:57

by Conor Dooley

[permalink] [raw]
Subject: Re: [PATCH 4/7] riscv: Implement xchg8/16() using Zabha

On Tue, May 28, 2024 at 05:10:49PM +0200, Alexandre Ghiti wrote:
\
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index e17d0078a651..f71ddd2ca163 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -81,6 +81,7 @@
> #define RISCV_ISA_EXT_ZTSO 72
> #define RISCV_ISA_EXT_ZACAS 73
> #define RISCV_ISA_EXT_XANDESPMU 74
> +#define RISCV_ISA_EXT_ZABHA 75
>
> #define RISCV_ISA_EXT_XLINUXENVCFG 127
>
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 3ed2359eae35..8d0f56dd2f53 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -257,6 +257,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
> __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
> __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
> + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
> __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
> __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
> __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),

You're missing a dt-binding patch in this series adding zabha.

Thanks,
Conor.


Attachments:
(No filename) (1.22 kB)
signature.asc (235.00 B)
Download all attachments

2024-05-28 15:35:15

by Conor Dooley

[permalink] [raw]
Subject: Re: [PATCH 1/7] riscv: Implement cmpxchg32/64() using Zacas

On Tue, May 28, 2024 at 05:10:46PM +0200, Alexandre Ghiti wrote:
> This adds runtime support for Zacas in cmpxchg operations.
>
> Signed-off-by: Alexandre Ghiti <[email protected]>
> ---
> arch/riscv/Kconfig | 17 +++++++++++++++++
> arch/riscv/Makefile | 11 +++++++++++
> arch/riscv/include/asm/cmpxchg.h | 23 ++++++++++++++++++++---
> 3 files changed, 48 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 8a0f403432e8..b443def70139 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -579,6 +579,23 @@ config RISCV_ISA_V_PREEMPTIVE
> preemption. Enabling this config will result in higher memory
> consumption due to the allocation of per-task's kernel Vector context.
>
> +config TOOLCHAIN_HAS_ZACAS
> + bool
> + default y
> + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> + depends on AS_HAS_OPTION_ARCH
> +
> +config RISCV_ISA_ZACAS
> + bool "Zacas extension support for atomic CAS"
> + depends on TOOLCHAIN_HAS_ZACAS
> + default y
> + help
> + Adds support to use atomic CAS instead of LR/SC to implement kernel
> + atomic cmpxchg operation.

If you were a person compiling a kernel, would you be able to read this
and realise that this is safe to enable when their system does not
support atomic CAS? Please take a look at other how other extensions
handle this, or the patch that I have been sending that tries to make
things clearer:
https://patchwork.kernel.org/project/linux-riscv/patch/20240528-varnish-status-9c22973093a0@spud/

> +
> + If you don't know what to do here, say Y.
> +
> config TOOLCHAIN_HAS_ZBB
> bool
> default y
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index 5b3115a19852..d5b60b87998c 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -78,6 +78,17 @@ endif
> # Check if the toolchain supports Zihintpause extension
> riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
>
> +# Check if the toolchain supports Zacas
> +ifdef CONFIG_AS_IS_LLVM
> +# Support for experimental Zacas was merged in LLVM 17, but the removal of
> +# the "experimental" was merged in LLVM 19.
> +KBUILD_CFLAGS += -menable-experimental-extensions
> +KBUILD_AFLAGS += -menable-experimental-extensions
> +riscv-march-y := $(riscv-march-y)_zacas1p0
> +else
> +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> +endif

I'm almost certain that we discussed this before for vector and it was
decided to not enable experimental extensions (particularly as it is a
global option), and instead require the non-experimental versions.
This isn't even consistent with your TOOLCHAIN_HAS_ZACAS checks, that
will only enable the option for the ratified version. I think we should
continue to avoid enabling experimental extensions, even if that imposes
a requirement of having a bleeding edge toolchain to actually use the
extension.

Thanks,
Conor.


Attachments:
(No filename) (3.04 kB)
signature.asc (235.00 B)
Download all attachments

2024-05-28 18:00:43

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH 4/7] riscv: Implement xchg8/16() using Zabha

> -#define __arch_xchg_masked(prepend, append, r, p, n) \
> +#define __arch_xchg_masked(swap_sfx, prepend, append, r, p, n) \

This actually indicates a problem in the current (aka, no Zabha)
implementation: without your series, xchg16() gets mapped to

lr.w a2,(a3)
and a1,a2,a5
or a1,a1,a4
sc.w a1,a1,(a3)
bnez a1,43c <.L0^B1>

which is clearly wrong... (other "fully-ordered LR/SC sequences"
instead follow the mapping

lr.w a2,(a3)
and a1,a2,a5
or a1,a1,a4
sc.w.rl a1,a1,(a3)
bnez a1,43c <.L0^B1>
fence rw,rw )

A similar consideration for xchg8().

Andrea

2024-05-28 18:16:48

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH 1/7] riscv: Implement cmpxchg32/64() using Zacas

> + asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \
> + RISCV_ISA_EXT_ZACAS, 1) \
> + : : : : zacas); \
> + \
> __asm__ __volatile__ ( \
> prepend \
> "0: lr" lr_sfx " %0, %2\n" \
> " bne %0, %z3, 1f\n" \
> - " sc" sc_sfx " %1, %z4, %2\n" \
> + " sc" sc_cas_sfx " %1, %z4, %2\n" \
> " bnez %1, 0b\n" \
> append \
> "1:\n" \
> : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> : "rJ" (co o), "rJ" (n) \
> : "memory"); \
> + goto end; \
> + \
> +zacas: \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \

With this, a cmpxchg32() will result in something like

amocas.w.rl a5,a4,(s1)
fence rw,rw

(cf. my remarks in patch #4); this will/should provide enough sync,
but you might want to try the alternative and currently more common
mapping for "fully-ordered AMO sequences", aka

amocas.w.aqrl a5,a4,(s1)

Similarly for cmpxchg64 and other sizes.

Andrea

2024-05-28 19:31:21

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

Hi Alexandre,

On Tue, May 28, 2024 at 05:10:47PM +0200, Alexandre Ghiti wrote:
> This adds runtime support for Zabha in cmpxchg8/16 operations.
>
> Signed-off-by: Alexandre Ghiti <[email protected]>
> ---
> arch/riscv/Kconfig | 16 ++++++++++++++++
> arch/riscv/Makefile | 10 ++++++++++
> arch/riscv/include/asm/cmpxchg.h | 26 ++++++++++++++++++++++++--
> 3 files changed, 50 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index b443def70139..05597719bb1c 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -579,6 +579,22 @@ config RISCV_ISA_V_PREEMPTIVE
> preemption. Enabling this config will result in higher memory
> consumption due to the allocation of per-task's kernel Vector context.
>
> +config TOOLCHAIN_HAS_ZABHA
> + bool
> + default y
> + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
> + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)

This test does not take into account the need for
'-menable-experimental-extensions' and '1p0' in the '-march=' value with
clang 19, so it can never be enabled even if it is available.

I am not really sure how to succinctly account for this though, other
than duplicating and modifying the cc-option checks with a dependency on
either CC_IS_GCC or CC_IS_CLANG. Another option is taking the same
approach as the _SUPPORTS_DYNAMIC_FTRACE symbols and introduce
CLANG_HAS_ZABHA and GCC_HAS_ZABHA? That might not make it too ugly.

I think the ZACAS patch has a similar issue, it just isn't noticeable
with clang 19 but it should be with clang 17 and 18.

> + depends on AS_HAS_OPTION_ARCH
> +
> +config RISCV_ISA_ZABHA
> + bool "Zabha extension support for atomic byte/half-word operations"
> + depends on TOOLCHAIN_HAS_ZABHA
> + default y
> + help
> + Adds support to use atomic byte/half-word operations in the kernel.
> +
> + If you don't know what to do here, say Y.
> +
> config TOOLCHAIN_HAS_ZACAS
> bool
> default y
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index d5b60b87998c..f58ac921dece 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -89,6 +89,16 @@ else
> riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> endif
>
> +# Check if the toolchain supports Zabha
> +ifdef CONFIG_AS_IS_LLVM
> +# Support for experimental Zabha was merged in LLVM 19.
> +KBUILD_CFLAGS += -menable-experimental-extensions
> +KBUILD_AFLAGS += -menable-experimental-extensions
> +riscv-march-y := $(riscv-march-y)_zabha1p0

This block should have some dependency on CONFIG_TOOLCHAIN_HAS_ZABHA as
well right? Otherwise, the build breaks with LLVM toolchains that do not
support zabha, like LLVM 18.1.x:

clang: error: invalid arch name 'rv64imac_zihintpause_zacas1p0_zabha1p0', unsupported version number 1.0 for extension 'zabha'

I think the zacas patch has the same bug.

I think that it would be good to consolidate the adding of
'-menable-experimental-extensions' to the compiler and assembler flags
to perhaps having a hidden symbol like CONFIG_EXPERIMENTAL_EXTENSIONS
that is selected by any extension that is experimental for the
particular toolchain version.

config EXPERIMENTAL_EXTENSIONS
bool

config TOOLCHAIN_HAS_ZABHA
def_bool y
select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG
...

config TOOLCHAIN_HAS_ZACAS
def_bool_y
# ZACAS was experimental until Clang 19: https://github.com/llvm/llvm-project/commit/95aab69c109adf29e183090c25dc95c773215746
select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG && CLANG_VERSION < 190000
...

Then in the Makefile:

ifdef CONFIG_EXPERIMENTAL_EXTENSIONS
KBUILD_AFLAGS += -menable-experimental-extensions
KBUILD_CFLAGS += -menable-experimental-extensions
endif

> +else
> +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
> +endif
> +
> # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 1c50b4821ac8..65de9771078e 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -103,8 +103,14 @@
> * indicated by comparing RETURN with OLD.
> */
>
> -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
> +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
> ({ \
> + __label__ zabha, end; \
> + \
> + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
> + RISCV_ISA_EXT_ZABHA, 1) \
> + : : : : zabha); \
> + \
> u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
> ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
> ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
> @@ -131,6 +137,17 @@
> : "memory"); \
> \
> r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> + goto end; \
> + \
> +zabha: \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" cas_sfx " %0, %z2, %1\n" \

This should probably have some dependency on CONFIG_RISCV_ISA_ZABHA? I get the
following with GCC 13.2.0:

include/linux/atomic/atomic-arch-fallback.h: Assembler messages:
include/linux/atomic/atomic-arch-fallback.h:2108: Error: unrecognized opcode `amocas.w a4,a3,0(s1)'

> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \
> +end: \

I get a lot of warnings from this statement and the one added by the
previous patch for zacas, which is a C23 extension:

include/linux/atomic/atomic-arch-fallback.h:4234:9: warning: label at end of compound statement is a C23 extension [-Wc23-extensions]
include/linux/atomic/atomic-arch-fallback.h:89:29: note: expanded from macro 'raw_cmpxchg_relaxed'
89 | #define raw_cmpxchg_relaxed arch_cmpxchg_relaxed
| ^
arch/riscv/include/asm/cmpxchg.h:219:2: note: expanded from macro 'arch_cmpxchg_relaxed'
219 | _arch_cmpxchg((ptr), (o), (n), "", "", "")
| ^
arch/riscv/include/asm/cmpxchg.h:200:3: note: expanded from macro '_arch_cmpxchg'
200 | __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \
| ^
arch/riscv/include/asm/cmpxchg.h:150:14: note: expanded from macro '__arch_cmpxchg_masked'
150 | end: \
| ^

This resolves it:

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ba3ffc2fcdd0..57aa4a554278 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -147,7 +147,7 @@ zabha: \
: "+&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
-end: \
+end:; \
})

#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
@@ -180,7 +180,7 @@ zacas: \
: "+&r" (r), "+A" (*(p)) \
: "rJ" (n) \
: "memory"); \
-end: \
+end:; \
})

#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \

> })
>
> #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> @@ -175,8 +192,13 @@ end: \
> \
> switch (sizeof(*__ptr)) { \
> case 1: \
> + __arch_cmpxchg_masked(sc_sfx, ".b" sc_sfx, \
> + prepend, append, \
> + __ret, __ptr, __old, __new); \
> + break; \
> case 2: \
> - __arch_cmpxchg_masked(sc_sfx, prepend, append, \
> + __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \
> + prepend, append, \
> __ret, __ptr, __old, __new); \
> break; \
> case 4: \
> --
> 2.39.2
>
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv

2024-05-28 23:56:00

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

> +zabha: \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \

Couldn't a platform have Zabha but not have Zacas? I don't see how this
asm goto could work in such case, what am I missing?

Andrea

2024-05-29 00:56:41

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA

IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
perhaps a similar select for the latter? (not a kconfig expert)

Andrea

2024-05-29 06:16:18

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 4/7] riscv: Implement xchg8/16() using Zabha

Hi Conor,

On Tue, May 28, 2024 at 5:22 PM Conor Dooley <[email protected]> wrote:
>
> On Tue, May 28, 2024 at 05:10:49PM +0200, Alexandre Ghiti wrote:
> \
> > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> > index e17d0078a651..f71ddd2ca163 100644
> > --- a/arch/riscv/include/asm/hwcap.h
> > +++ b/arch/riscv/include/asm/hwcap.h
> > @@ -81,6 +81,7 @@
> > #define RISCV_ISA_EXT_ZTSO 72
> > #define RISCV_ISA_EXT_ZACAS 73
> > #define RISCV_ISA_EXT_XANDESPMU 74
> > +#define RISCV_ISA_EXT_ZABHA 75
> >
> > #define RISCV_ISA_EXT_XLINUXENVCFG 127
> >
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index 3ed2359eae35..8d0f56dd2f53 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -257,6 +257,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
> > __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> > __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
> > __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
> > + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
> > __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
> > __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
> > __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),
>
> You're missing a dt-binding patch in this series adding zabha.

Thanks, I will add that to the v2.

>
> Thanks,
> Conor.

2024-05-29 08:18:25

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 4/7] riscv: Implement xchg8/16() using Zabha

Hi Andrea,

On Tue, May 28, 2024 at 8:00 PM Andrea Parri <[email protected]> wrote:
>
> > -#define __arch_xchg_masked(prepend, append, r, p, n) \
> > +#define __arch_xchg_masked(swap_sfx, prepend, append, r, p, n) \
>
> This actually indicates a problem in the current (aka, no Zabha)
> implementation: without your series, xchg16() gets mapped to
>
> lr.w a2,(a3)
> and a1,a2,a5
> or a1,a1,a4
> sc.w a1,a1,(a3)
> bnez a1,43c <.L0^B1>
>
> which is clearly wrong... (other "fully-ordered LR/SC sequences"
> instead follow the mapping
>
> lr.w a2,(a3)
> and a1,a2,a5
> or a1,a1,a4
> sc.w.rl a1,a1,(a3)
> bnez a1,43c <.L0^B1>
> fence rw,rw )
>
> A similar consideration for xchg8().

Ok, I will fix that separately and make it merged in -rc2.

Thanks,

Akex

>
> Andrea

2024-05-29 09:24:15

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
>
> In order to produce a generic kernel, a user can select
> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> spinlock implementation if Zabha is not present.
>
> Note that we can't use alternatives here because the discovery of
> extensions is done too late and we need to start with the qspinlock
> implementation because the ticket spinlock implementation would pollute
> the spinlock value, so let's use static keys.
>
> This is largely based on Guo's work and Leonardo reviews at [1].
>
> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> Signed-off-by: Alexandre Ghiti <[email protected]>
> ---
> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> arch/riscv/Kconfig | 1 +
> arch/riscv/include/asm/Kbuild | 4 +-
> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> arch/riscv/kernel/setup.c | 18 +++++++++
> include/asm-generic/qspinlock.h | 2 +
> include/asm-generic/ticket_spinlock.h | 2 +
> 7 files changed, 66 insertions(+), 2 deletions(-)
> create mode 100644 arch/riscv/include/asm/spinlock.h
>
> diff --git a/Documentation/features/locking/queued-spinlocks/arch-supporttxt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> index 22f2990392ff..cf26042480e2 100644
> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> @@ -20,7 +20,7 @@
> | openrisc: | ok |
> | parisc: | TODO |
> | powerpc: | ok |
> - | riscv: | TODO |
> + | riscv: | ok |
> | s390: | TODO |
> | sh: | TODO |
> | sparc: | ok |
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 184a9edb04e0..ccf1703edeb9 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -59,6 +59,7 @@ config RISCV
> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> select ARCH_USE_MEMTEST
> select ARCH_USE_QUEUED_RWLOCKS
> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
Using qspinlock or not depends on real hardware capabilities, not the
compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
& qspinlock three Kconfigs, and the combo-spinlock would compat all
hardware platforms but waste some qspinlock code size.

> select ARCH_USES_CFI_TRAPS if CFI_CLANG
> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> index 504f8b7e72d4..ad72f2bd4cc9 100644
> --- a/arch/riscv/include/asm/Kbuild
> +++ b/arch/riscv/include/asm/Kbuild
> @@ -2,10 +2,12 @@
> generic-y += early_ioremap.h
> generic-y += flat.h
> generic-y += kvm_para.h
> +generic-y += mcs_spinlock.h
> generic-y += parport.h
> -generic-y += spinlock.h
> generic-y += spinlock_types.h
> +generic-y += ticket_spinlock.h
> generic-y += qrwlock.h
> generic-y += qrwlock_types.h
> +generic-y += qspinlock.h
> generic-y += user.h
> generic-y += vmlinux.lds.h
> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> new file mode 100644
> index 000000000000..e00429ac20ed
> --- /dev/null
> +++ b/arch/riscv/include/asm/spinlock.h
> @@ -0,0 +1,39 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_RISCV_SPINLOCK_H
> +#define __ASM_RISCV_SPINLOCK_H
> +
> +#ifdef CONFIG_QUEUED_SPINLOCKS
> +#define _Q_PENDING_LOOPS (1 << 9)
> +
> +#define __no_arch_spinlock_redefine
> +#include <asm/ticket_spinlock.h>
> +#include <asm/qspinlock.h>
> +#include <asm/alternative.h>
> +
> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> +
> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> +static __always_inline type arch_spin_##op(type_lock lock) \
> +{ \
> + if (static_branch_unlikely(&qspinlock_key)) \
> + return queued_spin_##op(lock); \
> + return ticket_spin_##op(lock); \
> +}
> +
> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> +
> +#else
> +
> +#include <asm/ticket_spinlock.h>
> +
> +#endif
> +
> +#include <asm/qrwlock.h>
> +
> +#endif /* __ASM_RISCV_SPINLOCK_H */
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 4f73c0ae44b2..31ce75522fd4 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> #endif
> }
>
> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> +EXPORT_SYMBOL(qspinlock_key);
> +
> +static void __init riscv_spinlock_init(void)
> +{
> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> + : : : : qspinlock);
> +
> + static_branch_disable(&qspinlock_key);
> + pr_info("Ticket spinlock: enabled\n");
> +
> + return;
> +
> +qspinlock:
> + pr_info("Queued spinlock: enabled\n");
> +}
> +
> extern void __init init_rt_signal_env(void);
>
> void __init setup_arch(char **cmdline_p)
> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> riscv_set_dma_cache_alignment();
>
> riscv_user_isa_enable();
> + riscv_spinlock_init();
> }
>
> bool arch_cpu_is_hotpluggable(int cpu)
> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> index 0655aa5b57b2..bf47cca2c375 100644
> --- a/include/asm-generic/qspinlock.h
> +++ b/include/asm-generic/qspinlock.h
> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> }
> #endif
>
> +#ifndef __no_arch_spinlock_redefine
> /*
> * Remapping spinlock architecture specific functions to the corresponding
> * queued spinlock functions.
> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> #define arch_spin_lock(l) queued_spin_lock(l)
> #define arch_spin_trylock(l) queued_spin_trylock(l)
> #define arch_spin_unlock(l) queued_spin_unlock(l)
> +#endif
>
> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> index cfcff22b37b3..325779970d8a 100644
> --- a/include/asm-generic/ticket_spinlock.h
> +++ b/include/asm-generic/ticket_spinlock.h
> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> }
>
> +#ifndef __no_arch_spinlock_redefine
> /*
> * Remapping spinlock architecture specific functions to the corresponding
> * ticket spinlock functions.
> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> #define arch_spin_lock(l) ticket_spin_lock(l)
> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> +#endif
>
> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> --
> 2.39.2
>


--
Best Regards
Guo Ren

2024-05-29 12:21:14

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 1/7] riscv: Implement cmpxchg32/64() using Zacas

Hi Conor,

On Tue, May 28, 2024 at 5:34 PM Conor Dooley <[email protected]> wrote:
>
> On Tue, May 28, 2024 at 05:10:46PM +0200, Alexandre Ghiti wrote:
> > This adds runtime support for Zacas in cmpxchg operations.
> >
> > Signed-off-by: Alexandre Ghiti <[email protected]>
> > ---
> > arch/riscv/Kconfig | 17 +++++++++++++++++
> > arch/riscv/Makefile | 11 +++++++++++
> > arch/riscv/include/asm/cmpxchg.h | 23 ++++++++++++++++++++---
> > 3 files changed, 48 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 8a0f403432e8..b443def70139 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -579,6 +579,23 @@ config RISCV_ISA_V_PREEMPTIVE
> > preemption. Enabling this config will result in higher memory
> > consumption due to the allocation of per-task's kernel Vector context.
> >
> > +config TOOLCHAIN_HAS_ZACAS
> > + bool
> > + default y
> > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> > + depends on AS_HAS_OPTION_ARCH
> > +
> > +config RISCV_ISA_ZACAS
> > + bool "Zacas extension support for atomic CAS"
> > + depends on TOOLCHAIN_HAS_ZACAS
> > + default y
> > + help
> > + Adds support to use atomic CAS instead of LR/SC to implement kernel
> > + atomic cmpxchg operation.
>
> If you were a person compiling a kernel, would you be able to read this
> and realise that this is safe to enable when their system does not
> support atomic CAS? Please take a look at other how other extensions
> handle this, or the patch that I have been sending that tries to make
> things clearer:
> https://patchwork.kernel.org/project/linux-riscv/patch/20240528-varnish-status-9c22973093a0@spud/

Ok, I will go for: "Enable the use of the Zacas ISA-extension to
implement atomic cmpxchg operations when it is detected at boot."
And I will do the same for Zabha.

>
> > +
> > + If you don't know what to do here, say Y.
> > +
> > config TOOLCHAIN_HAS_ZBB
> > bool
> > default y
> > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > index 5b3115a19852..d5b60b87998c 100644
> > --- a/arch/riscv/Makefile
> > +++ b/arch/riscv/Makefile
> > @@ -78,6 +78,17 @@ endif
> > # Check if the toolchain supports Zihintpause extension
> > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
> >
> > +# Check if the toolchain supports Zacas
> > +ifdef CONFIG_AS_IS_LLVM
> > +# Support for experimental Zacas was merged in LLVM 17, but the removal of
> > +# the "experimental" was merged in LLVM 19.
> > +KBUILD_CFLAGS += -menable-experimental-extensions
> > +KBUILD_AFLAGS += -menable-experimental-extensions
> > +riscv-march-y := $(riscv-march-y)_zacas1p0
> > +else
> > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> > +endif
>
> I'm almost certain that we discussed this before for vector and it was
> decided to not enable experimental extensions (particularly as it is a
> global option), and instead require the non-experimental versions.
> This isn't even consistent with your TOOLCHAIN_HAS_ZACAS checks, that
> will only enable the option for the ratified version.

Zacas was ratified, hence the removal of "experimental" in LLVM 19.
But unfortunately Zabha lacks such changes in LLVM, so that will make
this inconsistent (ratified extension but still experimental).

I'll remove the enablement of the experimental extensions then so that
will fail for LLVM < 19. And for Zabha, I'll try to push the removal
of experimental from LLVM.

> I think we should
> continue to avoid enabling experimental extensions, even if that imposes
> a requirement of having a bleeding edge toolchain to actually use the
> extension.

Would it make sense to have a
CONFIG_RISCV_LLVM_ENABLE_EXPERIMENTAL_EXTENSIONS or similar? So that
people who want to play with those extensions will still be able to do
so without patching the kernel?

Thanks,

Alex

>
> Thanks,
> Conor.

2024-05-29 12:29:25

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

On Wed, May 29, 2024 at 1:54 AM Andrea Parri <[email protected]> wrote:
>
> > +zabha: \
> > + __asm__ __volatile__ ( \
> > + prepend \
> > + " amocas" cas_sfx " %0, %z2, %1\n" \
> > + append \
> > + : "+&r" (r), "+A" (*(p)) \
> > + : "rJ" (n) \
> > + : "memory"); \
>
> Couldn't a platform have Zabha but not have Zacas? I don't see how this
> asm goto could work in such case, what am I missing?

Zabha amocas.[b|h] instructions are only implemented if Zacas is
present, as the specification states: "If Zacas [2] extension is also
implemented, Zabha further provides the AMOCAS.[B|H] instructions."

But the code you mention is only for 8 and 16bit operations, so I
think we are good anyway?

Thanks,

Alex

>
> Andrea

2024-05-29 12:56:27

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

On Wed, May 29, 2024 at 2:29 PM Alexandre Ghiti <alexghiti@rivosinccom> wrote:
>
> On Wed, May 29, 2024 at 1:54 AM Andrea Parri <[email protected]> wrote:
> >
> > > +zabha: \
> > > + __asm__ __volatile__ ( \
> > > + prepend \
> > > + " amocas" cas_sfx " %0, %z2, %1\n" \
> > > + append \
> > > + : "+&r" (r), "+A" (*(p)) \
> > > + : "rJ" (n) \
> > > + : "memory"); \
> >
> > Couldn't a platform have Zabha but not have Zacas? I don't see how this
> > asm goto could work in such case, what am I missing?
>
> Zabha amocas.[b|h] instructions are only implemented if Zacas is
> present, as the specification states: "If Zacas [2] extension is also
> implemented, Zabha further provides the AMOCAS.[B|H] instructions."
>
> But the code you mention is only for 8 and 16bit operations, so I
> think we are good anyway?

And I was wrong like Andrea noted privately. So I'll fix that too, thanks!

>
> Thanks,
>
> Alex
>
> >
> > Andrea

2024-05-29 12:59:05

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

Hi Nathan,

On Tue, May 28, 2024 at 9:31 PM Nathan Chancellor <[email protected]> wrote:
>
> Hi Alexandre,
>
> On Tue, May 28, 2024 at 05:10:47PM +0200, Alexandre Ghiti wrote:
> > This adds runtime support for Zabha in cmpxchg8/16 operations.
> >
> > Signed-off-by: Alexandre Ghiti <[email protected]>
> > ---
> > arch/riscv/Kconfig | 16 ++++++++++++++++
> > arch/riscv/Makefile | 10 ++++++++++
> > arch/riscv/include/asm/cmpxchg.h | 26 ++++++++++++++++++++++++--
> > 3 files changed, 50 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index b443def70139..05597719bb1c 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -579,6 +579,22 @@ config RISCV_ISA_V_PREEMPTIVE
> > preemption. Enabling this config will result in higher memory
> > consumption due to the allocation of per-task's kernel Vector context.
> >
> > +config TOOLCHAIN_HAS_ZABHA
> > + bool
> > + default y
> > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
> > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)
>
> This test does not take into account the need for
> '-menable-experimental-extensions' and '1p0' in the '-march=' value with
> clang 19, so it can never be enabled even if it is available.

Then I missed that, I should have checked the generated code. Is the
extension version "1p0" in '-march=' only required for experimental
extensions?

>
> I am not really sure how to succinctly account for this though, other
> than duplicating and modifying the cc-option checks with a dependency on
> either CC_IS_GCC or CC_IS_CLANG. Another option is taking the same
> approach as the _SUPPORTS_DYNAMIC_FTRACE symbols and introduce
> CLANG_HAS_ZABHA and GCC_HAS_ZABHA? That might not make it too ugly.
>
> I think the ZACAS patch has a similar issue, it just isn't noticeable
> with clang 19 but it should be with clang 17 and 18.

But from Conor comment here [1], we should not enable extensions that
are only experimental. In that case, we should be good with this.

[1] https://lore.kernel.org/linux-riscv/[email protected]/T/#mefb283477bce852f3713cbbb4ff002252281c9d5

>
> > + depends on AS_HAS_OPTION_ARCH
> > +
> > +config RISCV_ISA_ZABHA
> > + bool "Zabha extension support for atomic byte/half-word operations"
> > + depends on TOOLCHAIN_HAS_ZABHA
> > + default y
> > + help
> > + Adds support to use atomic byte/half-word operations in the kernel.
> > +
> > + If you don't know what to do here, say Y.
> > +
> > config TOOLCHAIN_HAS_ZACAS
> > bool
> > default y
> > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > index d5b60b87998c..f58ac921dece 100644
> > --- a/arch/riscv/Makefile
> > +++ b/arch/riscv/Makefile
> > @@ -89,6 +89,16 @@ else
> > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> > endif
> >
> > +# Check if the toolchain supports Zabha
> > +ifdef CONFIG_AS_IS_LLVM
> > +# Support for experimental Zabha was merged in LLVM 19.
> > +KBUILD_CFLAGS += -menable-experimental-extensions
> > +KBUILD_AFLAGS += -menable-experimental-extensions
> > +riscv-march-y := $(riscv-march-y)_zabha1p0
>
> This block should have some dependency on CONFIG_TOOLCHAIN_HAS_ZABHA as
> well right? Otherwise, the build breaks with LLVM toolchains that do not
> support zabha, like LLVM 18.1.x:
>
> clang: error: invalid arch name 'rv64imac_zihintpause_zacas1p0_zabha1p0', unsupported version number 1.0 for extension 'zabha'
>
> I think the zacas patch has the same bug.

Ok, I will fix that, thanks.

>
> I think that it would be good to consolidate the adding of
> '-menable-experimental-extensions' to the compiler and assembler flags
> to perhaps having a hidden symbol like CONFIG_EXPERIMENTAL_EXTENSIONS
> that is selected by any extension that is experimental for the
> particular toolchain version.
>
> config EXPERIMENTAL_EXTENSIONS
> bool
>
> config TOOLCHAIN_HAS_ZABHA
> def_bool y
> select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG
> ...
>
> config TOOLCHAIN_HAS_ZACAS
> def_bool_y
> # ZACAS was experimental until Clang 19: https://github.com/llvm/llvm-project/commit/95aab69c109adf29e183090c25dc95c773215746
> select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG && CLANG_VERSION < 190000
> ...
>
> Then in the Makefile:
>
> ifdef CONFIG_EXPERIMENTAL_EXTENSIONS
> KBUILD_AFLAGS += -menable-experimental-extensions
> KBUILD_CFLAGS += -menable-experimental-extensions
> endif

That's a good idea to me, let's see what Conor thinks [2]

[2] https://lore.kernel.org/linux-riscv/[email protected]/T/#m1d798dfc4c27e5b6d9e14117d81b577ace123322

>
> > +else
> > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
> > +endif
> > +
> > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index 1c50b4821ac8..65de9771078e 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -103,8 +103,14 @@
> > * indicated by comparing RETURN with OLD.
> > */
> >
> > -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
> > +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
> > ({ \
> > + __label__ zabha, end; \
> > + \
> > + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
> > + RISCV_ISA_EXT_ZABHA, 1) \
> > + : : : : zabha); \
> > + \
> > u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
> > ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
> > ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
> > @@ -131,6 +137,17 @@
> > : "memory"); \
> > \
> > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> > + goto end; \
> > + \
> > +zabha: \
> > + __asm__ __volatile__ ( \
> > + prepend \
> > + " amocas" cas_sfx " %0, %z2, %1\n" \
>
> This should probably have some dependency on CONFIG_RISCV_ISA_ZABHA? I get the
> following with GCC 13.2.0:
>
> include/linux/atomic/atomic-arch-fallback.h: Assembler messages:
> include/linux/atomic/atomic-arch-fallback.h:2108: Error: unrecognized opcode `amocas.w a4,a3,0(s1)'

Indeed, my test setup lacks a few things apparently, I will fix that, thanks.

>
> > + append \
> > + : "+&r" (r), "+A" (*(p)) \
> > + : "rJ" (n) \
> > + : "memory"); \
> > +end: \
>
> I get a lot of warnings from this statement and the one added by the
> previous patch for zacas, which is a C23 extension:
>
> include/linux/atomic/atomic-arch-fallback.h:4234:9: warning: label at end of compound statement is a C23 extension [-Wc23-extensions]
> include/linux/atomic/atomic-arch-fallback.h:89:29: note: expanded from macro 'raw_cmpxchg_relaxed'
> 89 | #define raw_cmpxchg_relaxed arch_cmpxchg_relaxed
> | ^
> arch/riscv/include/asm/cmpxchg.h:219:2: note: expanded from macro 'arch_cmpxchg_relaxed'
> 219 | _arch_cmpxchg((ptr), (o), (n), "", "", "")
> | ^
> arch/riscv/include/asm/cmpxchg.h:200:3: note: expanded from macro '_arch_cmpxchg'
> 200 | __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \
> | ^
> arch/riscv/include/asm/cmpxchg.h:150:14: note: expanded from macro '__arch_cmpxchg_masked'
> 150 | end: \
> | ^
>
> This resolves it:
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index ba3ffc2fcdd0..57aa4a554278 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -147,7 +147,7 @@ zabha: \
> : "+&r" (r), "+A" (*(p)) \
> : "rJ" (n) \
> : "memory"); \
> -end: \
> +end:; \
> })
>
> #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> @@ -180,7 +180,7 @@ zacas: \
> : "+&r" (r), "+A" (*(p)) \
> : "rJ" (n) \
> : "memory"); \
> -end: \
> +end:; \
> })
>
> #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \

Weird, I missed this too, I will fix that, thanks.

>
> > })
> >
> > #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> > @@ -175,8 +192,13 @@ end: \
> > \
> > switch (sizeof(*__ptr)) { \
> > case 1: \
> > + __arch_cmpxchg_masked(sc_sfx, ".b" sc_sfx, \
> > + prepend, append, \
> > + __ret, __ptr, __old, __new); \
> > + break; \
> > case 2: \
> > - __arch_cmpxchg_masked(sc_sfx, prepend, append, \
> > + __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \
> > + prepend, append, \
> > __ret, __ptr, __old, __new); \
> > break; \
> > case 4: \
> > --
> > 2.39.2
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-riscv

Thanks for your thorough review!

Alex

2024-05-29 13:03:27

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

Hi Guo,

On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
>
> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> >
> > In order to produce a generic kernel, a user can select
> > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > spinlock implementation if Zabha is not present.
> >
> > Note that we can't use alternatives here because the discovery of
> > extensions is done too late and we need to start with the qspinlock
> > implementation because the ticket spinlock implementation would pollute
> > the spinlock value, so let's use static keys.
> >
> > This is largely based on Guo's work and Leonardo reviews at [1].
> >
> > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > Signed-off-by: Alexandre Ghiti <[email protected]>
> > ---
> > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > arch/riscv/Kconfig | 1 +
> > arch/riscv/include/asm/Kbuild | 4 +-
> > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > arch/riscv/kernel/setup.c | 18 +++++++++
> > include/asm-generic/qspinlock.h | 2 +
> > include/asm-generic/ticket_spinlock.h | 2 +
> > 7 files changed, 66 insertions(+), 2 deletions(-)
> > create mode 100644 arch/riscv/include/asm/spinlock.h
> >
> > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > index 22f2990392ff..cf26042480e2 100644
> > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > @@ -20,7 +20,7 @@
> > | openrisc: | ok |
> > | parisc: | TODO |
> > | powerpc: | ok |
> > - | riscv: | TODO |
> > + | riscv: | ok |
> > | s390: | TODO |
> > | sh: | TODO |
> > | sparc: | ok |
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 184a9edb04e0..ccf1703edeb9 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -59,6 +59,7 @@ config RISCV
> > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > select ARCH_USE_MEMTEST
> > select ARCH_USE_QUEUED_RWLOCKS
> > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> Using qspinlock or not depends on real hardware capabilities, not the
> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> hardware platforms but waste some qspinlock code size.

You're right, and I think your comment matches what Conor mentioned
about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
will allow a platform with Zabha capability to use qspinlocks. But if
the hardware does not, it will fallback to the ticket spinlocks.

But I agree that looking at the config alone may be misleading, even
though it will work as expected at runtime. So I agree with you:
unless anyone is strongly against the combo spinlocks, I will do what
you suggest and add them.

Thanks again for your initial work,

Alex

>
> > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > --- a/arch/riscv/include/asm/Kbuild
> > +++ b/arch/riscv/include/asm/Kbuild
> > @@ -2,10 +2,12 @@
> > generic-y += early_ioremap.h
> > generic-y += flat.h
> > generic-y += kvm_para.h
> > +generic-y += mcs_spinlock.h
> > generic-y += parport.h
> > -generic-y += spinlock.h
> > generic-y += spinlock_types.h
> > +generic-y += ticket_spinlock.h
> > generic-y += qrwlock.h
> > generic-y += qrwlock_types.h
> > +generic-y += qspinlock.h
> > generic-y += user.h
> > generic-y += vmlinux.lds.h
> > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > new file mode 100644
> > index 000000000000..e00429ac20ed
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/spinlock.h
> > @@ -0,0 +1,39 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef __ASM_RISCV_SPINLOCK_H
> > +#define __ASM_RISCV_SPINLOCK_H
> > +
> > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > +#define _Q_PENDING_LOOPS (1 << 9)
> > +
> > +#define __no_arch_spinlock_redefine
> > +#include <asm/ticket_spinlock.h>
> > +#include <asm/qspinlock.h>
> > +#include <asm/alternative.h>
> > +
> > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > +
> > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > +static __always_inline type arch_spin_##op(type_lock lock) \
> > +{ \
> > + if (static_branch_unlikely(&qspinlock_key)) \
> > + return queued_spin_##op(lock); \
> > + return ticket_spin_##op(lock); \
> > +}
> > +
> > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > +
> > +#else
> > +
> > +#include <asm/ticket_spinlock.h>
> > +
> > +#endif
> > +
> > +#include <asm/qrwlock.h>
> > +
> > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 4f73c0ae44b2..31ce75522fd4 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > #endif
> > }
> >
> > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > +EXPORT_SYMBOL(qspinlock_key);
> > +
> > +static void __init riscv_spinlock_init(void)
> > +{
> > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > + : : : : qspinlock);
> > +
> > + static_branch_disable(&qspinlock_key);
> > + pr_info("Ticket spinlock: enabled\n");
> > +
> > + return;
> > +
> > +qspinlock:
> > + pr_info("Queued spinlock: enabled\n");
> > +}
> > +
> > extern void __init init_rt_signal_env(void);
> >
> > void __init setup_arch(char **cmdline_p)
> > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > riscv_set_dma_cache_alignment();
> >
> > riscv_user_isa_enable();
> > + riscv_spinlock_init();
> > }
> >
> > bool arch_cpu_is_hotpluggable(int cpu)
> > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > index 0655aa5b57b2..bf47cca2c375 100644
> > --- a/include/asm-generic/qspinlock.h
> > +++ b/include/asm-generic/qspinlock.h
> > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > }
> > #endif
> >
> > +#ifndef __no_arch_spinlock_redefine
> > /*
> > * Remapping spinlock architecture specific functions to the corresponding
> > * queued spinlock functions.
> > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > #define arch_spin_lock(l) queued_spin_lock(l)
> > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > +#endif
> >
> > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > index cfcff22b37b3..325779970d8a 100644
> > --- a/include/asm-generic/ticket_spinlock.h
> > +++ b/include/asm-generic/ticket_spinlock.h
> > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > }
> >
> > +#ifndef __no_arch_spinlock_redefine
> > /*
> > * Remapping spinlock architecture specific functions to the corresponding
> > * ticket spinlock functions.
> > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > #define arch_spin_lock(l) ticket_spin_lock(l)
> > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > +#endif
> >
> > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > --
> > 2.39.2
> >
>
>
> --
> Best Regards
> Guo Ren

2024-05-29 15:58:01

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

On Wed, May 29, 2024 at 02:49:58PM +0200, Alexandre Ghiti wrote:
> Then I missed that, I should have checked the generated code. Is the
> extension version "1p0" in '-march=' only required for experimental
> extensions?

I think so, if my understanding of the message is correct.

> But from Conor comment here [1], we should not enable extensions that
> are only experimental. In that case, we should be good with this.
>
> [1] https://lore.kernel.org/linux-riscv/[email protected]/T/#mefb283477bce852f3713cbbb4ff002252281c9d5

Yeah, I tend to agree with Conor on that front. I had not noticed that
part of the message when I was looking at other parts of this thread. I
could see an argument for allowing experimental extensions for
qualification purposes but I think it does create a bit of a support
nightmare, especially when there are breaking changes across revisions.

> > config EXPERIMENTAL_EXTENSIONS
> > bool
> >
> > config TOOLCHAIN_HAS_ZABHA
> > def_bool y
> > select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG
> > ...
> >
> > config TOOLCHAIN_HAS_ZACAS
> > def_bool_y
> > # ZACAS was experimental until Clang 19: https://github.com/llvm/llvm-project/commit/95aab69c109adf29e183090c25dc95c773215746
> > select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG && CLANG_VERSION < 190000
> > ...
> >
> > Then in the Makefile:
> >
> > ifdef CONFIG_EXPERIMENTAL_EXTENSIONS
> > KBUILD_AFLAGS += -menable-experimental-extensions
> > KBUILD_CFLAGS += -menable-experimental-extensions
> > endif

Perhaps with that in mind, maybe EXPERIMENTAL_EXTENSIONS (or whatever)
should be a user selectable option and the TOOLCHAIN values depend on it
when the user has a clang version that does not support the ratified
version.

> That's a good idea to me, let's see what Conor thinks [2]
>
> [2] https://lore.kernel.org/linux-riscv/[email protected]/T/#m1d798dfc4c27e5b6d9e14117d81b577ace123322

FWIW, I think your plan of removing support for the experimental version
of the extension and pushing to remove the experimental status in LLVM
(especially since it seems like it is ratified like zacas?
https://jira.riscv.org/browse/RVS-1685) is probably the best thing going
forward. If the LLVM folks are made aware soon, it should be easy to get
that change into clang-19, which is branching at the end of July I
believe.

> Thanks for your thorough review!

Thanks for taking LLVM support into consideration :)

Cheers,
Nathan

2024-05-30 01:55:15

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinccom> wrote:
>
> Hi Guo,
>
> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> >
> > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> > >
> > > In order to produce a generic kernel, a user can select
> > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > spinlock implementation if Zabha is not present.
> > >
> > > Note that we can't use alternatives here because the discovery of
> > > extensions is done too late and we need to start with the qspinlock
> > > implementation because the ticket spinlock implementation would pollute
> > > the spinlock value, so let's use static keys.
> > >
> > > This is largely based on Guo's work and Leonardo reviews at [1].
> > >
> > > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > > Signed-off-by: Alexandre Ghiti <[email protected]>
> > > ---
> > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > arch/riscv/Kconfig | 1 +
> > > arch/riscv/include/asm/Kbuild | 4 +-
> > > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > > arch/riscv/kernel/setup.c | 18 +++++++++
> > > include/asm-generic/qspinlock.h | 2 +
> > > include/asm-generic/ticket_spinlock.h | 2 +
> > > 7 files changed, 66 insertions(+), 2 deletions(-)
> > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > >
> > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > index 22f2990392ff..cf26042480e2 100644
> > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > @@ -20,7 +20,7 @@
> > > | openrisc: | ok |
> > > | parisc: | TODO |
> > > | powerpc: | ok |
> > > - | riscv: | TODO |
> > > + | riscv: | ok |
> > > | s390: | TODO |
> > > | sh: | TODO |
> > > | sparc: | ok |
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 184a9edb04e0..ccf1703edeb9 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -59,6 +59,7 @@ config RISCV
> > > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > select ARCH_USE_MEMTEST
> > > select ARCH_USE_QUEUED_RWLOCKS
> > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > Using qspinlock or not depends on real hardware capabilities, not the
> > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > hardware platforms but waste some qspinlock code size.
>
> You're right, and I think your comment matches what Conor mentioned
> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> will allow a platform with Zabha capability to use qspinlocks. But if
> the hardware does not, it will fallback to the ticket spinlocks.
>
> But I agree that looking at the config alone may be misleading, even
> though it will work as expected at runtime. So I agree with you:
> unless anyone is strongly against the combo spinlocks, I will do what
> you suggest and add them.
The problem with the v12 combo-spinlock is using a static_branch
instead of the full ALTERNATIVE. Frankly, that's a bad example that
costs more code space. I found that your cmpxchg32/64 also uses a
condition branch, which has a similar problem, right?

Anyway, your patch series inspired me to update the v13
combo-spinlock. My plan is:
1. Separate native-qspinlock out of paravirt-qspinlock.
2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
ticket-lock or qspinlock.

What do you think?


>
> Thanks again for your initial work,
>
> Alex
>
> >
> > > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > --- a/arch/riscv/include/asm/Kbuild
> > > +++ b/arch/riscv/include/asm/Kbuild
> > > @@ -2,10 +2,12 @@
> > > generic-y += early_ioremap.h
> > > generic-y += flat.h
> > > generic-y += kvm_para.h
> > > +generic-y += mcs_spinlock.h
> > > generic-y += parport.h
> > > -generic-y += spinlock.h
> > > generic-y += spinlock_types.h
> > > +generic-y += ticket_spinlock.h
> > > generic-y += qrwlock.h
> > > generic-y += qrwlock_types.h
> > > +generic-y += qspinlock.h
> > > generic-y += user.h
> > > generic-y += vmlinux.lds.h
> > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > new file mode 100644
> > > index 000000000000..e00429ac20ed
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/spinlock.h
> > > @@ -0,0 +1,39 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +
> > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > +#define __ASM_RISCV_SPINLOCK_H
> > > +
> > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > +
> > > +#define __no_arch_spinlock_redefine
> > > +#include <asm/ticket_spinlock.h>
> > > +#include <asm/qspinlock.h>
> > > +#include <asm/alternative.h>
> > > +
> > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > +
> > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > +{ \
> > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > + return queued_spin_##op(lock); \
> > > + return ticket_spin_##op(lock); \
> > > +}
> > > +
> > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > +
> > > +#else
> > > +
> > > +#include <asm/ticket_spinlock.h>
> > > +
> > > +#endif
> > > +
> > > +#include <asm/qrwlock.h>
> > > +
> > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > #endif
> > > }
> > >
> > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > +EXPORT_SYMBOL(qspinlock_key);
> > > +
> > > +static void __init riscv_spinlock_init(void)
> > > +{
> > > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > + : : : : qspinlock);
> > > +
> > > + static_branch_disable(&qspinlock_key);
> > > + pr_info("Ticket spinlock: enabled\n");
> > > +
> > > + return;
> > > +
> > > +qspinlock:
> > > + pr_info("Queued spinlock: enabled\n");
> > > +}
> > > +
> > > extern void __init init_rt_signal_env(void);
> > >
> > > void __init setup_arch(char **cmdline_p)
> > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > riscv_set_dma_cache_alignment();
> > >
> > > riscv_user_isa_enable();
> > > + riscv_spinlock_init();
> > > }
> > >
> > > bool arch_cpu_is_hotpluggable(int cpu)
> > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > index 0655aa5b57b2..bf47cca2c375 100644
> > > --- a/include/asm-generic/qspinlock.h
> > > +++ b/include/asm-generic/qspinlock.h
> > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > }
> > > #endif
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > > /*
> > > * Remapping spinlock architecture specific functions to the corresponding
> > > * queued spinlock functions.
> > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > #define arch_spin_lock(l) queued_spin_lock(l)
> > > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > > +#endif
> > >
> > > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > index cfcff22b37b3..325779970d8a 100644
> > > --- a/include/asm-generic/ticket_spinlock.h
> > > +++ b/include/asm-generic/ticket_spinlock.h
> > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > }
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > > /*
> > > * Remapping spinlock architecture specific functions to the corresponding
> > > * ticket spinlock functions.
> > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > #define arch_spin_lock(l) ticket_spin_lock(l)
> > > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > > +#endif
> > >
> > > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > --
> > > 2.39.2
> > >
> >
> >
> > --
> > Best Regards
> > Guo Ren



--
Best Regards
Guo Ren

2024-05-30 05:30:35

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

Hi Guo,

On Thu, May 30, 2024 at 3:55 AM Guo Ren <[email protected]> wrote:
>
> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> >
> > Hi Guo,
> >
> > On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> > >
> > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> > > >
> > > > In order to produce a generic kernel, a user can select
> > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > spinlock implementation if Zabha is not present.
> > > >
> > > > Note that we can't use alternatives here because the discovery of
> > > > extensions is done too late and we need to start with the qspinlock
> > > > implementation because the ticket spinlock implementation would pollute
> > > > the spinlock value, so let's use static keys.
> > > >
> > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > >
> > > > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > > > Signed-off-by: Alexandre Ghiti <[email protected]>
> > > > ---
> > > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > > arch/riscv/Kconfig | 1 +
> > > > arch/riscv/include/asm/Kbuild | 4 +-
> > > > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > > > arch/riscv/kernel/setup.c | 18 +++++++++
> > > > include/asm-generic/qspinlock.h | 2 +
> > > > include/asm-generic/ticket_spinlock.h | 2 +
> > > > 7 files changed, 66 insertions(+), 2 deletions(-)
> > > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > > >
> > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > index 22f2990392ff..cf26042480e2 100644
> > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > @@ -20,7 +20,7 @@
> > > > | openrisc: | ok |
> > > > | parisc: | TODO |
> > > > | powerpc: | ok |
> > > > - | riscv: | TODO |
> > > > + | riscv: | ok |
> > > > | s390: | TODO |
> > > > | sh: | TODO |
> > > > | sparc: | ok |
> > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > --- a/arch/riscv/Kconfig
> > > > +++ b/arch/riscv/Kconfig
> > > > @@ -59,6 +59,7 @@ config RISCV
> > > > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > select ARCH_USE_MEMTEST
> > > > select ARCH_USE_QUEUED_RWLOCKS
> > > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > Using qspinlock or not depends on real hardware capabilities, not the
> > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > hardware platforms but waste some qspinlock code size.
> >
> > You're right, and I think your comment matches what Conor mentioned
> > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > will allow a platform with Zabha capability to use qspinlocks. But if
> > the hardware does not, it will fallback to the ticket spinlocks.
> >
> > But I agree that looking at the config alone may be misleading, even
> > though it will work as expected at runtime. So I agree with you:
> > unless anyone is strongly against the combo spinlocks, I will do what
> > you suggest and add them.
> The problem with the v12 combo-spinlock is using a static_branch
> instead of the full ALTERNATIVE. Frankly, that's a bad example that
> costs more code space. I found that your cmpxchg32/64 also uses a
> condition branch, which has a similar problem, right?
>
> Anyway, your patch series inspired me to update the v13
> combo-spinlock. My plan is:
> 1. Separate native-qspinlock out of paravirt-qspinlock.
> 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> ticket-lock or qspinlock.

What's your plan to make use of alternatives here? The alternatives
patching depends on the discovery of the extensions, which is done too
late, at least after the first use of a spinlock (the printk
spinlock). So you'd need to find a way to first use qspinlocks (but
without knowing Zabha is available) and then do the correct patching:
an idea here could be to add an "init" value to the alternatives and
let the patching process do the right thing when the extensions are
known.

Another solution would be the early discovery of the extensions, but I
took a look and it's easy with a device tree, but not with ACPI.

Let me know what you plan to do and how I can help!

Thanks,

Alex

>
> What do you think?
>
>
> >
> > Thanks again for your initial work,
> >
> > Alex
> >
> > >
> > > > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > --- a/arch/riscv/include/asm/Kbuild
> > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > @@ -2,10 +2,12 @@
> > > > generic-y += early_ioremap.h
> > > > generic-y += flat.h
> > > > generic-y += kvm_para.h
> > > > +generic-y += mcs_spinlock.h
> > > > generic-y += parport.h
> > > > -generic-y += spinlock.h
> > > > generic-y += spinlock_types.h
> > > > +generic-y += ticket_spinlock.h
> > > > generic-y += qrwlock.h
> > > > generic-y += qrwlock_types.h
> > > > +generic-y += qspinlock.h
> > > > generic-y += user.h
> > > > generic-y += vmlinux.lds.h
> > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > new file mode 100644
> > > > index 000000000000..e00429ac20ed
> > > > --- /dev/null
> > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > @@ -0,0 +1,39 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > +
> > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > +
> > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > > +
> > > > +#define __no_arch_spinlock_redefine
> > > > +#include <asm/ticket_spinlock.h>
> > > > +#include <asm/qspinlock.h>
> > > > +#include <asm/alternative.h>
> > > > +
> > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > +
> > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > > +{ \
> > > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > > + return queued_spin_##op(lock); \
> > > > + return ticket_spin_##op(lock); \
> > > > +}
> > > > +
> > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > +
> > > > +#else
> > > > +
> > > > +#include <asm/ticket_spinlock.h>
> > > > +
> > > > +#endif
> > > > +
> > > > +#include <asm/qrwlock.h>
> > > > +
> > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > --- a/arch/riscv/kernel/setup.c
> > > > +++ b/arch/riscv/kernel/setup.c
> > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > #endif
> > > > }
> > > >
> > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > +
> > > > +static void __init riscv_spinlock_init(void)
> > > > +{
> > > > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > + : : : : qspinlock);
> > > > +
> > > > + static_branch_disable(&qspinlock_key);
> > > > + pr_info("Ticket spinlock: enabled\n");
> > > > +
> > > > + return;
> > > > +
> > > > +qspinlock:
> > > > + pr_info("Queued spinlock: enabled\n");
> > > > +}
> > > > +
> > > > extern void __init init_rt_signal_env(void);
> > > >
> > > > void __init setup_arch(char **cmdline_p)
> > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > riscv_set_dma_cache_alignment();
> > > >
> > > > riscv_user_isa_enable();
> > > > + riscv_spinlock_init();
> > > > }
> > > >
> > > > bool arch_cpu_is_hotpluggable(int cpu)
> > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > --- a/include/asm-generic/qspinlock.h
> > > > +++ b/include/asm-generic/qspinlock.h
> > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > }
> > > > #endif
> > > >
> > > > +#ifndef __no_arch_spinlock_redefine
> > > > /*
> > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > * queued spinlock functions.
> > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > #define arch_spin_lock(l) queued_spin_lock(l)
> > > > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > > > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > > > +#endif
> > > >
> > > > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > index cfcff22b37b3..325779970d8a 100644
> > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > }
> > > >
> > > > +#ifndef __no_arch_spinlock_redefine
> > > > /*
> > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > * ticket spinlock functions.
> > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > #define arch_spin_lock(l) ticket_spin_lock(l)
> > > > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > > > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > > > +#endif
> > > >
> > > > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > --
> > > > 2.39.2
> > > >
> > >
> > >
> > > --
> > > Best Regards
> > > Guo Ren
>
>
>
> --
> Best Regards
> Guo Ren

2024-05-30 14:43:27

by Conor Dooley

[permalink] [raw]
Subject: Re: [PATCH 1/7] riscv: Implement cmpxchg32/64() using Zacas

On Wed, May 29, 2024 at 02:20:33PM +0200, Alexandre Ghiti wrote:
> On Tue, May 28, 2024 at 5:34 PM Conor Dooley <[email protected]> wrote:
> > On Tue, May 28, 2024 at 05:10:46PM +0200, Alexandre Ghiti wrote:

> > > config TOOLCHAIN_HAS_ZBB
> > > bool
> > > default y
> > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > index 5b3115a19852..d5b60b87998c 100644
> > > --- a/arch/riscv/Makefile
> > > +++ b/arch/riscv/Makefile
> > > @@ -78,6 +78,17 @@ endif
> > > # Check if the toolchain supports Zihintpause extension
> > > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
> > >
> > > +# Check if the toolchain supports Zacas
> > > +ifdef CONFIG_AS_IS_LLVM
> > > +# Support for experimental Zacas was merged in LLVM 17, but the removal of
> > > +# the "experimental" was merged in LLVM 19.
> > > +KBUILD_CFLAGS += -menable-experimental-extensions
> > > +KBUILD_AFLAGS += -menable-experimental-extensions
> > > +riscv-march-y := $(riscv-march-y)_zacas1p0
> > > +else
> > > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> > > +endif
> >
> > I'm almost certain that we discussed this before for vector and it was
> > decided to not enable experimental extensions (particularly as it is a
> > global option), and instead require the non-experimental versions.
> > This isn't even consistent with your TOOLCHAIN_HAS_ZACAS checks, that
> > will only enable the option for the ratified version.
>
> Zacas was ratified, hence the removal of "experimental" in LLVM 19.
> But unfortunately Zabha lacks such changes in LLVM, so that will make
> this inconsistent (ratified extension but still experimental).
>
> I'll remove the enablement of the experimental extensions then so that
> will fail for LLVM < 19. And for Zabha, I'll try to push the removal
> of experimental from LLVM.

Ye, as Nathan mentioned there may yet be some time. It'd be great if you
could.

> > I think we should
> > continue to avoid enabling experimental extensions, even if that imposes
> > a requirement of having a bleeding edge toolchain to actually use the
> > extension.
>
> Would it make sense to have a
> CONFIG_RISCV_LLVM_ENABLE_EXPERIMENTAL_EXTENSIONS or similar? So that
> people who want to play with those extensions will still be able to do
> so without patching the kernel?

Maybe, but I think something like that should depend on BROKEN and only
be done when the extension hasn't had its experimental status removed by
a released version of LLVM and is not supported by a release of GCC.
Given we only allow frozen extensions into the kernel, actually having to
do this would be rather rare.

I think we should also reserve the right to drop support for the
experimental version as soon as it does get its status changed, and
depending on BROKEN would let us do that without any regressions in terms
of toolchain version support.

Yes, making the option depend on BROKEN would require patching a Kconfig
file but this would be a facility for kernel developers to test prior to
the release of a toolchain that actually supports the extension, and the
"hard" part in the Makefile to hook it up would already be done. I think
if you're capable of messing about with experimental extensions in the
kernel you're capable of also modifying a Kconfig file locally ;)

Cheers,
Conor.


Attachments:
(No filename) (3.34 kB)
signature.asc (235.00 B)
Download all attachments

2024-05-31 01:57:26

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <alexghiti@rivosinccom> wrote:
>
> Hi Guo,
>
> On Thu, May 30, 2024 at 3:55 AM Guo Ren <[email protected]> wrote:
> >
> > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> > >
> > > Hi Guo,
> > >
> > > On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> > > >
> > > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> > > > >
> > > > > In order to produce a generic kernel, a user can select
> > > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > > spinlock implementation if Zabha is not present.
> > > > >
> > > > > Note that we can't use alternatives here because the discovery of
> > > > > extensions is done too late and we need to start with the qspinlock
> > > > > implementation because the ticket spinlock implementation would pollute
> > > > > the spinlock value, so let's use static keys.
> > > > >
> > > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > > >
> > > > > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > > > > Signed-off-by: Alexandre Ghiti <[email protected]>
> > > > > ---
> > > > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > > > arch/riscv/Kconfig | 1 +
> > > > > arch/riscv/include/asm/Kbuild | 4 +-
> > > > > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > > > > arch/riscv/kernel/setup.c | 18 +++++++++
> > > > > include/asm-generic/qspinlock.h | 2 +
> > > > > include/asm-generic/ticket_spinlock.h | 2 +
> > > > > 7 files changed, 66 insertions(+), 2 deletions(-)
> > > > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > > > >
> > > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-supporttxt
> > > > > index 22f2990392ff..cf26042480e2 100644
> > > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > @@ -20,7 +20,7 @@
> > > > > | openrisc: | ok |
> > > > > | parisc: | TODO |
> > > > > | powerpc: | ok |
> > > > > - | riscv: | TODO |
> > > > > + | riscv: | ok |
> > > > > | s390: | TODO |
> > > > > | sh: | TODO |
> > > > > | sparc: | ok |
> > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > > --- a/arch/riscv/Kconfig
> > > > > +++ b/arch/riscv/Kconfig
> > > > > @@ -59,6 +59,7 @@ config RISCV
> > > > > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > > select ARCH_USE_MEMTEST
> > > > > select ARCH_USE_QUEUED_RWLOCKS
> > > > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > > Using qspinlock or not depends on real hardware capabilities, not the
> > > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > > hardware platforms but waste some qspinlock code size.
> > >
> > > You're right, and I think your comment matches what Conor mentioned
> > > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > > will allow a platform with Zabha capability to use qspinlocks. But if
> > > the hardware does not, it will fallback to the ticket spinlocks.
> > >
> > > But I agree that looking at the config alone may be misleading, even
> > > though it will work as expected at runtime. So I agree with you:
> > > unless anyone is strongly against the combo spinlocks, I will do what
> > > you suggest and add them.
> > The problem with the v12 combo-spinlock is using a static_branch
> > instead of the full ALTERNATIVE. Frankly, that's a bad example that
> > costs more code space. I found that your cmpxchg32/64 also uses a
> > condition branch, which has a similar problem, right?
> >
> > Anyway, your patch series inspired me to update the v13
> > combo-spinlock. My plan is:
> > 1. Separate native-qspinlock out of paravirt-qspinlock.
> > 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> > ticket-lock or qspinlock.
>
> What's your plan to make use of alternatives here? The alternatives
> patching depends on the discovery of the extensions, which is done too
> late, at least after the first use of a spinlock (the printk
> spinlock). So you'd need to find a way to first use qspinlocks (but
> without knowing Zabha is available) and then do the correct patching:
I do that in v12:
1. Use qspinlock as init.
2. Change to ticket-lock or not.
(Only qspinlock -> ticket-lock, No reverse direction)

If there is no contention, Qspinlock is okay for all platforms before
smp bringup & no-irq environment.

> an idea here could be to add an "init" value to the alternatives and
> let the patching process do the right thing when the extensions are
> known.
>
> Another solution would be the early discovery of the extensions, but I
> took a look and it's easy with a device tree, but not with ACPI.
>
> Let me know what you plan to do and how I can help!
>
> Thanks,
>
> Alex
>
> >
> > What do you think?
> >
> >
> > >
> > > Thanks again for your initial work,
> > >
> > > Alex
> > >
> > > >
> > > > > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > > --- a/arch/riscv/include/asm/Kbuild
> > > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > > @@ -2,10 +2,12 @@
> > > > > generic-y += early_ioremap.h
> > > > > generic-y += flat.h
> > > > > generic-y += kvm_para.h
> > > > > +generic-y += mcs_spinlock.h
> > > > > generic-y += parport.h
> > > > > -generic-y += spinlock.h
> > > > > generic-y += spinlock_types.h
> > > > > +generic-y += ticket_spinlock.h
> > > > > generic-y += qrwlock.h
> > > > > generic-y += qrwlock_types.h
> > > > > +generic-y += qspinlock.h
> > > > > generic-y += user.h
> > > > > generic-y += vmlinux.lds.h
> > > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > > new file mode 100644
> > > > > index 000000000000..e00429ac20ed
> > > > > --- /dev/null
> > > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > > @@ -0,0 +1,39 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +
> > > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > > +
> > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > > > +
> > > > > +#define __no_arch_spinlock_redefine
> > > > > +#include <asm/ticket_spinlock.h>
> > > > > +#include <asm/qspinlock.h>
> > > > > +#include <asm/alternative.h>
> > > > > +
> > > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > +
> > > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > > > +{ \
> > > > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > > > + return queued_spin_##op(lock); \
> > > > > + return ticket_spin_##op(lock); \
> > > > > +}
> > > > > +
> > > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > > +
> > > > > +#else
> > > > > +
> > > > > +#include <asm/ticket_spinlock.h>
> > > > > +
> > > > > +#endif
> > > > > +
> > > > > +#include <asm/qrwlock.h>
> > > > > +
> > > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > > --- a/arch/riscv/kernel/setup.c
> > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > > #endif
> > > > > }
> > > > >
> > > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > > +
> > > > > +static void __init riscv_spinlock_init(void)
> > > > > +{
> > > > > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > > + : : : : qspinlock);
> > > > > +
> > > > > + static_branch_disable(&qspinlock_key);
> > > > > + pr_info("Ticket spinlock: enabled\n");
> > > > > +
> > > > > + return;
> > > > > +
> > > > > +qspinlock:
> > > > > + pr_info("Queued spinlock: enabled\n");
> > > > > +}
> > > > > +
> > > > > extern void __init init_rt_signal_env(void);
> > > > >
> > > > > void __init setup_arch(char **cmdline_p)
> > > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > > riscv_set_dma_cache_alignment();
> > > > >
> > > > > riscv_user_isa_enable();
> > > > > + riscv_spinlock_init();
> > > > > }
> > > > >
> > > > > bool arch_cpu_is_hotpluggable(int cpu)
> > > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > > --- a/include/asm-generic/qspinlock.h
> > > > > +++ b/include/asm-generic/qspinlock.h
> > > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > }
> > > > > #endif
> > > > >
> > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > /*
> > > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > > * queued spinlock functions.
> > > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > #define arch_spin_lock(l) queued_spin_lock(l)
> > > > > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > > > > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > > > > +#endif
> > > > >
> > > > > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > > index cfcff22b37b3..325779970d8a 100644
> > > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > > }
> > > > >
> > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > /*
> > > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > > * ticket spinlock functions.
> > > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > #define arch_spin_lock(l) ticket_spin_lock(l)
> > > > > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > > > > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > > > > +#endif
> > > > >
> > > > > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > > --
> > > > > 2.39.2
> > > > >
> > > >
> > > >
> > > > --
> > > > Best Regards
> > > > Guo Ren
> >
> >
> >
> > --
> > Best Regards
> > Guo Ren



--
Best Regards
Guo Ren

2024-05-31 06:22:30

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Fri, May 31, 2024 at 3:57 AM Guo Ren <[email protected]> wrote:
>
> On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <[email protected]> wrote:
> >
> > Hi Guo,
> >
> > On Thu, May 30, 2024 at 3:55 AM Guo Ren <[email protected]> wrote:
> > >
> > > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> > > >
> > > > Hi Guo,
> > > >
> > > > On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> > > > >
> > > > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> > > > > >
> > > > > > In order to produce a generic kernel, a user can select
> > > > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > > > spinlock implementation if Zabha is not present.
> > > > > >
> > > > > > Note that we can't use alternatives here because the discovery of
> > > > > > extensions is done too late and we need to start with the qspinlock
> > > > > > implementation because the ticket spinlock implementation would pollute
> > > > > > the spinlock value, so let's use static keys.
> > > > > >
> > > > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > > > >
> > > > > > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > > > > > Signed-off-by: Alexandre Ghiti <[email protected]>
> > > > > > ---
> > > > > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > > > > arch/riscv/Kconfig | 1 +
> > > > > > arch/riscv/include/asm/Kbuild | 4 +-
> > > > > > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > > > > > arch/riscv/kernel/setup.c | 18 +++++++++
> > > > > > include/asm-generic/qspinlock.h | 2 +
> > > > > > include/asm-generic/ticket_spinlock.h | 2 +
> > > > > > 7 files changed, 66 insertions(+), 2 deletions(-)
> > > > > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > > > > >
> > > > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > index 22f2990392ff..cf26042480e2 100644
> > > > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > @@ -20,7 +20,7 @@
> > > > > > | openrisc: | ok |
> > > > > > | parisc: | TODO |
> > > > > > | powerpc: | ok |
> > > > > > - | riscv: | TODO |
> > > > > > + | riscv: | ok |
> > > > > > | s390: | TODO |
> > > > > > | sh: | TODO |
> > > > > > | sparc: | ok |
> > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > > > --- a/arch/riscv/Kconfig
> > > > > > +++ b/arch/riscv/Kconfig
> > > > > > @@ -59,6 +59,7 @@ config RISCV
> > > > > > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > > > select ARCH_USE_MEMTEST
> > > > > > select ARCH_USE_QUEUED_RWLOCKS
> > > > > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > > > Using qspinlock or not depends on real hardware capabilities, not the
> > > > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > > > hardware platforms but waste some qspinlock code size.
> > > >
> > > > You're right, and I think your comment matches what Conor mentioned
> > > > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > > > will allow a platform with Zabha capability to use qspinlocks. But if
> > > > the hardware does not, it will fallback to the ticket spinlocks.
> > > >
> > > > But I agree that looking at the config alone may be misleading, even
> > > > though it will work as expected at runtime. So I agree with you:
> > > > unless anyone is strongly against the combo spinlocks, I will do what
> > > > you suggest and add them.
> > > The problem with the v12 combo-spinlock is using a static_branch
> > > instead of the full ALTERNATIVE. Frankly, that's a bad example that
> > > costs more code space. I found that your cmpxchg32/64 also uses a
> > > condition branch, which has a similar problem, right?
> > >
> > > Anyway, your patch series inspired me to update the v13
> > > combo-spinlock. My plan is:
> > > 1. Separate native-qspinlock out of paravirt-qspinlock.
> > > 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> > > ticket-lock or qspinlock.
> >
> > What's your plan to make use of alternatives here? The alternatives
> > patching depends on the discovery of the extensions, which is done too
> > late, at least after the first use of a spinlock (the printk
> > spinlock). So you'd need to find a way to first use qspinlocks (but
> > without knowing Zabha is available) and then do the correct patching:
> I do that in v12:
> 1. Use qspinlock as init.
> 2. Change to ticket-lock or not.
> (Only qspinlock -> ticket-lock, No reverse direction)
>
> If there is no contention, Qspinlock is okay for all platforms before
> smp bringup & no-irq environment.
>

Yes, by using static keys not alternatives. My question was: how do
you plan to use alternatives here instead of static keys? To me, it's
not that simple, hence my suggestions in my previous answer.

Thanks,

Alex

> > an idea here could be to add an "init" value to the alternatives and
> > let the patching process do the right thing when the extensions are
> > known.
> >
> > Another solution would be the early discovery of the extensions, but I
> > took a look and it's easy with a device tree, but not with ACPI.
> >
> > Let me know what you plan to do and how I can help!
> >
> > Thanks,
> >
> > Alex
> >
> > >
> > > What do you think?
> > >
> > >
> > > >
> > > > Thanks again for your initial work,
> > > >
> > > > Alex
> > > >
> > > > >
> > > > > > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > > > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > > > --- a/arch/riscv/include/asm/Kbuild
> > > > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > > > @@ -2,10 +2,12 @@
> > > > > > generic-y += early_ioremap.h
> > > > > > generic-y += flat.h
> > > > > > generic-y += kvm_para.h
> > > > > > +generic-y += mcs_spinlock.h
> > > > > > generic-y += parport.h
> > > > > > -generic-y += spinlock.h
> > > > > > generic-y += spinlock_types.h
> > > > > > +generic-y += ticket_spinlock.h
> > > > > > generic-y += qrwlock.h
> > > > > > generic-y += qrwlock_types.h
> > > > > > +generic-y += qspinlock.h
> > > > > > generic-y += user.h
> > > > > > generic-y += vmlinux.lds.h
> > > > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > > > new file mode 100644
> > > > > > index 000000000000..e00429ac20ed
> > > > > > --- /dev/null
> > > > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > > > @@ -0,0 +1,39 @@
> > > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > > +
> > > > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > > > +
> > > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > > > > +
> > > > > > +#define __no_arch_spinlock_redefine
> > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > +#include <asm/qspinlock.h>
> > > > > > +#include <asm/alternative.h>
> > > > > > +
> > > > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > +
> > > > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > > > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > > > > +{ \
> > > > > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > > > > + return queued_spin_##op(lock); \
> > > > > > + return ticket_spin_##op(lock); \
> > > > > > +}
> > > > > > +
> > > > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > > > +
> > > > > > +#else
> > > > > > +
> > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > +
> > > > > > +#endif
> > > > > > +
> > > > > > +#include <asm/qrwlock.h>
> > > > > > +
> > > > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > > > --- a/arch/riscv/kernel/setup.c
> > > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > > > #endif
> > > > > > }
> > > > > >
> > > > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > > > +
> > > > > > +static void __init riscv_spinlock_init(void)
> > > > > > +{
> > > > > > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > > > + : : : : qspinlock);
> > > > > > +
> > > > > > + static_branch_disable(&qspinlock_key);
> > > > > > + pr_info("Ticket spinlock: enabled\n");
> > > > > > +
> > > > > > + return;
> > > > > > +
> > > > > > +qspinlock:
> > > > > > + pr_info("Queued spinlock: enabled\n");
> > > > > > +}
> > > > > > +
> > > > > > extern void __init init_rt_signal_env(void);
> > > > > >
> > > > > > void __init setup_arch(char **cmdline_p)
> > > > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > > > riscv_set_dma_cache_alignment();
> > > > > >
> > > > > > riscv_user_isa_enable();
> > > > > > + riscv_spinlock_init();
> > > > > > }
> > > > > >
> > > > > > bool arch_cpu_is_hotpluggable(int cpu)
> > > > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > > > --- a/include/asm-generic/qspinlock.h
> > > > > > +++ b/include/asm-generic/qspinlock.h
> > > > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > > }
> > > > > > #endif
> > > > > >
> > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > > /*
> > > > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > > > * queued spinlock functions.
> > > > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > > #define arch_spin_lock(l) queued_spin_lock(l)
> > > > > > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > > > > > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > > > > > +#endif
> > > > > >
> > > > > > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > > > index cfcff22b37b3..325779970d8a 100644
> > > > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > > > }
> > > > > >
> > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > > /*
> > > > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > > > * ticket spinlock functions.
> > > > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > > #define arch_spin_lock(l) ticket_spin_lock(l)
> > > > > > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > > > > > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > > > > > +#endif
> > > > > >
> > > > > > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > > > --
> > > > > > 2.39.2
> > > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Best Regards
> > > > > Guo Ren
> > >
> > >
> > >
> > > --
> > > Best Regards
> > > Guo Ren
>
>
>
> --
> Best Regards
> Guo Ren

2024-05-31 06:42:57

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Fri, May 31, 2024 at 2:22 PM Alexandre Ghiti <alexghiti@rivosinccom> wrote:
>
> On Fri, May 31, 2024 at 3:57 AM Guo Ren <[email protected]> wrote:
> >
> > On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <[email protected]> wrote:
> > >
> > > Hi Guo,
> > >
> > > On Thu, May 30, 2024 at 3:55 AM Guo Ren <[email protected]> wrote:
> > > >
> > > > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> > > > >
> > > > > Hi Guo,
> > > > >
> > > > > On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> > > > > >
> > > > > > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> > > > > > >
> > > > > > > In order to produce a generic kernel, a user can select
> > > > > > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > > > > > spinlock implementation if Zabha is not present.
> > > > > > >
> > > > > > > Note that we can't use alternatives here because the discovery of
> > > > > > > extensions is done too late and we need to start with the qspinlock
> > > > > > > implementation because the ticket spinlock implementation would pollute
> > > > > > > the spinlock value, so let's use static keys.
> > > > > > >
> > > > > > > This is largely based on Guo's work and Leonardo reviews at [1].
> > > > > > >
> > > > > > > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > > > > > > Signed-off-by: Alexandre Ghiti <[email protected]>
> > > > > > > ---
> > > > > > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > > > > > arch/riscv/Kconfig | 1 +
> > > > > > > arch/riscv/include/asm/Kbuild | 4 +-
> > > > > > > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > > > > > > arch/riscv/kernel/setup.c | 18 +++++++++
> > > > > > > include/asm-generic/qspinlock.h | 2 +
> > > > > > > include/asm-generic/ticket_spinlock.h | 2 +
> > > > > > > 7 files changed, 66 insertions(+), 2 deletions(-)
> > > > > > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > > > > > >
> > > > > > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > > index 22f2990392ff..cf26042480e2 100644
> > > > > > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > > > > > @@ -20,7 +20,7 @@
> > > > > > > | openrisc: | ok |
> > > > > > > | parisc: | TODO |
> > > > > > > | powerpc: | ok |
> > > > > > > - | riscv: | TODO |
> > > > > > > + | riscv: | ok |
> > > > > > > | s390: | TODO |
> > > > > > > | sh: | TODO |
> > > > > > > | sparc: | ok |
> > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > > > > index 184a9edb04e0..ccf1703edeb9 100644
> > > > > > > --- a/arch/riscv/Kconfig
> > > > > > > +++ b/arch/riscv/Kconfig
> > > > > > > @@ -59,6 +59,7 @@ config RISCV
> > > > > > > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > > > > > select ARCH_USE_MEMTEST
> > > > > > > select ARCH_USE_QUEUED_RWLOCKS
> > > > > > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > > > > Using qspinlock or not depends on real hardware capabilities, not the
> > > > > > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > > > > > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > > > > > hardware platforms but waste some qspinlock code size.
> > > > >
> > > > > You're right, and I think your comment matches what Conor mentioned
> > > > > about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> > > > > will allow a platform with Zabha capability to use qspinlocks. But if
> > > > > the hardware does not, it will fallback to the ticket spinlocks.
> > > > >
> > > > > But I agree that looking at the config alone may be misleading, even
> > > > > though it will work as expected at runtime. So I agree with you:
> > > > > unless anyone is strongly against the combo spinlocks, I will do what
> > > > > you suggest and add them.
> > > > The problem with the v12 combo-spinlock is using a static_branch
> > > > instead of the full ALTERNATIVE. Frankly, that's a bad example that
> > > > costs more code space. I found that your cmpxchg32/64 also uses a
> > > > condition branch, which has a similar problem, right?
> > > >
> > > > Anyway, your patch series inspired me to update the v13
> > > > combo-spinlock. My plan is:
> > > > 1. Separate native-qspinlock out of paravirt-qspinlock.
> > > > 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> > > > ticket-lock or qspinlock.
> > >
> > > What's your plan to make use of alternatives here? The alternatives
> > > patching depends on the discovery of the extensions, which is done too
> > > late, at least after the first use of a spinlock (the printk
> > > spinlock). So you'd need to find a way to first use qspinlocks (but
> > > without knowing Zabha is available) and then do the correct patching:
> > I do that in v12:
> > 1. Use qspinlock as init.
> > 2. Change to ticket-lock or not.
> > (Only qspinlock -> ticket-lock, No reverse direction)
> >
> > If there is no contention, Qspinlock is okay for all platforms before
> > smp bringup & no-irq environment.
> >
>
> Yes, by using static keys not alternatives. My question was: how do
> you plan to use alternatives here instead of static keys? To me, it's
> not that simple, hence my suggestions in my previous answer.
Yes, it's not that simple. The current framework doesn't support that
and has two problems:
1. We need to re-implement ticket-lock & qspinlock-fast-path with assembly code.
2. Current alternatives patching only for extensions, but qspinlock is
not a formal extension. Could we accept
__RISCV_ISA_EXT_DATA(xqspinlock, RISCV_ISA_EXT_XQSPINLOCK)?

>
> Thanks,
>
> Alex
>
> > > an idea here could be to add an "init" value to the alternatives and
> > > let the patching process do the right thing when the extensions are
> > > known.
> > >
> > > Another solution would be the early discovery of the extensions, but I
> > > took a look and it's easy with a device tree, but not with ACPI.
> > >
> > > Let me know what you plan to do and how I can help!
> > >
> > > Thanks,
> > >
> > > Alex
> > >
> > > >
> > > > What do you think?
> > > >
> > > >
> > > > >
> > > > > Thanks again for your initial work,
> > > > >
> > > > > Alex
> > > > >
> > > > > >
> > > > > > > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > > > > > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > > > > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > > > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > > > > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > > > > > --- a/arch/riscv/include/asm/Kbuild
> > > > > > > +++ b/arch/riscv/include/asm/Kbuild
> > > > > > > @@ -2,10 +2,12 @@
> > > > > > > generic-y += early_ioremap.h
> > > > > > > generic-y += flat.h
> > > > > > > generic-y += kvm_para.h
> > > > > > > +generic-y += mcs_spinlock.h
> > > > > > > generic-y += parport.h
> > > > > > > -generic-y += spinlock.h
> > > > > > > generic-y += spinlock_types.h
> > > > > > > +generic-y += ticket_spinlock.h
> > > > > > > generic-y += qrwlock.h
> > > > > > > generic-y += qrwlock_types.h
> > > > > > > +generic-y += qspinlock.h
> > > > > > > generic-y += user.h
> > > > > > > generic-y += vmlinux.lds.h
> > > > > > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..e00429ac20ed
> > > > > > > --- /dev/null
> > > > > > > +++ b/arch/riscv/include/asm/spinlock.h
> > > > > > > @@ -0,0 +1,39 @@
> > > > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > > > +
> > > > > > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > > > > > +#define __ASM_RISCV_SPINLOCK_H
> > > > > > > +
> > > > > > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > > > > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > > > > > +
> > > > > > > +#define __no_arch_spinlock_redefine
> > > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > > +#include <asm/qspinlock.h>
> > > > > > > +#include <asm/alternative.h>
> > > > > > > +
> > > > > > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > > +
> > > > > > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > > > > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > > > > > +{ \
> > > > > > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > > > > > + return queued_spin_##op(lock); \
> > > > > > > + return ticket_spin_##op(lock); \
> > > > > > > +}
> > > > > > > +
> > > > > > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > > > > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > > > > > +
> > > > > > > +#else
> > > > > > > +
> > > > > > > +#include <asm/ticket_spinlock.h>
> > > > > > > +
> > > > > > > +#endif
> > > > > > > +
> > > > > > > +#include <asm/qrwlock.h>
> > > > > > > +
> > > > > > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > > > > > --- a/arch/riscv/kernel/setup.c
> > > > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > > > > > #endif
> > > > > > > }
> > > > > > >
> > > > > > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > > > > > +EXPORT_SYMBOL(qspinlock_key);
> > > > > > > +
> > > > > > > +static void __init riscv_spinlock_init(void)
> > > > > > > +{
> > > > > > > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > > > > > + : : : : qspinlock);
> > > > > > > +
> > > > > > > + static_branch_disable(&qspinlock_key);
> > > > > > > + pr_info("Ticket spinlock: enabled\n");
> > > > > > > +
> > > > > > > + return;
> > > > > > > +
> > > > > > > +qspinlock:
> > > > > > > + pr_info("Queued spinlock: enabled\n");
> > > > > > > +}
> > > > > > > +
> > > > > > > extern void __init init_rt_signal_env(void);
> > > > > > >
> > > > > > > void __init setup_arch(char **cmdline_p)
> > > > > > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > > > > > riscv_set_dma_cache_alignment();
> > > > > > >
> > > > > > > riscv_user_isa_enable();
> > > > > > > + riscv_spinlock_init();
> > > > > > > }
> > > > > > >
> > > > > > > bool arch_cpu_is_hotpluggable(int cpu)
> > > > > > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > > > > > index 0655aa5b57b2..bf47cca2c375 100644
> > > > > > > --- a/include/asm-generic/qspinlock.h
> > > > > > > +++ b/include/asm-generic/qspinlock.h
> > > > > > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > > > }
> > > > > > > #endif
> > > > > > >
> > > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > > > /*
> > > > > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > > > > * queued spinlock functions.
> > > > > > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > > > > > #define arch_spin_lock(l) queued_spin_lock(l)
> > > > > > > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > > > > > > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > > > > > > +#endif
> > > > > > >
> > > > > > > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > > > > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > > > > > index cfcff22b37b3..325779970d8a 100644
> > > > > > > --- a/include/asm-generic/ticket_spinlock.h
> > > > > > > +++ b/include/asm-generic/ticket_spinlock.h
> > > > > > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > > > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > > > > > }
> > > > > > >
> > > > > > > +#ifndef __no_arch_spinlock_redefine
> > > > > > > /*
> > > > > > > * Remapping spinlock architecture specific functions to the corresponding
> > > > > > > * ticket spinlock functions.
> > > > > > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > > > > > #define arch_spin_lock(l) ticket_spin_lock(l)
> > > > > > > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > > > > > > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > > > > > > +#endif
> > > > > > >
> > > > > > > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > > > > > --
> > > > > > > 2.39.2
> > > > > > >
> > > > > >
> > > > > >
> > > > > > --
> > > > > > Best Regards
> > > > > > Guo Ren
> > > >
> > > >
> > > >
> > > > --
> > > > Best Regards
> > > > Guo Ren
> >
> >
> >
> > --
> > Best Regards
> > Guo Ren



--
Best Regards
Guo Ren

2024-05-31 13:10:45

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <alexghiti@rivosinccom> wrote:
>
> Hi Guo,
>
> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> >
> > On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> > >
> > > In order to produce a generic kernel, a user can select
> > > CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> > > spinlock implementation if Zabha is not present.
> > >
> > > Note that we can't use alternatives here because the discovery of
> > > extensions is done too late and we need to start with the qspinlock
> > > implementation because the ticket spinlock implementation would pollute
> > > the spinlock value, so let's use static keys.
Zabha is not a prerequisite for qspinlock; the prerequisite for
qspinlock is the *forward progress guarantee* in the atomic operation
loop during intense contention. Even with Zabha enabled to meet the
requirements of xchg_tail, that still only applies when the number of
CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
xchg_tail when the number of cores is more than 16K. Thus, hardware
support for Zabha does not equate to the safe use of qspinlock.

Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
Forward Progress Guarantee). If RISC-V vendors can ensure the progress
of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
lines are sufficiently sticky, they could then claim support for this
extension. Linux could then select different spinlock implementations
based on this extension's support or not.

> > >
> > > This is largely based on Guo's work and Leonardo reviews at [1].
> > >
> > > Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> > > Signed-off-by: Alexandre Ghiti <[email protected]>
> > > ---
> > > .../locking/queued-spinlocks/arch-support.txt | 2 +-
> > > arch/riscv/Kconfig | 1 +
> > > arch/riscv/include/asm/Kbuild | 4 +-
> > > arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> > > arch/riscv/kernel/setup.c | 18 +++++++++
> > > include/asm-generic/qspinlock.h | 2 +
> > > include/asm-generic/ticket_spinlock.h | 2 +
> > > 7 files changed, 66 insertions(+), 2 deletions(-)
> > > create mode 100644 arch/riscv/include/asm/spinlock.h
> > >
> > > diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > index 22f2990392ff..cf26042480e2 100644
> > > --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> > > @@ -20,7 +20,7 @@
> > > | openrisc: | ok |
> > > | parisc: | TODO |
> > > | powerpc: | ok |
> > > - | riscv: | TODO |
> > > + | riscv: | ok |
> > > | s390: | TODO |
> > > | sh: | TODO |
> > > | sparc: | ok |
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 184a9edb04e0..ccf1703edeb9 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -59,6 +59,7 @@ config RISCV
> > > select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> > > select ARCH_USE_MEMTEST
> > > select ARCH_USE_QUEUED_RWLOCKS
> > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > Using qspinlock or not depends on real hardware capabilities, not the
> > compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> > & qspinlock three Kconfigs, and the combo-spinlock would compat all
> > hardware platforms but waste some qspinlock code size.
>
> You're right, and I think your comment matches what Conor mentioned
> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> will allow a platform with Zabha capability to use qspinlocks. But if
> the hardware does not, it will fallback to the ticket spinlocks.
>
> But I agree that looking at the config alone may be misleading, even
> though it will work as expected at runtime. So I agree with you:
> unless anyone is strongly against the combo spinlocks, I will do what
> you suggest and add them.
>
> Thanks again for your initial work,
>
> Alex
>
> >
> > > select ARCH_USES_CFI_TRAPS if CFI_CLANG
> > > select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> > > select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> > > index 504f8b7e72d4..ad72f2bd4cc9 100644
> > > --- a/arch/riscv/include/asm/Kbuild
> > > +++ b/arch/riscv/include/asm/Kbuild
> > > @@ -2,10 +2,12 @@
> > > generic-y += early_ioremap.h
> > > generic-y += flat.h
> > > generic-y += kvm_para.h
> > > +generic-y += mcs_spinlock.h
> > > generic-y += parport.h
> > > -generic-y += spinlock.h
> > > generic-y += spinlock_types.h
> > > +generic-y += ticket_spinlock.h
> > > generic-y += qrwlock.h
> > > generic-y += qrwlock_types.h
> > > +generic-y += qspinlock.h
> > > generic-y += user.h
> > > generic-y += vmlinux.lds.h
> > > diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> > > new file mode 100644
> > > index 000000000000..e00429ac20ed
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/spinlock.h
> > > @@ -0,0 +1,39 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +
> > > +#ifndef __ASM_RISCV_SPINLOCK_H
> > > +#define __ASM_RISCV_SPINLOCK_H
> > > +
> > > +#ifdef CONFIG_QUEUED_SPINLOCKS
> > > +#define _Q_PENDING_LOOPS (1 << 9)
> > > +
> > > +#define __no_arch_spinlock_redefine
> > > +#include <asm/ticket_spinlock.h>
> > > +#include <asm/qspinlock.h>
> > > +#include <asm/alternative.h>
> > > +
> > > +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> > > +
> > > +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> > > +static __always_inline type arch_spin_##op(type_lock lock) \
> > > +{ \
> > > + if (static_branch_unlikely(&qspinlock_key)) \
> > > + return queued_spin_##op(lock); \
> > > + return ticket_spin_##op(lock); \
> > > +}
> > > +
> > > +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> > > +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> > > +
> > > +#else
> > > +
> > > +#include <asm/ticket_spinlock.h>
> > > +
> > > +#endif
> > > +
> > > +#include <asm/qrwlock.h>
> > > +
> > > +#endif /* __ASM_RISCV_SPINLOCK_H */
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4f73c0ae44b2..31ce75522fd4 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> > > #endif
> > > }
> > >
> > > +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> > > +EXPORT_SYMBOL(qspinlock_key);
> > > +
> > > +static void __init riscv_spinlock_init(void)
> > > +{
> > > + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> > > + : : : : qspinlock);
> > > +
> > > + static_branch_disable(&qspinlock_key);
> > > + pr_info("Ticket spinlock: enabled\n");
> > > +
> > > + return;
> > > +
> > > +qspinlock:
> > > + pr_info("Queued spinlock: enabled\n");
> > > +}
> > > +
> > > extern void __init init_rt_signal_env(void);
> > >
> > > void __init setup_arch(char **cmdline_p)
> > > @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> > > riscv_set_dma_cache_alignment();
> > >
> > > riscv_user_isa_enable();
> > > + riscv_spinlock_init();
> > > }
> > >
> > > bool arch_cpu_is_hotpluggable(int cpu)
> > > diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> > > index 0655aa5b57b2..bf47cca2c375 100644
> > > --- a/include/asm-generic/qspinlock.h
> > > +++ b/include/asm-generic/qspinlock.h
> > > @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > }
> > > #endif
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > > /*
> > > * Remapping spinlock architecture specific functions to the corresponding
> > > * queued spinlock functions.
> > > @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> > > #define arch_spin_lock(l) queued_spin_lock(l)
> > > #define arch_spin_trylock(l) queued_spin_trylock(l)
> > > #define arch_spin_unlock(l) queued_spin_unlock(l)
> > > +#endif
> > >
> > > #endif /* __ASM_GENERIC_QSPINLOCK_H */
> > > diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> > > index cfcff22b37b3..325779970d8a 100644
> > > --- a/include/asm-generic/ticket_spinlock.h
> > > +++ b/include/asm-generic/ticket_spinlock.h
> > > @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > return (s16)((val >> 16) - (val & 0xffff)) > 1;
> > > }
> > >
> > > +#ifndef __no_arch_spinlock_redefine
> > > /*
> > > * Remapping spinlock architecture specific functions to the corresponding
> > > * ticket spinlock functions.
> > > @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> > > #define arch_spin_lock(l) ticket_spin_lock(l)
> > > #define arch_spin_trylock(l) ticket_spin_trylock(l)
> > > #define arch_spin_unlock(l) ticket_spin_unlock(l)
> > > +#endif
> > >
> > > #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> > > --
> > > 2.39.2
> > >
> >
> >
> > --
> > Best Regards
> > Guo Ren



--
Best Regards
Guo Ren

2024-05-31 13:40:32

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

Hi Andrea,

On 29/05/2024 02:55, Andrea Parri wrote:
>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
> perhaps a similar select for the latter? (not a kconfig expert)


Where did you see this dependency? And if that is really a dependency of
qspinlocks, shouldn't this be under CONFIG_QUEUED_SPINLOCKS? (not a
Kconfig expert too).


>
> Andrea
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv

2024-05-31 16:23:22

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

> > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
> > perhaps a similar select for the latter? (not a kconfig expert)
>
>
> Where did you see this dependency? And if that is really a dependency of
> qspinlocks, shouldn't this be under CONFIG_QUEUED_SPINLOCKS? (not a Kconfig
> expert too).

The comment on smp_mb__after_unlock_lock() in include/linux/rcupdate.h
(the barrier is currently only used by the RCU subsystem) recalls:

/*
* Place this after a lock-acquisition primitive to guarantee that
* an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies
* if the UNLOCK and LOCK are executed by the same CPU or if the
* UNLOCK and LOCK operate on the same lock variable.
*/
#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
#define smp_mb__after_unlock_lock() do { } while (0)
#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */

Architectures whose UNLOCK+LOCK implementation does not (already) meet
the required "full barrier" ordering property (currently, only powerpc)
can overwrite the "default"/common #define for this barrier (NOP) and
meet the ordering by opting in for ARCH_WEAK_RELEASE_ACQUIRE.

The (current) "generic" ticket lock implementation provides "the full
barrier" in its LOCK operations (hence in part. in UNLOCK+LOCK), cf.

arch_spin_trylock() -> atomic_try_cmpxchg()
arch_spin_lock() -> atomic_fetch_add()
-> atomic_cond_read_acquire(); smp_mb()

but the "UNLOCK+LOCK pairs act as a full barrier" property doesn't hold
true for riscv (and powerpc) when switching over to queued spinlocks.
OTOH, I see no particular reason for other "users" of queued spinlocks
(notably, x86 and arm64) for selecting ARCH_WEAK_RELEASE_ACQUIRE.

But does this address your concern? Let me know if I misunderstood it.

Andrea

2024-06-01 06:18:53

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Fri, May 31, 2024 at 11:52 PM Andrea Parri <[email protected]> wrote:
>
> > > > + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> > > IIUC, we should make sure qspinlocks run with ARCH_WEAK_RELEASE_ACQUIRE,
> > > perhaps a similar select for the latter? (not a kconfig expert)
> >
> >
> > Where did you see this dependency? And if that is really a dependency of
> > qspinlocks, shouldn't this be under CONFIG_QUEUED_SPINLOCKS? (not a Kconfig
> > expert too).
>
> The comment on smp_mb__after_unlock_lock() in include/linux/rcupdate.h
> (the barrier is currently only used by the RCU subsystem) recalls:
>
> /*
> * Place this after a lock-acquisition primitive to guarantee that
> * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies
> * if the UNLOCK and LOCK are executed by the same CPU or if the
> * UNLOCK and LOCK operate on the same lock variable.
> */
> #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE
> #define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
> #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
> #define smp_mb__after_unlock_lock() do { } while (0)
> #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
>
> Architectures whose UNLOCK+LOCK implementation does not (already) meet
> the required "full barrier" ordering property (currently, only powerpc)
> can overwrite the "default"/common #define for this barrier (NOP) and
> meet the ordering by opting in for ARCH_WEAK_RELEASE_ACQUIRE.
>
> The (current) "generic" ticket lock implementation provides "the full
> barrier" in its LOCK operations (hence in part. in UNLOCK+LOCK), cf.
>
> arch_spin_trylock() -> atomic_try_cmpxchg()
> arch_spin_lock() -> atomic_fetch_add()
> -> atomic_cond_read_acquire(); smp_mb()
>
> but the "UNLOCK+LOCK pairs act as a full barrier" property doesn't hold
> true for riscv (and powerpc) when switching over to queued spinlock.
Yes.

> OTOH, I see no particular reason for other "users" of queued spinlocks
> (notably, x86 and arm64) for selecting ARCH_WEAK_RELEASE_ACQUIRE.
I looked at the riscv-unprivileged ppo section, seems RISC-V .rl ->
.aq has RCsc annotations.
ref:
Explicit Synchronization
5. has an acquire annotation
6. has a release annotation
7. a and b both have RCsc annotations

And for qspinlock:
unlock:
smp_store_release(&lock->locked, 0);

lock:
if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))

If the hardware has Store-Release and CAS instructions, they all obey
Explicit Synchronization rules. Then RISC-V "UNLOCK+LOCK" pairs act as
a full barrier, right?

>
> But does this address your concern? Let me know if I misunderstood it.
>
> Andrea



--
Best Regards
Guo Ren

2024-06-03 00:41:54

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

> I looked at the riscv-unprivileged ppo section, seems RISC-V .rl ->
> .aq has RCsc annotations.
> ref:
> Explicit Synchronization
> 5. has an acquire annotation
> 6. has a release annotation
> 7. a and b both have RCsc annotations
>
> And for qspinlock:
> unlock:
> smp_store_release(&lock->locked, 0);
>
> lock:
> if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
>
> If the hardware has Store-Release and CAS instructions, they all obey
> Explicit Synchronization rules. Then RISC-V "UNLOCK+LOCK" pairs act as
> a full barrier, right?

Presuming you were thinking at CAS.aq (based on your previous remarks
above), that all seems right to me. In fact, the (putative) Store.rl
and an LR.aq would also do it (by the same/mentioned rules).

Andrea

2024-06-03 09:22:20

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

Hi Guo,

On 31/05/2024 08:42, Guo Ren wrote:
> On Fri, May 31, 2024 at 2:22 PM Alexandre Ghiti <[email protected]> wrote:
>> On Fri, May 31, 2024 at 3:57 AM Guo Ren <[email protected]> wrote:
>>> On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <[email protected]> wrote:
>>>> Hi Guo,
>>>>
>>>> On Thu, May 30, 2024 at 3:55 AM Guo Ren <[email protected]> wrote:
>>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
>>>>>> Hi Guo,
>>>>>>
>>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
>>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
>>>>>>>> In order to produce a generic kernel, a user can select
>>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>>>>>> spinlock implementation if Zabha is not present.
>>>>>>>>
>>>>>>>> Note that we can't use alternatives here because the discovery of
>>>>>>>> extensions is done too late and we need to start with the qspinlock
>>>>>>>> implementation because the ticket spinlock implementation would pollute
>>>>>>>> the spinlock value, so let's use static keys.
>>>>>>>>
>>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>>>>>
>>>>>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
>>>>>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
>>>>>>>> ---
>>>>>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
>>>>>>>> arch/riscv/Kconfig | 1 +
>>>>>>>> arch/riscv/include/asm/Kbuild | 4 +-
>>>>>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
>>>>>>>> arch/riscv/kernel/setup.c | 18 +++++++++
>>>>>>>> include/asm-generic/qspinlock.h | 2 +
>>>>>>>> include/asm-generic/ticket_spinlock.h | 2 +
>>>>>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
>>>>>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>>>>>
>>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> index 22f2990392ff..cf26042480e2 100644
>>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> @@ -20,7 +20,7 @@
>>>>>>>> | openrisc: | ok |
>>>>>>>> | parisc: | TODO |
>>>>>>>> | powerpc: | ok |
>>>>>>>> - | riscv: | TODO |
>>>>>>>> + | riscv: | ok |
>>>>>>>> | s390: | TODO |
>>>>>>>> | sh: | TODO |
>>>>>>>> | sparc: | ok |
>>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>>>>>> --- a/arch/riscv/Kconfig
>>>>>>>> +++ b/arch/riscv/Kconfig
>>>>>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>>>>> select ARCH_USE_MEMTEST
>>>>>>>> select ARCH_USE_QUEUED_RWLOCKS
>>>>>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
>>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>>>>>> hardware platforms but waste some qspinlock code size.
>>>>>> You're right, and I think your comment matches what Conor mentioned
>>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
>>>>>> the hardware does not, it will fallback to the ticket spinlocks.
>>>>>>
>>>>>> But I agree that looking at the config alone may be misleading, even
>>>>>> though it will work as expected at runtime. So I agree with you:
>>>>>> unless anyone is strongly against the combo spinlocks, I will do what
>>>>>> you suggest and add them.
>>>>> The problem with the v12 combo-spinlock is using a static_branch
>>>>> instead of the full ALTERNATIVE. Frankly, that's a bad example that
>>>>> costs more code space. I found that your cmpxchg32/64 also uses a
>>>>> condition branch, which has a similar problem, right?
>>>>>
>>>>> Anyway, your patch series inspired me to update the v13
>>>>> combo-spinlock. My plan is:
>>>>> 1. Separate native-qspinlock out of paravirt-qspinlock.
>>>>> 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
>>>>> ticket-lock or qspinlock.
>>>> What's your plan to make use of alternatives here? The alternatives
>>>> patching depends on the discovery of the extensions, which is done too
>>>> late, at least after the first use of a spinlock (the printk
>>>> spinlock). So you'd need to find a way to first use qspinlocks (but
>>>> without knowing Zabha is available) and then do the correct patching:
>>> I do that in v12:
>>> 1. Use qspinlock as init.
>>> 2. Change to ticket-lock or not.
>>> (Only qspinlock -> ticket-lock, No reverse direction)
>>>
>>> If there is no contention, Qspinlock is okay for all platforms before
>>> smp bringup & no-irq environment.
>>>
>> Yes, by using static keys not alternatives. My question was: how do
>> you plan to use alternatives here instead of static keys? To me, it's
>> not that simple, hence my suggestions in my previous answer.
> Yes, it's not that simple. The current framework doesn't support that
> and has two problems:
> 1. We need to re-implement ticket-lock & qspinlock-fast-path with assembly code.
> 2. Current alternatives patching only for extensions, but qspinlock is
> not a formal extension. Could we accept
> __RISCV_ISA_EXT_DATA(xqspinlock, RISCV_ISA_EXT_XQSPINLOCK)?


But the problem is that the alternatives needs to patch the code very
early in the boot process which is not possible since we don't have the
list of extensions yet (for ACPI systems), so your
RISCV_ISA_EXT_XQSPINLOCK proposal would not help.

Thanks,

Alex


>
>> Thanks,
>>
>> Alex
>>
>>>> an idea here could be to add an "init" value to the alternatives and
>>>> let the patching process do the right thing when the extensions are
>>>> known.
>>>>
>>>> Another solution would be the early discovery of the extensions, but I
>>>> took a look and it's easy with a device tree, but not with ACPI.
>>>>
>>>> Let me know what you plan to do and how I can help!
>>>>
>>>> Thanks,
>>>>
>>>> Alex
>>>>
>>>>> What do you think?
>>>>>
>>>>>
>>>>>> Thanks again for your initial work,
>>>>>>
>>>>>> Alex
>>>>>>
>>>>>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>>>>>> --- a/arch/riscv/include/asm/Kbuild
>>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>>>>>> @@ -2,10 +2,12 @@
>>>>>>>> generic-y += early_ioremap.h
>>>>>>>> generic-y += flat.h
>>>>>>>> generic-y += kvm_para.h
>>>>>>>> +generic-y += mcs_spinlock.h
>>>>>>>> generic-y += parport.h
>>>>>>>> -generic-y += spinlock.h
>>>>>>>> generic-y += spinlock_types.h
>>>>>>>> +generic-y += ticket_spinlock.h
>>>>>>>> generic-y += qrwlock.h
>>>>>>>> generic-y += qrwlock_types.h
>>>>>>>> +generic-y += qspinlock.h
>>>>>>>> generic-y += user.h
>>>>>>>> generic-y += vmlinux.lds.h
>>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..e00429ac20ed
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>>>>>> @@ -0,0 +1,39 @@
>>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>>>> +
>>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>>>>>> +#define _Q_PENDING_LOOPS (1 << 9)
>>>>>>>> +
>>>>>>>> +#define __no_arch_spinlock_redefine
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +#include <asm/qspinlock.h>
>>>>>>>> +#include <asm/alternative.h>
>>>>>>>> +
>>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +
>>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
>>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
>>>>>>>> +{ \
>>>>>>>> + if (static_branch_unlikely(&qspinlock_key)) \
>>>>>>>> + return queued_spin_##op(lock); \
>>>>>>>> + return ticket_spin_##op(lock); \
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>>>>>> +
>>>>>>>> +#else
>>>>>>>> +
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +#include <asm/qrwlock.h>
>>>>>>>> +
>>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>>>>>> --- a/arch/riscv/kernel/setup.c
>>>>>>>> +++ b/arch/riscv/kernel/setup.c
>>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>>>>> #endif
>>>>>>>> }
>>>>>>>>
>>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>>>>>> +
>>>>>>>> +static void __init riscv_spinlock_init(void)
>>>>>>>> +{
>>>>>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>>>>>> + : : : : qspinlock);
>>>>>>>> +
>>>>>>>> + static_branch_disable(&qspinlock_key);
>>>>>>>> + pr_info("Ticket spinlock: enabled\n");
>>>>>>>> +
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> +qspinlock:
>>>>>>>> + pr_info("Queued spinlock: enabled\n");
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> extern void __init init_rt_signal_env(void);
>>>>>>>>
>>>>>>>> void __init setup_arch(char **cmdline_p)
>>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>>>>> riscv_set_dma_cache_alignment();
>>>>>>>>
>>>>>>>> riscv_user_isa_enable();
>>>>>>>> + riscv_spinlock_init();
>>>>>>>> }
>>>>>>>>
>>>>>>>> bool arch_cpu_is_hotpluggable(int cpu)
>>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>>>>>> --- a/include/asm-generic/qspinlock.h
>>>>>>>> +++ b/include/asm-generic/qspinlock.h
>>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>> }
>>>>>>>> #endif
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>> /*
>>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>> * queued spinlock functions.
>>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>> #define arch_spin_lock(l) queued_spin_lock(l)
>>>>>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
>>>>>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>>>>>> index cfcff22b37b3..325779970d8a 100644
>>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>>>>> }
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>> /*
>>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>> * ticket spinlock functions.
>>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
>>>>>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
>>>>>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>>>>>> --
>>>>>>>> 2.39.2
>>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Best Regards
>>>>>>> Guo Ren
>>>>>
>>>>>
>>>>> --
>>>>> Best Regards
>>>>> Guo Ren
>>>
>>>
>>> --
>>> Best Regards
>>> Guo Ren
>
>

2024-06-03 10:24:19

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

Hi Guo,

On 31/05/2024 15:10, Guo Ren wrote:
> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
>> Hi Guo,
>>
>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
>>>> In order to produce a generic kernel, a user can select
>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>> spinlock implementation if Zabha is not present.
>>>>
>>>> Note that we can't use alternatives here because the discovery of
>>>> extensions is done too late and we need to start with the qspinlock
>>>> implementation because the ticket spinlock implementation would pollute
>>>> the spinlock value, so let's use static keys.
> Zabha is not a prerequisite for qspinlock; the prerequisite for
> qspinlock is the *forward progress guarantee* in the atomic operation
> loop during intense contention. Even with Zabha enabled to meet the
> requirements of xchg_tail, that still only applies when the number of
> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> xchg_tail when the number of cores is more than 16K. Thus, hardware
> support for Zabha does not equate to the safe use of qspinlock.


But if we have Zacas to implement cmpxchg(), we still provide the
"forward progress guarantee" then right? Let me know if I missed something.

Thanks,

Alex


>
> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> lines are sufficiently sticky, they could then claim support for this
> extension. Linux could then select different spinlock implementations
> based on this extension's support or not.
>
>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>
>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
>>>> ---
>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
>>>> arch/riscv/Kconfig | 1 +
>>>> arch/riscv/include/asm/Kbuild | 4 +-
>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
>>>> arch/riscv/kernel/setup.c | 18 +++++++++
>>>> include/asm-generic/qspinlock.h | 2 +
>>>> include/asm-generic/ticket_spinlock.h | 2 +
>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>
>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> index 22f2990392ff..cf26042480e2 100644
>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>> @@ -20,7 +20,7 @@
>>>> | openrisc: | ok |
>>>> | parisc: | TODO |
>>>> | powerpc: | ok |
>>>> - | riscv: | TODO |
>>>> + | riscv: | ok |
>>>> | s390: | TODO |
>>>> | sh: | TODO |
>>>> | sparc: | ok |
>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>> --- a/arch/riscv/Kconfig
>>>> +++ b/arch/riscv/Kconfig
>>>> @@ -59,6 +59,7 @@ config RISCV
>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>> select ARCH_USE_MEMTEST
>>>> select ARCH_USE_QUEUED_RWLOCKS
>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>> Using qspinlock or not depends on real hardware capabilities, not the
>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>> hardware platforms but waste some qspinlock code size.
>> You're right, and I think your comment matches what Conor mentioned
>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>> will allow a platform with Zabha capability to use qspinlocks. But if
>> the hardware does not, it will fallback to the ticket spinlocks.
>>
>> But I agree that looking at the config alone may be misleading, even
>> though it will work as expected at runtime. So I agree with you:
>> unless anyone is strongly against the combo spinlocks, I will do what
>> you suggest and add them.
>>
>> Thanks again for your initial work,
>>
>> Alex
>>
>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>> --- a/arch/riscv/include/asm/Kbuild
>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>> @@ -2,10 +2,12 @@
>>>> generic-y += early_ioremap.h
>>>> generic-y += flat.h
>>>> generic-y += kvm_para.h
>>>> +generic-y += mcs_spinlock.h
>>>> generic-y += parport.h
>>>> -generic-y += spinlock.h
>>>> generic-y += spinlock_types.h
>>>> +generic-y += ticket_spinlock.h
>>>> generic-y += qrwlock.h
>>>> generic-y += qrwlock_types.h
>>>> +generic-y += qspinlock.h
>>>> generic-y += user.h
>>>> generic-y += vmlinux.lds.h
>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>> new file mode 100644
>>>> index 000000000000..e00429ac20ed
>>>> --- /dev/null
>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>> @@ -0,0 +1,39 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +
>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>> +
>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>> +#define _Q_PENDING_LOOPS (1 << 9)
>>>> +
>>>> +#define __no_arch_spinlock_redefine
>>>> +#include <asm/ticket_spinlock.h>
>>>> +#include <asm/qspinlock.h>
>>>> +#include <asm/alternative.h>
>>>> +
>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>> +
>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
>>>> +{ \
>>>> + if (static_branch_unlikely(&qspinlock_key)) \
>>>> + return queued_spin_##op(lock); \
>>>> + return ticket_spin_##op(lock); \
>>>> +}
>>>> +
>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>> +
>>>> +#else
>>>> +
>>>> +#include <asm/ticket_spinlock.h>
>>>> +
>>>> +#endif
>>>> +
>>>> +#include <asm/qrwlock.h>
>>>> +
>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>> --- a/arch/riscv/kernel/setup.c
>>>> +++ b/arch/riscv/kernel/setup.c
>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>> #endif
>>>> }
>>>>
>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>> +
>>>> +static void __init riscv_spinlock_init(void)
>>>> +{
>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>> + : : : : qspinlock);
>>>> +
>>>> + static_branch_disable(&qspinlock_key);
>>>> + pr_info("Ticket spinlock: enabled\n");
>>>> +
>>>> + return;
>>>> +
>>>> +qspinlock:
>>>> + pr_info("Queued spinlock: enabled\n");
>>>> +}
>>>> +
>>>> extern void __init init_rt_signal_env(void);
>>>>
>>>> void __init setup_arch(char **cmdline_p)
>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>> riscv_set_dma_cache_alignment();
>>>>
>>>> riscv_user_isa_enable();
>>>> + riscv_spinlock_init();
>>>> }
>>>>
>>>> bool arch_cpu_is_hotpluggable(int cpu)
>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>> --- a/include/asm-generic/qspinlock.h
>>>> +++ b/include/asm-generic/qspinlock.h
>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>> }
>>>> #endif
>>>>
>>>> +#ifndef __no_arch_spinlock_redefine
>>>> /*
>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>> * queued spinlock functions.
>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>> #define arch_spin_lock(l) queued_spin_lock(l)
>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
>>>> +#endif
>>>>
>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>> index cfcff22b37b3..325779970d8a 100644
>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>> }
>>>>
>>>> +#ifndef __no_arch_spinlock_redefine
>>>> /*
>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>> * ticket spinlock functions.
>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
>>>> +#endif
>>>>
>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>> --
>>>> 2.39.2
>>>>
>>>
>>> --
>>> Best Regards
>>> Guo Ren
>
>

2024-06-03 11:21:16

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Mon, Jun 3, 2024 at 5:22 PM Alexandre Ghiti <[email protected]> wrote:
>
> Hi Guo,
>
> On 31/05/2024 08:42, Guo Ren wrote:
> > On Fri, May 31, 2024 at 2:22 PM Alexandre Ghiti <[email protected]> wrote:
> >> On Fri, May 31, 2024 at 3:57 AM Guo Ren <[email protected]> wrote:
> >>> On Thu, May 30, 2024 at 1:30 PM Alexandre Ghiti <[email protected]> wrote:
> >>>> Hi Guo,
> >>>>
> >>>> On Thu, May 30, 2024 at 3:55 AM Guo Ren <[email protected]> wrote:
> >>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> >>>>>> Hi Guo,
> >>>>>>
> >>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> >>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> >>>>>>>> In order to produce a generic kernel, a user can select
> >>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>>>>>> spinlock implementation if Zabha is not present.
> >>>>>>>>
> >>>>>>>> Note that we can't use alternatives here because the discovery of
> >>>>>>>> extensions is done too late and we need to start with the qspinlock
> >>>>>>>> implementation because the ticket spinlock implementation would pollute
> >>>>>>>> the spinlock value, so let's use static keys.
> >>>>>>>>
> >>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>>>>>
> >>>>>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> >>>>>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
> >>>>>>>> ---
> >>>>>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> >>>>>>>> arch/riscv/Kconfig | 1 +
> >>>>>>>> arch/riscv/include/asm/Kbuild | 4 +-
> >>>>>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> >>>>>>>> arch/riscv/kernel/setup.c | 18 +++++++++
> >>>>>>>> include/asm-generic/qspinlock.h | 2 +
> >>>>>>>> include/asm-generic/ticket_spinlock.h | 2 +
> >>>>>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>>>>>
> >>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> index 22f2990392ff..cf26042480e2 100644
> >>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> @@ -20,7 +20,7 @@
> >>>>>>>> | openrisc: | ok |
> >>>>>>>> | parisc: | TODO |
> >>>>>>>> | powerpc: | ok |
> >>>>>>>> - | riscv: | TODO |
> >>>>>>>> + | riscv: | ok |
> >>>>>>>> | s390: | TODO |
> >>>>>>>> | sh: | TODO |
> >>>>>>>> | sparc: | ok |
> >>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>>>>>> --- a/arch/riscv/Kconfig
> >>>>>>>> +++ b/arch/riscv/Kconfig
> >>>>>>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>>>>> select ARCH_USE_MEMTEST
> >>>>>>>> select ARCH_USE_QUEUED_RWLOCKS
> >>>>>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
> >>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>>>>>> hardware platforms but waste some qspinlock code size.
> >>>>>> You're right, and I think your comment matches what Conor mentioned
> >>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
> >>>>>> the hardware does not, it will fallback to the ticket spinlocks.
> >>>>>>
> >>>>>> But I agree that looking at the config alone may be misleading, even
> >>>>>> though it will work as expected at runtime. So I agree with you:
> >>>>>> unless anyone is strongly against the combo spinlocks, I will do what
> >>>>>> you suggest and add them.
> >>>>> The problem with the v12 combo-spinlock is using a static_branch
> >>>>> instead of the full ALTERNATIVE. Frankly, that's a bad example that
> >>>>> costs more code space. I found that your cmpxchg32/64 also uses a
> >>>>> condition branch, which has a similar problem, right?
> >>>>>
> >>>>> Anyway, your patch series inspired me to update the v13
> >>>>> combo-spinlock. My plan is:
> >>>>> 1. Separate native-qspinlock out of paravirt-qspinlock.
> >>>>> 2. Re-design an ALTERNATIVE(asm) code instead of static_branch generic
> >>>>> ticket-lock or qspinlock.
> >>>> What's your plan to make use of alternatives here? The alternatives
> >>>> patching depends on the discovery of the extensions, which is done too
> >>>> late, at least after the first use of a spinlock (the printk
> >>>> spinlock). So you'd need to find a way to first use qspinlocks (but
> >>>> without knowing Zabha is available) and then do the correct patching:
> >>> I do that in v12:
> >>> 1. Use qspinlock as init.
> >>> 2. Change to ticket-lock or not.
> >>> (Only qspinlock -> ticket-lock, No reverse direction)
> >>>
> >>> If there is no contention, Qspinlock is okay for all platforms before
> >>> smp bringup & no-irq environment.
> >>>
> >> Yes, by using static keys not alternatives. My question was: how do
> >> you plan to use alternatives here instead of static keys? To me, it's
> >> not that simple, hence my suggestions in my previous answer.
> > Yes, it's not that simple. The current framework doesn't support that
> > and has two problems:
> > 1. We need to re-implement ticket-lock & qspinlock-fast-path with assembly code.
> > 2. Current alternatives patching only for extensions, but qspinlock is
> > not a formal extension. Could we accept
> > __RISCV_ISA_EXT_DATA(xqspinlock, RISCV_ISA_EXT_XQSPINLOCK)?
>
>
> But the problem is that the alternatives needs to patch the code very
> early in the boot process which is not possible since we don't have the
> list of extensions yet (for ACPI systems), so your
> RISCV_ISA_EXT_XQSPINLOCK proposal would not help.
I think the setup_arch()->apply_boot_alternatives() is okay. I can do
that in v13.

>
> Thanks,
>
> Alex
>
>
> >
> >> Thanks,
> >>
> >> Alex
> >>
> >>>> an idea here could be to add an "init" value to the alternatives and
> >>>> let the patching process do the right thing when the extensions are
> >>>> known.
> >>>>
> >>>> Another solution would be the early discovery of the extensions, but I
> >>>> took a look and it's easy with a device tree, but not with ACPI.
> >>>>
> >>>> Let me know what you plan to do and how I can help!
> >>>>
> >>>> Thanks,
> >>>>
> >>>> Alex
> >>>>
> >>>>> What do you think?
> >>>>>
> >>>>>
> >>>>>> Thanks again for your initial work,
> >>>>>>
> >>>>>> Alex
> >>>>>>
> >>>>>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>>>>>> --- a/arch/riscv/include/asm/Kbuild
> >>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>>>>>> @@ -2,10 +2,12 @@
> >>>>>>>> generic-y += early_ioremap.h
> >>>>>>>> generic-y += flat.h
> >>>>>>>> generic-y += kvm_para.h
> >>>>>>>> +generic-y += mcs_spinlock.h
> >>>>>>>> generic-y += parport.h
> >>>>>>>> -generic-y += spinlock.h
> >>>>>>>> generic-y += spinlock_types.h
> >>>>>>>> +generic-y += ticket_spinlock.h
> >>>>>>>> generic-y += qrwlock.h
> >>>>>>>> generic-y += qrwlock_types.h
> >>>>>>>> +generic-y += qspinlock.h
> >>>>>>>> generic-y += user.h
> >>>>>>>> generic-y += vmlinux.lds.h
> >>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..e00429ac20ed
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> @@ -0,0 +1,39 @@
> >>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>>>>> +
> >>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>>>>>> +#define _Q_PENDING_LOOPS (1 << 9)
> >>>>>>>> +
> >>>>>>>> +#define __no_arch_spinlock_redefine
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +#include <asm/qspinlock.h>
> >>>>>>>> +#include <asm/alternative.h>
> >>>>>>>> +
> >>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> >>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
> >>>>>>>> +{ \
> >>>>>>>> + if (static_branch_unlikely(&qspinlock_key)) \
> >>>>>>>> + return queued_spin_##op(lock); \
> >>>>>>>> + return ticket_spin_##op(lock); \
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>>>>>> +
> >>>>>>>> +#else
> >>>>>>>> +
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +#include <asm/qrwlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>>>>>> --- a/arch/riscv/kernel/setup.c
> >>>>>>>> +++ b/arch/riscv/kernel/setup.c
> >>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>>>>> #endif
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +static void __init riscv_spinlock_init(void)
> >>>>>>>> +{
> >>>>>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>>>>>> + : : : : qspinlock);
> >>>>>>>> +
> >>>>>>>> + static_branch_disable(&qspinlock_key);
> >>>>>>>> + pr_info("Ticket spinlock: enabled\n");
> >>>>>>>> +
> >>>>>>>> + return;
> >>>>>>>> +
> >>>>>>>> +qspinlock:
> >>>>>>>> + pr_info("Queued spinlock: enabled\n");
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> extern void __init init_rt_signal_env(void);
> >>>>>>>>
> >>>>>>>> void __init setup_arch(char **cmdline_p)
> >>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>>>>> riscv_set_dma_cache_alignment();
> >>>>>>>>
> >>>>>>>> riscv_user_isa_enable();
> >>>>>>>> + riscv_spinlock_init();
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> bool arch_cpu_is_hotpluggable(int cpu)
> >>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>>>>>> --- a/include/asm-generic/qspinlock.h
> >>>>>>>> +++ b/include/asm-generic/qspinlock.h
> >>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>> }
> >>>>>>>> #endif
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>> /*
> >>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>> * queued spinlock functions.
> >>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>> #define arch_spin_lock(l) queued_spin_lock(l)
> >>>>>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
> >>>>>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> index cfcff22b37b3..325779970d8a 100644
> >>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>> /*
> >>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>> * ticket spinlock functions.
> >>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
> >>>>>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> >>>>>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>>>>>> --
> >>>>>>>> 2.39.2
> >>>>>>>>
> >>>>>>>
> >>>>>>> --
> >>>>>>> Best Regards
> >>>>>>> Guo Ren
> >>>>>
> >>>>>
> >>>>> --
> >>>>> Best Regards
> >>>>> Guo Ren
> >>>
> >>>
> >>> --
> >>> Best Regards
> >>> Guo Ren
> >
> >



--
Best Regards
Guo Ren

2024-06-03 11:28:39

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <[email protected]> wrote:
>
> Hi Guo,
>
> On 31/05/2024 15:10, Guo Ren wrote:
> > On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> >> Hi Guo,
> >>
> >> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> >>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> >>>> In order to produce a generic kernel, a user can select
> >>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>> spinlock implementation if Zabha is not present.
> >>>>
> >>>> Note that we can't use alternatives here because the discovery of
> >>>> extensions is done too late and we need to start with the qspinlock
> >>>> implementation because the ticket spinlock implementation would pollute
> >>>> the spinlock value, so let's use static keys.
> > Zabha is not a prerequisite for qspinlock; the prerequisite for
> > qspinlock is the *forward progress guarantee* in the atomic operation
> > loop during intense contention. Even with Zabha enabled to meet the
> > requirements of xchg_tail, that still only applies when the number of
> > CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> > xchg_tail when the number of cores is more than 16K. Thus, hardware
> > support for Zabha does not equate to the safe use of qspinlock.
>
>
> But if we have Zacas to implement cmpxchg(), we still provide the
> "forward progress guarantee" then right? Let me know if I missed something.
The qspinlock needs a "forward progress guarantee," not Zacas, and
Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
fwd guarantee that depends on the micro-arch design of the riscv core.
I think the help of AMO instead of LR/SC is it could off-load AMO
operations from LSU to CIU(Next Level Cache or Interconnect), which
gains better performance. "LR/SC pairs" only provide Near-Atomic, but
AMO gives Far-Atomic additionally.


>
> Thanks,
>
> Alex
>
>
> >
> > Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> > Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> > of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> > lines are sufficiently sticky, they could then claim support for this
> > extension. Linux could then select different spinlock implementations
> > based on this extension's support or not.
> >
> >>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>
> >>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> >>>> Signed-off-by: Alexandre Ghiti <[email protected]>
> >>>> ---
> >>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> >>>> arch/riscv/Kconfig | 1 +
> >>>> arch/riscv/include/asm/Kbuild | 4 +-
> >>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> >>>> arch/riscv/kernel/setup.c | 18 +++++++++
> >>>> include/asm-generic/qspinlock.h | 2 +
> >>>> include/asm-generic/ticket_spinlock.h | 2 +
> >>>> 7 files changed, 66 insertions(+), 2 deletions(-)
> >>>> create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>
> >>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>> index 22f2990392ff..cf26042480e2 100644
> >>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>> @@ -20,7 +20,7 @@
> >>>> | openrisc: | ok |
> >>>> | parisc: | TODO |
> >>>> | powerpc: | ok |
> >>>> - | riscv: | TODO |
> >>>> + | riscv: | ok |
> >>>> | s390: | TODO |
> >>>> | sh: | TODO |
> >>>> | sparc: | ok |
> >>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>> --- a/arch/riscv/Kconfig
> >>>> +++ b/arch/riscv/Kconfig
> >>>> @@ -59,6 +59,7 @@ config RISCV
> >>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>> select ARCH_USE_MEMTEST
> >>>> select ARCH_USE_QUEUED_RWLOCKS
> >>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>> Using qspinlock or not depends on real hardware capabilities, not the
> >>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>> hardware platforms but waste some qspinlock code size.
> >> You're right, and I think your comment matches what Conor mentioned
> >> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >> will allow a platform with Zabha capability to use qspinlocks. But if
> >> the hardware does not, it will fallback to the ticket spinlocks.
> >>
> >> But I agree that looking at the config alone may be misleading, even
> >> though it will work as expected at runtime. So I agree with you:
> >> unless anyone is strongly against the combo spinlocks, I will do what
> >> you suggest and add them.
> >>
> >> Thanks again for your initial work,
> >>
> >> Alex
> >>
> >>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>> --- a/arch/riscv/include/asm/Kbuild
> >>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>> @@ -2,10 +2,12 @@
> >>>> generic-y += early_ioremap.h
> >>>> generic-y += flat.h
> >>>> generic-y += kvm_para.h
> >>>> +generic-y += mcs_spinlock.h
> >>>> generic-y += parport.h
> >>>> -generic-y += spinlock.h
> >>>> generic-y += spinlock_types.h
> >>>> +generic-y += ticket_spinlock.h
> >>>> generic-y += qrwlock.h
> >>>> generic-y += qrwlock_types.h
> >>>> +generic-y += qspinlock.h
> >>>> generic-y += user.h
> >>>> generic-y += vmlinux.lds.h
> >>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>> new file mode 100644
> >>>> index 000000000000..e00429ac20ed
> >>>> --- /dev/null
> >>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>> @@ -0,0 +1,39 @@
> >>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>> +
> >>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>> +
> >>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>> +#define _Q_PENDING_LOOPS (1 << 9)
> >>>> +
> >>>> +#define __no_arch_spinlock_redefine
> >>>> +#include <asm/ticket_spinlock.h>
> >>>> +#include <asm/qspinlock.h>
> >>>> +#include <asm/alternative.h>
> >>>> +
> >>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>> +
> >>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> >>>> +static __always_inline type arch_spin_##op(type_lock lock) \
> >>>> +{ \
> >>>> + if (static_branch_unlikely(&qspinlock_key)) \
> >>>> + return queued_spin_##op(lock); \
> >>>> + return ticket_spin_##op(lock); \
> >>>> +}
> >>>> +
> >>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>> +
> >>>> +#else
> >>>> +
> >>>> +#include <asm/ticket_spinlock.h>
> >>>> +
> >>>> +#endif
> >>>> +
> >>>> +#include <asm/qrwlock.h>
> >>>> +
> >>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>> --- a/arch/riscv/kernel/setup.c
> >>>> +++ b/arch/riscv/kernel/setup.c
> >>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>> #endif
> >>>> }
> >>>>
> >>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>> +
> >>>> +static void __init riscv_spinlock_init(void)
> >>>> +{
> >>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>> + : : : : qspinlock);
> >>>> +
> >>>> + static_branch_disable(&qspinlock_key);
> >>>> + pr_info("Ticket spinlock: enabled\n");
> >>>> +
> >>>> + return;
> >>>> +
> >>>> +qspinlock:
> >>>> + pr_info("Queued spinlock: enabled\n");
> >>>> +}
> >>>> +
> >>>> extern void __init init_rt_signal_env(void);
> >>>>
> >>>> void __init setup_arch(char **cmdline_p)
> >>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>> riscv_set_dma_cache_alignment();
> >>>>
> >>>> riscv_user_isa_enable();
> >>>> + riscv_spinlock_init();
> >>>> }
> >>>>
> >>>> bool arch_cpu_is_hotpluggable(int cpu)
> >>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>> --- a/include/asm-generic/qspinlock.h
> >>>> +++ b/include/asm-generic/qspinlock.h
> >>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>> }
> >>>> #endif
> >>>>
> >>>> +#ifndef __no_arch_spinlock_redefine
> >>>> /*
> >>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>> * queued spinlock functions.
> >>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>> #define arch_spin_lock(l) queued_spin_lock(l)
> >>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
> >>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
> >>>> +#endif
> >>>>
> >>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>> index cfcff22b37b3..325779970d8a 100644
> >>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>> }
> >>>>
> >>>> +#ifndef __no_arch_spinlock_redefine
> >>>> /*
> >>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>> * ticket spinlock functions.
> >>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>> #define arch_spin_lock(l) ticket_spin_lock(l)
> >>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> >>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> >>>> +#endif
> >>>>
> >>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>> --
> >>>> 2.39.2
> >>>>
> >>>
> >>> --
> >>> Best Regards
> >>> Guo Ren
> >
> >



--
Best Regards
Guo Ren

2024-06-03 11:35:03

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On 03/06/2024 13:28, Guo Ren wrote:
> On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <[email protected]> wrote:
>> Hi Guo,
>>
>> On 31/05/2024 15:10, Guo Ren wrote:
>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
>>>> Hi Guo,
>>>>
>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
>>>>>> In order to produce a generic kernel, a user can select
>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>>>> spinlock implementation if Zabha is not present.
>>>>>>
>>>>>> Note that we can't use alternatives here because the discovery of
>>>>>> extensions is done too late and we need to start with the qspinlock
>>>>>> implementation because the ticket spinlock implementation would pollute
>>>>>> the spinlock value, so let's use static keys.
>>> Zabha is not a prerequisite for qspinlock; the prerequisite for
>>> qspinlock is the *forward progress guarantee* in the atomic operation
>>> loop during intense contention. Even with Zabha enabled to meet the
>>> requirements of xchg_tail, that still only applies when the number of
>>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
>>> xchg_tail when the number of cores is more than 16K. Thus, hardware
>>> support for Zabha does not equate to the safe use of qspinlock.
>>
>> But if we have Zacas to implement cmpxchg(), we still provide the
>> "forward progress guarantee" then right? Let me know if I missed something.
> The qspinlock needs a "forward progress guarantee," not Zacas, and
> Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
> AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
> fwd guarantee that depends on the micro-arch design of the riscv core.
> I think the help of AMO instead of LR/SC is it could off-load AMO
> operations from LSU to CIU(Next Level Cache or Interconnect), which
> gains better performance. "LR/SC pairs" only provide Near-Atomic, but
> AMO gives Far-Atomic additionally.


I understand qspinlocks require forward progress and that your company's
LR/SC implementations provide such guarantee, I'm not arguing against
your new extension proposal.

It seemed to me in your previous message that you implied that when
NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
Zacas provide such guarantee"? I think it does, so qspinlocks should
actually depend on Zabha *and* Zacas. Is that correct to you?

Let me know if I misunderstood something again.

Thanks,

Alex


>
>
>> Thanks,
>>
>> Alex
>>
>>
>>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
>>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
>>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
>>> lines are sufficiently sticky, they could then claim support for this
>>> extension. Linux could then select different spinlock implementations
>>> based on this extension's support or not.
>>>
>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>>>
>>>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
>>>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
>>>>>> ---
>>>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
>>>>>> arch/riscv/Kconfig | 1 +
>>>>>> arch/riscv/include/asm/Kbuild | 4 +-
>>>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
>>>>>> arch/riscv/kernel/setup.c | 18 +++++++++
>>>>>> include/asm-generic/qspinlock.h | 2 +
>>>>>> include/asm-generic/ticket_spinlock.h | 2 +
>>>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
>>>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>>>
>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>> index 22f2990392ff..cf26042480e2 100644
>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>> @@ -20,7 +20,7 @@
>>>>>> | openrisc: | ok |
>>>>>> | parisc: | TODO |
>>>>>> | powerpc: | ok |
>>>>>> - | riscv: | TODO |
>>>>>> + | riscv: | ok |
>>>>>> | s390: | TODO |
>>>>>> | sh: | TODO |
>>>>>> | sparc: | ok |
>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>>>> --- a/arch/riscv/Kconfig
>>>>>> +++ b/arch/riscv/Kconfig
>>>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>>> select ARCH_USE_MEMTEST
>>>>>> select ARCH_USE_QUEUED_RWLOCKS
>>>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>>>> Using qspinlock or not depends on real hardware capabilities, not the
>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>>>> hardware platforms but waste some qspinlock code size.
>>>> You're right, and I think your comment matches what Conor mentioned
>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>>>> will allow a platform with Zabha capability to use qspinlocks. But if
>>>> the hardware does not, it will fallback to the ticket spinlocks.
>>>>
>>>> But I agree that looking at the config alone may be misleading, even
>>>> though it will work as expected at runtime. So I agree with you:
>>>> unless anyone is strongly against the combo spinlocks, I will do what
>>>> you suggest and add them.
>>>>
>>>> Thanks again for your initial work,
>>>>
>>>> Alex
>>>>
>>>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>>>> --- a/arch/riscv/include/asm/Kbuild
>>>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>>>> @@ -2,10 +2,12 @@
>>>>>> generic-y += early_ioremap.h
>>>>>> generic-y += flat.h
>>>>>> generic-y += kvm_para.h
>>>>>> +generic-y += mcs_spinlock.h
>>>>>> generic-y += parport.h
>>>>>> -generic-y += spinlock.h
>>>>>> generic-y += spinlock_types.h
>>>>>> +generic-y += ticket_spinlock.h
>>>>>> generic-y += qrwlock.h
>>>>>> generic-y += qrwlock_types.h
>>>>>> +generic-y += qspinlock.h
>>>>>> generic-y += user.h
>>>>>> generic-y += vmlinux.lds.h
>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..e00429ac20ed
>>>>>> --- /dev/null
>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>>>> @@ -0,0 +1,39 @@
>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>> +
>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>>>> +
>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>>>> +#define _Q_PENDING_LOOPS (1 << 9)
>>>>>> +
>>>>>> +#define __no_arch_spinlock_redefine
>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>> +#include <asm/qspinlock.h>
>>>>>> +#include <asm/alternative.h>
>>>>>> +
>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>> +
>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
>>>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
>>>>>> +{ \
>>>>>> + if (static_branch_unlikely(&qspinlock_key)) \
>>>>>> + return queued_spin_##op(lock); \
>>>>>> + return ticket_spin_##op(lock); \
>>>>>> +}
>>>>>> +
>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>>>> +
>>>>>> +#else
>>>>>> +
>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>> +
>>>>>> +#endif
>>>>>> +
>>>>>> +#include <asm/qrwlock.h>
>>>>>> +
>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>>>> --- a/arch/riscv/kernel/setup.c
>>>>>> +++ b/arch/riscv/kernel/setup.c
>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>>> #endif
>>>>>> }
>>>>>>
>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>>>> +
>>>>>> +static void __init riscv_spinlock_init(void)
>>>>>> +{
>>>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>>>> + : : : : qspinlock);
>>>>>> +
>>>>>> + static_branch_disable(&qspinlock_key);
>>>>>> + pr_info("Ticket spinlock: enabled\n");
>>>>>> +
>>>>>> + return;
>>>>>> +
>>>>>> +qspinlock:
>>>>>> + pr_info("Queued spinlock: enabled\n");
>>>>>> +}
>>>>>> +
>>>>>> extern void __init init_rt_signal_env(void);
>>>>>>
>>>>>> void __init setup_arch(char **cmdline_p)
>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>>> riscv_set_dma_cache_alignment();
>>>>>>
>>>>>> riscv_user_isa_enable();
>>>>>> + riscv_spinlock_init();
>>>>>> }
>>>>>>
>>>>>> bool arch_cpu_is_hotpluggable(int cpu)
>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>>>> --- a/include/asm-generic/qspinlock.h
>>>>>> +++ b/include/asm-generic/qspinlock.h
>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>> }
>>>>>> #endif
>>>>>>
>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>> /*
>>>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>>>> * queued spinlock functions.
>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>> #define arch_spin_lock(l) queued_spin_lock(l)
>>>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
>>>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
>>>>>> +#endif
>>>>>>
>>>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>>>> index cfcff22b37b3..325779970d8a 100644
>>>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>>> }
>>>>>>
>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>> /*
>>>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>>>> * ticket spinlock functions.
>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
>>>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
>>>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
>>>>>> +#endif
>>>>>>
>>>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>>>> --
>>>>>> 2.39.2
>>>>>>
>>>>> --
>>>>> Best Regards
>>>>> Guo Ren
>>>
>
>

2024-06-03 11:44:45

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Mon, Jun 3, 2024 at 7:34 PM Alexandre Ghiti <[email protected]> wrote:
>
> On 03/06/2024 13:28, Guo Ren wrote:
> > On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <[email protected]> wrote:
> >> Hi Guo,
> >>
> >> On 31/05/2024 15:10, Guo Ren wrote:
> >>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> >>>> Hi Guo,
> >>>>
> >>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> >>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> >>>>>> In order to produce a generic kernel, a user can select
> >>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>>>> spinlock implementation if Zabha is not present.
> >>>>>>
> >>>>>> Note that we can't use alternatives here because the discovery of
> >>>>>> extensions is done too late and we need to start with the qspinlock
> >>>>>> implementation because the ticket spinlock implementation would pollute
> >>>>>> the spinlock value, so let's use static keys.
> >>> Zabha is not a prerequisite for qspinlock; the prerequisite for
> >>> qspinlock is the *forward progress guarantee* in the atomic operation
> >>> loop during intense contention. Even with Zabha enabled to meet the
> >>> requirements of xchg_tail, that still only applies when the number of
> >>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> >>> xchg_tail when the number of cores is more than 16K. Thus, hardware
> >>> support for Zabha does not equate to the safe use of qspinlock.
> >>
> >> But if we have Zacas to implement cmpxchg(), we still provide the
> >> "forward progress guarantee" then right? Let me know if I missed something.
> > The qspinlock needs a "forward progress guarantee," not Zacas, and
> > Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
> > AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
> > fwd guarantee that depends on the micro-arch design of the riscv core.
> > I think the help of AMO instead of LR/SC is it could off-load AMO
> > operations from LSU to CIU(Next Level Cache or Interconnect), which
> > gains better performance. "LR/SC pairs" only provide Near-Atomic, but
> > AMO gives Far-Atomic additionally.
>
>
> I understand qspinlocks require forward progress and that your company's
> LR/SC implementations provide such guarantee, I'm not arguing against
> your new extension proposal.
>
> It seemed to me in your previous message that you implied that when
> NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
> Zacas provide such guarantee"? I think it does, so qspinlocks should
> actually depend on Zabha *and* Zacas. Is that correct to you?
See kernel/locking/qspinlock.c
#if _Q_PENDING_BITS == 8 (NR_CPUS < 16K)
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
/*
* We can use relaxed semantics since the caller ensures that the
* MCS node is properly initialized before updating the tail.
*/
return (u32)xchg_relaxed(&lock->tail,
tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* NR_CPUS >= 16K */
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
u32 old, new;

old = atomic_read(&lock->val);
do {
new = (old & _Q_LOCKED_PENDING_MASK) | tail;
/*
* We can use relaxed semantics since the caller ensures that
* the MCS node is properly initialized before updating the
* tail.
*/
} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));

return old;
}
#endif

Look! You, Zacas, still need an additional FWD guarantee to break the
loop. That is, how *stickiness* your cache line is?

>
> Let me know if I misunderstood something again.
>
> Thanks,
>
> Alex
>
>
> >
> >
> >> Thanks,
> >>
> >> Alex
> >>
> >>
> >>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> >>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> >>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> >>> lines are sufficiently sticky, they could then claim support for this
> >>> extension. Linux could then select different spinlock implementations
> >>> based on this extension's support or not.
> >>>
> >>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>>>
> >>>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> >>>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
> >>>>>> ---
> >>>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> >>>>>> arch/riscv/Kconfig | 1 +
> >>>>>> arch/riscv/include/asm/Kbuild | 4 +-
> >>>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> >>>>>> arch/riscv/kernel/setup.c | 18 +++++++++
> >>>>>> include/asm-generic/qspinlock.h | 2 +
> >>>>>> include/asm-generic/ticket_spinlock.h | 2 +
> >>>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>>>
> >>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>> index 22f2990392ff..cf26042480e2 100644
> >>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>> @@ -20,7 +20,7 @@
> >>>>>> | openrisc: | ok |
> >>>>>> | parisc: | TODO |
> >>>>>> | powerpc: | ok |
> >>>>>> - | riscv: | TODO |
> >>>>>> + | riscv: | ok |
> >>>>>> | s390: | TODO |
> >>>>>> | sh: | TODO |
> >>>>>> | sparc: | ok |
> >>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>>>> --- a/arch/riscv/Kconfig
> >>>>>> +++ b/arch/riscv/Kconfig
> >>>>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>>> select ARCH_USE_MEMTEST
> >>>>>> select ARCH_USE_QUEUED_RWLOCKS
> >>>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>>>> Using qspinlock or not depends on real hardware capabilities, not the
> >>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>>>> hardware platforms but waste some qspinlock code size.
> >>>> You're right, and I think your comment matches what Conor mentioned
> >>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >>>> will allow a platform with Zabha capability to use qspinlocks. But if
> >>>> the hardware does not, it will fallback to the ticket spinlocks.
> >>>>
> >>>> But I agree that looking at the config alone may be misleading, even
> >>>> though it will work as expected at runtime. So I agree with you:
> >>>> unless anyone is strongly against the combo spinlocks, I will do what
> >>>> you suggest and add them.
> >>>>
> >>>> Thanks again for your initial work,
> >>>>
> >>>> Alex
> >>>>
> >>>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>>>> --- a/arch/riscv/include/asm/Kbuild
> >>>>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>>>> @@ -2,10 +2,12 @@
> >>>>>> generic-y += early_ioremap.h
> >>>>>> generic-y += flat.h
> >>>>>> generic-y += kvm_para.h
> >>>>>> +generic-y += mcs_spinlock.h
> >>>>>> generic-y += parport.h
> >>>>>> -generic-y += spinlock.h
> >>>>>> generic-y += spinlock_types.h
> >>>>>> +generic-y += ticket_spinlock.h
> >>>>>> generic-y += qrwlock.h
> >>>>>> generic-y += qrwlock_types.h
> >>>>>> +generic-y += qspinlock.h
> >>>>>> generic-y += user.h
> >>>>>> generic-y += vmlinux.lds.h
> >>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>>>> new file mode 100644
> >>>>>> index 000000000000..e00429ac20ed
> >>>>>> --- /dev/null
> >>>>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>>>> @@ -0,0 +1,39 @@
> >>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>>> +
> >>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>>>> +
> >>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>>>> +#define _Q_PENDING_LOOPS (1 << 9)
> >>>>>> +
> >>>>>> +#define __no_arch_spinlock_redefine
> >>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>> +#include <asm/qspinlock.h>
> >>>>>> +#include <asm/alternative.h>
> >>>>>> +
> >>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>> +
> >>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> >>>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
> >>>>>> +{ \
> >>>>>> + if (static_branch_unlikely(&qspinlock_key)) \
> >>>>>> + return queued_spin_##op(lock); \
> >>>>>> + return ticket_spin_##op(lock); \
> >>>>>> +}
> >>>>>> +
> >>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>>>> +
> >>>>>> +#else
> >>>>>> +
> >>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>> +
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +#include <asm/qrwlock.h>
> >>>>>> +
> >>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>>>> --- a/arch/riscv/kernel/setup.c
> >>>>>> +++ b/arch/riscv/kernel/setup.c
> >>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>>> #endif
> >>>>>> }
> >>>>>>
> >>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>>>> +
> >>>>>> +static void __init riscv_spinlock_init(void)
> >>>>>> +{
> >>>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>>>> + : : : : qspinlock);
> >>>>>> +
> >>>>>> + static_branch_disable(&qspinlock_key);
> >>>>>> + pr_info("Ticket spinlock: enabled\n");
> >>>>>> +
> >>>>>> + return;
> >>>>>> +
> >>>>>> +qspinlock:
> >>>>>> + pr_info("Queued spinlock: enabled\n");
> >>>>>> +}
> >>>>>> +
> >>>>>> extern void __init init_rt_signal_env(void);
> >>>>>>
> >>>>>> void __init setup_arch(char **cmdline_p)
> >>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>>> riscv_set_dma_cache_alignment();
> >>>>>>
> >>>>>> riscv_user_isa_enable();
> >>>>>> + riscv_spinlock_init();
> >>>>>> }
> >>>>>>
> >>>>>> bool arch_cpu_is_hotpluggable(int cpu)
> >>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>>>> --- a/include/asm-generic/qspinlock.h
> >>>>>> +++ b/include/asm-generic/qspinlock.h
> >>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>> }
> >>>>>> #endif
> >>>>>>
> >>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>> /*
> >>>>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>>>> * queued spinlock functions.
> >>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>> #define arch_spin_lock(l) queued_spin_lock(l)
> >>>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
> >>>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
> >>>>>> +#endif
> >>>>>>
> >>>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>>>> index cfcff22b37b3..325779970d8a 100644
> >>>>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>>> }
> >>>>>>
> >>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>> /*
> >>>>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>>>> * ticket spinlock functions.
> >>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
> >>>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> >>>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> >>>>>> +#endif
> >>>>>>
> >>>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>>>> --
> >>>>>> 2.39.2
> >>>>>>
> >>>>> --
> >>>>> Best Regards
> >>>>> Guo Ren
> >>>
> >
> >



--
Best Regards
Guo Ren

2024-06-03 11:51:17

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On 03/06/2024 13:44, Guo Ren wrote:
> On Mon, Jun 3, 2024 at 7:34 PM Alexandre Ghiti <[email protected]> wrote:
>> On 03/06/2024 13:28, Guo Ren wrote:
>>> On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <[email protected]> wrote:
>>>> Hi Guo,
>>>>
>>>> On 31/05/2024 15:10, Guo Ren wrote:
>>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
>>>>>> Hi Guo,
>>>>>>
>>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
>>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
>>>>>>>> In order to produce a generic kernel, a user can select
>>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
>>>>>>>> spinlock implementation if Zabha is not present.
>>>>>>>>
>>>>>>>> Note that we can't use alternatives here because the discovery of
>>>>>>>> extensions is done too late and we need to start with the qspinlock
>>>>>>>> implementation because the ticket spinlock implementation would pollute
>>>>>>>> the spinlock value, so let's use static keys.
>>>>> Zabha is not a prerequisite for qspinlock; the prerequisite for
>>>>> qspinlock is the *forward progress guarantee* in the atomic operation
>>>>> loop during intense contention. Even with Zabha enabled to meet the
>>>>> requirements of xchg_tail, that still only applies when the number of
>>>>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
>>>>> xchg_tail when the number of cores is more than 16K. Thus, hardware
>>>>> support for Zabha does not equate to the safe use of qspinlock.
>>>> But if we have Zacas to implement cmpxchg(), we still provide the
>>>> "forward progress guarantee" then right? Let me know if I missed something.
>>> The qspinlock needs a "forward progress guarantee," not Zacas, and
>>> Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
>>> AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
>>> fwd guarantee that depends on the micro-arch design of the riscv core.
>>> I think the help of AMO instead of LR/SC is it could off-load AMO
>>> operations from LSU to CIU(Next Level Cache or Interconnect), which
>>> gains better performance. "LR/SC pairs" only provide Near-Atomic, but
>>> AMO gives Far-Atomic additionally.
>>
>> I understand qspinlocks require forward progress and that your company's
>> LR/SC implementations provide such guarantee, I'm not arguing against
>> your new extension proposal.
>>
>> It seemed to me in your previous message that you implied that when
>> NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
>> Zacas provide such guarantee"? I think it does, so qspinlocks should
>> actually depend on Zabha *and* Zacas. Is that correct to you?
> See kernel/locking/qspinlock.c
> #if _Q_PENDING_BITS == 8 (NR_CPUS < 16K)
> static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> {
> /*
> * We can use relaxed semantics since the caller ensures that the
> * MCS node is properly initialized before updating the tail.
> */
> return (u32)xchg_relaxed(&lock->tail,
> tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
> }
> #else /* NR_CPUS >= 16K */
> static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> {
> u32 old, new;
>
> old = atomic_read(&lock->val);
> do {
> new = (old & _Q_LOCKED_PENDING_MASK) | tail;
> /*
> * We can use relaxed semantics since the caller ensures that
> * the MCS node is properly initialized before updating the
> * tail.
> */
> } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
>
> return old;
> }
> #endif
>
> Look! You, Zacas, still need an additional FWD guarantee to break the
> loop. That is, how *stickiness* your cache line is?


But then the problem comes from this generic implementation of
xchg_tail(), not from the arch cas implementation right?


>
>> Let me know if I misunderstood something again.
>>
>> Thanks,
>>
>> Alex
>>
>>
>>>
>>>> Thanks,
>>>>
>>>> Alex
>>>>
>>>>
>>>>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
>>>>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
>>>>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
>>>>> lines are sufficiently sticky, they could then claim support for this
>>>>> extension. Linux could then select different spinlock implementations
>>>>> based on this extension's support or not.
>>>>>
>>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
>>>>>>>>
>>>>>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
>>>>>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
>>>>>>>> ---
>>>>>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
>>>>>>>> arch/riscv/Kconfig | 1 +
>>>>>>>> arch/riscv/include/asm/Kbuild | 4 +-
>>>>>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
>>>>>>>> arch/riscv/kernel/setup.c | 18 +++++++++
>>>>>>>> include/asm-generic/qspinlock.h | 2 +
>>>>>>>> include/asm-generic/ticket_spinlock.h | 2 +
>>>>>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
>>>>>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
>>>>>>>>
>>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> index 22f2990392ff..cf26042480e2 100644
>>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
>>>>>>>> @@ -20,7 +20,7 @@
>>>>>>>> | openrisc: | ok |
>>>>>>>> | parisc: | TODO |
>>>>>>>> | powerpc: | ok |
>>>>>>>> - | riscv: | TODO |
>>>>>>>> + | riscv: | ok |
>>>>>>>> | s390: | TODO |
>>>>>>>> | sh: | TODO |
>>>>>>>> | sparc: | ok |
>>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
>>>>>>>> --- a/arch/riscv/Kconfig
>>>>>>>> +++ b/arch/riscv/Kconfig
>>>>>>>> @@ -59,6 +59,7 @@ config RISCV
>>>>>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
>>>>>>>> select ARCH_USE_MEMTEST
>>>>>>>> select ARCH_USE_QUEUED_RWLOCKS
>>>>>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
>>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
>>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
>>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
>>>>>>> hardware platforms but waste some qspinlock code size.
>>>>>> You're right, and I think your comment matches what Conor mentioned
>>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
>>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
>>>>>> the hardware does not, it will fallback to the ticket spinlocks.
>>>>>>
>>>>>> But I agree that looking at the config alone may be misleading, even
>>>>>> though it will work as expected at runtime. So I agree with you:
>>>>>> unless anyone is strongly against the combo spinlocks, I will do what
>>>>>> you suggest and add them.
>>>>>>
>>>>>> Thanks again for your initial work,
>>>>>>
>>>>>> Alex
>>>>>>
>>>>>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
>>>>>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
>>>>>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
>>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
>>>>>>>> --- a/arch/riscv/include/asm/Kbuild
>>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
>>>>>>>> @@ -2,10 +2,12 @@
>>>>>>>> generic-y += early_ioremap.h
>>>>>>>> generic-y += flat.h
>>>>>>>> generic-y += kvm_para.h
>>>>>>>> +generic-y += mcs_spinlock.h
>>>>>>>> generic-y += parport.h
>>>>>>>> -generic-y += spinlock.h
>>>>>>>> generic-y += spinlock_types.h
>>>>>>>> +generic-y += ticket_spinlock.h
>>>>>>>> generic-y += qrwlock.h
>>>>>>>> generic-y += qrwlock_types.h
>>>>>>>> +generic-y += qspinlock.h
>>>>>>>> generic-y += user.h
>>>>>>>> generic-y += vmlinux.lds.h
>>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
>>>>>>>> new file mode 100644
>>>>>>>> index 000000000000..e00429ac20ed
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
>>>>>>>> @@ -0,0 +1,39 @@
>>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>>>> +
>>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
>>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
>>>>>>>> +
>>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>>>>>>> +#define _Q_PENDING_LOOPS (1 << 9)
>>>>>>>> +
>>>>>>>> +#define __no_arch_spinlock_redefine
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +#include <asm/qspinlock.h>
>>>>>>>> +#include <asm/alternative.h>
>>>>>>>> +
>>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +
>>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
>>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
>>>>>>>> +{ \
>>>>>>>> + if (static_branch_unlikely(&qspinlock_key)) \
>>>>>>>> + return queued_spin_##op(lock); \
>>>>>>>> + return ticket_spin_##op(lock); \
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
>>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
>>>>>>>> +
>>>>>>>> +#else
>>>>>>>> +
>>>>>>>> +#include <asm/ticket_spinlock.h>
>>>>>>>> +
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>> +#include <asm/qrwlock.h>
>>>>>>>> +
>>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
>>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
>>>>>>>> --- a/arch/riscv/kernel/setup.c
>>>>>>>> +++ b/arch/riscv/kernel/setup.c
>>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
>>>>>>>> #endif
>>>>>>>> }
>>>>>>>>
>>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
>>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
>>>>>>>> +
>>>>>>>> +static void __init riscv_spinlock_init(void)
>>>>>>>> +{
>>>>>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
>>>>>>>> + : : : : qspinlock);
>>>>>>>> +
>>>>>>>> + static_branch_disable(&qspinlock_key);
>>>>>>>> + pr_info("Ticket spinlock: enabled\n");
>>>>>>>> +
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> +qspinlock:
>>>>>>>> + pr_info("Queued spinlock: enabled\n");
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> extern void __init init_rt_signal_env(void);
>>>>>>>>
>>>>>>>> void __init setup_arch(char **cmdline_p)
>>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
>>>>>>>> riscv_set_dma_cache_alignment();
>>>>>>>>
>>>>>>>> riscv_user_isa_enable();
>>>>>>>> + riscv_spinlock_init();
>>>>>>>> }
>>>>>>>>
>>>>>>>> bool arch_cpu_is_hotpluggable(int cpu)
>>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
>>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
>>>>>>>> --- a/include/asm-generic/qspinlock.h
>>>>>>>> +++ b/include/asm-generic/qspinlock.h
>>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>> }
>>>>>>>> #endif
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>> /*
>>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>> * queued spinlock functions.
>>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
>>>>>>>> #define arch_spin_lock(l) queued_spin_lock(l)
>>>>>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
>>>>>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
>>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
>>>>>>>> index cfcff22b37b3..325779970d8a 100644
>>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
>>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
>>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
>>>>>>>> }
>>>>>>>>
>>>>>>>> +#ifndef __no_arch_spinlock_redefine
>>>>>>>> /*
>>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
>>>>>>>> * ticket spinlock functions.
>>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
>>>>>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
>>>>>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
>>>>>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
>>>>>>>> +#endif
>>>>>>>>
>>>>>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
>>>>>>>> --
>>>>>>>> 2.39.2
>>>>>>>>
>>>>>>> --
>>>>>>> Best Regards
>>>>>>> Guo Ren
>>>
>
>

2024-06-03 11:57:37

by Guo Ren

[permalink] [raw]
Subject: Re: [PATCH 7/7] riscv: Add qspinlock support based on Zabha extension

On Mon, Jun 3, 2024 at 7:49 PM Alexandre Ghiti <[email protected]> wrote:
>
> On 03/06/2024 13:44, Guo Ren wrote:
> > On Mon, Jun 3, 2024 at 7:34 PM Alexandre Ghiti <[email protected]> wrote:
> >> On 03/06/2024 13:28, Guo Ren wrote:
> >>> On Mon, Jun 3, 2024 at 5:49 PM Alexandre Ghiti <[email protected]> wrote:
> >>>> Hi Guo,
> >>>>
> >>>> On 31/05/2024 15:10, Guo Ren wrote:
> >>>>> On Wed, May 29, 2024 at 9:03 PM Alexandre Ghiti <[email protected]> wrote:
> >>>>>> Hi Guo,
> >>>>>>
> >>>>>> On Wed, May 29, 2024 at 11:24 AM Guo Ren <[email protected]> wrote:
> >>>>>>> On Tue, May 28, 2024 at 11:18 PM Alexandre Ghiti <[email protected]> wrote:
> >>>>>>>> In order to produce a generic kernel, a user can select
> >>>>>>>> CONFIG_QUEUED_SPINLOCKS which will fallback at runtime to the ticket
> >>>>>>>> spinlock implementation if Zabha is not present.
> >>>>>>>>
> >>>>>>>> Note that we can't use alternatives here because the discovery of
> >>>>>>>> extensions is done too late and we need to start with the qspinlock
> >>>>>>>> implementation because the ticket spinlock implementation would pollute
> >>>>>>>> the spinlock value, so let's use static keys.
> >>>>> Zabha is not a prerequisite for qspinlock; the prerequisite for
> >>>>> qspinlock is the *forward progress guarantee* in the atomic operation
> >>>>> loop during intense contention. Even with Zabha enabled to meet the
> >>>>> requirements of xchg_tail, that still only applies when the number of
> >>>>> CPUs is less than 16K. The qspinlock uses cmpxchg loop instead of
> >>>>> xchg_tail when the number of cores is more than 16K. Thus, hardware
> >>>>> support for Zabha does not equate to the safe use of qspinlock.
> >>>> But if we have Zacas to implement cmpxchg(), we still provide the
> >>>> "forward progress guarantee" then right? Let me know if I missed something.
> >>> The qspinlock needs a "forward progress guarantee," not Zacas, and
> >>> Zabha could give a guarantee to qspinlock xchg_tail (CPUs < 16K) with
> >>> AMOSWAP.H instruction. But, using "LR/SC pairs" also could give enough
> >>> fwd guarantee that depends on the micro-arch design of the riscv core.
> >>> I think the help of AMO instead of LR/SC is it could off-load AMO
> >>> operations from LSU to CIU(Next Level Cache or Interconnect), which
> >>> gains better performance. "LR/SC pairs" only provide Near-Atomic, but
> >>> AMO gives Far-Atomic additionally.
> >>
> >> I understand qspinlocks require forward progress and that your company's
> >> LR/SC implementations provide such guarantee, I'm not arguing against
> >> your new extension proposal.
> >>
> >> It seemed to me in your previous message that you implied that when
> >> NR_CPUS > 16k, we should not use qspinlocks. My question was: "Don't
> >> Zacas provide such guarantee"? I think it does, so qspinlocks should
> >> actually depend on Zabha *and* Zacas. Is that correct to you?
> > See kernel/locking/qspinlock.c
> > #if _Q_PENDING_BITS == 8 (NR_CPUS < 16K)
> > static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> > {
> > /*
> > * We can use relaxed semantics since the caller ensures that the
> > * MCS node is properly initialized before updating the tail.
> > */
> > return (u32)xchg_relaxed(&lock->tail,
> > tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
> > }
> > #else /* NR_CPUS >= 16K */
> > static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
> > {
> > u32 old, new;
> >
> > old = atomic_read(&lock->val);
> > do {
> > new = (old & _Q_LOCKED_PENDING_MASK) | tail;
> > /*
> > * We can use relaxed semantics since the caller ensures that
> > * the MCS node is properly initialized before updating the
> > * tail.
> > */
> > } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
> >
> > return old;
> > }
> > #endif
> >
> > Look! You, Zacas, still need an additional FWD guarantee to break the
> > loop. That is, how *stickiness* your cache line is?
>
>
> But then the problem comes from this generic implementation of
> xchg_tail(), not from the arch cas implementation right?
The cmpxchg loop forward guarantee problems are needed in the whole
Linux, not only in qspinlock. If the machine couldn't give a fwd
guarantee, that seems a crap one.

>
>
> >
> >> Let me know if I misunderstood something again.
> >>
> >> Thanks,
> >>
> >> Alex
> >>
> >>
> >>>
> >>>> Thanks,
> >>>>
> >>>> Alex
> >>>>
> >>>>
> >>>>> Therefore, I would like to propose a new ISA extension: Zafpg(Atomic
> >>>>> Forward Progress Guarantee). If RISC-V vendors can ensure the progress
> >>>>> of LR/SC or CMPXCHG LOOP at the microarchitectural level or if cache
> >>>>> lines are sufficiently sticky, they could then claim support for this
> >>>>> extension. Linux could then select different spinlock implementations
> >>>>> based on this extension's support or not.
> >>>>>
> >>>>>>>> This is largely based on Guo's work and Leonardo reviews at [1].
> >>>>>>>>
> >>>>>>>> Link: https://lore.kernel.org/linux-riscv/[email protected]/ [1]
> >>>>>>>> Signed-off-by: Alexandre Ghiti <[email protected]>
> >>>>>>>> ---
> >>>>>>>> .../locking/queued-spinlocks/arch-support.txt | 2 +-
> >>>>>>>> arch/riscv/Kconfig | 1 +
> >>>>>>>> arch/riscv/include/asm/Kbuild | 4 +-
> >>>>>>>> arch/riscv/include/asm/spinlock.h | 39 +++++++++++++++++++
> >>>>>>>> arch/riscv/kernel/setup.c | 18 +++++++++
> >>>>>>>> include/asm-generic/qspinlock.h | 2 +
> >>>>>>>> include/asm-generic/ticket_spinlock.h | 2 +
> >>>>>>>> 7 files changed, 66 insertions(+), 2 deletions(-)
> >>>>>>>> create mode 100644 arch/riscv/include/asm/spinlock.h
> >>>>>>>>
> >>>>>>>> diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> index 22f2990392ff..cf26042480e2 100644
> >>>>>>>> --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
> >>>>>>>> @@ -20,7 +20,7 @@
> >>>>>>>> | openrisc: | ok |
> >>>>>>>> | parisc: | TODO |
> >>>>>>>> | powerpc: | ok |
> >>>>>>>> - | riscv: | TODO |
> >>>>>>>> + | riscv: | ok |
> >>>>>>>> | s390: | TODO |
> >>>>>>>> | sh: | TODO |
> >>>>>>>> | sparc: | ok |
> >>>>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>>>>>>> index 184a9edb04e0..ccf1703edeb9 100644
> >>>>>>>> --- a/arch/riscv/Kconfig
> >>>>>>>> +++ b/arch/riscv/Kconfig
> >>>>>>>> @@ -59,6 +59,7 @@ config RISCV
> >>>>>>>> select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
> >>>>>>>> select ARCH_USE_MEMTEST
> >>>>>>>> select ARCH_USE_QUEUED_RWLOCKS
> >>>>>>>> + select ARCH_USE_QUEUED_SPINLOCKS if TOOLCHAIN_HAS_ZABHA
> >>>>>>> Using qspinlock or not depends on real hardware capabilities, not the
> >>>>>>> compiler flag. That's why I introduced combo-spinlock, ticket-spinlock
> >>>>>>> & qspinlock three Kconfigs, and the combo-spinlock would compat all
> >>>>>>> hardware platforms but waste some qspinlock code size.
> >>>>>> You're right, and I think your comment matches what Conor mentioned
> >>>>>> about the lack of clarity with some extensions: TOOLCHAIN_HAS_ZABHA
> >>>>>> will allow a platform with Zabha capability to use qspinlocks. But if
> >>>>>> the hardware does not, it will fallback to the ticket spinlocks.
> >>>>>>
> >>>>>> But I agree that looking at the config alone may be misleading, even
> >>>>>> though it will work as expected at runtime. So I agree with you:
> >>>>>> unless anyone is strongly against the combo spinlocks, I will do what
> >>>>>> you suggest and add them.
> >>>>>>
> >>>>>> Thanks again for your initial work,
> >>>>>>
> >>>>>> Alex
> >>>>>>
> >>>>>>>> select ARCH_USES_CFI_TRAPS if CFI_CLANG
> >>>>>>>> select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
> >>>>>>>> select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
> >>>>>>>> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> >>>>>>>> index 504f8b7e72d4..ad72f2bd4cc9 100644
> >>>>>>>> --- a/arch/riscv/include/asm/Kbuild
> >>>>>>>> +++ b/arch/riscv/include/asm/Kbuild
> >>>>>>>> @@ -2,10 +2,12 @@
> >>>>>>>> generic-y += early_ioremap.h
> >>>>>>>> generic-y += flat.h
> >>>>>>>> generic-y += kvm_para.h
> >>>>>>>> +generic-y += mcs_spinlock.h
> >>>>>>>> generic-y += parport.h
> >>>>>>>> -generic-y += spinlock.h
> >>>>>>>> generic-y += spinlock_types.h
> >>>>>>>> +generic-y += ticket_spinlock.h
> >>>>>>>> generic-y += qrwlock.h
> >>>>>>>> generic-y += qrwlock_types.h
> >>>>>>>> +generic-y += qspinlock.h
> >>>>>>>> generic-y += user.h
> >>>>>>>> generic-y += vmlinux.lds.h
> >>>>>>>> diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> new file mode 100644
> >>>>>>>> index 000000000000..e00429ac20ed
> >>>>>>>> --- /dev/null
> >>>>>>>> +++ b/arch/riscv/include/asm/spinlock.h
> >>>>>>>> @@ -0,0 +1,39 @@
> >>>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>>>>> +
> >>>>>>>> +#ifndef __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +#define __ASM_RISCV_SPINLOCK_H
> >>>>>>>> +
> >>>>>>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
> >>>>>>>> +#define _Q_PENDING_LOOPS (1 << 9)
> >>>>>>>> +
> >>>>>>>> +#define __no_arch_spinlock_redefine
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +#include <asm/qspinlock.h>
> >>>>>>>> +#include <asm/alternative.h>
> >>>>>>>> +
> >>>>>>>> +DECLARE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +#define SPINLOCK_BASE_DECLARE(op, type, type_lock) \
> >>>>>>>> +static __always_inline type arch_spin_##op(type_lock lock) \
> >>>>>>>> +{ \
> >>>>>>>> + if (static_branch_unlikely(&qspinlock_key)) \
> >>>>>>>> + return queued_spin_##op(lock); \
> >>>>>>>> + return ticket_spin_##op(lock); \
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +SPINLOCK_BASE_DECLARE(lock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(unlock, void, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_locked, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(is_contended, int, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(trylock, bool, arch_spinlock_t *)
> >>>>>>>> +SPINLOCK_BASE_DECLARE(value_unlocked, int, arch_spinlock_t)
> >>>>>>>> +
> >>>>>>>> +#else
> >>>>>>>> +
> >>>>>>>> +#include <asm/ticket_spinlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>> +#include <asm/qrwlock.h>
> >>>>>>>> +
> >>>>>>>> +#endif /* __ASM_RISCV_SPINLOCK_H */
> >>>>>>>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >>>>>>>> index 4f73c0ae44b2..31ce75522fd4 100644
> >>>>>>>> --- a/arch/riscv/kernel/setup.c
> >>>>>>>> +++ b/arch/riscv/kernel/setup.c
> >>>>>>>> @@ -244,6 +244,23 @@ static void __init parse_dtb(void)
> >>>>>>>> #endif
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> +DEFINE_STATIC_KEY_TRUE(qspinlock_key);
> >>>>>>>> +EXPORT_SYMBOL(qspinlock_key);
> >>>>>>>> +
> >>>>>>>> +static void __init riscv_spinlock_init(void)
> >>>>>>>> +{
> >>>>>>>> + asm goto(ALTERNATIVE("nop", "j %[qspinlock]", 0, RISCV_ISA_EXT_ZABHA, 1)
> >>>>>>>> + : : : : qspinlock);
> >>>>>>>> +
> >>>>>>>> + static_branch_disable(&qspinlock_key);
> >>>>>>>> + pr_info("Ticket spinlock: enabled\n");
> >>>>>>>> +
> >>>>>>>> + return;
> >>>>>>>> +
> >>>>>>>> +qspinlock:
> >>>>>>>> + pr_info("Queued spinlock: enabled\n");
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> extern void __init init_rt_signal_env(void);
> >>>>>>>>
> >>>>>>>> void __init setup_arch(char **cmdline_p)
> >>>>>>>> @@ -295,6 +312,7 @@ void __init setup_arch(char **cmdline_p)
> >>>>>>>> riscv_set_dma_cache_alignment();
> >>>>>>>>
> >>>>>>>> riscv_user_isa_enable();
> >>>>>>>> + riscv_spinlock_init();
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> bool arch_cpu_is_hotpluggable(int cpu)
> >>>>>>>> diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
> >>>>>>>> index 0655aa5b57b2..bf47cca2c375 100644
> >>>>>>>> --- a/include/asm-generic/qspinlock.h
> >>>>>>>> +++ b/include/asm-generic/qspinlock.h
> >>>>>>>> @@ -136,6 +136,7 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>> }
> >>>>>>>> #endif
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>> /*
> >>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>> * queued spinlock functions.
> >>>>>>>> @@ -146,5 +147,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
> >>>>>>>> #define arch_spin_lock(l) queued_spin_lock(l)
> >>>>>>>> #define arch_spin_trylock(l) queued_spin_trylock(l)
> >>>>>>>> #define arch_spin_unlock(l) queued_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>> #endif /* __ASM_GENERIC_QSPINLOCK_H */
> >>>>>>>> diff --git a/include/asm-generic/ticket_spinlock.h b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> index cfcff22b37b3..325779970d8a 100644
> >>>>>>>> --- a/include/asm-generic/ticket_spinlock.h
> >>>>>>>> +++ b/include/asm-generic/ticket_spinlock.h
> >>>>>>>> @@ -89,6 +89,7 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>> return (s16)((val >> 16) - (val & 0xffff)) > 1;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>>> +#ifndef __no_arch_spinlock_redefine
> >>>>>>>> /*
> >>>>>>>> * Remapping spinlock architecture specific functions to the corresponding
> >>>>>>>> * ticket spinlock functions.
> >>>>>>>> @@ -99,5 +100,6 @@ static __always_inline int ticket_spin_is_contended(arch_spinlock_t *lock)
> >>>>>>>> #define arch_spin_lock(l) ticket_spin_lock(l)
> >>>>>>>> #define arch_spin_trylock(l) ticket_spin_trylock(l)
> >>>>>>>> #define arch_spin_unlock(l) ticket_spin_unlock(l)
> >>>>>>>> +#endif
> >>>>>>>>
> >>>>>>>> #endif /* __ASM_GENERIC_TICKET_SPINLOCK_H */
> >>>>>>>> --
> >>>>>>>> 2.39.2
> >>>>>>>>
> >>>>>>> --
> >>>>>>> Best Regards
> >>>>>>> Guo Ren
> >>>
> >
> >
>


--
Best Regards
Guo Ren

2024-06-03 15:33:00

by Alexandre Ghiti

[permalink] [raw]
Subject: Re: [PATCH 2/7] riscv: Implement cmpxchg8/16() using Zabha

Hi Conor, Nathan,

On 29/05/2024 17:57, Nathan Chancellor wrote:
> On Wed, May 29, 2024 at 02:49:58PM +0200, Alexandre Ghiti wrote:
>> Then I missed that, I should have checked the generated code. Is the
>> extension version "1p0" in '-march=' only required for experimental
>> extensions?
> I think so, if my understanding of the message is correct.
>
>> But from Conor comment here [1], we should not enable extensions that
>> are only experimental. In that case, we should be good with this.
>>
>> [1] https://lore.kernel.org/linux-riscv/[email protected]/T/#mefb283477bce852f3713cbbb4ff002252281c9d5
> Yeah, I tend to agree with Conor on that front. I had not noticed that
> part of the message when I was looking at other parts of this thread. I
> could see an argument for allowing experimental extensions for
> qualification purposes but I think it does create a bit of a support
> nightmare, especially when there are breaking changes across revisions.
>
>>> config EXPERIMENTAL_EXTENSIONS
>>> bool
>>>
>>> config TOOLCHAIN_HAS_ZABHA
>>> def_bool y
>>> select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG
>>> ...
>>>
>>> config TOOLCHAIN_HAS_ZACAS
>>> def_bool_y
>>> # ZACAS was experimental until Clang 19: https://github.com/llvm/llvm-project/commit/95aab69c109adf29e183090c25dc95c773215746
>>> select EXPERIMENTAL_EXETNSIONS if CC_IS_CLANG && CLANG_VERSION < 190000
>>> ...
>>>
>>> Then in the Makefile:
>>>
>>> ifdef CONFIG_EXPERIMENTAL_EXTENSIONS
>>> KBUILD_AFLAGS += -menable-experimental-extensions
>>> KBUILD_CFLAGS += -menable-experimental-extensions
>>> endif
> Perhaps with that in mind, maybe EXPERIMENTAL_EXTENSIONS (or whatever)
> should be a user selectable option and the TOOLCHAIN values depend on it
> when the user has a clang version that does not support the ratified
> version.
>
>> That's a good idea to me, let's see what Conor thinks [2]
>>
>> [2] https://lore.kernel.org/linux-riscv/[email protected]/T/#m1d798dfc4c27e5b6d9e14117d81b577ace123322
> FWIW, I think your plan of removing support for the experimental version
> of the extension and pushing to remove the experimental status in LLVM
> (especially since it seems like it is ratified like zacas?
> https://jira.riscv.org/browse/RVS-1685) is probably the best thing going
> forward. If the LLVM folks are made aware soon, it should be easy to get
> that change into clang-19, which is branching at the end of July I
> believe.


FYI, it was just merged https://github.com/llvm/llvm-project/pull/93831

Thanks again,

Alex


>
>> Thanks for your thorough review!
> Thanks for taking LLVM support into consideration :)
>
> Cheers,
> Nathan
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv