I unified previous patchsets into a single one, since the work is related.
While studying riscv's cmpxchg.h file, I got really interested in
understanding how RISCV asm implemented the different versions of
{cmp,}xchg.
When I understood the pattern, it made sense for me to remove the
duplications and create macros to make it easier to understand what exactly
changes between the versions: Instruction sufixes & barriers.
Also, did the same kind of work on atomic.c.
Note to Guo Ren:
I did some further improvement after your previous reviews, so I ended
up afraid including your Reviewed-by before cheching if the changes are
ok for you. Please check it out again, I just removed some helper macros
that were not being used elsewhere in the kernel.
Thanks!
Leo
Changes since squashed cmpxchg:
- Unified with atomic.c patchset
- Rebased on top of torvalds/master (thanks Andrea Parri!)
- Removed helper macros that were not being used elsewhere in the kernel.
Changes since (cmpxchg) RFCv3:
- Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
https://lore.kernel.org/all/[email protected]/
Changes since (cmpxchg) RFCv2:
- Fixed macros that depend on having a local variable with a magic name
- Previous cast to (long) is now only applied on 4-bytes cmpxchg
https://lore.kernel.org/all/[email protected]/
Changes since (cmpxchg) RFCv1:
- Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
https://lore.kernel.org/all/[email protected]/
Leonardo Bras (3):
riscv/cmpxchg: Deduplicate xchg() asm functions
riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
riscv/atomic.h : Deduplicate arch_atomic.*
arch/riscv/include/asm/atomic.h | 164 ++++++++--------
arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
2 files changed, 123 insertions(+), 359 deletions(-)
--
2.41.0
Some functions use mostly the same asm for 32-bit and 64-bit versions.
Make a macro that is generic enough and avoid code duplication.
(This did not cause any change in generated asm)
Signed-off-by: Leonardo Bras <[email protected]>
---
arch/riscv/include/asm/atomic.h | 164 +++++++++++++++-----------------
1 file changed, 76 insertions(+), 88 deletions(-)
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index f5dfef6c2153..80cca7ac16fd 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -196,22 +196,28 @@ ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
+#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \
+({ \
+ __asm__ __volatile__ ( \
+ "0: lr." sfx " %[p], %[c]\n" \
+ " beq %[p], %[u], 1f\n" \
+ " add %[rc], %[p], %[a]\n" \
+ " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
+ " bnez %[rc], 0b\n" \
+ " fence rw, rw\n" \
+ "1:\n" \
+ : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
+ : [a]"r" (_a), [u]"r" (_u) \
+ : "memory"); \
+})
+
/* This is required to provide a full barrier on success. */
static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int prev, rc;
- __asm__ __volatile__ (
- "0: lr.w %[p], %[c]\n"
- " beq %[p], %[u], 1f\n"
- " add %[rc], %[p], %[a]\n"
- " sc.w.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- : [a]"r" (a), [u]"r" (u)
- : "memory");
+ _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w");
+
return prev;
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
@@ -222,77 +228,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
s64 prev;
long rc;
- __asm__ __volatile__ (
- "0: lr.d %[p], %[c]\n"
- " beq %[p], %[u], 1f\n"
- " add %[rc], %[p], %[a]\n"
- " sc.d.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- : [a]"r" (a), [u]"r" (u)
- : "memory");
+ _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d");
+
return prev;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
+#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \
+({ \
+ __asm__ __volatile__ ( \
+ "0: lr." sfx " %[p], %[c]\n" \
+ " bltz %[p], 1f\n" \
+ " addi %[rc], %[p], 1\n" \
+ " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
+ " bnez %[rc], 0b\n" \
+ " fence rw, rw\n" \
+ "1:\n" \
+ : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
+ : \
+ : "memory"); \
+})
+
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
{
int prev, rc;
- __asm__ __volatile__ (
- "0: lr.w %[p], %[c]\n"
- " bltz %[p], 1f\n"
- " addi %[rc], %[p], 1\n"
- " sc.w.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- :
- : "memory");
+ _arch_atomic_inc_unless_negative(prev, rc, v->counter, "w");
+
return !(prev < 0);
}
#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \
+({ \
+ __asm__ __volatile__ ( \
+ "0: lr." sfx " %[p], %[c]\n" \
+ " bgtz %[p], 1f\n" \
+ " addi %[rc], %[p], -1\n" \
+ " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
+ " bnez %[rc], 0b\n" \
+ " fence rw, rw\n" \
+ "1:\n" \
+ : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
+ : \
+ : "memory"); \
+})
+
static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
{
int prev, rc;
- __asm__ __volatile__ (
- "0: lr.w %[p], %[c]\n"
- " bgtz %[p], 1f\n"
- " addi %[rc], %[p], -1\n"
- " sc.w.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- :
- : "memory");
+ _arch_atomic_dec_unless_positive(prev, rc, v->counter, "w");
+
return !(prev > 0);
}
#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \
+({ \
+ __asm__ __volatile__ ( \
+ "0: lr." sfx " %[p], %[c]\n" \
+ " addi %[rc], %[p], -1\n" \
+ " bltz %[rc], 1f\n" \
+ " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
+ " bnez %[rc], 0b\n" \
+ " fence rw, rw\n" \
+ "1:\n" \
+ : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
+ : \
+ : "memory"); \
+})
+
static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
{
int prev, rc;
- __asm__ __volatile__ (
- "0: lr.w %[p], %[c]\n"
- " addi %[rc], %[p], -1\n"
- " bltz %[rc], 1f\n"
- " sc.w.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- :
- : "memory");
+ _arch_atomic_dec_if_positive(prev, rc, v->counter, "w");
+
return prev - 1;
}
@@ -304,17 +319,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
s64 prev;
long rc;
- __asm__ __volatile__ (
- "0: lr.d %[p], %[c]\n"
- " bltz %[p], 1f\n"
- " addi %[rc], %[p], 1\n"
- " sc.d.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- :
- : "memory");
+ _arch_atomic_inc_unless_negative(prev, rc, v->counter, "d");
+
return !(prev < 0);
}
@@ -325,17 +331,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
s64 prev;
long rc;
- __asm__ __volatile__ (
- "0: lr.d %[p], %[c]\n"
- " bgtz %[p], 1f\n"
- " addi %[rc], %[p], -1\n"
- " sc.d.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- :
- : "memory");
+ _arch_atomic_dec_unless_positive(prev, rc, v->counter, "d");
+
return !(prev > 0);
}
@@ -346,17 +343,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
s64 prev;
long rc;
- __asm__ __volatile__ (
- "0: lr.d %[p], %[c]\n"
- " addi %[rc], %[p], -1\n"
- " bltz %[rc], 1f\n"
- " sc.d.rl %[rc], %[rc], %[c]\n"
- " bnez %[rc], 0b\n"
- " fence rw, rw\n"
- "1:\n"
- : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
- :
- : "memory");
+ _arch_atomic_dec_if_positive(prev, rc, v->counter, "d");
+
return prev - 1;
}
--
2.41.0
In this header every cmpxchg define (_relaxed, _acquire, _release,
vanilla) contain it's own asm file, both for 4-byte variables an 8-byte
variables, on a total of 8 versions of mostly the same asm.
This is usually bad, as it means any change may be done in up to 8
different places.
Unify those versions by creating a new define with enough parameters to
generate any version of the previous 8.
Then unify the result under a more general define, and simplify
arch_cmpxchg* generation
(This did not cause any change in generated asm)
Signed-off-by: Leonardo Bras <[email protected]>
---
arch/riscv/include/asm/cmpxchg.h | 182 +++++--------------------------
1 file changed, 25 insertions(+), 157 deletions(-)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ec4ea4f3f908..bb45ef83592f 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -71,81 +71,37 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
-#define __cmpxchg_relaxed(ptr, old, new, size) \
-({ \
- __typeof__(ptr) __ptr = (ptr); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __typeof__(*(ptr)) __ret; \
- register unsigned int __rc; \
- switch (size) { \
- case 4: \
- __asm__ __volatile__ ( \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- __ret; \
-})
-#define arch_cmpxchg_relaxed(ptr, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, rc, p, co, o, n)\
({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
- _o_, _n_, sizeof(*(ptr))); \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr" lr_sfx " %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc" sc_sfx " %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ append \
+ "1:\n" \
+ : "=&r" (r), "=&r" (rc), "+A" (*(p)) \
+ : "rJ" (co o), "rJ" (n) \
+ : "memory"); \
})
-#define __cmpxchg_acquire(ptr, old, new, size) \
+#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
- switch (size) { \
+ switch (sizeof(*__ptr)) { \
case 4: \
- __asm__ __volatile__ ( \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- RISCV_ACQUIRE_BARRIER \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
+ __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
+ __ret, __rc, __ptr, (long), __old, __new); \
break; \
case 8: \
- __asm__ __volatile__ ( \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- RISCV_ACQUIRE_BARRIER \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
+ __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \
+ __ret, __rc, __ptr, /**/, __old, __new); \
break; \
default: \
BUILD_BUG(); \
@@ -153,108 +109,20 @@
__ret; \
})
-#define arch_cmpxchg_acquire(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
- _o_, _n_, sizeof(*(ptr))); \
-})
+#define arch_cmpxchg_relaxed(ptr, o, n) \
+ _arch_cmpxchg((ptr), (o), (n), "", "", "")
-#define __cmpxchg_release(ptr, old, new, size) \
-({ \
- __typeof__(ptr) __ptr = (ptr); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __typeof__(*(ptr)) __ret; \
- register unsigned int __rc; \
- switch (size) { \
- case 4: \
- __asm__ __volatile__ ( \
- RISCV_RELEASE_BARRIER \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- RISCV_RELEASE_BARRIER \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- __ret; \
-})
+#define arch_cmpxchg_acquire(ptr, o, n) \
+ _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
#define arch_cmpxchg_release(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_release((ptr), \
- _o_, _n_, sizeof(*(ptr))); \
-})
-
-#define __cmpxchg(ptr, old, new, size) \
-({ \
- __typeof__(ptr) __ptr = (ptr); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __typeof__(*(ptr)) __ret; \
- register unsigned int __rc; \
- switch (size) { \
- case 4: \
- __asm__ __volatile__ ( \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w.rl %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- " fence rw, rw\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d.rl %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- " fence rw, rw\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- __ret; \
-})
+ _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
#define arch_cmpxchg(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg((ptr), \
- _o_, _n_, sizeof(*(ptr))); \
-})
+ _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
#define arch_cmpxchg_local(ptr, o, n) \
- (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
+ arch_cmpxchg_relaxed((ptr), (o), (n))
#define arch_cmpxchg64(ptr, o, n) \
({ \
--
2.41.0
On Thu, Aug 03, 2023 at 02:14:00AM -0300, Leonardo Bras wrote:
> Some functions use mostly the same asm for 32-bit and 64-bit versions.
>
> Make a macro that is generic enough and avoid code duplication.
>
> (This did not cause any change in generated asm)
>
> Signed-off-by: Leonardo Bras <[email protected]>
> ---
> arch/riscv/include/asm/atomic.h | 164 +++++++++++++++-----------------
> 1 file changed, 76 insertions(+), 88 deletions(-)
>
> diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
> index f5dfef6c2153..80cca7ac16fd 100644
> --- a/arch/riscv/include/asm/atomic.h
> +++ b/arch/riscv/include/asm/atomic.h
> @@ -196,22 +196,28 @@ ATOMIC_OPS(xor, xor, i)
> #undef ATOMIC_FETCH_OP
> #undef ATOMIC_OP_RETURN
>
> +#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \
> +({ \
> + __asm__ __volatile__ ( \
> + "0: lr." sfx " %[p], %[c]\n" \
> + " beq %[p], %[u], 1f\n" \
> + " add %[rc], %[p], %[a]\n" \
> + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
> + " bnez %[rc], 0b\n" \
> + " fence rw, rw\n" \
> + "1:\n" \
> + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
> + : [a]"r" (_a), [u]"r" (_u) \
> + : "memory"); \
> +})
> +
> /* This is required to provide a full barrier on success. */
> static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
> {
> int prev, rc;
>
> - __asm__ __volatile__ (
> - "0: lr.w %[p], %[c]\n"
> - " beq %[p], %[u], 1f\n"
> - " add %[rc], %[p], %[a]\n"
> - " sc.w.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - : [a]"r" (a), [u]"r" (u)
> - : "memory");
> + _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w");
> +
> return prev;
> }
> #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
> @@ -222,77 +228,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
> s64 prev;
> long rc;
>
> - __asm__ __volatile__ (
> - "0: lr.d %[p], %[c]\n"
> - " beq %[p], %[u], 1f\n"
> - " add %[rc], %[p], %[a]\n"
> - " sc.d.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - : [a]"r" (a), [u]"r" (u)
> - : "memory");
> + _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d");
> +
> return prev;
> }
> #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
> #endif
>
> +#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \
> +({ \
> + __asm__ __volatile__ ( \
> + "0: lr." sfx " %[p], %[c]\n" \
> + " bltz %[p], 1f\n" \
> + " addi %[rc], %[p], 1\n" \
> + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
> + " bnez %[rc], 0b\n" \
> + " fence rw, rw\n" \
> + "1:\n" \
> + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
> + : \
> + : "memory"); \
> +})
> +
> static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
> {
> int prev, rc;
>
> - __asm__ __volatile__ (
> - "0: lr.w %[p], %[c]\n"
> - " bltz %[p], 1f\n"
> - " addi %[rc], %[p], 1\n"
> - " sc.w.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - :
> - : "memory");
> + _arch_atomic_inc_unless_negative(prev, rc, v->counter, "w");
> +
> return !(prev < 0);
> }
>
> #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
>
> +#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \
> +({ \
> + __asm__ __volatile__ ( \
> + "0: lr." sfx " %[p], %[c]\n" \
> + " bgtz %[p], 1f\n" \
> + " addi %[rc], %[p], -1\n" \
> + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
> + " bnez %[rc], 0b\n" \
> + " fence rw, rw\n" \
> + "1:\n" \
> + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
> + : \
> + : "memory"); \
> +})
> +
> static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
> {
> int prev, rc;
>
> - __asm__ __volatile__ (
> - "0: lr.w %[p], %[c]\n"
> - " bgtz %[p], 1f\n"
> - " addi %[rc], %[p], -1\n"
> - " sc.w.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - :
> - : "memory");
> + _arch_atomic_dec_unless_positive(prev, rc, v->counter, "w");
> +
> return !(prev > 0);
> }
>
> #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
>
> +#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \
> +({ \
> + __asm__ __volatile__ ( \
> + "0: lr." sfx " %[p], %[c]\n" \
> + " addi %[rc], %[p], -1\n" \
> + " bltz %[rc], 1f\n" \
> + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \
> + " bnez %[rc], 0b\n" \
> + " fence rw, rw\n" \
> + "1:\n" \
> + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
> + : \
> + : "memory"); \
> +})
> +
> static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
> {
> int prev, rc;
>
> - __asm__ __volatile__ (
> - "0: lr.w %[p], %[c]\n"
> - " addi %[rc], %[p], -1\n"
> - " bltz %[rc], 1f\n"
> - " sc.w.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - :
> - : "memory");
> + _arch_atomic_dec_if_positive(prev, rc, v->counter, "w");
> +
> return prev - 1;
> }
>
> @@ -304,17 +319,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
> s64 prev;
> long rc;
>
> - __asm__ __volatile__ (
> - "0: lr.d %[p], %[c]\n"
> - " bltz %[p], 1f\n"
> - " addi %[rc], %[p], 1\n"
> - " sc.d.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - :
> - : "memory");
> + _arch_atomic_inc_unless_negative(prev, rc, v->counter, "d");
> +
> return !(prev < 0);
> }
>
> @@ -325,17 +331,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
> s64 prev;
> long rc;
>
> - __asm__ __volatile__ (
> - "0: lr.d %[p], %[c]\n"
> - " bgtz %[p], 1f\n"
> - " addi %[rc], %[p], -1\n"
> - " sc.d.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - :
> - : "memory");
> + _arch_atomic_dec_unless_positive(prev, rc, v->counter, "d");
> +
> return !(prev > 0);
> }
>
> @@ -346,17 +343,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
> s64 prev;
> long rc;
>
> - __asm__ __volatile__ (
> - "0: lr.d %[p], %[c]\n"
> - " addi %[rc], %[p], -1\n"
> - " bltz %[rc], 1f\n"
> - " sc.d.rl %[rc], %[rc], %[c]\n"
> - " bnez %[rc], 0b\n"
> - " fence rw, rw\n"
> - "1:\n"
> - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
> - :
> - : "memory");
> + _arch_atomic_dec_if_positive(prev, rc, v->counter, "d");
> +
> return prev - 1;
> }
I have no problem with this optimization.
Reviewed-by: Guo Ren <[email protected]>
>
> --
> 2.41.0
>
On Thu, Aug 03, 2023 at 02:13:57AM -0300, Leonardo Bras wrote:
> I unified previous patchsets into a single one, since the work is related.
>
> While studying riscv's cmpxchg.h file, I got really interested in
> understanding how RISCV asm implemented the different versions of
> {cmp,}xchg.
>
> When I understood the pattern, it made sense for me to remove the
> duplications and create macros to make it easier to understand what exactly
> changes between the versions: Instruction sufixes & barriers.
>
> Also, did the same kind of work on atomic.c.
>
> Note to Guo Ren:
> I did some further improvement after your previous reviews, so I ended
> up afraid including your Reviewed-by before cheching if the changes are
> ok for you. Please check it out again, I just removed some helper macros
> that were not being used elsewhere in the kernel.
>
> Thanks!
> Leo
>
>
> Changes since squashed cmpxchg:
> - Unified with atomic.c patchset
> - Rebased on top of torvalds/master (thanks Andrea Parri!)
> - Removed helper macros that were not being used elsewhere in the kernel.
>
> Changes since (cmpxchg) RFCv3:
> - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> https://lore.kernel.org/all/[email protected]/
>
> Changes since (cmpxchg) RFCv2:
> - Fixed macros that depend on having a local variable with a magic name
> - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> https://lore.kernel.org/all/[email protected]/
>
> Changes since (cmpxchg) RFCv1:
> - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> https://lore.kernel.org/all/[email protected]/
>
>
> Leonardo Bras (3):
> riscv/cmpxchg: Deduplicate xchg() asm functions
> riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> riscv/atomic.h : Deduplicate arch_atomic.*
>
> arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> 2 files changed, 123 insertions(+), 359 deletions(-)
LGTM. For the series,
Reviewed-by: Andrea Parri <[email protected]>
Andrea
On Thu, Aug 3, 2023 at 1:14 PM Leonardo Bras <[email protected]> wrote:
>
> I unified previous patchsets into a single one, since the work is related.
>
> While studying riscv's cmpxchg.h file, I got really interested in
> understanding how RISCV asm implemented the different versions of
> {cmp,}xchg.
>
> When I understood the pattern, it made sense for me to remove the
> duplications and create macros to make it easier to understand what exactly
> changes between the versions: Instruction sufixes & barriers.
>
> Also, did the same kind of work on atomic.c.
>
> Note to Guo Ren:
> I did some further improvement after your previous reviews, so I ended
> up afraid including your Reviewed-by before cheching if the changes are
> ok for you. Please check it out again, I just removed some helper macros
> that were not being used elsewhere in the kernel.
I found this optimization has conflicts with the below patches:
https://lore.kernel.org/linux-riscv/[email protected]/
https://lore.kernel.org/linux-riscv/[email protected]/
If yours merged, how do we support the inline cmpxchg/xchg_small
function? It's very struggling to use macros to implement complex
functions.
Could you consider a more relaxed optimization in which we could
insert inline cmpxchg/xchg_small?
>
> Thanks!
> Leo
>
>
> Changes since squashed cmpxchg:
> - Unified with atomic.c patchset
> - Rebased on top of torvalds/master (thanks Andrea Parri!)
> - Removed helper macros that were not being used elsewhere in the kernel.
>
> Changes since (cmpxchg) RFCv3:
> - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> https://lore.kernel.org/all/[email protected]/
>
> Changes since (cmpxchg) RFCv2:
> - Fixed macros that depend on having a local variable with a magic name
> - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> https://lore.kernel.org/all/[email protected]/
>
> Changes since (cmpxchg) RFCv1:
> - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> https://lore.kernel.org/all/[email protected]/
>
>
> Leonardo Bras (3):
> riscv/cmpxchg: Deduplicate xchg() asm functions
> riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> riscv/atomic.h : Deduplicate arch_atomic.*
>
> arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> 2 files changed, 123 insertions(+), 359 deletions(-)
>
> --
> 2.41.0
>
--
Best Regards
Guo Ren
On Thu, Aug 3, 2023 at 9:53 PM Guo Ren <[email protected]> wrote:
>
> On Thu, Aug 3, 2023 at 1:14 PM Leonardo Bras <[email protected]> wrote:
> >
> > I unified previous patchsets into a single one, since the work is related.
> >
> > While studying riscv's cmpxchg.h file, I got really interested in
> > understanding how RISCV asm implemented the different versions of
> > {cmp,}xchg.
> >
> > When I understood the pattern, it made sense for me to remove the
> > duplications and create macros to make it easier to understand what exactly
> > changes between the versions: Instruction sufixes & barriers.
> >
> > Also, did the same kind of work on atomic.c.
> >
> > Note to Guo Ren:
> > I did some further improvement after your previous reviews, so I ended
> > up afraid including your Reviewed-by before cheching if the changes are
> > ok for you. Please check it out again, I just removed some helper macros
> > that were not being used elsewhere in the kernel.
> I found this optimization has conflicts with the below patches:
> https://lore.kernel.org/linux-riscv/[email protected]/
> https://lore.kernel.org/linux-riscv/[email protected]/
>
> If yours merged, how do we support the inline cmpxchg/xchg_small
> function?
Oh, I actually introduced my series so I could introduce new xchg and
cmpxchg for size 1 and 2. Is that what your patches are about, right?
I was working on that yesterday, and decided to send the patchset
without them because I was still not sure enough.
About implementation strategy, I was introducing a new macros for xchg
& cmpxchg with asm which would work for both for size 1 & size 2, and
use the switch-case to create the mask and and_value.
You think that works enough?
> It's very struggling to use macros to implement complex
> functions.
I agree, but with this we can achieve more generic code, which makes
more clear what is the pattern for given function.
> Could you consider a more relaxed optimization in which we could
> insert inline cmpxchg/xchg_small?
What about this: I finish the patches I have been working with
(cmpxchg & xchg for sizes 1 and 2), and if they are fine we expand
this patchset with them. If not, I try relaxing them a little so we
can merge with your set.
Does that work for you?
Best regards,
Leo
>
> >
> > Thanks!
> > Leo
> >
> >
> > Changes since squashed cmpxchg:
> > - Unified with atomic.c patchset
> > - Rebased on top of torvalds/master (thanks Andrea Parri!)
> > - Removed helper macros that were not being used elsewhere in the kernel.
> >
> > Changes since (cmpxchg) RFCv3:
> > - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> > https://lore.kernel.org/all/[email protected]/
> >
> > Changes since (cmpxchg) RFCv2:
> > - Fixed macros that depend on having a local variable with a magic name
> > - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> > https://lore.kernel.org/all/[email protected]/
> >
> > Changes since (cmpxchg) RFCv1:
> > - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> > https://lore.kernel.org/all/[email protected]/
> >
> >
> > Leonardo Bras (3):
> > riscv/cmpxchg: Deduplicate xchg() asm functions
> > riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> > riscv/atomic.h : Deduplicate arch_atomic.*
> >
> > arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> > arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> > 2 files changed, 123 insertions(+), 359 deletions(-)
> >
> > --
> > 2.41.0
> >
>
>
> --
> Best Regards
> Guo Ren
>
On Fri, Aug 4, 2023 at 10:20 AM Leonardo Bras Soares Passos
<[email protected]> wrote:
>
> On Thu, Aug 3, 2023 at 9:53 PM Guo Ren <[email protected]> wrote:
> >
> > On Thu, Aug 3, 2023 at 1:14 PM Leonardo Bras <[email protected]> wrote:
> > >
> > > I unified previous patchsets into a single one, since the work is related.
> > >
> > > While studying riscv's cmpxchg.h file, I got really interested in
> > > understanding how RISCV asm implemented the different versions of
> > > {cmp,}xchg.
> > >
> > > When I understood the pattern, it made sense for me to remove the
> > > duplications and create macros to make it easier to understand what exactly
> > > changes between the versions: Instruction sufixes & barriers.
> > >
> > > Also, did the same kind of work on atomic.c.
> > >
> > > Note to Guo Ren:
> > > I did some further improvement after your previous reviews, so I ended
> > > up afraid including your Reviewed-by before cheching if the changes are
> > > ok for you. Please check it out again, I just removed some helper macros
> > > that were not being used elsewhere in the kernel.
> > I found this optimization has conflicts with the below patches:
> > https://lore.kernel.org/linux-riscv/[email protected]/
> > https://lore.kernel.org/linux-riscv/[email protected]/
> >
> > If yours merged, how do we support the inline cmpxchg/xchg_small
> > function?
>
> Oh, I actually introduced my series so I could introduce new xchg and
> cmpxchg for size 1 and 2. Is that what your patches are about, right?
>
> I was working on that yesterday, and decided to send the patchset
> without them because I was still not sure enough.
>
> About implementation strategy, I was introducing a new macros for xchg
> & cmpxchg with asm which would work for both for size 1 & size 2, and
> use the switch-case to create the mask and and_value.
>
> You think that works enough?
Good, go ahead.
>
> > It's very struggling to use macros to implement complex
> > functions.
>
> I agree, but with this we can achieve more generic code, which makes
> more clear what is the pattern for given function.
>
> > Could you consider a more relaxed optimization in which we could
> > insert inline cmpxchg/xchg_small?
>
> What about this: I finish the patches I have been working with
> (cmpxchg & xchg for sizes 1 and 2), and if they are fine we expand
> this patchset with them. If not, I try relaxing them a little so we
> can merge with your set.
>
> Does that work for you?
Great, you could provide cmpxchg & xchg for sizes 1 and 2, then my
patch series would base on yours. After tested, I would give you
Tested-by.
>
> Best regards,
> Leo
>
>
> >
> > >
> > > Thanks!
> > > Leo
> > >
> > >
> > > Changes since squashed cmpxchg:
> > > - Unified with atomic.c patchset
> > > - Rebased on top of torvalds/master (thanks Andrea Parri!)
> > > - Removed helper macros that were not being used elsewhere in the kernel.
> > >
> > > Changes since (cmpxchg) RFCv3:
> > > - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> > > https://lore.kernel.org/all/[email protected]/
> > >
> > > Changes since (cmpxchg) RFCv2:
> > > - Fixed macros that depend on having a local variable with a magic name
> > > - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> > > https://lore.kernel.org/all/[email protected]/
> > >
> > > Changes since (cmpxchg) RFCv1:
> > > - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> > > https://lore.kernel.org/all/[email protected]/
> > >
> > >
> > > Leonardo Bras (3):
> > > riscv/cmpxchg: Deduplicate xchg() asm functions
> > > riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> > > riscv/atomic.h : Deduplicate arch_atomic.*
> > >
> > > arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> > > arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> > > 2 files changed, 123 insertions(+), 359 deletions(-)
> > >
> > > --
> > > 2.41.0
> > >
> >
> >
> > --
> > Best Regards
> > Guo Ren
> >
>
--
Best Regards
Guo Ren
On Thu, Aug 3, 2023 at 10:53 AM Andrea Parri <[email protected]> wrote:
>
> On Thu, Aug 03, 2023 at 02:13:57AM -0300, Leonardo Bras wrote:
> > I unified previous patchsets into a single one, since the work is related.
> >
> > While studying riscv's cmpxchg.h file, I got really interested in
> > understanding how RISCV asm implemented the different versions of
> > {cmp,}xchg.
> >
> > When I understood the pattern, it made sense for me to remove the
> > duplications and create macros to make it easier to understand what exactly
> > changes between the versions: Instruction sufixes & barriers.
> >
> > Also, did the same kind of work on atomic.c.
> >
> > Note to Guo Ren:
> > I did some further improvement after your previous reviews, so I ended
> > up afraid including your Reviewed-by before cheching if the changes are
> > ok for you. Please check it out again, I just removed some helper macros
> > that were not being used elsewhere in the kernel.
> >
> > Thanks!
> > Leo
> >
> >
> > Changes since squashed cmpxchg:
> > - Unified with atomic.c patchset
> > - Rebased on top of torvalds/master (thanks Andrea Parri!)
> > - Removed helper macros that were not being used elsewhere in the kernel.
> >
> > Changes since (cmpxchg) RFCv3:
> > - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> > https://lore.kernel.org/all/[email protected]/
> >
> > Changes since (cmpxchg) RFCv2:
> > - Fixed macros that depend on having a local variable with a magic name
> > - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> > https://lore.kernel.org/all/[email protected]/
> >
> > Changes since (cmpxchg) RFCv1:
> > - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> > https://lore.kernel.org/all/[email protected]/
> >
> >
> > Leonardo Bras (3):
> > riscv/cmpxchg: Deduplicate xchg() asm functions
> > riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> > riscv/atomic.h : Deduplicate arch_atomic.*
> >
> > arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> > arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> > 2 files changed, 123 insertions(+), 359 deletions(-)
>
> LGTM. For the series,
>
> Reviewed-by: Andrea Parri <[email protected]>
>
> Andrea
Thanks Andrea!
>
On Fri, Aug 4, 2023 at 5:05 AM Leonardo Bras Soares Passos
<[email protected]> wrote:
>
> On Thu, Aug 3, 2023 at 11:29 PM Guo Ren <[email protected]> wrote:
> >
> > On Fri, Aug 4, 2023 at 10:20 AM Leonardo Bras Soares Passos
> > <[email protected]> wrote:
> > >
> > > On Thu, Aug 3, 2023 at 9:53 PM Guo Ren <[email protected]> wrote:
> > > >
> > > > On Thu, Aug 3, 2023 at 1:14 PM Leonardo Bras <[email protected]> wrote:
> > > > >
> > > > > I unified previous patchsets into a single one, since the work is related.
> > > > >
> > > > > While studying riscv's cmpxchg.h file, I got really interested in
> > > > > understanding how RISCV asm implemented the different versions of
> > > > > {cmp,}xchg.
> > > > >
> > > > > When I understood the pattern, it made sense for me to remove the
> > > > > duplications and create macros to make it easier to understand what exactly
> > > > > changes between the versions: Instruction sufixes & barriers.
> > > > >
> > > > > Also, did the same kind of work on atomic.c.
> > > > >
> > > > > Note to Guo Ren:
> > > > > I did some further improvement after your previous reviews, so I ended
> > > > > up afraid including your Reviewed-by before cheching if the changes are
> > > > > ok for you. Please check it out again, I just removed some helper macros
> > > > > that were not being used elsewhere in the kernel.
> > > > I found this optimization has conflicts with the below patches:
> > > > https://lore.kernel.org/linux-riscv/[email protected]/
> > > > https://lore.kernel.org/linux-riscv/[email protected]/
> > > >
> > > > If yours merged, how do we support the inline cmpxchg/xchg_small
> > > > function?
> > >
> > > Oh, I actually introduced my series so I could introduce new xchg and
> > > cmpxchg for size 1 and 2. Is that what your patches are about, right?
> > >
> > > I was working on that yesterday, and decided to send the patchset
> > > without them because I was still not sure enough.
> > >
> > > About implementation strategy, I was introducing a new macros for xchg
> > > & cmpxchg with asm which would work for both for size 1 & size 2, and
> > > use the switch-case to create the mask and and_value.
> > >
> > > You think that works enough?
> > Good, go ahead.
> >
> > >
> > > > It's very struggling to use macros to implement complex
> > > > functions.
> > >
> > > I agree, but with this we can achieve more generic code, which makes
> > > more clear what is the pattern for given function.
> > >
> > > > Could you consider a more relaxed optimization in which we could
> > > > insert inline cmpxchg/xchg_small?
> > >
> > > What about this: I finish the patches I have been working with
> > > (cmpxchg & xchg for sizes 1 and 2), and if they are fine we expand
> > > this patchset with them. If not, I try relaxing them a little so we
> > > can merge with your set.
> > >
> > > Does that work for you?
> > Great, you could provide cmpxchg & xchg for sizes 1 and 2, then my
> > patch series would base on yours. After tested, I would give you
> > Tested-by.
>
> Awesome! Thanks!
>
> I will send it shortly, just compile-testing the kernel.
>
v3:
https://patchwork.kernel.org/project/linux-riscv/list/?series=772986&state=%2A&archive=both
> > >
> > > Best regards,
> > > Leo
> > >
> > >
> > > >
> > > > >
> > > > > Thanks!
> > > > > Leo
> > > > >
> > > > >
> > > > > Changes since squashed cmpxchg:
> > > > > - Unified with atomic.c patchset
> > > > > - Rebased on top of torvalds/master (thanks Andrea Parri!)
> > > > > - Removed helper macros that were not being used elsewhere in the kernel.
> > > > >
> > > > > Changes since (cmpxchg) RFCv3:
> > > > > - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> > > > > https://lore.kernel.org/all/[email protected]/
> > > > >
> > > > > Changes since (cmpxchg) RFCv2:
> > > > > - Fixed macros that depend on having a local variable with a magic name
> > > > > - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> > > > > https://lore.kernel.org/all/[email protected]/
> > > > >
> > > > > Changes since (cmpxchg) RFCv1:
> > > > > - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> > > > > https://lore.kernel.org/all/[email protected]/
> > > > >
> > > > >
> > > > > Leonardo Bras (3):
> > > > > riscv/cmpxchg: Deduplicate xchg() asm functions
> > > > > riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> > > > > riscv/atomic.h : Deduplicate arch_atomic.*
> > > > >
> > > > > arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> > > > > arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> > > > > 2 files changed, 123 insertions(+), 359 deletions(-)
> > > > >
> > > > > --
> > > > > 2.41.0
> > > > >
> > > >
> > > >
> > > > --
> > > > Best Regards
> > > > Guo Ren
> > > >
> > >
> >
> >
> > --
> > Best Regards
> > Guo Ren
> >
On Thu, Aug 3, 2023 at 11:29 PM Guo Ren <[email protected]> wrote:
>
> On Fri, Aug 4, 2023 at 10:20 AM Leonardo Bras Soares Passos
> <[email protected]> wrote:
> >
> > On Thu, Aug 3, 2023 at 9:53 PM Guo Ren <[email protected]> wrote:
> > >
> > > On Thu, Aug 3, 2023 at 1:14 PM Leonardo Bras <[email protected]> wrote:
> > > >
> > > > I unified previous patchsets into a single one, since the work is related.
> > > >
> > > > While studying riscv's cmpxchg.h file, I got really interested in
> > > > understanding how RISCV asm implemented the different versions of
> > > > {cmp,}xchg.
> > > >
> > > > When I understood the pattern, it made sense for me to remove the
> > > > duplications and create macros to make it easier to understand what exactly
> > > > changes between the versions: Instruction sufixes & barriers.
> > > >
> > > > Also, did the same kind of work on atomic.c.
> > > >
> > > > Note to Guo Ren:
> > > > I did some further improvement after your previous reviews, so I ended
> > > > up afraid including your Reviewed-by before cheching if the changes are
> > > > ok for you. Please check it out again, I just removed some helper macros
> > > > that were not being used elsewhere in the kernel.
> > > I found this optimization has conflicts with the below patches:
> > > https://lore.kernel.org/linux-riscv/[email protected]/
> > > https://lore.kernel.org/linux-riscv/[email protected]/
> > >
> > > If yours merged, how do we support the inline cmpxchg/xchg_small
> > > function?
> >
> > Oh, I actually introduced my series so I could introduce new xchg and
> > cmpxchg for size 1 and 2. Is that what your patches are about, right?
> >
> > I was working on that yesterday, and decided to send the patchset
> > without them because I was still not sure enough.
> >
> > About implementation strategy, I was introducing a new macros for xchg
> > & cmpxchg with asm which would work for both for size 1 & size 2, and
> > use the switch-case to create the mask and and_value.
> >
> > You think that works enough?
> Good, go ahead.
>
> >
> > > It's very struggling to use macros to implement complex
> > > functions.
> >
> > I agree, but with this we can achieve more generic code, which makes
> > more clear what is the pattern for given function.
> >
> > > Could you consider a more relaxed optimization in which we could
> > > insert inline cmpxchg/xchg_small?
> >
> > What about this: I finish the patches I have been working with
> > (cmpxchg & xchg for sizes 1 and 2), and if they are fine we expand
> > this patchset with them. If not, I try relaxing them a little so we
> > can merge with your set.
> >
> > Does that work for you?
> Great, you could provide cmpxchg & xchg for sizes 1 and 2, then my
> patch series would base on yours. After tested, I would give you
> Tested-by.
Awesome! Thanks!
I will send it shortly, just compile-testing the kernel.
> >
> > Best regards,
> > Leo
> >
> >
> > >
> > > >
> > > > Thanks!
> > > > Leo
> > > >
> > > >
> > > > Changes since squashed cmpxchg:
> > > > - Unified with atomic.c patchset
> > > > - Rebased on top of torvalds/master (thanks Andrea Parri!)
> > > > - Removed helper macros that were not being used elsewhere in the kernel.
> > > >
> > > > Changes since (cmpxchg) RFCv3:
> > > > - Squashed the 6 original patches in 2: one for cmpxchg and one for xchg
> > > > https://lore.kernel.org/all/[email protected]/
> > > >
> > > > Changes since (cmpxchg) RFCv2:
> > > > - Fixed macros that depend on having a local variable with a magic name
> > > > - Previous cast to (long) is now only applied on 4-bytes cmpxchg
> > > > https://lore.kernel.org/all/[email protected]/
> > > >
> > > > Changes since (cmpxchg) RFCv1:
> > > > - Fixed patch 4/6 suffix from 'w.aqrl' to '.w.aqrl', to avoid build error
> > > > https://lore.kernel.org/all/[email protected]/
> > > >
> > > >
> > > > Leonardo Bras (3):
> > > > riscv/cmpxchg: Deduplicate xchg() asm functions
> > > > riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
> > > > riscv/atomic.h : Deduplicate arch_atomic.*
> > > >
> > > > arch/riscv/include/asm/atomic.h | 164 ++++++++--------
> > > > arch/riscv/include/asm/cmpxchg.h | 318 +++++--------------------------
> > > > 2 files changed, 123 insertions(+), 359 deletions(-)
> > > >
> > > > --
> > > > 2.41.0
> > > >
> > >
> > >
> > > --
> > > Best Regards
> > > Guo Ren
> > >
> >
>
>
> --
> Best Regards
> Guo Ren
>