by Vincent MAILHOL

[permalink] [raw]

Subject: [PATCH] m68k/bitops: always use compiler's builtin for bit finding functions

GCC and clang provides a set of builtin functions which can be used as
a replacement for ffs(), __ffs(), fls() and __fls().

Using the builtin comes as a trade-off.

Pros:

- Simpler code, easier to maintain
- Guarantee the constant folding

Cons:

- clang does not provide optimized code. Ref:
https://godbolt.org/z/Yb8nMKnYf

Despite of the cons, use the builtin unconditionally and remove any
existing assembly implementation.

This done, remove the find_first_zero_bit(), find_next_zero_bit(),
find_first_bit() and find_next_bit() assembly implementations.
Instead, rely on the generic functions from linux/find.h which will
themselves rely on the builtin we just set up.

Not-signed-off-by: Vincent Mailhol <[email protected]>
---
As written earlier, I do not want to sign-off a patch which can
degrade the performances of m68k clang users. But because it is
already written, there it is.

If someone wants to pick up this, go ahead. Just make sure to remove
any reference to myself. I hereby grant permission for anyone to reuse
that patch, with or without modifications, under the unique condition
that my name gets removed.
---
arch/m68k/include/asm/bitops.h | 215 +--------------------------------
1 file changed, 5 insertions(+), 210 deletions(-)

diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index 14c64a6f1217..44aac8ced9fc 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -340,218 +340,13 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask,
#endif
}

-/*
- * The true 68020 and more advanced processors support the "bfffo"
- * instruction for finding bits. ColdFire and simple 68000 parts
- * (including CPU32) do not support this. They simply use the generic
- * functions.
- */
-#if defined(CONFIG_CPU_HAS_NO_BITFIELDS)
-#include <asm-generic/bitops/ffz.h>
-#else
-
-static inline int find_first_zero_bit(const unsigned long *vaddr,
- unsigned size)
-{
- const unsigned long *p = vaddr;
- int res = 32;
- unsigned int words;
- unsigned long num;
-
- if (!size)
- return 0;
-
- words = (size + 31) >> 5;
- while (!(num = ~*p++)) {
- if (!--words)
- goto out;
- }
-
- __asm__ __volatile__ ("bfffo %1{#0,#0},%0"
- : "=d" (res) : "d" (num & -num));
- res ^= 31;
-out:
- res += ((long)p - (long)vaddr - 4) * 8;
- return res < size ? res : size;
-}
-#define find_first_zero_bit find_first_zero_bit
-
-static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
- int offset)
-{
- const unsigned long *p = vaddr + (offset >> 5);
- int bit = offset & 31UL, res;
-
- if (offset >= size)
- return size;
-
- if (bit) {
- unsigned long num = ~*p++ & (~0UL << bit);
- offset -= bit;
-
- /* Look for zero in first longword */
- __asm__ __volatile__ ("bfffo %1{#0,#0},%0"
- : "=d" (res) : "d" (num & -num));
- if (res < 32) {
- offset += res ^ 31;
- return offset < size ? offset : size;
- }
- offset += 32;
-
- if (offset >= size)
- return size;
- }
- /* No zero yet, search remaining full bytes for a zero */
- return offset + find_first_zero_bit(p, size - offset);
-}
-#define find_next_zero_bit find_next_zero_bit
-
-static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
-{
- const unsigned long *p = vaddr;
- int res = 32;
- unsigned int words;
- unsigned long num;
-
- if (!size)
- return 0;
-
- words = (size + 31) >> 5;
- while (!(num = *p++)) {
- if (!--words)
- goto out;
- }
-
- __asm__ __volatile__ ("bfffo %1{#0,#0},%0"
- : "=d" (res) : "d" (num & -num));
- res ^= 31;
-out:
- res += ((long)p - (long)vaddr - 4) * 8;
- return res < size ? res : size;
-}
-#define find_first_bit find_first_bit
-
-static inline int find_next_bit(const unsigned long *vaddr, int size,
- int offset)
-{
- const unsigned long *p = vaddr + (offset >> 5);
- int bit = offset & 31UL, res;
-
- if (offset >= size)
- return size;
-
- if (bit) {
- unsigned long num = *p++ & (~0UL << bit);
- offset -= bit;
-
- /* Look for one in first longword */
- __asm__ __volatile__ ("bfffo %1{#0,#0},%0"
- : "=d" (res) : "d" (num & -num));
- if (res < 32) {
- offset += res ^ 31;
- return offset < size ? offset : size;
- }
- offset += 32;
-
- if (offset >= size)
- return size;
- }
- /* No one yet, search remaining full bytes for a one */
- return offset + find_first_bit(p, size - offset);
-}
-#define find_next_bit find_next_bit
-
-/*
- * ffz = Find First Zero in word. Undefined if no zero exists,
- * so code should check against ~0UL first..
- */
-static inline unsigned long ffz(unsigned long word)
-{
- int res;
-
- __asm__ __volatile__ ("bfffo %1{#0,#0},%0"
- : "=d" (res) : "d" (~word & -~word));
- return res ^ 31;
-}
-
-#endif
-
#ifdef __KERNEL__

-#if defined(CONFIG_CPU_HAS_NO_BITFIELDS)
-
-/*
- * The newer ColdFire family members support a "bitrev" instruction
- * and we can use that to implement a fast ffs. Older Coldfire parts,
- * and normal 68000 parts don't have anything special, so we use the
- * generic functions for those.
- */
-#if (defined(__mcfisaaplus__) || defined(__mcfisac__)) && \
- !defined(CONFIG_M68000)
-static inline unsigned long __ffs(unsigned long x)
-{
- __asm__ __volatile__ ("bitrev %0; ff1 %0"
- : "=d" (x)
- : "0" (x));
- return x;
-}
-
-static inline int ffs(int x)
-{
- if (!x)
- return 0;
- return __ffs(x) + 1;
-}
-
-#else
-#include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/__ffs.h>
-#endif
-
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-
-#else
-
-/*
- * ffs: find first bit set. This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
- int cnt;
-
- __asm__ ("bfffo %1{#0:#0},%0"
- : "=d" (cnt)
- : "dm" (x & -x));
- return 32 - cnt;
-}
-
-static inline unsigned long __ffs(unsigned long x)
-{
- return ffs(x) - 1;
-}
-
-/*
- * fls: find last bit set.
- */
-static inline int fls(unsigned int x)
-{
- int cnt;
-
- __asm__ ("bfffo %1{#0,#0},%0"
- : "=d" (cnt)
- : "dm" (x));
- return 32 - cnt;
-}
-
-static inline unsigned long __fls(unsigned long x)
-{
- return fls(x) - 1;
-}
-
-#endif
+#include <asm-generic/bitops/builtin-ffs.h>
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-fls.h>
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/ffz.h>

/* Simple test-and-set bit locks */
#define test_and_set_bit_lock test_and_set_bit
--
2.43.0

2024-02-07 09:35:35

by Finn Thain

[permalink] [raw]

Subject: Re: [PATCH v4 2/5] m68k/bitops: use __builtin_{clz,ctzl,ffs} to evaluate constant expressions

On Mon, 5 Feb 2024, Vincent MAILHOL wrote:

> On Mon. 5 Feb. 2024 at 18:48, Finn Thain <[email protected]> wrote:
> > On Mon, 5 Feb 2024, Vincent MAILHOL wrote:
> >
> > If clang support is important then clang's builtins are important. So
> > why not improve those instead? That would resolve the issue in a
> > win-win.
>
> I am deeply sorry, but with all your respect, this request is unfair. I
> will not fix the compiler.
>

Absolutely. It is unfair. Just as your proposal was unfair to maintainers.
That patch would make a compiler deficiency into a maintenance burden.