Subject: [0/3] Improve generic fls64 for 64-bit machines

This series of patches:

[1/3] adds __fls.h to asm-generic
[2/3] modifies asm-*/bitops.h for 64-bit archs to implement __fls
[3/3] modifies asm-generic/fls64.h to make use of __fls

I have compiled i386 and x86_64, and they generate the same code as
before the change. The changes to the other archs are a best effort.
Please comment.

If this patch series is accepted, it will make one tiny bit of
the x86-unification a tiny bit cleaner. The patches are against
Linus' current tree.

Andrew, if no concensus can be reached that this is a bad patch
series, would you be willing to add this to your tree?

Greetings,
Alexander


Subject: [1/3] Introduce a generic __fls implementation

Add a generic __fls implementation in the same spirit as
the generic __ffs one. It finds the last (most significant)
set bit in the given long value.

Signed-off-by: Alexander van Heukelum <[email protected]>
---
include/asm-generic/bitops/__fls.h | 43 ++++++++++++++++++++++++++++++++++++
1 files changed, 43 insertions(+), 0 deletions(-)
create mode 100644 include/asm-generic/bitops/__fls.h

diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
new file mode 100644
index 0000000..be24465
--- /dev/null
+++ b/include/asm-generic/bitops/__fls.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_GENERIC_BITOPS___FLS_H_
+#define _ASM_GENERIC_BITOPS___FLS_H_
+
+#include <asm/types.h>
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+ int num = BITS_PER_LONG - 1;
+
+#if BITS_PER_LONG == 64
+ if (!(word & (~0ul << 32))) {
+ num -= 32;
+ word <<= 32;
+ }
+#endif
+ if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+ num -= 16;
+ word <<= 16;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+ num -= 8;
+ word <<= 8;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+ num -= 4;
+ word <<= 4;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+ num -= 2;
+ word <<= 2;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-1))))
+ num -= 1;
+ return num;
+}
+
+#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */

Subject: [2/3] Implement __fls on all 64-bit archs

Implement __fls on all 64-bit archs:

alpha has an implementation of fls64.
Added __fls(x) = fls64(x) - 1.

ia64 has fls, but not __fls.
Added __fls based on code of fls.

mips and powerpc have __ilog2, which is the same as __fls.
Added __fls = __ilog2.

parisc, s390, sh and sparc64:
Include generic __fls.

x86_64 already has __fls.

Signed-off-by: Alexander van Heukelum <[email protected]>
---
include/asm-alpha/bitops.h | 5 +++++
include/asm-ia64/bitops.h | 16 ++++++++++++++++
include/asm-mips/bitops.h | 5 +++++
include/asm-parisc/bitops.h | 1 +
include/asm-powerpc/bitops.h | 5 +++++
include/asm-s390/bitops.h | 1 +
include/asm-sh/bitops.h | 1 +
include/asm-sparc64/bitops.h | 1 +
8 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 9e19a70..15f3ae2 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -388,6 +388,11 @@ static inline int fls64(unsigned long x)
}
#endif

+static inline unsigned long __fls(unsigned long x)
+{
+ return fls64(x) - 1;
+}
+
static inline int fls(int x)
{
return fls64((unsigned int) x);
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 953d3df..e2ca800 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -407,6 +407,22 @@ fls (int t)
return ia64_popcnt(x);
}

+/*
+ * Find the last (most significant) bit set. Undefined for x==0.
+ * Bits are numbered from 0..63 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long
+__fls (unsigned long x)
+{
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ x |= x >> 32;
+ return ia64_popcnt(x) - 1;
+}
+
#include <asm-generic/bitops/fls64.h>

/*
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index ec75ce4..c2bd126 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -591,6 +591,11 @@ static inline int __ilog2(unsigned long x)
return 63 - lz;
}

+static inline unsigned long __fls(unsigned long x)
+{
+ return __ilog2(x);
+}
+
#if defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64)

/*
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index f8eebcb..7a6ea10 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -210,6 +210,7 @@ static __inline__ int fls(int x)
return ret;
}

+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index 220d9a7..2fc0c45 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -312,6 +312,11 @@ static __inline__ int fls(unsigned int x)
asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
return 32 - lz;
}
+
+static __inline__ unsigned long __fls(unsigned long x)
+{
+ return __ilog2(x);
+}
#include <asm-generic/bitops/fls64.h>

#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index 965394e..b4eb24a 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -769,6 +769,7 @@ static inline int sched_find_first_bit(unsigned long *b)
}

#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>

#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index b6ba5a6..d7d382f 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -95,6 +95,7 @@ static inline unsigned long ffz(unsigned long word)
#include <asm-generic/bitops/ext2-atomic.h>
#include <asm-generic/bitops/minix.h>
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>

#endif /* __KERNEL__ */
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 982ce89..11f9d81 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -34,6 +34,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>

#ifdef __KERNEL__

Subject: [3/3] Use __fls for fls64 on 64-bit archs

Use __fls for fls64 on 64-bit archs. The implementation for
64-bit archs is moved from x86_64 to asm-generic.

Signed-off-by: Alexander van Heukelum <[email protected]>
---
include/asm-generic/bitops/fls64.h | 22 ++++++++++++++++++++++
include/asm-x86/bitops_64.h | 15 ++-------------
2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h
index 1b6b17c..86d403f 100644
--- a/include/asm-generic/bitops/fls64.h
+++ b/include/asm-generic/bitops/fls64.h
@@ -3,6 +3,18 @@

#include <asm/types.h>

+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
static inline int fls64(__u64 x)
{
__u32 h = x >> 32;
@@ -10,5 +22,15 @@ static inline int fls64(__u64 x)
return fls(h) + 32;
return fls(x);
}
+#elif BITS_PER_LONG == 64
+static inline int fls64(__u64 x)
+{
+ if (x == 0)
+ return 0;
+ return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif

#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index aaf1519..1f1f796 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -112,19 +112,6 @@ static inline int ffs(int x)
}

/**
- * fls64 - find last bit set in 64 bit word
- * @x: the word to search
- *
- * This is defined the same way as fls.
- */
-static inline int fls64(__u64 x)
-{
- if (x == 0)
- return 0;
- return __fls(x) + 1;
-}
-
-/**
* fls - find last bit set
* @x: the word to search
*
@@ -146,6 +133,8 @@ static inline int fls(int x)

#endif /* __KERNEL__ */

+#include <asm-generic/bitops/fls64.h>
+
#ifdef __KERNEL__

#include <asm-generic/bitops/ext2-non-atomic.h>

2008-03-21 13:11:20

by Ingo Molnar

[permalink] [raw]
Subject: Re: [0/3] Improve generic fls64 for 64-bit machines


* Alexander van Heukelum <[email protected]> wrote:

> This series of patches:
>
> [1/3] adds __fls.h to asm-generic
> [2/3] modifies asm-*/bitops.h for 64-bit archs to implement __fls
> [3/3] modifies asm-generic/fls64.h to make use of __fls
>
> I have compiled i386 and x86_64, and they generate the same code as
> before the change. The changes to the other archs are a best effort.
> Please comment.

i've applied #1 and #3 to x86.git/testing, to see how this works out on
x86. But it looks good to me in general.

Ingo

2008-07-05 16:56:48

by Ricardo M. Correia

[permalink] [raw]
Subject: Re: [3/3] Use __fls for fls64 on 64-bit archs

(Sorry, sending this again as I screwed up the previous mail).

Hi,

I have a question about fls64() which I hope you or someone else could
clarify, please see below.

On Sáb, 2008-03-15 at 18:32 +0100, Alexander van Heukelum wrote:
> +#elif BITS_PER_LONG == 64
> +static inline int fls64(__u64 x)
> +{
> + if (x == 0)
> + return 0;
> + return __fls(x) + 1;
> +}

It seems fls64() is implemented on top of __fls(), however the __fls()
implementation on the x86-64 architecture states that the result is
undefined if the argument does not have any zero bits.
So if I understand correctly, the statement "fls64(~0ULL)" would return
an undefined result on x64-64 instead of 64 as one would expect.

Wouldn't it make sense to check for ~0ULL in fls64()?

Thanks,
Ricardo

Subject: [PATCH] x86: fix description of __fls(): __fls(0) is undefined

Ricardo M. Correia spotted that the use of __fls() in fls64() did
not seem to make sense. In fact fls64()'s implementation is fine,
but the description of __fls() was wrong. Fix that.

Reported-by: "Ricardo M. Correia" <[email protected]>
Signed-off-by: Alexander van Heukelum <[email protected]>

---

On Sat, Jul 05, 2008 at 05:56:37PM +0100, Ricardo M. Correia wrote:
> Hi,
>
> I have a question about fls64() which I hope you or someone else could
> clarify, please see below.
>
> On Sáb, 2008-03-15 at 18:32 +0100, Alexander van Heukelum wrote:
> > +#elif BITS_PER_LONG == 64
> > +static inline int fls64(__u64 x)
> > +{
> > + if (x == 0)
> > + return 0;
> > + return __fls(x) + 1;
> > +}
>
> It seems fls64() is implemented on top of __fls(), however the __fls()
> implementation on the x86-64 architecture states that the result is
> undefined if the argument does not have any zero bits.

You have found a bug. It's not in fls64, though, but a copy/paste
one in the comment preceding __fls(). __fls() gives an undefined
result if there are no _set_ bits: only __fls(0) gives an undefined
result.

The inconsistency is well-spotted, though, thanks.

Patch is against current -tip.

Greetings,
Alexander

> So if I understand correctly, the statement "fls64(~0ULL)" would return
> an undefined result on x64-64 instead of 64 as one would expect.
>
> Wouldn't it make sense to check for ~0ULL in fls64()?
>
> Thanks,
> Ricardo

---

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 96b1829..cfb2b64 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -356,7 +356,7 @@ static inline unsigned long ffz(unsigned long word)
* __fls: find last set bit in word
* @word: The word to search
*
- * Undefined if no zero exists, so code should check against ~0UL first.
+ * Undefined if no set bit exists, so code should check against 0 first.
*/
static inline unsigned long __fls(unsigned long word)
{

2008-07-18 12:33:29

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH] x86: fix description of __fls(): __fls(0) is undefined


* Alexander van Heukelum <[email protected]> wrote:

> Ricardo M. Correia spotted that the use of __fls() in fls64() did not
> seem to make sense. In fact fls64()'s implementation is fine, but the
> description of __fls() was wrong. Fix that.
>
> Reported-by: "Ricardo M. Correia" <[email protected]>
> Signed-off-by: Alexander van Heukelum <[email protected]>

applied to tip/x86/cleanups, thanks!

Ingo