This series of patches:
[1/3] adds __fls.h to asm-generic
[2/3] modifies asm-*/bitops.h for 64-bit archs to implement __fls
[3/3] modifies asm-generic/fls64.h to make use of __fls
I have compiled i386 and x86_64, and they generate the same code as
before the change. The changes to the other archs are a best effort.
Please comment.
If this patch series is accepted, it will make one tiny bit of
the x86-unification a tiny bit cleaner. The patches are against
Linus' current tree.
Andrew, if no concensus can be reached that this is a bad patch
series, would you be willing to add this to your tree?
Greetings,
Alexander
Add a generic __fls implementation in the same spirit as
the generic __ffs one. It finds the last (most significant)
set bit in the given long value.
Signed-off-by: Alexander van Heukelum <[email protected]>
---
include/asm-generic/bitops/__fls.h | 43 ++++++++++++++++++++++++++++++++++++
1 files changed, 43 insertions(+), 0 deletions(-)
create mode 100644 include/asm-generic/bitops/__fls.h
diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
new file mode 100644
index 0000000..be24465
--- /dev/null
+++ b/include/asm-generic/bitops/__fls.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_GENERIC_BITOPS___FLS_H_
+#define _ASM_GENERIC_BITOPS___FLS_H_
+
+#include <asm/types.h>
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+ int num = BITS_PER_LONG - 1;
+
+#if BITS_PER_LONG == 64
+ if (!(word & (~0ul << 32))) {
+ num -= 32;
+ word <<= 32;
+ }
+#endif
+ if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+ num -= 16;
+ word <<= 16;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+ num -= 8;
+ word <<= 8;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+ num -= 4;
+ word <<= 4;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+ num -= 2;
+ word <<= 2;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-1))))
+ num -= 1;
+ return num;
+}
+
+#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
Implement __fls on all 64-bit archs:
alpha has an implementation of fls64.
Added __fls(x) = fls64(x) - 1.
ia64 has fls, but not __fls.
Added __fls based on code of fls.
mips and powerpc have __ilog2, which is the same as __fls.
Added __fls = __ilog2.
parisc, s390, sh and sparc64:
Include generic __fls.
x86_64 already has __fls.
Signed-off-by: Alexander van Heukelum <[email protected]>
---
include/asm-alpha/bitops.h | 5 +++++
include/asm-ia64/bitops.h | 16 ++++++++++++++++
include/asm-mips/bitops.h | 5 +++++
include/asm-parisc/bitops.h | 1 +
include/asm-powerpc/bitops.h | 5 +++++
include/asm-s390/bitops.h | 1 +
include/asm-sh/bitops.h | 1 +
include/asm-sparc64/bitops.h | 1 +
8 files changed, 35 insertions(+), 0 deletions(-)
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 9e19a70..15f3ae2 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -388,6 +388,11 @@ static inline int fls64(unsigned long x)
}
#endif
+static inline unsigned long __fls(unsigned long x)
+{
+ return fls64(x) - 1;
+}
+
static inline int fls(int x)
{
return fls64((unsigned int) x);
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 953d3df..e2ca800 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -407,6 +407,22 @@ fls (int t)
return ia64_popcnt(x);
}
+/*
+ * Find the last (most significant) bit set. Undefined for x==0.
+ * Bits are numbered from 0..63 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long
+__fls (unsigned long x)
+{
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ x |= x >> 32;
+ return ia64_popcnt(x) - 1;
+}
+
#include <asm-generic/bitops/fls64.h>
/*
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index ec75ce4..c2bd126 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -591,6 +591,11 @@ static inline int __ilog2(unsigned long x)
return 63 - lz;
}
+static inline unsigned long __fls(unsigned long x)
+{
+ return __ilog2(x);
+}
+
#if defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64)
/*
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index f8eebcb..7a6ea10 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -210,6 +210,7 @@ static __inline__ int fls(int x)
return ret;
}
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index 220d9a7..2fc0c45 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -312,6 +312,11 @@ static __inline__ int fls(unsigned int x)
asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
return 32 - lz;
}
+
+static __inline__ unsigned long __fls(unsigned long x)
+{
+ return __ilog2(x);
+}
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index 965394e..b4eb24a 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -769,6 +769,7 @@ static inline int sched_find_first_bit(unsigned long *b)
}
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index b6ba5a6..d7d382f 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -95,6 +95,7 @@ static inline unsigned long ffz(unsigned long word)
#include <asm-generic/bitops/ext2-atomic.h>
#include <asm-generic/bitops/minix.h>
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#endif /* __KERNEL__ */
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 982ce89..11f9d81 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -34,6 +34,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
Use __fls for fls64 on 64-bit archs. The implementation for
64-bit archs is moved from x86_64 to asm-generic.
Signed-off-by: Alexander van Heukelum <[email protected]>
---
include/asm-generic/bitops/fls64.h | 22 ++++++++++++++++++++++
include/asm-x86/bitops_64.h | 15 ++-------------
2 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h
index 1b6b17c..86d403f 100644
--- a/include/asm-generic/bitops/fls64.h
+++ b/include/asm-generic/bitops/fls64.h
@@ -3,6 +3,18 @@
#include <asm/types.h>
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
static inline int fls64(__u64 x)
{
__u32 h = x >> 32;
@@ -10,5 +22,15 @@ static inline int fls64(__u64 x)
return fls(h) + 32;
return fls(x);
}
+#elif BITS_PER_LONG == 64
+static inline int fls64(__u64 x)
+{
+ if (x == 0)
+ return 0;
+ return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif
#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index aaf1519..1f1f796 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -112,19 +112,6 @@ static inline int ffs(int x)
}
/**
- * fls64 - find last bit set in 64 bit word
- * @x: the word to search
- *
- * This is defined the same way as fls.
- */
-static inline int fls64(__u64 x)
-{
- if (x == 0)
- return 0;
- return __fls(x) + 1;
-}
-
-/**
* fls - find last bit set
* @x: the word to search
*
@@ -146,6 +133,8 @@ static inline int fls(int x)
#endif /* __KERNEL__ */
+#include <asm-generic/bitops/fls64.h>
+
#ifdef __KERNEL__
#include <asm-generic/bitops/ext2-non-atomic.h>
* Alexander van Heukelum <[email protected]> wrote:
> This series of patches:
>
> [1/3] adds __fls.h to asm-generic
> [2/3] modifies asm-*/bitops.h for 64-bit archs to implement __fls
> [3/3] modifies asm-generic/fls64.h to make use of __fls
>
> I have compiled i386 and x86_64, and they generate the same code as
> before the change. The changes to the other archs are a best effort.
> Please comment.
i've applied #1 and #3 to x86.git/testing, to see how this works out on
x86. But it looks good to me in general.
Ingo
(Sorry, sending this again as I screwed up the previous mail).
Hi,
I have a question about fls64() which I hope you or someone else could
clarify, please see below.
On Sáb, 2008-03-15 at 18:32 +0100, Alexander van Heukelum wrote:
> +#elif BITS_PER_LONG == 64
> +static inline int fls64(__u64 x)
> +{
> + if (x == 0)
> + return 0;
> + return __fls(x) + 1;
> +}
It seems fls64() is implemented on top of __fls(), however the __fls()
implementation on the x86-64 architecture states that the result is
undefined if the argument does not have any zero bits.
So if I understand correctly, the statement "fls64(~0ULL)" would return
an undefined result on x64-64 instead of 64 as one would expect.
Wouldn't it make sense to check for ~0ULL in fls64()?
Thanks,
Ricardo
Ricardo M. Correia spotted that the use of __fls() in fls64() did
not seem to make sense. In fact fls64()'s implementation is fine,
but the description of __fls() was wrong. Fix that.
Reported-by: "Ricardo M. Correia" <[email protected]>
Signed-off-by: Alexander van Heukelum <[email protected]>
---
On Sat, Jul 05, 2008 at 05:56:37PM +0100, Ricardo M. Correia wrote:
> Hi,
>
> I have a question about fls64() which I hope you or someone else could
> clarify, please see below.
>
> On Sáb, 2008-03-15 at 18:32 +0100, Alexander van Heukelum wrote:
> > +#elif BITS_PER_LONG == 64
> > +static inline int fls64(__u64 x)
> > +{
> > + if (x == 0)
> > + return 0;
> > + return __fls(x) + 1;
> > +}
>
> It seems fls64() is implemented on top of __fls(), however the __fls()
> implementation on the x86-64 architecture states that the result is
> undefined if the argument does not have any zero bits.
You have found a bug. It's not in fls64, though, but a copy/paste
one in the comment preceding __fls(). __fls() gives an undefined
result if there are no _set_ bits: only __fls(0) gives an undefined
result.
The inconsistency is well-spotted, though, thanks.
Patch is against current -tip.
Greetings,
Alexander
> So if I understand correctly, the statement "fls64(~0ULL)" would return
> an undefined result on x64-64 instead of 64 as one would expect.
>
> Wouldn't it make sense to check for ~0ULL in fls64()?
>
> Thanks,
> Ricardo
---
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 96b1829..cfb2b64 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -356,7 +356,7 @@ static inline unsigned long ffz(unsigned long word)
* __fls: find last set bit in word
* @word: The word to search
*
- * Undefined if no zero exists, so code should check against ~0UL first.
+ * Undefined if no set bit exists, so code should check against 0 first.
*/
static inline unsigned long __fls(unsigned long word)
{
* Alexander van Heukelum <[email protected]> wrote:
> Ricardo M. Correia spotted that the use of __fls() in fls64() did not
> seem to make sense. In fact fls64()'s implementation is fine, but the
> description of __fls() was wrong. Fix that.
>
> Reported-by: "Ricardo M. Correia" <[email protected]>
> Signed-off-by: Alexander van Heukelum <[email protected]>
applied to tip/x86/cleanups, thanks!
Ingo