LinuxLists.cc - [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

2010-01-22 16:00:31

Subject: [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

Provide compile time versions of hweight.

Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <new-submission>
---
include/linux/bitops.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)

Index: linux-2.6/include/linux/bitops.h
===================================================================
--- linux-2.6.orig/include/linux/bitops.h
+++ linux-2.6/include/linux/bitops.h
@@ -45,6 +45,20 @@ static inline unsigned long hweight_long
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}

+#define HWEIGHT8(w) \
+ ( (!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
+ (!!((w) & (1ULL << 7))) )
+
+#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+
/**
* rol32 - rotate a 32-bit value left
* @word: value to rotate

--

2010-01-29 09:29:52

by Peter Zijlstra

[permalink] [raw]

Subject: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

Commit-ID: 9f41699ed067fa695faff8e2e9981b2550abec62
Gitweb: http://git.kernel.org/tip/9f41699ed067fa695faff8e2e9981b2550abec62
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 22 Jan 2010 15:59:29 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 29 Jan 2010 09:01:39 +0100

bitops: Provide compile time HWEIGHT{8,16,32,64}

Provide compile time versions of hweight.

Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Thomas Gleixner <[email protected]>
LKML-Reference: <[email protected]>
[ Remove some whitespace damage while we are at it ]
Signed-off-by: Ingo Molnar <[email protected]>
---
include/linux/bitops.h | 18 ++++++++++++++++--
1 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c05a29c..ba0fd1e 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -25,7 +25,7 @@
static __inline__ int get_bitmask_order(unsigned int count)
{
int order;
-
+
order = fls(count);
return order; /* We could be slightly more clever with -1 here... */
}
@@ -33,7 +33,7 @@ static __inline__ int get_bitmask_order(unsigned int count)
static __inline__ int get_count_order(unsigned int count)
{
int order;
-
+
order = fls(count) - 1;
if (count & (count - 1))
order++;
@@ -45,6 +45,20 @@ static inline unsigned long hweight_long(unsigned long w)
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}

+#define HWEIGHT8(w) \
+ ( (!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
+ (!!((w) & (1ULL << 7))) )
+
+#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+
/**
* rol32 - rotate a 32-bit value left
* @word: value to rotate

2010-01-29 10:02:11

by Andrew Morton

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, 29 Jan 2010 09:28:04 GMT tip-bot for Peter Zijlstra <[email protected]> wrote:

> +#define HWEIGHT8(w) \
> + ( (!!((w) & (1ULL << 0))) + \
> + (!!((w) & (1ULL << 1))) + \
> + (!!((w) & (1ULL << 2))) + \
> + (!!((w) & (1ULL << 3))) + \
> + (!!((w) & (1ULL << 4))) + \
> + (!!((w) & (1ULL << 5))) + \
> + (!!((w) & (1ULL << 6))) + \
> + (!!((w) & (1ULL << 7))) )
> +
> +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
> +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
> +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))

Would be nice if it had a comment explaining why it exists. If people
accidentally use this with non-constant arguments, the generated code
will be pretty ghastly.

Or add some barf-if-not-__constant_p() thing, perhaps.

2010-01-29 10:04:45

by Ingo Molnar

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

* Andrew Morton <[email protected]> wrote:

> On Fri, 29 Jan 2010 09:28:04 GMT tip-bot for Peter Zijlstra <[email protected]> wrote:
>
> > +#define HWEIGHT8(w) \
> > + ( (!!((w) & (1ULL << 0))) + \
> > + (!!((w) & (1ULL << 1))) + \
> > + (!!((w) & (1ULL << 2))) + \
> > + (!!((w) & (1ULL << 3))) + \
> > + (!!((w) & (1ULL << 4))) + \
> > + (!!((w) & (1ULL << 5))) + \
> > + (!!((w) & (1ULL << 6))) + \
> > + (!!((w) & (1ULL << 7))) )
> > +
> > +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
> > +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
> > +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
>
> Would be nice if it had a comment explaining why it exists. If people
> accidentally use this with non-constant arguments, the generated code
> will be pretty ghastly.
>
> Or add some barf-if-not-__constant_p() thing, perhaps.

Yeah, agreed.

Ingo

2010-01-29 10:15:53

by Andrew Morton

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, 29 Jan 2010 11:04:31 +0100 Ingo Molnar <[email protected]> wrote:

>
> * Andrew Morton <[email protected]> wrote:
>
> > On Fri, 29 Jan 2010 09:28:04 GMT tip-bot for Peter Zijlstra <[email protected]> wrote:
> >
> > > +#define HWEIGHT8(w) \
> > > + ( (!!((w) & (1ULL << 0))) + \
> > > + (!!((w) & (1ULL << 1))) + \
> > > + (!!((w) & (1ULL << 2))) + \
> > > + (!!((w) & (1ULL << 3))) + \
> > > + (!!((w) & (1ULL << 4))) + \
> > > + (!!((w) & (1ULL << 5))) + \
> > > + (!!((w) & (1ULL << 6))) + \
> > > + (!!((w) & (1ULL << 7))) )
> > > +
> > > +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
> > > +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
> > > +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
> >
> > Would be nice if it had a comment explaining why it exists. If people
> > accidentally use this with non-constant arguments, the generated code
> > will be pretty ghastly.
> >
> > Or add some barf-if-not-__constant_p() thing, perhaps.
>
> Yeah, agreed.
>

Also...

Should we just do

#define HWEIGHT(x) HWEIGHT64(x)

and make HWEIGHT() the sole officially-exported interface? I mean, all
it does is emit an obfuscated constant - perhaps we can save users from
having to pick which one of the above to use by giving them a "this one
always works" interface.

That might require some casting to suppress "shift out of range"
warnings though.

<wonders if we'd otherwise end up needing an HWEIGHT_LONG()>

2010-01-29 10:32:05

by John Kacur

[permalink] [raw]

Subject: Re: [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, Jan 22, 2010 at 4:50 PM, Peter Zijlstra <[email protected]> wrote:
> Provide compile time versions of hweight.
>
> Signed-off-by: Peter Zijlstra <[email protected]>
> LKML-Reference: <new-submission>
> ---
> ?include/linux/bitops.h | ? 14 ++++++++++++++
> ?1 file changed, 14 insertions(+)
>
> Index: linux-2.6/include/linux/bitops.h
> ===================================================================
> --- linux-2.6.orig/include/linux/bitops.h
> +++ linux-2.6/include/linux/bitops.h
> @@ -45,6 +45,20 @@ static inline unsigned long hweight_long
> ? ? ? ?return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
> ?}

I like it. Maybe provide a comment that this provides the Hamming weight, it
took me a second to realize what this was.

>
> +#define HWEIGHT8(w) ? ? ? ? ? ? ? ? ? ?\
> + ? ? ?( ? ? ? ?(!!((w) & (1ULL << 0))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 1))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 2))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 3))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 4))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 5))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 6))) + ? ? ? \
> + ? ? ? (!!((w) & (1ULL << 7))) )
> +
> +#define HWEIGHT16(w) (HWEIGHT8(w) ?+ HWEIGHT8(w >> 8))
> +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
> +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
> +
> ?/**
> ?* rol32 - rotate a 32-bit value left
> ?* @word: value to rotate
>
> --
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at ?http://www.tux.org/lkml/
>

2010-01-29 11:04:14

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, 2010-01-29 at 02:01 -0800, Andrew Morton wrote:
> On Fri, 29 Jan 2010 09:28:04 GMT tip-bot for Peter Zijlstra <[email protected]> wrote:
>
> > +#define HWEIGHT8(w) \
> > + ( (!!((w) & (1ULL << 0))) + \
> > + (!!((w) & (1ULL << 1))) + \
> > + (!!((w) & (1ULL << 2))) + \
> > + (!!((w) & (1ULL << 3))) + \
> > + (!!((w) & (1ULL << 4))) + \
> > + (!!((w) & (1ULL << 5))) + \
> > + (!!((w) & (1ULL << 6))) + \
> > + (!!((w) & (1ULL << 7))) )
> > +
> > +#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
> > +#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
> > +#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
>
> Would be nice if it had a comment explaining why it exists. If people
> accidentally use this with non-constant arguments, the generated code
> will be pretty ghastly.

*sigh* and here I though it being placed right next to hweight_long()
which uses the arch hweightN() would be clue enough.

If people are so clueless, who says they'll read a comment.. but sure I
guess I can add one.

> Or add some barf-if-not-__constant_p() thing, perhaps.

I've actually sneaked one non-constant usage in, but since its in an
init path I didn't care to fix that, but I guess here goes:

---
Subject: bitops: Dummyify the compile-time hweight versions

Because it seems allowed to not think and write kernel code.

Signed-off-by: Peter Zijlstra <[email protected]>
---
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -93,13 +93,16 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
};

-#define EVENT_CONSTRAINT(c, n, m) { \
+#define __EVENT_CONSTRAINT(c, n, m, w) {\
{ .idxmsk64[0] = (n) }, \
.code = (c), \
.cmask = (m), \
- .weight = HWEIGHT64((u64)(n)), \
+ .weight = (w), \
}

+#define EVENT_CONSTRAINT(c, n, m) \
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+
#define INTEL_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)

@@ -2646,7 +2649,8 @@ void __init init_hw_perf_events(void)
register_die_notifier(&perf_event_nmi_notifier);

unconstrained = (struct event_constraint)
- EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+ __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
+ 0, x86_pmu.num_events);

pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.event_bits);
Index: linux-2.6/include/linux/bitops.h
===================================================================
--- linux-2.6.orig/include/linux/bitops.h
+++ linux-2.6/include/linux/bitops.h
@@ -45,19 +45,29 @@ static inline unsigned long hweight_long
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}

-#define HWEIGHT8(w) \
- ( (!!((w) & (1ULL << 0))) + \
- (!!((w) & (1ULL << 1))) + \
- (!!((w) & (1ULL << 2))) + \
- (!!((w) & (1ULL << 3))) + \
- (!!((w) & (1ULL << 4))) + \
- (!!((w) & (1ULL << 5))) + \
- (!!((w) & (1ULL << 6))) + \
+/*
+ * Clearly slow versions of the hweightN() functions, their benefit is
+ * of course compile time evaluation of constant arguments.
+ */
+#define HWEIGHT8(w) \
+ ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \
+ (!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
(!!((w) & (1ULL << 7))) )

-#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8(w >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
+
+/*
+ * For us lazy bastards
+ */
+#define HWEIGHT(w) HWEIGHT64((u64)(w))

/**
* rol32 - rotate a 32-bit value left

2010-01-29 11:06:09

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, 2010-01-29 at 11:32 +0100, John Kacur wrote:
>
> I like it. Maybe provide a comment that this provides the Hamming weight, it
> took me a second to realize what this was.

its called _H_weight for crying out loud.

2010-01-29 11:13:27

by John Kacur

[permalink] [raw]

Subject: Re: [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, Jan 29, 2010 at 12:05 PM, Peter Zijlstra <[email protected]> wrote:
> On Fri, 2010-01-29 at 11:32 +0100, John Kacur wrote:
>>
>> I like it. Maybe provide a comment that this provides the Hamming weight, it
>> took me a second to realize what this was.
>
> its called _H_weight for crying out loud.
>

Ha. :) You're right, but it's always obvious when your hands are dirty with
algorithms that can use it. Easy to forget when you're working on other stuff.
Put the term in there so we can google it more easily.

2010-01-29 16:25:58

by Linus Torvalds

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, 29 Jan 2010, Peter Zijlstra wrote:
>
> *sigh* and here I though it being placed right next to hweight_long()
> which uses the arch hweightN() would be clue enough.

No. People who add new uses may be copying old uses, without looking at
the definition.

Also, people who _change_ uses may be changing a value that used to be a
constant into a variable when something is made more dynamic. At which
point it really makes sense to have a function that requires a constant to
_check_ that it gets a constant.

> If people are so clueless, who says they'll read a comment.. but sure I
> guess I can add one.

The comment nobody cares about. But surprisingly crap code generation?
That's bad.

> Subject: bitops: Dummyify the compile-time hweight versions
>
> Because it seems allowed to not think and write kernel code.

I would suggest you look in the mirror at some point.

Linus

2010-01-29 22:55:41

by H. Peter Anvin

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On 01/29/2010 03:03 AM, Peter Zijlstra wrote:
>
> *sigh* and here I though it being placed right next to hweight_long()
> which uses the arch hweightN() would be clue enough.
>
> If people are so clueless, who says they'll read a comment.. but sure I
> guess I can add one.
>

I would personally say that the Right Way[TM] to do this is to call
these __constant_hweightX() -- so the name reflects the function -- and
then have

#define hweight(x) (__builtin_constant_p(x) ? __constant_hweight(x) :
__arch_hweight(x))

[example does not reflect actual naming]

-hpa

2010-01-30 00:13:37

by H. Peter Anvin

[permalink] [raw]

Subject: Re: [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

On 01/29/2010 03:05 AM, Peter Zijlstra wrote:
> On Fri, 2010-01-29 at 11:32 +0100, John Kacur wrote:
>>
>> I like it. Maybe provide a comment that this provides the Hamming weight, it
>> took me a second to realize what this was.
>
> its called _H_weight for crying out loud.
>

True... although in computer architecture, when talking about bits, I
think you find that the term population count (popcount) is more readily
recognized.

-hpa

2010-01-30 07:36:03

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH 07/10] bitops: Provide compile time HWEIGHT{8,16,32,64}

* Peter Zijlstra <[email protected]> wrote:

> On Fri, 2010-01-29 at 11:32 +0100, John Kacur wrote:
> >
> > I like it. Maybe provide a comment that this provides the Hamming weight,
> > it took me a second to realize what this was.
>
> its called _H_weight for crying out loud.

I always forget such abbreviations :-/

I think when a single kernel hacker requests a clarifying comment that's
reason enough to add it. After all he broke through a barrier of laziness to
write that email ;-)

Ingo

2010-01-30 16:28:32

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Fri, 2010-01-29 at 14:50 -0800, H. Peter Anvin wrote:

> I would personally say that the Right Way[TM] to do this is to call
> these __constant_hweightX() -- so the name reflects the function -- and
> then have
>
> #define hweight(x) (__builtin_constant_p(x) ? __constant_hweight(x) :
> __arch_hweight(x))

I actually considered that, but since I didn't have a full cross compile
set around I wasn't sure.

The trouble is that asm/bitops.h used to be sufficient for hweightN(),
but with such a scheme we'd need linux/bitops.h.

Anyway, something like the below, I'll try and run it through the cross
compilers I have on monday or something.

---
arch/alpha/include/asm/bitops.h | 14 +++++++-------
arch/sparc/include/asm/bitops_64.h | 8 ++++----
include/asm-generic/bitops/hweight.h | 8 ++++----
include/linux/bitops.h | 33 +++++++++++++++++++--------------
lib/hweight.c | 19 ++++++++++---------
5 files changed, 44 insertions(+), 38 deletions(-)

Index: linux-2.6/arch/alpha/include/asm/bitops.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/bitops.h
+++ linux-2.6/arch/alpha/include/asm/bitops.h
@@ -405,24 +405,24 @@ static inline int fls(int x)

#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
/* Whee. EV67 can calculate it directly. */
-static inline unsigned long hweight64(unsigned long w)
+static inline unsigned long __arch_hweight64(unsigned long w)
{
return __kernel_ctpop(w);
}

-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
{
- return hweight64(w);
+ return __arch_hweight64(w);
}

-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
{
- return hweight64(w & 0xffff);
+ return __arch_hweight64(w & 0xffff);
}

-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
{
- return hweight64(w & 0xff);
+ return __arch_hweight64(w & 0xff);
}
#else
#include <asm-generic/bitops/hweight.h>
Index: linux-2.6/arch/sparc/include/asm/bitops_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/bitops_64.h
+++ linux-2.6/arch/sparc/include/asm/bitops_64.h
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr,

#ifdef ULTRA_HAS_POPULATION_COUNT

-static inline unsigned int hweight64(unsigned long w)
+static inline unsigned int __arch_hweight64(unsigned long w)
{
unsigned int res;

@@ -52,7 +52,7 @@ static inline unsigned int hweight64(uns
return res;
}

-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

@@ -60,7 +60,7 @@ static inline unsigned int hweight32(uns
return res;
}

-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
{
unsigned int res;

@@ -68,7 +68,7 @@ static inline unsigned int hweight16(uns
return res;
}

-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
{
unsigned int res;

Index: linux-2.6/include/asm-generic/bitops/hweight.h
===================================================================
--- linux-2.6.orig/include/asm-generic/bitops/hweight.h
+++ linux-2.6/include/asm-generic/bitops/hweight.h
@@ -3,9 +3,9 @@

#include <asm/types.h>

-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
+extern unsigned int __arch_hweight32(unsigned int w);
+extern unsigned int __arch_hweight16(unsigned int w);
+extern unsigned int __arch_hweight8(unsigned int w);
+extern unsigned long __arch_hweight64(__u64 w);

#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
Index: linux-2.6/include/linux/bitops.h
===================================================================
--- linux-2.6.orig/include/linux/bitops.h
+++ linux-2.6/include/linux/bitops.h
@@ -40,16 +40,7 @@ static __inline__ int get_count_order(un
return order;
}

-static inline unsigned long hweight_long(unsigned long w)
-{
- return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
-}
-
-/*
- * Clearly slow versions of the hweightN() functions, their benefit is
- * of course compile time evaluation of constant arguments.
- */
-#define HWEIGHT8(w) \
+#define __const_hweight8(w) \
( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \
(!!((w) & (1ULL << 0))) + \
(!!((w) & (1ULL << 1))) + \
@@ -60,15 +51,29 @@ static inline unsigned long hweight_long
(!!((w) & (1ULL << 6))) + \
(!!((w) & (1ULL << 7))) )

-#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
+#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+#define hweight8(w) \
+ (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w))
+#define hweight16(w) \
+ (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w))
+#define hweight32(w) \
+ (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w))
+#define hweight64(w) \
+ (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w))

/*
* Type invariant version that simply casts things to the
* largest type.
*/
-#define HWEIGHT(w) HWEIGHT64((u64)(w))
+#define HWEIGHT(w) __const_hweight64((u64)(w))
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}

/**
* rol32 - rotate a 32-bit value left
Index: linux-2.6/lib/hweight.c
===================================================================
--- linux-2.6.orig/lib/hweight.c
+++ linux-2.6/lib/hweight.c
@@ -9,7 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/

-unsigned int hweight32(unsigned int w)
+unsigned int __arhc_hweight32(unsigned int w)
{
#ifdef ARCH_HAS_FAST_MULTIPLIER
w -= (w >> 1) & 0x55555555;
@@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w)
return (res + (res >> 16)) & 0x000000FF;
#endif
}
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__arch_hweight32);

-unsigned int hweight16(unsigned int w)
+unsigned int __arch_hweight16(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x5555);
res = (res & 0x3333) + ((res >> 2) & 0x3333);
res = (res + (res >> 4)) & 0x0F0F;
return (res + (res >> 8)) & 0x00FF;
}
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__arch_hweight16);

-unsigned int hweight8(unsigned int w)
+unsigned int __arch_hweight8(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x55);
res = (res & 0x33) + ((res >> 2) & 0x33);
return (res + (res >> 4)) & 0x0F;
}
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__arch_hweight8);

-unsigned long hweight64(__u64 w)
+unsigned long __arch_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
- return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+ return __arch_hweight32((unsigned int)(w >> 32)) +
+ __arch_hweight32((unsigned int)w);
#elif BITS_PER_LONG == 64
#ifdef ARCH_HAS_FAST_MULTIPLIER
w -= (w >> 1) & 0x5555555555555555ul;
@@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w)
#endif
#endif
}
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__arch_hweight64);

2010-02-01 12:44:26

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On Sat, 2010-01-30 at 17:28 +0100, Peter Zijlstra wrote:
> On Fri, 2010-01-29 at 14:50 -0800, H. Peter Anvin wrote:
>
> > I would personally say that the Right Way[TM] to do this is to call
> > these __constant_hweightX() -- so the name reflects the function -- and
> > then have
> >
> > #define hweight(x) (__builtin_constant_p(x) ? __constant_hweight(x) :
> > __arch_hweight(x))
>
> I actually considered that, but since I didn't have a full cross compile
> set around I wasn't sure.
>
> The trouble is that asm/bitops.h used to be sufficient for hweightN(),
> but with such a scheme we'd need linux/bitops.h.
>
> Anyway, something like the below, I'll try and run it through the cross
> compilers I have on monday or something.

Ok, that didn't compile, utter include dependency hell.

The below does work, but is still a tad ugly in that if you want to use
any of the HWEIGHT functions that use BUILD_BUG_ON_ZERO() you have to
have included linux/kernel.h yourself.

But at least it builds on x86_64, alpha and sparc64 (didn't have a ia64
compiler around).

FWIW I was tempted to change the return type of hweight64() from
unsigned long to unsigned int, its not as if it'll ever return a value
larger than 64.

---
arch/alpha/include/asm/bitops.h | 18 ++++++------
arch/ia64/include/asm/bitops.h | 11 ++++---
arch/sparc/include/asm/bitops_64.h | 11 ++++---
include/asm-generic/bitops/arch_hweight.h | 11 +++++++
include/asm-generic/bitops/const_hweight.h | 42 +++++++++++++++++++++++++++++
include/asm-generic/bitops/hweight.h | 8 +----
include/linux/bitops.h | 25 -----------------
lib/hweight.c | 19 ++++++-------
8 files changed, 87 insertions(+), 58 deletions(-)

Index: linux-2.6/arch/alpha/include/asm/bitops.h
===================================================================
--- linux-2.6.orig/arch/alpha/include/asm/bitops.h
+++ linux-2.6/arch/alpha/include/asm/bitops.h
@@ -405,29 +405,31 @@ static inline int fls(int x)

#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
/* Whee. EV67 can calculate it directly. */
-static inline unsigned long hweight64(unsigned long w)
+static inline unsigned long __arch_hweight64(unsigned long w)
{
return __kernel_ctpop(w);
}

-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_weight32(unsigned int w)
{
- return hweight64(w);
+ return __arch_hweight64(w);
}

-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
{
- return hweight64(w & 0xffff);
+ return __arch_hweight64(w & 0xffff);
}

-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
{
- return hweight64(w & 0xff);
+ return __arch_hweight64(w & 0xff);
}
#else
-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/arch_hweight.h>
#endif

+#include <asm-generic/bitops/const_hweight.h>
+
#endif /* __KERNEL__ */

#include <asm-generic/bitops/find.h>
Index: linux-2.6/arch/ia64/include/asm/bitops.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/bitops.h
+++ linux-2.6/arch/ia64/include/asm/bitops.h
@@ -437,17 +437,18 @@ __fls (unsigned long x)
* hweightN: returns the hamming weight (i.e. the number
* of bits set) of a N-bit word
*/
-static __inline__ unsigned long
-hweight64 (unsigned long x)
+static __inline__ unsigned long __arch_hweight64(unsigned long x)
{
unsigned long result;
result = ia64_popcnt(x);
return result;
}

-#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful)
-#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful)
-#define hweight8(x) (unsigned int) hweight64((x) & 0xfful)
+#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful))
+#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful))
+#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful))
+
+#include <asm-generic/bitops/const_hweight.h>

#endif /* __KERNEL__ */

Index: linux-2.6/arch/sparc/include/asm/bitops_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/bitops_64.h
+++ linux-2.6/arch/sparc/include/asm/bitops_64.h
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr,

#ifdef ULTRA_HAS_POPULATION_COUNT

-static inline unsigned int hweight64(unsigned long w)
+static inline unsigned int __arch_hweight64(unsigned long w)
{
unsigned int res;

@@ -52,7 +52,7 @@ static inline unsigned int hweight64(uns
return res;
}

-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

@@ -60,7 +60,7 @@ static inline unsigned int hweight32(uns
return res;
}

-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
{
unsigned int res;

@@ -68,7 +68,7 @@ static inline unsigned int hweight16(uns
return res;
}

-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
{
unsigned int res;

@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsi

#else

-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/arch_hweight.h>

#endif
+#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/lock.h>
#endif /* __KERNEL__ */

Index: linux-2.6/include/asm-generic/bitops/arch_hweight.h
===================================================================
--- /dev/null
+++ linux-2.6/include/asm-generic/bitops/arch_hweight.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+
+#include <asm/types.h>
+
+extern unsigned int __arch_hweight32(unsigned int w);
+extern unsigned int __arch_hweight16(unsigned int w);
+extern unsigned int __arch_hweight8(unsigned int w);
+extern unsigned long __arch_hweight64(__u64 w);
+
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
Index: linux-2.6/include/asm-generic/bitops/const_hweight.h
===================================================================
--- /dev/null
+++ linux-2.6/include/asm-generic/bitops/const_hweight.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+
+/*
+ * Compile time versions of __arch_hweightN()
+ */
+#define __const_hweight8(w) \
+ ( (!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
+ (!!((w) & (1ULL << 7))) )
+
+#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 ))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+/*
+ * Generic interface.
+ */
+#define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w))
+#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w))
+#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w))
+#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w))
+
+/*
+ * Interface for known constant arguments
+ */
+#define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w))
+#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w))
+#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w))
+#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w))
+
+/*
+ * Type invariant interface to the compile time constant hweight functions.
+ */
+#define HWEIGHT(w) HWEIGHT64((u64)w)
+
+#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */
Index: linux-2.6/include/asm-generic/bitops/hweight.h
===================================================================
--- linux-2.6.orig/include/asm-generic/bitops/hweight.h
+++ linux-2.6/include/asm-generic/bitops/hweight.h
@@ -1,11 +1,7 @@
#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
#define _ASM_GENERIC_BITOPS_HWEIGHT_H_

-#include <asm/types.h>
-
-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
+#include <asm-generic/bitops/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>

#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
Index: linux-2.6/include/linux/bitops.h
===================================================================
--- linux-2.6.orig/include/linux/bitops.h
+++ linux-2.6/include/linux/bitops.h
@@ -45,31 +45,6 @@ static inline unsigned long hweight_long
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}

-/*
- * Clearly slow versions of the hweightN() functions, their benefit is
- * of course compile time evaluation of constant arguments.
- */
-#define HWEIGHT8(w) \
- ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \
- (!!((w) & (1ULL << 0))) + \
- (!!((w) & (1ULL << 1))) + \
- (!!((w) & (1ULL << 2))) + \
- (!!((w) & (1ULL << 3))) + \
- (!!((w) & (1ULL << 4))) + \
- (!!((w) & (1ULL << 5))) + \
- (!!((w) & (1ULL << 6))) + \
- (!!((w) & (1ULL << 7))) )
-
-#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
-
-/*
- * Type invariant version that simply casts things to the
- * largest type.
- */
-#define HWEIGHT(w) HWEIGHT64((u64)(w))
-
/**
* rol32 - rotate a 32-bit value left
* @word: value to rotate
Index: linux-2.6/lib/hweight.c
===================================================================
--- linux-2.6.orig/lib/hweight.c
+++ linux-2.6/lib/hweight.c
@@ -9,7 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/

-unsigned int hweight32(unsigned int w)
+unsigned int __arch_hweight32(unsigned int w)
{
#ifdef ARCH_HAS_FAST_MULTIPLIER
w -= (w >> 1) & 0x55555555;
@@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w)
return (res + (res >> 16)) & 0x000000FF;
#endif
}
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__arch_hweight32);

-unsigned int hweight16(unsigned int w)
+unsigned int __arch_hweight16(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x5555);
res = (res & 0x3333) + ((res >> 2) & 0x3333);
res = (res + (res >> 4)) & 0x0F0F;
return (res + (res >> 8)) & 0x00FF;
}
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__arch_hweight16);

-unsigned int hweight8(unsigned int w)
+unsigned int __arch_hweight8(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x55);
res = (res & 0x33) + ((res >> 2) & 0x33);
return (res + (res >> 4)) & 0x0F;
}
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__arch_hweight8);

-unsigned long hweight64(__u64 w)
+unsigned long __arch_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
- return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+ return __arch_hweight32((unsigned int)(w >> 32)) +
+ __arch_hweight32((unsigned int)w);
#elif BITS_PER_LONG == 64
#ifdef ARCH_HAS_FAST_MULTIPLIER
w -= (w >> 1) & 0x5555555555555555ul;
@@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w)
#endif
#endif
}
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__arch_hweight64);

2010-02-01 19:12:07

by H. Peter Anvin

[permalink] [raw]

Subject: Re: [tip:perf/core] bitops: Provide compile time HWEIGHT{8,16,32,64}

On 02/01/2010 04:43 AM, Peter Zijlstra wrote:
>
> The below does work, but is still a tad ugly in that if you want to use
> any of the HWEIGHT functions that use BUILD_BUG_ON_ZERO() you have to
> have included linux/kernel.h yourself.
>
> But at least it builds on x86_64, alpha and sparc64 (didn't have a ia64
> compiler around).
>
> FWIW I was tempted to change the return type of hweight64() from
> unsigned long to unsigned int, its not as if it'll ever return a value
> larger than 64.
>

That might generate worse code in some cases, though (something that
needs it as a 64-bit value would have to extend it unnecessarily), but
probably the easiest is to just compile and see what happens.

x86 also has a POPCNT instruction now, which we should be able to use
via alternatives. That's something to do after your cleanup is in.

Anyway, I like the cleanup.

Acked-by: H. Peter Anvin <[email protected]>

-hpa

2010-04-06 23:04:26

by Peter Zijlstra

[permalink] [raw]

Subject: [tip:core/hweight] bitops: Optimize hweight() by making use of compile-time evaluation

Commit-ID: 1527bc8b928dd1399c3d3467dd47d9ede210978a
Gitweb: http://git.kernel.org/tip/1527bc8b928dd1399c3d3467dd47d9ede210978a
Author: Peter Zijlstra <[email protected]>
AuthorDate: Mon, 1 Feb 2010 15:03:07 +0100
Committer: H. Peter Anvin <[email protected]>
CommitDate: Tue, 6 Apr 2010 15:52:11 -0700

bitops: Optimize hweight() by making use of compile-time evaluation

Rename the extisting runtime hweight() implementations to
__arch_hweight(), rename the compile-time versions to __const_hweight()
and then have hweight() pick between them.

Suggested-by: H. Peter Anvin <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <20100318111929.GB11152@aftab>
Acked-by: H. Peter Anvin <[email protected]>
LKML-Reference: <1265028224.24455.154.camel@laptop>
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/alpha/include/asm/bitops.h | 18 ++++++-----
arch/ia64/include/asm/bitops.h | 11 ++++---
arch/sparc/include/asm/bitops_64.h | 11 ++++---
include/asm-generic/bitops/arch_hweight.h | 11 +++++++
include/asm-generic/bitops/const_hweight.h | 42 ++++++++++++++++++++++++++++
include/asm-generic/bitops/hweight.h | 8 +----
include/linux/bitops.h | 25 ----------------
lib/hweight.c | 19 ++++++------
8 files changed, 87 insertions(+), 58 deletions(-)

diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index 15f3ae2..296da1d 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -405,29 +405,31 @@ static inline int fls(int x)

#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
/* Whee. EV67 can calculate it directly. */
-static inline unsigned long hweight64(unsigned long w)
+static inline unsigned long __arch_hweight64(unsigned long w)
{
return __kernel_ctpop(w);
}

-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_weight32(unsigned int w)
{
- return hweight64(w);
+ return __arch_hweight64(w);
}

-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
{
- return hweight64(w & 0xffff);
+ return __arch_hweight64(w & 0xffff);
}

-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
{
- return hweight64(w & 0xff);
+ return __arch_hweight64(w & 0xff);
}
#else
-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/arch_hweight.h>
#endif

+#include <asm-generic/bitops/const_hweight.h>
+
#endif /* __KERNEL__ */

#include <asm-generic/bitops/find.h>
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 6ebc229..9da3df6 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -437,17 +437,18 @@ __fls (unsigned long x)
* hweightN: returns the hamming weight (i.e. the number
* of bits set) of a N-bit word
*/
-static __inline__ unsigned long
-hweight64 (unsigned long x)
+static __inline__ unsigned long __arch_hweight64(unsigned long x)
{
unsigned long result;
result = ia64_popcnt(x);
return result;
}

-#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful)
-#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful)
-#define hweight8(x) (unsigned int) hweight64((x) & 0xfful)
+#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful))
+#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful))
+#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful))
+
+#include <asm-generic/bitops/const_hweight.h>

#endif /* __KERNEL__ */

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index e72ac9c..766121a 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);

#ifdef ULTRA_HAS_POPULATION_COUNT

-static inline unsigned int hweight64(unsigned long w)
+static inline unsigned int __arch_hweight64(unsigned long w)
{
unsigned int res;

@@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w)
return res;
}

-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

@@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w)
return res;
}

-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
{
unsigned int res;

@@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w)
return res;
}

-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
{
unsigned int res;

@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w)

#else

-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/arch_hweight.h>

#endif
+#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/lock.h>
#endif /* __KERNEL__ */

diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h
new file mode 100644
index 0000000..3a7be84
--- /dev/null
+++ b/include/asm-generic/bitops/arch_hweight.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+
+#include <asm/types.h>
+
+extern unsigned int __arch_hweight32(unsigned int w);
+extern unsigned int __arch_hweight16(unsigned int w);
+extern unsigned int __arch_hweight8(unsigned int w);
+extern unsigned long __arch_hweight64(__u64 w);
+
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h
new file mode 100644
index 0000000..fa2a50b
--- /dev/null
+++ b/include/asm-generic/bitops/const_hweight.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+
+/*
+ * Compile time versions of __arch_hweightN()
+ */
+#define __const_hweight8(w) \
+ ( (!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
+ (!!((w) & (1ULL << 7))) )
+
+#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 ))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+/*
+ * Generic interface.
+ */
+#define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w))
+#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w))
+#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w))
+#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w))
+
+/*
+ * Interface for known constant arguments
+ */
+#define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w))
+#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w))
+#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w))
+#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w))
+
+/*
+ * Type invariant interface to the compile time constant hweight functions.
+ */
+#define HWEIGHT(w) HWEIGHT64((u64)w)
+
+#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */
diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h
index fbbc383..a94d651 100644
--- a/include/asm-generic/bitops/hweight.h
+++ b/include/asm-generic/bitops/hweight.h
@@ -1,11 +1,7 @@
#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
#define _ASM_GENERIC_BITOPS_HWEIGHT_H_

-#include <asm/types.h>
-
-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
+#include <asm-generic/bitops/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>

#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index b793898..c55d5bc 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -47,31 +47,6 @@ static inline unsigned long hweight_long(unsigned long w)
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}

-/*
- * Clearly slow versions of the hweightN() functions, their benefit is
- * of course compile time evaluation of constant arguments.
- */
-#define HWEIGHT8(w) \
- ( BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + \
- (!!((w) & (1ULL << 0))) + \
- (!!((w) & (1ULL << 1))) + \
- (!!((w) & (1ULL << 2))) + \
- (!!((w) & (1ULL << 3))) + \
- (!!((w) & (1ULL << 4))) + \
- (!!((w) & (1ULL << 5))) + \
- (!!((w) & (1ULL << 6))) + \
- (!!((w) & (1ULL << 7))) )
-
-#define HWEIGHT16(w) (HWEIGHT8(w) + HWEIGHT8((w) >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
-
-/*
- * Type invariant version that simply casts things to the
- * largest type.
- */
-#define HWEIGHT(w) HWEIGHT64((u64)(w))
-
/**
* rol32 - rotate a 32-bit value left
* @word: value to rotate
diff --git a/lib/hweight.c b/lib/hweight.c
index 63ee4eb..a6927e7 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,7 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/

-unsigned int hweight32(unsigned int w)
+unsigned int __arch_hweight32(unsigned int w)
{
#ifdef ARCH_HAS_FAST_MULTIPLIER
w -= (w >> 1) & 0x55555555;
@@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w)
return (res + (res >> 16)) & 0x000000FF;
#endif
}
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__arch_hweight32);

-unsigned int hweight16(unsigned int w)
+unsigned int __arch_hweight16(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x5555);
res = (res & 0x3333) + ((res >> 2) & 0x3333);
res = (res + (res >> 4)) & 0x0F0F;
return (res + (res >> 8)) & 0x00FF;
}
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__arch_hweight16);

-unsigned int hweight8(unsigned int w)
+unsigned int __arch_hweight8(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x55);
res = (res & 0x33) + ((res >> 2) & 0x33);
return (res + (res >> 4)) & 0x0F;
}
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__arch_hweight8);

-unsigned long hweight64(__u64 w)
+unsigned long __arch_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
- return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+ return __arch_hweight32((unsigned int)(w >> 32)) +
+ __arch_hweight32((unsigned int)w);
#elif BITS_PER_LONG == 64
#ifdef ARCH_HAS_FAST_MULTIPLIER
w -= (w >> 1) & 0x5555555555555555ul;
@@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w)
#endif
#endif
}
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__arch_hweight64);