num_online_cpus() and num_possible_cpus() are not performance
critical and are quite large.
Unlining them shrinks kernel text size by 7523 bytes on x86,
if NR_CPUS>32
Signed-off-by: Eric Dumazet <[email protected]>
---
include/linux/cpumask.h | 4 ++--
init/main.c | 12 ++++++++++++
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21e1dd4..f9b2b51 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -507,8 +507,8 @@ extern cpumask_t cpu_present_map;
extern cpumask_t cpu_active_map;
#if NR_CPUS > 1
-#define num_online_cpus() cpus_weight_nr(cpu_online_map)
-#define num_possible_cpus() cpus_weight_nr(cpu_possible_map)
+extern int num_online_cpus(void);
+extern int num_possible_cpus(void);
#define num_present_cpus() cpus_weight_nr(cpu_present_map)
#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
diff --git a/init/main.c b/init/main.c
index 7e117a2..a1a3e55 100644
--- a/init/main.c
+++ b/init/main.c
@@ -376,6 +376,18 @@ EXPORT_SYMBOL(cpu_mask_all);
int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
+int num_online_cpus(void)
+{
+ return cpus_weight_nr(cpu_online_map);
+}
+EXPORT_SYMBOL(num_online_cpus);
+
+int num_possible_cpus(void)
+{
+ return cpus_weight_nr(cpu_possible_map);
+}
+EXPORT_SYMBOL(num_possible_cpus);
+
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
static void __init setup_nr_cpu_ids(void)
{
On Fri, 05 Dec 2008 18:33:44 +0100
Eric Dumazet <[email protected]> wrote:
> num_online_cpus() and num_possible_cpus() are not performance
> critical and are quite large.
>
> Unlining them shrinks kernel text size by 7523 bytes on x86,
> if NR_CPUS>32
>
> Signed-off-by: Eric Dumazet <[email protected]>
> ---
> include/linux/cpumask.h | 4 ++--
> init/main.c | 12 ++++++++++++
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 21e1dd4..f9b2b51 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -507,8 +507,8 @@ extern cpumask_t cpu_present_map;
> extern cpumask_t cpu_active_map;
>
> #if NR_CPUS > 1
> -#define num_online_cpus() cpus_weight_nr(cpu_online_map)
> -#define num_possible_cpus() cpus_weight_nr(cpu_possible_map)
> +extern int num_online_cpus(void);
> +extern int num_possible_cpus(void);
> #define num_present_cpus() cpus_weight_nr(cpu_present_map)
> #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
> #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
> diff --git a/init/main.c b/init/main.c
> index 7e117a2..a1a3e55 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -376,6 +376,18 @@ EXPORT_SYMBOL(cpu_mask_all);
> int nr_cpu_ids __read_mostly = NR_CPUS;
> EXPORT_SYMBOL(nr_cpu_ids);
>
> +int num_online_cpus(void)
> +{
> + return cpus_weight_nr(cpu_online_map);
> +}
> +EXPORT_SYMBOL(num_online_cpus);
> +
> +int num_possible_cpus(void)
> +{
> + return cpus_weight_nr(cpu_possible_map);
> +}
> +EXPORT_SYMBOL(num_possible_cpus);
> +
> /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
> static void __init setup_nr_cpu_ids(void)
> {
Looks OK.
That area in init/main.c is horrid - it took quite some staring through
the ifdef tangle for me to convince myself that the code you added was
reliably SMP-only.
Perhaps sometime a lot of this cpu masky code should be moved over to
kernel/cpu.c and cleaned up.
On Saturday 06 December 2008 04:03:44 Eric Dumazet wrote:
> num_online_cpus() and num_possible_cpus() are not performance
> critical and are quite large.
>
> Unlining them shrinks kernel text size by 7523 bytes on x86,
> if NR_CPUS>32
Hi Eric!
Slight misdiagnosis, I think. One base problem is addressed in fixing
the bitmap operators (see "[PATCH] bitmap: test for constant as well as
small size for inline versions" on lkml Message-Id: <[email protected]>). This is already in
linux-next, and I've pasted it below.
Worse, you used the obsolete cpumask operators :)
Thanks!
Rusty.
bitmap: test for constant as well as small size for inline versions
bitmap_zero et al have a fastpath for nbits <= BITS_PER_LONG, but this
should really only apply where the nbits is known at compile time.
This only saves about 1200 bytes on an allyesconfig kernel, but with
cpumasks going variable that number will increase.
text data bss dec hex filename
35327852 5035607 6782976 47146435 2cf65c3 vmlinux-before
35326640 5035607 6782976 47145223 2cf6107 vmlinux-after
Signed-off-by: Rusty Russell <[email protected]>
---
include/linux/bitmap.h | 35 +++++++++++++++++++----------------
1 file changed, 19 insertions(+), 16 deletions(-)
diff -r b4540ad329c1 include/linux/bitmap.h
--- a/include/linux/bitmap.h Thu Nov 06 13:00:51 2008 +1100
+++ b/include/linux/bitmap.h Thu Nov 06 14:34:07 2008 +1100
@@ -137,9 +137,12 @@ extern void bitmap_copy_le(void *dst, co
(1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
)
+#define small_const_nbits(nbits) \
+ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+
static inline void bitmap_zero(unsigned long *dst, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = 0UL;
else {
int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
@@ -150,7 +153,7 @@ static inline void bitmap_fill(unsigned
static inline void bitmap_fill(unsigned long *dst, int nbits)
{
size_t nlongs = BITS_TO_LONGS(nbits);
- if (nlongs > 1) {
+ if (!small_const_nbits(nbits)) {
int len = (nlongs - 1) * sizeof(unsigned long);
memset(dst, 0xff, len);
}
@@ -160,7 +163,7 @@ static inline void bitmap_copy(unsigned
static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = *src;
else {
int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
@@ -171,7 +174,7 @@ static inline void bitmap_and(unsigned l
static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = *src1 & *src2;
else
__bitmap_and(dst, src1, src2, nbits);
@@ -180,7 +183,7 @@ static inline void bitmap_or(unsigned lo
static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = *src1 | *src2;
else
__bitmap_or(dst, src1, src2, nbits);
@@ -189,7 +192,7 @@ static inline void bitmap_xor(unsigned l
static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = *src1 ^ *src2;
else
__bitmap_xor(dst, src1, src2, nbits);
@@ -198,7 +201,7 @@ static inline void bitmap_andnot(unsigne
static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = *src1 & ~(*src2);
else
__bitmap_andnot(dst, src1, src2, nbits);
@@ -207,7 +210,7 @@ static inline void bitmap_complement(uns
static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
else
__bitmap_complement(dst, src, nbits);
@@ -216,7 +219,7 @@ static inline int bitmap_equal(const uns
static inline int bitmap_equal(const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
else
return __bitmap_equal(src1, src2, nbits);
@@ -225,7 +228,7 @@ static inline int bitmap_intersects(cons
static inline int bitmap_intersects(const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
else
return __bitmap_intersects(src1, src2, nbits);
@@ -234,7 +237,7 @@ static inline int bitmap_subset(const un
static inline int bitmap_subset(const unsigned long *src1,
const unsigned long *src2, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
else
return __bitmap_subset(src1, src2, nbits);
@@ -242,7 +245,7 @@ static inline int bitmap_subset(const un
static inline int bitmap_empty(const unsigned long *src, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
else
return __bitmap_empty(src, nbits);
@@ -250,7 +253,7 @@ static inline int bitmap_empty(const uns
static inline int bitmap_full(const unsigned long *src, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
else
return __bitmap_full(src, nbits);
@@ -258,7 +261,7 @@ static inline int bitmap_full(const unsi
static inline int bitmap_weight(const unsigned long *src, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
return __bitmap_weight(src, nbits);
}
@@ -266,7 +269,7 @@ static inline void bitmap_shift_right(un
static inline void bitmap_shift_right(unsigned long *dst,
const unsigned long *src, int n, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = *src >> n;
else
__bitmap_shift_right(dst, src, n, nbits);
@@ -275,7 +278,7 @@ static inline void bitmap_shift_left(uns
static inline void bitmap_shift_left(unsigned long *dst,
const unsigned long *src, int n, int nbits)
{
- if (nbits <= BITS_PER_LONG)
+ if (small_const_nbits(nbits))
*dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits);
else
__bitmap_shift_left(dst, src, n, nbits);
Rusty Russell a ?crit :
> On Saturday 06 December 2008 04:03:44 Eric Dumazet wrote:
>> num_online_cpus() and num_possible_cpus() are not performance
>> critical and are quite large.
>>
>> Unlining them shrinks kernel text size by 7523 bytes on x86,
>> if NR_CPUS>32
>
> Hi Eric!
>
> Slight misdiagnosis, I think. One base problem is addressed in fixing
> the bitmap operators (see "[PATCH] bitmap: test for constant as well as
> small size for inline versions" on lkml Message-Id: <[email protected]>). This is already in
> linux-next, and I've pasted it below.
>
> Worse, you used the obsolete cpumask operators :)
>
I see ! Good work ;)
So the gain would be 11 bytes per call site, and about one hundred calls,
maybe not worth it :)
Thanks