2008-12-05 16:05:16

by Eric Dumazet

[permalink] [raw]
Subject: [PATCH] percpu_counter: FBC_BATCH might be too big

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values,
it makes sense to use a more sensible value for FBC_BATCH.

A sensible value is 2*num_online_cpus(), with a minimum value of 32
(This minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

Signed-off-by: Eric Dumazet <[email protected]>
---
include/linux/percpu_counter.h | 6 +-----
lib/percpu_counter.c | 14 +++++++++++++-
2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..c42a184 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
s32 *counters;
};

-#if NR_CPUS >= 16
-#define FBC_BATCH (NR_CPUS*2)
-#else
-#define FBC_BATCH (NR_CPUS*4)
-#endif
+extern int FBC_BATCH;

int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a866389..2fcf943 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
}
EXPORT_SYMBOL(percpu_counter_destroy);

+int FBC_BATCH __read_mostly = 32;
+EXPORT_SYMBOL(FBC_BATCH);
+
+static void compute_batch_value(void)
+{
+ int nr = num_online_cpus();
+
+ FBC_BATCH = max(32, nr*2);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
@@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned int cpu;
struct percpu_counter *fbc;

+ compute_batch_value();
if (action != CPU_DEAD)
return NOTIFY_OK;

@@ -139,11 +150,12 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
mutex_unlock(&percpu_counters_lock);
return NOTIFY_OK;
}
+#endif

static int __init percpu_counter_startup(void)
{
+ compute_batch_value();
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
return 0;
}
module_init(percpu_counter_startup);
-#endif


2008-12-05 16:40:43

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] percpu_counter: FBC_BATCH might be too big

On Fri, 2008-12-05 at 17:05 +0100, Eric Dumazet wrote:
> For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS
>
> Considering more and more distros are using high NR_CPUS values,
> it makes sense to use a more sensible value for FBC_BATCH.
>
> A sensible value is 2*num_online_cpus(), with a minimum value of 32
> (This minimum value helps branch prediction in __percpu_counter_add())
>
> We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

Agreed, that is one of the reasons the proportion code doesn't use the
FBC_BATCH stuff.

Acked-by: Peter Zijlstra <[email protected]>

> Signed-off-by: Eric Dumazet <[email protected]>
> ---
> include/linux/percpu_counter.h | 6 +-----
> lib/percpu_counter.c | 14 +++++++++++++-
> 2 files changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
> index 9007ccd..c42a184 100644
> --- a/include/linux/percpu_counter.h
> +++ b/include/linux/percpu_counter.h
> @@ -24,11 +24,7 @@ struct percpu_counter {
> s32 *counters;
> };
>
> -#if NR_CPUS >= 16
> -#define FBC_BATCH (NR_CPUS*2)
> -#else
> -#define FBC_BATCH (NR_CPUS*4)
> -#endif
> +extern int FBC_BATCH;
>
> int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
> int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
> index a866389..2fcf943 100644
> --- a/lib/percpu_counter.c
> +++ b/lib/percpu_counter.c
> @@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
> }
> EXPORT_SYMBOL(percpu_counter_destroy);
>
> +int FBC_BATCH __read_mostly = 32;
> +EXPORT_SYMBOL(FBC_BATCH);
> +
> +static void compute_batch_value(void)
> +{
> + int nr = num_online_cpus();
> +
> + FBC_BATCH = max(32, nr*2);
> +}
> +
> #ifdef CONFIG_HOTPLUG_CPU
> static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> unsigned long action, void *hcpu)
> @@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> unsigned int cpu;
> struct percpu_counter *fbc;
>
> + compute_batch_value();
> if (action != CPU_DEAD)
> return NOTIFY_OK;
>
> @@ -139,11 +150,12 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> mutex_unlock(&percpu_counters_lock);
> return NOTIFY_OK;
> }
> +#endif
>
> static int __init percpu_counter_startup(void)
> {
> + compute_batch_value();
> hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
> return 0;
> }
> module_init(percpu_counter_startup);
> -#endif

2008-12-05 20:32:02

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] percpu_counter: FBC_BATCH might be too big

From: Eric Dumazet <[email protected]>
Date: Fri, 05 Dec 2008 17:05:16 +0100

> For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS
>
> Considering more and more distros are using high NR_CPUS values,
> it makes sense to use a more sensible value for FBC_BATCH.
>
> A sensible value is 2*num_online_cpus(), with a minimum value of 32
> (This minimum value helps branch prediction in __percpu_counter_add())
>
> We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.
>
> Signed-off-by: Eric Dumazet <[email protected]>

The downside is now we must load this value in these common
routines. But I think the gain outweights the loss so:

Acked-by: David S. Miller <[email protected]>

2008-12-07 09:36:00

by Eric Dumazet

[permalink] [raw]
Subject: [PATCH, take2] percpu_counter: FBC_BATCH might be too big

In this second version I guarded hotcpu_notifier() call by
a #ifdef CONFIG_HOTPLUG_CPU

I wonder why hotcpu_notifier() is not a null op if !CONFIG_HOTPLUG_CPU

Thank you

[PATCH] percpu_counter: FBC_BATCH might be too big

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values,
it makes sense to use a more sensible value for FBC_BATCH.

A sensible value is 2*num_online_cpus(), with a minimum value of 32
(This minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

Signed-off-by: Eric Dumazet <[email protected]>
Acked-by: David S. Miller <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
---
include/linux/percpu_counter.h | 6 +-----
lib/percpu_counter.c | 16 +++++++++++++++-
2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..c42a184 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
s32 *counters;
};

-#if NR_CPUS >= 16
-#define FBC_BATCH (NR_CPUS*2)
-#else
-#define FBC_BATCH (NR_CPUS*4)
-#endif
+extern int FBC_BATCH;

int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a866389..e21ce7c 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
}
EXPORT_SYMBOL(percpu_counter_destroy);

+int FBC_BATCH __read_mostly = 32;
+EXPORT_SYMBOL(FBC_BATCH);
+
+static void compute_batch_value(void)
+{
+ int nr = num_online_cpus();
+
+ FBC_BATCH = max(32, nr*2);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
@@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned int cpu;
struct percpu_counter *fbc;

+ compute_batch_value();
if (action != CPU_DEAD)
return NOTIFY_OK;

@@ -139,11 +150,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
mutex_unlock(&percpu_counters_lock);
return NOTIFY_OK;
}
+#endif

static int __init percpu_counter_startup(void)
{
+ compute_batch_value();
+#ifdef CONFIG_HOTPLUG_CPU
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
+#endif
return 0;
}
module_init(percpu_counter_startup);
-#endif

2008-12-07 17:10:03

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH, take2] percpu_counter: FBC_BATCH might be too big

On Sun, 07 Dec 2008 10:25:45 +0100 Eric Dumazet <[email protected]> wrote:

> In this second version I guarded hotcpu_notifier() call by
> a #ifdef CONFIG_HOTPLUG_CPU
>
> I wonder why hotcpu_notifier() is not a null op if !CONFIG_HOTPLUG_CPU
>
> Thank you
>
> [PATCH] percpu_counter: FBC_BATCH might be too big
>
> For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS
>
> Considering more and more distros are using high NR_CPUS values,
> it makes sense to use a more sensible value for FBC_BATCH.
>
> A sensible value is 2*num_online_cpus(), with a minimum value of 32
> (This minimum value helps branch prediction in __percpu_counter_add())
>
> We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

Yup, anything using NR_CPUS is probably wrong.

> diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
> index 9007ccd..c42a184 100644
> --- a/include/linux/percpu_counter.h
> +++ b/include/linux/percpu_counter.h
> @@ -24,11 +24,7 @@ struct percpu_counter {
> s32 *counters;
> };
>
> -#if NR_CPUS >= 16
> -#define FBC_BATCH (NR_CPUS*2)
> -#else
> -#define FBC_BATCH (NR_CPUS*4)
> -#endif
> +extern int FBC_BATCH;

y:/usr/src/linux-2.6.28-rc7> grep -r FBC_BATCH . | wc -l
7

Can we fix this properly please? It should now become lower case, and
it was a pretty dopey name anyway - now would be a good time to improve
it. `percpu_counter_batch'?

> int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
> int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
> index a866389..e21ce7c 100644
> --- a/lib/percpu_counter.c
> +++ b/lib/percpu_counter.c
> @@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
> }
> EXPORT_SYMBOL(percpu_counter_destroy);
>
> +int FBC_BATCH __read_mostly = 32;
> +EXPORT_SYMBOL(FBC_BATCH);
> +
> +static void compute_batch_value(void)
> +{
> + int nr = num_online_cpus();
> +
> + FBC_BATCH = max(32, nr*2);
> +}
> +
> #ifdef CONFIG_HOTPLUG_CPU
> static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> unsigned long action, void *hcpu)
> @@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> unsigned int cpu;
> struct percpu_counter *fbc;
>
> + compute_batch_value();
> if (action != CPU_DEAD)
> return NOTIFY_OK;
>
> @@ -139,11 +150,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> mutex_unlock(&percpu_counters_lock);
> return NOTIFY_OK;
> }
> +#endif
>
> static int __init percpu_counter_startup(void)
> {
> + compute_batch_value();
> +#ifdef CONFIG_HOTPLUG_CPU
> hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
> +#endif
> return 0;
> }
> module_init(percpu_counter_startup);
> -#endif

hm, now what's going on in there? We should be able to drop the #ifdef
CONFIG_HOTPLUG_CPU from lib/percpu_counter.c altogether.
hotcpu_notifier() will do the right thing, and the compiler should
generate no code for percpu_counter_hotcpu_callback() if
CONFIG_HOTPLUG_CPU=n.

Do

$EDITOR $(grep -l hotcpu_notifier */*.c)

and you'll see lots of code gets it right, and lots of code gets it wrong.

2008-12-07 18:40:25

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH, take2] percpu_counter: FBC_BATCH might be too big

Andrew Morton a ?crit :
> On Sun, 07 Dec 2008 10:25:45 +0100 Eric Dumazet <[email protected]> wrote:
>
>> In this second version I guarded hotcpu_notifier() call by
>> a #ifdef CONFIG_HOTPLUG_CPU
>>
>> I wonder why hotcpu_notifier() is not a null op if !CONFIG_HOTPLUG_CPU
>>
>> Thank you
>>
>> [PATCH] percpu_counter: FBC_BATCH might be too big
>>
>> For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS
>>
>> Considering more and more distros are using high NR_CPUS values,
>> it makes sense to use a more sensible value for FBC_BATCH.
>>
>> A sensible value is 2*num_online_cpus(), with a minimum value of 32
>> (This minimum value helps branch prediction in __percpu_counter_add())
>>
>> We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.
>
> Yup, anything using NR_CPUS is probably wrong.
>
>> diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
>> index 9007ccd..c42a184 100644
>> --- a/include/linux/percpu_counter.h
>> +++ b/include/linux/percpu_counter.h
>> @@ -24,11 +24,7 @@ struct percpu_counter {
>> s32 *counters;
>> };
>>
>> -#if NR_CPUS >= 16
>> -#define FBC_BATCH (NR_CPUS*2)
>> -#else
>> -#define FBC_BATCH (NR_CPUS*4)
>> -#endif
>> +extern int FBC_BATCH;
>
> y:/usr/src/linux-2.6.28-rc7> grep -r FBC_BATCH . | wc -l
> 7
>
> Can we fix this properly please? It should now become lower case, and
> it was a pretty dopey name anyway - now would be a good time to improve
> it. `percpu_counter_batch'?

Yes

>
>> int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
>> int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
>> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
>> index a866389..e21ce7c 100644
>> --- a/lib/percpu_counter.c
>> +++ b/lib/percpu_counter.c
>> @@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
>> }
>> EXPORT_SYMBOL(percpu_counter_destroy);
>>
>> +int FBC_BATCH __read_mostly = 32;
>> +EXPORT_SYMBOL(FBC_BATCH);
>> +
>> +static void compute_batch_value(void)
>> +{
>> + int nr = num_online_cpus();
>> +
>> + FBC_BATCH = max(32, nr*2);
>> +}
>> +
>> #ifdef CONFIG_HOTPLUG_CPU
>> static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
>> unsigned long action, void *hcpu)
>> @@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
>> unsigned int cpu;
>> struct percpu_counter *fbc;
>>
>> + compute_batch_value();
>> if (action != CPU_DEAD)
>> return NOTIFY_OK;
>>
>> @@ -139,11 +150,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
>> mutex_unlock(&percpu_counters_lock);
>> return NOTIFY_OK;
>> }
>> +#endif
>>
>> static int __init percpu_counter_startup(void)
>> {
>> + compute_batch_value();
>> +#ifdef CONFIG_HOTPLUG_CPU
>> hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
>> +#endif
>> return 0;
>> }
>> module_init(percpu_counter_startup);
>> -#endif
>
> hm, now what's going on in there? We should be able to drop the #ifdef
> CONFIG_HOTPLUG_CPU from lib/percpu_counter.c altogether.
> hotcpu_notifier() will do the right thing, and the compiler should
> generate no code for percpu_counter_hotcpu_callback() if
> CONFIG_HOTPLUG_CPU=n.
>
> Do
>
> $EDITOR $(grep -l hotcpu_notifier */*.c)
>
> and you'll see lots of code gets it right, and lots of code gets it wrong.

I see nothing interesting, I must be blind.

lib/percpu_counter.c: In function 'percpu_counter_startup':
lib/percpu_counter.c:158: error: 'percpu_counter_hotcpu_callback' undeclared (first use in this function)
lib/percpu_counter.c:158: error: (Each undeclared identifier is reported only once
lib/percpu_counter.c:158: error: for each function it appears in.)
make[1]: *** [lib/percpu_counter.o] Error 1

static int __init percpu_counter_startup(void)
{
compute_batch_value();
hotcpu_notifier(percpu_counter_hotcpu_callback, 0); << ERROR >>
return 0;
}
module_init(percpu_counter_startup);

# grep hotcpu_notifier include/linux/cpu.h
#define hotcpu_notifier(fn, pri) { \
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) << ERROR >>


If changed to :

static struct notifier_block __cpuinitdata percpu_counter_cpu_notifier = {
.notifier_call = percpu_counter_hotcpu_callback,
};
static int __init percpu_counter_startup(void)
{
compute_batch_value();
register_hotcpu_notifier(&percpu_counter_cpu_notifier);
return 0;
}
module_init(percpu_counter_startup);

Then error is :
lib/percpu_counter.c:156: error: 'percpu_counter_hotcpu_callback' undeclared here (not in a function)
make[1]: *** [lib/percpu_counter.o] Error 1


So the only way seems to add an ugly

#define percpu_counter_hotcpu_callback NULL

Is is what you name "the right thing" ?

Thanks

[PATCH] percpu_counter: FBC_BATCH should be a variable

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values,
it makes sense to use a more sensible value for FBC_BATCH.

A sensible value is 2*num_online_cpus(), with a minimum value of 32
(This minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

We rename FBC_BATCH to percpu_counter_batch since its not a constant
anymore.

Signed-off-by: Eric Dumazet <[email protected]>
Acked-by: David S. Miller <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
---
fs/ext4/ext4.h | 6 +++---
fs/ext4/inode.c | 2 +-
include/linux/percpu_counter.h | 8 ++------
lib/percpu_counter.c | 16 +++++++++++++++-
4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c8..6c46c64 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@ do { \
} while (0)

#ifdef CONFIG_SMP
-/* Each CPU can accumulate FBC_BATCH blocks in their local
+/* Each CPU can accumulate percpu_counter_batch blocks in their local
* counters. So we need to make sure we have free blocks more
- * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
+ * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
*/
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
#else
#define EXT4_FREEBLOCKS_WATERMARK 0
#endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5a..db661e7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2497,7 +2497,7 @@ static int ext4_nonda_switch(struct super_block *sb)
/*
* switch to non delalloc mode if we are running low
* on free block. The free block accounting via percpu
- * counters can get slightly wrong with FBC_BATCH getting
+ * counters can get slightly wrong with percpu_counter_batch getting
* accumulated on each CPU without updating global counters
* Delalloc need an accurate free block accounting. So switch
* to non delalloc when we are near to error range.
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..99de7a3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
s32 *counters;
};

-#if NR_CPUS >= 16
-#define FBC_BATCH (NR_CPUS*2)
-#else
-#define FBC_BATCH (NR_CPUS*4)
-#endif
+extern int percpu_counter_batch;

int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc);

static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
- __percpu_counter_add(fbc, amount, FBC_BATCH);
+ __percpu_counter_add(fbc, amount, percpu_counter_batch);
}

static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a866389..519b877 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
}
EXPORT_SYMBOL(percpu_counter_destroy);

+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+ int nr = num_online_cpus();
+
+ percpu_counter_batch = max(32, nr*2);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
@@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned int cpu;
struct percpu_counter *fbc;

+ compute_batch_value();
if (action != CPU_DEAD)
return NOTIFY_OK;

@@ -139,11 +150,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
mutex_unlock(&percpu_counters_lock);
return NOTIFY_OK;
}
+#else
+#define percpu_counter_hotcpu_callback NULL
+#endif /* CONFIG_HOTPLUG_CPU */

static int __init percpu_counter_startup(void)
{
+ compute_batch_value();
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
return 0;
}
module_init(percpu_counter_startup);
-#endif

2008-12-08 04:33:02

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH, take2] percpu_counter: FBC_BATCH might be too big

On Sun, 07 Dec 2008 19:30:10 +0100 Eric Dumazet <[email protected]> wrote:

> >
> > Do
> >
> > $EDITOR $(grep -l hotcpu_notifier */*.c)
> >
> > and you'll see lots of code gets it right, and lots of code gets it wrong.
>
> I see nothing interesting, I must be blind.
>
> lib/percpu_counter.c: In function 'percpu_counter_startup':
> lib/percpu_counter.c:158: error: 'percpu_counter_hotcpu_callback' undeclared (first use in this function)
> lib/percpu_counter.c:158: error: (Each undeclared identifier is reported only once
> lib/percpu_counter.c:158: error: for each function it appears in.)
> make[1]: *** [lib/percpu_counter.o] Error 1

Perhaps you still had percpu_counter_hotcpu_callback inside #ifdef.

That a look at kernel/workqueue.c, fs/buffer.c. No #ifdef CONFIG_HOTPLUG_CPU
needed at all.


2008-12-08 08:10:53

by Eric Dumazet

[permalink] [raw]
Subject: [PATCH] percpu_counter: FBC_BATCH should be a variable

Andrew Morton a ?crit :
> On Sun, 07 Dec 2008 19:30:10 +0100 Eric Dumazet <[email protected]> wrote:
>
>>> Do
>>>
>>> $EDITOR $(grep -l hotcpu_notifier */*.c)
>>>
>>> and you'll see lots of code gets it right, and lots of code gets it wrong.
>> I see nothing interesting, I must be blind.
>>
>> lib/percpu_counter.c: In function 'percpu_counter_startup':
>> lib/percpu_counter.c:158: error: 'percpu_counter_hotcpu_callback' undeclared (first use in this function)
>> lib/percpu_counter.c:158: error: (Each undeclared identifier is reported only once
>> lib/percpu_counter.c:158: error: for each function it appears in.)
>> make[1]: *** [lib/percpu_counter.o] Error 1
>
> Perhaps you still had percpu_counter_hotcpu_callback inside #ifdef.
>
> That a look at kernel/workqueue.c, fs/buffer.c. No #ifdef CONFIG_HOTPLUG_CPU
> needed at all.

We still need some #ifdef or we also must also delete them around "struct list_head list"
in include/linux/percpu_counter.h and grow struct percpu_counter.

lib/percpu_counter.c: In function 'percpu_counter_hotcpu_callback':
lib/percpu_counter.c:137: error: 'struct percpu_counter' has no member named 'list'
lib/percpu_counter.c:137: warning: type defaults to 'int' in declaration of '__mptr'
lib/percpu_counter.c:137: warning: initialization from incompatible pointer type
...


Thank you Andrew


[PATCH] percpu_counter: FBC_BATCH should be a variable

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values,
it makes sense to use a more sensible value for FBC_BATCH, and
get rid of NR_CPUS.

A sensible value is 2*num_online_cpus(), with a minimum value of 32
(This minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

We rename FBC_BATCH to percpu_counter_batch since its not a constant
anymore.

Signed-off-by: Eric Dumazet <[email protected]>
Acked-by: David S. Miller <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
---
fs/ext4/ext4.h | 6 +++---
fs/ext4/inode.c | 2 +-
include/linux/percpu_counter.h | 8 ++------
lib/percpu_counter.c | 18 ++++++++++++++----
4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c8..6c46c64 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@ do { \
} while (0)

#ifdef CONFIG_SMP
-/* Each CPU can accumulate FBC_BATCH blocks in their local
+/* Each CPU can accumulate percpu_counter_batch blocks in their local
* counters. So we need to make sure we have free blocks more
- * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
+ * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
*/
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
#else
#define EXT4_FREEBLOCKS_WATERMARK 0
#endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5a..db661e7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2497,7 +2497,7 @@ static int ext4_nonda_switch(struct super_block *sb)
/*
* switch to non delalloc mode if we are running low
* on free block. The free block accounting via percpu
- * counters can get slightly wrong with FBC_BATCH getting
+ * counters can get slightly wrong with percpu_counter_batch getting
* accumulated on each CPU without updating global counters
* Delalloc need an accurate free block accounting. So switch
* to non delalloc when we are near to error range.
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..99de7a3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
s32 *counters;
};

-#if NR_CPUS >= 16
-#define FBC_BATCH (NR_CPUS*2)
-#else
-#define FBC_BATCH (NR_CPUS*4)
-#endif
+extern int percpu_counter_batch;

int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc);

static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
- __percpu_counter_add(fbc, amount, FBC_BATCH);
+ __percpu_counter_add(fbc, amount, percpu_counter_batch);
}

static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a866389..e73eb5b 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -9,10 +9,8 @@
#include <linux/cpu.h>
#include <linux/module.h>

-#ifdef CONFIG_HOTPLUG_CPU
static LIST_HEAD(percpu_counters);
static DEFINE_MUTEX(percpu_counters_lock);
-#endif

void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
@@ -114,13 +112,24 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
}
EXPORT_SYMBOL(percpu_counter_destroy);

-#ifdef CONFIG_HOTPLUG_CPU
+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+ int nr = num_online_cpus();
+
+ percpu_counter_batch = max(32, nr*2);
+}
+
static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
{
+#ifdef CONFIG_HOTPLUG_CPU
unsigned int cpu;
struct percpu_counter *fbc;

+ compute_batch_value();
if (action != CPU_DEAD)
return NOTIFY_OK;

@@ -137,13 +146,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
spin_unlock_irqrestore(&fbc->lock, flags);
}
mutex_unlock(&percpu_counters_lock);
+#endif
return NOTIFY_OK;
}

static int __init percpu_counter_startup(void)
{
+ compute_batch_value();
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
return 0;
}
module_init(percpu_counter_startup);
-#endif

2008-12-08 08:19:58

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] percpu_counter: FBC_BATCH should be a variable

On Mon, 08 Dec 2008 09:05:04 +0100 Eric Dumazet <[email protected]> wrote:

> Andrew Morton a __crit :
> > On Sun, 07 Dec 2008 19:30:10 +0100 Eric Dumazet <[email protected]> wrote:
> >
> >>> Do
> >>>
> >>> $EDITOR $(grep -l hotcpu_notifier */*.c)
> >>>
> >>> and you'll see lots of code gets it right, and lots of code gets it wrong.
> >> I see nothing interesting, I must be blind.
> >>
> >> lib/percpu_counter.c: In function 'percpu_counter_startup':
> >> lib/percpu_counter.c:158: error: 'percpu_counter_hotcpu_callback' undeclared (first use in this function)
> >> lib/percpu_counter.c:158: error: (Each undeclared identifier is reported only once
> >> lib/percpu_counter.c:158: error: for each function it appears in.)
> >> make[1]: *** [lib/percpu_counter.o] Error 1
> >
> > Perhaps you still had percpu_counter_hotcpu_callback inside #ifdef.
> >
> > That a look at kernel/workqueue.c, fs/buffer.c. No #ifdef CONFIG_HOTPLUG_CPU
> > needed at all.
>
> We still need some #ifdef or we also must also delete them around "struct list_head list"
> in include/linux/percpu_counter.h and grow struct percpu_counter.
>
> lib/percpu_counter.c: In function 'percpu_counter_hotcpu_callback':
> lib/percpu_counter.c:137: error: 'struct percpu_counter' has no member named 'list'
> lib/percpu_counter.c:137: warning: type defaults to 'int' in declaration of '__mptr'
> lib/percpu_counter.c:137: warning: initialization from incompatible pointer type
> ...
>

Yes, this conditionality:

struct percpu_counter {
spinlock_t lock;
s64 count;
#ifdef CONFIG_HOTPLUG_CPU
struct list_head list; /* All percpu_counters are on a list */
#endif
s32 *counters;
};

mucked up our nice scheme. Oh well.

> +static void compute_batch_value(void)
> +{
> + int nr = num_online_cpus();
> +
> + percpu_counter_batch = max(32, nr*2);
> +}
> +
> static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> unsigned long action, void *hcpu)
> {
> +#ifdef CONFIG_HOTPLUG_CPU
> unsigned int cpu;
> struct percpu_counter *fbc;
>
> + compute_batch_value();
> if (action != CPU_DEAD)
> return NOTIFY_OK;
>
> @@ -137,13 +146,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
> spin_unlock_irqrestore(&fbc->lock, flags);
> }
> mutex_unlock(&percpu_counters_lock);
> +#endif
> return NOTIFY_OK;
> }
>
> static int __init percpu_counter_startup(void)
> {
> + compute_batch_value();
> hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
> return 0;
> }
> module_init(percpu_counter_startup);

compute_batch_value() can be __cpuinit, but I think we're close enough
here ;)