2013-08-19 16:13:39

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 02/10] sched: Factor out code to should_we_balance()

From: Joonsoo Kim <[email protected]>

Now checking whether this cpu is appropriate to balance or not
is embedded into update_sg_lb_stats() and this checking has no direct
relationship to this function. There is not enough reason to place
this checking at update_sg_lb_stats(), except saving one iteration
for sched_group_cpus.

In this patch, I factor out this checking to should_we_balance() function.
And before doing actual work for load_balancing, check whether this cpu is
appropriate to balance via should_we_balance(). If this cpu is not
a candidate for balancing, it quit the work immediately.

With this change, we can save two memset cost and can expect better
compiler optimization.

Below is result of this patch.

* Vanilla *
text data bss dec hex filename
34499 1136 116 35751 8ba7 kernel/sched/fair.o

* Patched *
text data bss dec hex filename
34243 1136 116 35495 8aa7 kernel/sched/fair.o

In addition, rename @balance to @should_balance in order to represent
its purpose more clearly.

Signed-off-by: Joonsoo Kim <[email protected]>
[peterz: style changes and a fix in should_we_balance() ]
Signed-off-by: Peter Zijlstra <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
---
kernel/sched/fair.c | 111 +++++++++++++++++++++++++---------------------------
1 file changed, 54 insertions(+), 57 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4510,22 +4510,17 @@ fix_small_capacity(struct sched_domain *
* @group: sched_group whose statistics are to be updated.
* @load_idx: Load index of sched_domain of this_cpu for load calc.
* @local_group: Does group contain this_cpu.
- * @balance: Should we balance.
* @sgs: variable to hold the statistics for this group.
*/
static inline void update_sg_lb_stats(struct lb_env *env,
struct sched_group *group, int load_idx,
- int local_group, int *balance, struct sg_lb_stats *sgs)
+ int local_group, struct sg_lb_stats *sgs)
{
unsigned long nr_running, max_nr_running, min_nr_running;
unsigned long load, max_cpu_load, min_cpu_load;
- unsigned int balance_cpu = -1, first_idle_cpu = 0;
unsigned long avg_load_per_task = 0;
int i;

- if (local_group)
- balance_cpu = group_balance_cpu(group);
-
/* Tally up the load of all CPUs in the group */
max_cpu_load = 0;
min_cpu_load = ~0UL;
@@ -4538,15 +4533,9 @@ static inline void update_sg_lb_stats(st
nr_running = rq->nr_running;

/* Bias balancing toward cpus of our domain */
- if (local_group) {
- if (idle_cpu(i) && !first_idle_cpu &&
- cpumask_test_cpu(i, sched_group_mask(group))) {
- first_idle_cpu = 1;
- balance_cpu = i;
- }
-
+ if (local_group)
load = target_load(i, load_idx);
- } else {
+ else {
load = source_load(i, load_idx);
if (load > max_cpu_load)
max_cpu_load = load;
@@ -4566,22 +4555,9 @@ static inline void update_sg_lb_stats(st
sgs->idle_cpus++;
}

- /*
- * First idle cpu or the first cpu(busiest) in this sched group
- * is eligible for doing load balancing at this and above
- * domains. In the newly idle case, we will allow all the cpu's
- * to do the newly idle load balance.
- */
- if (local_group) {
- if (env->idle != CPU_NEWLY_IDLE) {
- if (balance_cpu != env->dst_cpu) {
- *balance = 0;
- return;
- }
- update_group_power(env->sd, env->dst_cpu);
- } else if (time_after_eq(jiffies, group->sgp->next_update))
- update_group_power(env->sd, env->dst_cpu);
- }
+ if (local_group && (env->idle != CPU_NEWLY_IDLE ||
+ time_after_eq(jiffies, group->sgp->next_update)))
+ update_group_power(env->sd, env->dst_cpu);

/* Adjust by relative CPU power of the group */
sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
@@ -4663,7 +4639,7 @@ static bool update_sd_pick_busiest(struc
* @sds: variable to hold the statistics for this sched_domain.
*/
static inline void update_sd_lb_stats(struct lb_env *env,
- int *balance, struct sd_lb_stats *sds)
+ struct sd_lb_stats *sds)
{
struct sched_domain *child = env->sd->child;
struct sched_group *sg = env->sd->groups;
@@ -4680,10 +4656,7 @@ static inline void update_sd_lb_stats(st

local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
-
- if (local_group && !(*balance))
- return;
+ update_sg_lb_stats(env, sg, load_idx, local_group, &sgs);

sds->total_load += sgs.group_load;
sds->total_pwr += sg->sgp->power;
@@ -4916,8 +4889,6 @@ static inline void calculate_imbalance(s
* to restore balance.
*
* @env: The load balancing environment.
- * @balance: Pointer to a variable indicating if this_cpu
- * is the appropriate cpu to perform load balancing at this_level.
*
* Return: - The busiest group if imbalance exists.
* - If no imbalance and user has opted for power-savings balance,
@@ -4925,7 +4896,7 @@ static inline void calculate_imbalance(s
* put to idle by rebalancing its tasks onto our group.
*/
static struct sched_group *
-find_busiest_group(struct lb_env *env, int *balance)
+find_busiest_group(struct lb_env *env)
{
struct sd_lb_stats sds;

@@ -4935,14 +4906,7 @@ find_busiest_group(struct lb_env *env, i
* Compute the various statistics relavent for load balancing at
* this level.
*/
- update_sd_lb_stats(env, balance, &sds);
-
- /*
- * this_cpu is not the appropriate cpu to perform load balancing at
- * this level.
- */
- if (!(*balance))
- goto ret;
+ update_sd_lb_stats(env, &sds);

if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
check_asym_packing(env, &sds))
@@ -5006,7 +4970,6 @@ find_busiest_group(struct lb_env *env, i
return sds.busiest;

out_balanced:
-ret:
env->imbalance = 0;
return NULL;
}
@@ -5088,13 +5051,47 @@ static int need_active_balance(struct lb

static int active_load_balance_cpu_stop(void *data);

+static int should_we_balance(struct lb_env *env)
+{
+ struct sched_group *sg = env->sd->groups;
+ struct cpumask *sg_cpus, *sg_mask;
+ int cpu, balance_cpu = -1;
+
+ /*
+ * In the newly idle case, we will allow all the cpu's
+ * to do the newly idle load balance.
+ */
+ if (env->idle == CPU_NEWLY_IDLE)
+ return 1;
+
+ sg_cpus = sched_group_cpus(sg);
+ sg_mask = sched_group_mask(sg);
+ /* Try to find first idle cpu */
+ for_each_cpu_and(cpu, sg_cpus, env->cpus) {
+ if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
+ continue;
+
+ balance_cpu = cpu;
+ break;
+ }
+
+ if (balance_cpu == -1)
+ balance_cpu = group_balance_cpu(sg);
+
+ /*
+ * First idle cpu or the first cpu(busiest) in this sched group
+ * is eligible for doing load balancing at this and above domains.
+ */
+ return balance_cpu != env->dst_cpu;
+}
+
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
*/
static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
- int *balance)
+ int *should_balance)
{
int ld_moved, cur_ld_moved, active_balance = 0;
struct sched_group *group;
@@ -5123,12 +5120,11 @@ static int load_balance(int this_cpu, st

schedstat_inc(sd, lb_count[idle]);

-redo:
- group = find_busiest_group(&env, balance);
-
- if (*balance == 0)
+ if (!(*should_balance = should_we_balance(&env)))
goto out_balanced;

+redo:
+ group = find_busiest_group(&env);
if (!group) {
schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced;
@@ -5340,7 +5336,7 @@ void idle_balance(int this_cpu, struct r
rcu_read_lock();
for_each_domain(this_cpu, sd) {
unsigned long interval;
- int balance = 1;
+ int should_balance;

if (!(sd->flags & SD_LOAD_BALANCE))
continue;
@@ -5348,7 +5344,8 @@ void idle_balance(int this_cpu, struct r
if (sd->flags & SD_BALANCE_NEWIDLE) {
/* If we've pulled tasks over stop searching: */
pulled_task = load_balance(this_cpu, this_rq,
- sd, CPU_NEWLY_IDLE, &balance);
+ sd, CPU_NEWLY_IDLE,
+ &should_balance);
}

interval = msecs_to_jiffies(sd->balance_interval);
@@ -5586,7 +5583,7 @@ void update_max_interval(void)
*/
static void rebalance_domains(int cpu, enum cpu_idle_type idle)
{
- int balance = 1;
+ int should_balance = 1;
struct rq *rq = cpu_rq(cpu);
unsigned long interval;
struct sched_domain *sd;
@@ -5618,7 +5615,7 @@ static void rebalance_domains(int cpu, e
}

if (time_after_eq(jiffies, sd->last_balance + interval)) {
- if (load_balance(cpu, rq, sd, idle, &balance)) {
+ if (load_balance(cpu, rq, sd, idle, &should_balance)) {
/*
* The LBF_SOME_PINNED logic could have changed
* env->dst_cpu, so we can't know our idle
@@ -5641,7 +5638,7 @@ static void rebalance_domains(int cpu, e
* CPU in our sched group which is doing load balancing more
* actively.
*/
- if (!balance)
+ if (!should_balance)
break;
}
rcu_read_unlock();


2013-08-22 09:59:03

by Paul Turner

[permalink] [raw]
Subject: Re: [PATCH 02/10] sched: Factor out code to should_we_balance()

On Mon, Aug 19, 2013 at 9:01 AM, Peter Zijlstra <[email protected]> wrote:
> From: Joonsoo Kim <[email protected]>
>
> Now checking whether this cpu is appropriate to balance or not
> is embedded into update_sg_lb_stats() and this checking has no direct
> relationship to this function. There is not enough reason to place
> this checking at update_sg_lb_stats(), except saving one iteration
> for sched_group_cpus.
>
> In this patch, I factor out this checking to should_we_balance() function.
> And before doing actual work for load_balancing, check whether this cpu is
> appropriate to balance via should_we_balance(). If this cpu is not
> a candidate for balancing, it quit the work immediately.
>
> With this change, we can save two memset cost and can expect better
> compiler optimization.
>
> Below is result of this patch.
>
> * Vanilla *
> text data bss dec hex filename
> 34499 1136 116 35751 8ba7 kernel/sched/fair.o
>
> * Patched *
> text data bss dec hex filename
> 34243 1136 116 35495 8aa7 kernel/sched/fair.o
>
> In addition, rename @balance to @should_balance in order to represent
> its purpose more clearly.
>
> Signed-off-by: Joonsoo Kim <[email protected]>
> [peterz: style changes and a fix in should_we_balance() ]
> Signed-off-by: Peter Zijlstra <[email protected]>
> Link: http://lkml.kernel.org/r/[email protected]
> ---
> kernel/sched/fair.c | 111 +++++++++++++++++++++++++---------------------------
> 1 file changed, 54 insertions(+), 57 deletions(-)
>
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4510,22 +4510,17 @@ fix_small_capacity(struct sched_domain *
> * @group: sched_group whose statistics are to be updated.
> * @load_idx: Load index of sched_domain of this_cpu for load calc.
> * @local_group: Does group contain this_cpu.
> - * @balance: Should we balance.
> * @sgs: variable to hold the statistics for this group.
> */
> static inline void update_sg_lb_stats(struct lb_env *env,
> struct sched_group *group, int load_idx,
> - int local_group, int *balance, struct sg_lb_stats *sgs)
> + int local_group, struct sg_lb_stats *sgs)
> {
> unsigned long nr_running, max_nr_running, min_nr_running;
> unsigned long load, max_cpu_load, min_cpu_load;
> - unsigned int balance_cpu = -1, first_idle_cpu = 0;
> unsigned long avg_load_per_task = 0;
> int i;
>
> - if (local_group)
> - balance_cpu = group_balance_cpu(group);
> -
> /* Tally up the load of all CPUs in the group */
> max_cpu_load = 0;
> min_cpu_load = ~0UL;
> @@ -4538,15 +4533,9 @@ static inline void update_sg_lb_stats(st
> nr_running = rq->nr_running;
>
> /* Bias balancing toward cpus of our domain */
> - if (local_group) {
> - if (idle_cpu(i) && !first_idle_cpu &&
> - cpumask_test_cpu(i, sched_group_mask(group))) {
> - first_idle_cpu = 1;
> - balance_cpu = i;
> - }
> -
> + if (local_group)
> load = target_load(i, load_idx);

Keep the braces here:

if (local_group) {
load = target_load(i, load_idx);
} else {
...

> - } else {
> + else {
> load = source_load(i, load_idx);
> if (load > max_cpu_load)
> max_cpu_load = load;
> @@ -4566,22 +4555,9 @@ static inline void update_sg_lb_stats(st
> sgs->idle_cpus++;
> }
>
> - /*
> - * First idle cpu or the first cpu(busiest) in this sched group
> - * is eligible for doing load balancing at this and above
> - * domains. In the newly idle case, we will allow all the cpu's
> - * to do the newly idle load balance.
> - */
> - if (local_group) {
> - if (env->idle != CPU_NEWLY_IDLE) {
> - if (balance_cpu != env->dst_cpu) {
> - *balance = 0;
> - return;
> - }
> - update_group_power(env->sd, env->dst_cpu);
> - } else if (time_after_eq(jiffies, group->sgp->next_update))
> - update_group_power(env->sd, env->dst_cpu);
> - }
> + if (local_group && (env->idle != CPU_NEWLY_IDLE ||
> + time_after_eq(jiffies, group->sgp->next_update)))
> + update_group_power(env->sd, env->dst_cpu);
>
> /* Adjust by relative CPU power of the group */
> sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
> @@ -4663,7 +4639,7 @@ static bool update_sd_pick_busiest(struc
> * @sds: variable to hold the statistics for this sched_domain.
> */
> static inline void update_sd_lb_stats(struct lb_env *env,
> - int *balance, struct sd_lb_stats *sds)
> + struct sd_lb_stats *sds)
> {
> struct sched_domain *child = env->sd->child;
> struct sched_group *sg = env->sd->groups;
> @@ -4680,10 +4656,7 @@ static inline void update_sd_lb_stats(st
>
> local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
> memset(&sgs, 0, sizeof(sgs));
> - update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
> -
> - if (local_group && !(*balance))
> - return;
> + update_sg_lb_stats(env, sg, load_idx, local_group, &sgs);
>
> sds->total_load += sgs.group_load;
> sds->total_pwr += sg->sgp->power;
> @@ -4916,8 +4889,6 @@ static inline void calculate_imbalance(s
> * to restore balance.
> *
> * @env: The load balancing environment.
> - * @balance: Pointer to a variable indicating if this_cpu
> - * is the appropriate cpu to perform load balancing at this_level.
> *
> * Return: - The busiest group if imbalance exists.
> * - If no imbalance and user has opted for power-savings balance,
> @@ -4925,7 +4896,7 @@ static inline void calculate_imbalance(s
> * put to idle by rebalancing its tasks onto our group.
> */
> static struct sched_group *
> -find_busiest_group(struct lb_env *env, int *balance)
> +find_busiest_group(struct lb_env *env)
> {
> struct sd_lb_stats sds;
>
> @@ -4935,14 +4906,7 @@ find_busiest_group(struct lb_env *env, i
> * Compute the various statistics relavent for load balancing at
> * this level.
> */
> - update_sd_lb_stats(env, balance, &sds);
> -
> - /*
> - * this_cpu is not the appropriate cpu to perform load balancing at
> - * this level.
> - */
> - if (!(*balance))
> - goto ret;
> + update_sd_lb_stats(env, &sds);
>
> if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
> check_asym_packing(env, &sds))
> @@ -5006,7 +4970,6 @@ find_busiest_group(struct lb_env *env, i
> return sds.busiest;
>
> out_balanced:
> -ret:
> env->imbalance = 0;
> return NULL;
> }
> @@ -5088,13 +5051,47 @@ static int need_active_balance(struct lb
>
> static int active_load_balance_cpu_stop(void *data);
>
> +static int should_we_balance(struct lb_env *env)
> +{
> + struct sched_group *sg = env->sd->groups;
> + struct cpumask *sg_cpus, *sg_mask;
> + int cpu, balance_cpu = -1;
> +
> + /*
> + * In the newly idle case, we will allow all the cpu's
> + * to do the newly idle load balance.
> + */
> + if (env->idle == CPU_NEWLY_IDLE)
> + return 1;
> +
> + sg_cpus = sched_group_cpus(sg);
> + sg_mask = sched_group_mask(sg);
> + /* Try to find first idle cpu */
> + for_each_cpu_and(cpu, sg_cpus, env->cpus) {
> + if (!cpumask_test_cpu(cpu, sg_mask) || !idle_cpu(cpu))
> + continue;
> +
> + balance_cpu = cpu;
> + break;
> + }
> +
> + if (balance_cpu == -1)
> + balance_cpu = group_balance_cpu(sg);
> +
> + /*
> + * First idle cpu or the first cpu(busiest) in this sched group
> + * is eligible for doing load balancing at this and above domains.
> + */
> + return balance_cpu != env->dst_cpu;
> +}
> +
> /*
> * Check this_cpu to ensure it is balanced within domain. Attempt to move
> * tasks if there is an imbalance.
> */
> static int load_balance(int this_cpu, struct rq *this_rq,
> struct sched_domain *sd, enum cpu_idle_type idle,
> - int *balance)
> + int *should_balance)
> {
> int ld_moved, cur_ld_moved, active_balance = 0;
> struct sched_group *group;
> @@ -5123,12 +5120,11 @@ static int load_balance(int this_cpu, st
>
> schedstat_inc(sd, lb_count[idle]);
>
> -redo:
> - group = find_busiest_group(&env, balance);
> -
> - if (*balance == 0)
> + if (!(*should_balance = should_we_balance(&env)))
> goto out_balanced;

Given we always initialize *should_balance where we care, it might be
more readable to write this as:

if (!should_we_balance(&env)) {
*continue_balancing = 0;
goto out_balanced;
}

[ With a rename to can_balance ]

>
> +redo:

One behavioral change worth noting here is that in the redo case if a
CPU has become idle we'll continue trying to load-balance in the
!new-idle case.

This could be unpleasant in the case where a package has a pinned busy
core allowing this and a newly idle cpu to start dueling for load.

While more deterministically bad in this case now, it could racily do
this before anyway so perhaps not worth worrying about immediately.

> + group = find_busiest_group(&env);
> if (!group) {
> schedstat_inc(sd, lb_nobusyg[idle]);
> goto out_balanced;
> @@ -5340,7 +5336,7 @@ void idle_balance(int this_cpu, struct r
> rcu_read_lock();
> for_each_domain(this_cpu, sd) {
> unsigned long interval;
> - int balance = 1;
> + int should_balance;
>
> if (!(sd->flags & SD_LOAD_BALANCE))
> continue;
> @@ -5348,7 +5344,8 @@ void idle_balance(int this_cpu, struct r
> if (sd->flags & SD_BALANCE_NEWIDLE) {
> /* If we've pulled tasks over stop searching: */
> pulled_task = load_balance(this_cpu, this_rq,
> - sd, CPU_NEWLY_IDLE, &balance);
> + sd, CPU_NEWLY_IDLE,
> + &should_balance);
> }
>
> interval = msecs_to_jiffies(sd->balance_interval);
> @@ -5586,7 +5583,7 @@ void update_max_interval(void)
> */
> static void rebalance_domains(int cpu, enum cpu_idle_type idle)
> {
> - int balance = 1;
> + int should_balance = 1;
> struct rq *rq = cpu_rq(cpu);
> unsigned long interval;
> struct sched_domain *sd;
> @@ -5618,7 +5615,7 @@ static void rebalance_domains(int cpu, e
> }
>
> if (time_after_eq(jiffies, sd->last_balance + interval)) {
> - if (load_balance(cpu, rq, sd, idle, &balance)) {
> + if (load_balance(cpu, rq, sd, idle, &should_balance)) {
> /*
> * The LBF_SOME_PINNED logic could have changed
> * env->dst_cpu, so we can't know our idle
> @@ -5641,7 +5638,7 @@ static void rebalance_domains(int cpu, e
> * CPU in our sched group which is doing load balancing more
> * actively.
> */
> - if (!balance)
> + if (!should_balance)
> break;
> }
> rcu_read_unlock();
>
>

Reviewed-by: Paul Turner <[email protected]>

2013-08-22 10:43:13

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 02/10] sched: Factor out code to should_we_balance()

On Thu, Aug 22, 2013 at 02:58:27AM -0700, Paul Turner wrote:
> On Mon, Aug 19, 2013 at 9:01 AM, Peter Zijlstra <[email protected]> wrote:

> > + if (local_group)
> > load = target_load(i, load_idx);
>
> Keep the braces here:
>
> if (local_group) {
> load = target_load(i, load_idx);
> } else {
> ...

Right you are, I misplaced that hunk in the next patch.

> > - } else {
> > + else {
> > load = source_load(i, load_idx);
> > if (load > max_cpu_load)
> > max_cpu_load = load;


> > @@ -5123,12 +5120,11 @@ static int load_balance(int this_cpu, st
> >
> > schedstat_inc(sd, lb_count[idle]);
> >
> > -redo:
> > - group = find_busiest_group(&env, balance);
> > -
> > - if (*balance == 0)
> > + if (!(*should_balance = should_we_balance(&env)))
> > goto out_balanced;
>
> Given we always initialize *should_balance where we care, it might be
> more readable to write this as:

Ah, but we don't actually, idle_balance() doesn't initialize
should_balance -- easy enough to fix though.

> if (!should_we_balance(&env)) {
> *continue_balancing = 0;
> goto out_balanced;
> }
>
> [ With a rename to can_balance ]

You confused me, your code implied
%s/should_balance/continue_balancing/g but now you suggest
%%s/should_balance/can_balance/g ?

I'm fine with either.

> >
> > +redo:
>
> One behavioral change worth noting here is that in the redo case if a
> CPU has become idle we'll continue trying to load-balance in the
> !new-idle case.
>
> This could be unpleasant in the case where a package has a pinned busy
> core allowing this and a newly idle cpu to start dueling for load.
>
> While more deterministically bad in this case now, it could racily do
> this before anyway so perhaps not worth worrying about immediately.

Ah, because the old code would effectively redo the check and find the
idle cpu and thereby our cpu would no longer be the balance_cpu.

Indeed. And I don't think this was an intentional change. I'll go put
the redo back before should_we_balance().

2013-08-23 04:51:45

by Joonsoo Kim

[permalink] [raw]
Subject: Re: [PATCH 02/10] sched: Factor out code to should_we_balance()

On Thu, Aug 22, 2013 at 12:42:57PM +0200, Peter Zijlstra wrote:
> > >
> > > +redo:
> >
> > One behavioral change worth noting here is that in the redo case if a
> > CPU has become idle we'll continue trying to load-balance in the
> > !new-idle case.
> >
> > This could be unpleasant in the case where a package has a pinned busy
> > core allowing this and a newly idle cpu to start dueling for load.
> >
> > While more deterministically bad in this case now, it could racily do
> > this before anyway so perhaps not worth worrying about immediately.
>
> Ah, because the old code would effectively redo the check and find the
> idle cpu and thereby our cpu would no longer be the balance_cpu.
>
> Indeed. And I don't think this was an intentional change. I'll go put
> the redo back before should_we_balance().

Ah, yes.
It isn't my intention. Please fix it.

Thanks.

2013-08-23 11:38:11

by Paul Turner

[permalink] [raw]
Subject: Re: [PATCH 02/10] sched: Factor out code to should_we_balance()

On Thu, Aug 22, 2013 at 3:42 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, Aug 22, 2013 at 02:58:27AM -0700, Paul Turner wrote:
>> On Mon, Aug 19, 2013 at 9:01 AM, Peter Zijlstra <[email protected]> wrote:
>
>> > + if (local_group)
>> > load = target_load(i, load_idx);
>>
>> Keep the braces here:
>>
>> if (local_group) {
>> load = target_load(i, load_idx);
>> } else {
>> ...
>
> Right you are, I misplaced that hunk in the next patch.
>
>> > - } else {
>> > + else {
>> > load = source_load(i, load_idx);
>> > if (load > max_cpu_load)
>> > max_cpu_load = load;
>
>
>> > @@ -5123,12 +5120,11 @@ static int load_balance(int this_cpu, st
>> >
>> > schedstat_inc(sd, lb_count[idle]);
>> >
>> > -redo:
>> > - group = find_busiest_group(&env, balance);
>> > -
>> > - if (*balance == 0)
>> > + if (!(*should_balance = should_we_balance(&env)))
>> > goto out_balanced;
>>
>> Given we always initialize *should_balance where we care, it might be
>> more readable to write this as:
>
> Ah, but we don't actually, idle_balance() doesn't initialize
> should_balance -- easy enough to fix though.

I should have been more explicit here.

idle_balance() doesn't initialize it, but it completely ignores the
result; it's just passing something in case a write occurs.
(Arguably it should be int unused = 1 instead of int balance = 1)

>
>> if (!should_we_balance(&env)) {
>> *continue_balancing = 0;
>> goto out_balanced;
>> }
>>
>> [ With a rename to can_balance ]
>
> You confused me, your code implied
> %s/should_balance/continue_balancing/g but now you suggest
> %%s/should_balance/can_balance/g ?
>
> I'm fine with either.

My bad, I started typing this comment, then went and looked at other
parts of the patch and came back to it :)

I think continue_balancing is more clear.

>
>> >
>> > +redo:
>>
>> One behavioral change worth noting here is that in the redo case if a
>> CPU has become idle we'll continue trying to load-balance in the
>> !new-idle case.
>>
>> This could be unpleasant in the case where a package has a pinned busy
>> core allowing this and a newly idle cpu to start dueling for load.
>>
>> While more deterministically bad in this case now, it could racily do
>> this before anyway so perhaps not worth worrying about immediately.
>
> Ah, because the old code would effectively redo the check and find the
> idle cpu and thereby our cpu would no longer be the balance_cpu.
>
> Indeed. And I don't think this was an intentional change. I'll go put
> the redo back before should_we_balance().
>

I was trying to get through the rest of these today but I'm out of
time. I should be able to finish reviewing the other patches in the
set tomorrow.