2009-09-01 08:51:38

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH 7/8] sched: try to deal with low capacity

When the capacity drops low, we want to migrate load away. Allow the
load-balancer to remove all tasks when we hit rock bottom.

Signed-off-by: Peter Zijlstra <[email protected]>
---
kernel/sched.c | 33 ++++++++++++++++++++++++++++-----
1 file changed, 28 insertions(+), 5 deletions(-)

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -3908,8 +3908,8 @@ static inline void update_sg_lb_stats(st
if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
sgs->group_imb = 1;

- sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
-
+ sgs->group_capacity =
+ DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
}

/**
@@ -3959,7 +3959,7 @@ static inline void update_sd_lb_stats(st
* and move all the excess tasks away.
*/
if (prefer_sibling)
- sgs.group_capacity = 1;
+ sgs.group_capacity = min(sgs.group_capacity, 1UL);

if (local_group) {
sds->this_load = sgs.avg_load;
@@ -4191,6 +4191,26 @@ ret:
return NULL;
}

+static struct sched_group *group_of(int cpu)
+{
+ struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+
+ if (!sd)
+ return NULL;
+
+ return sd->groups;
+}
+
+static unsigned long power_of(int cpu)
+{
+ struct sched_group *group = group_of(cpu);
+
+ if (!group)
+ return SCHED_LOAD_SCALE;
+
+ return group->__cpu_power;
+}
+
/*
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
@@ -4203,15 +4223,18 @@ find_busiest_queue(struct sched_group *g
int i;

for_each_cpu(i, sched_group_cpus(group)) {
+ unsigned long power = power_of(i);
+ unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
unsigned long wl;

if (!cpumask_test_cpu(i, cpus))
continue;

rq = cpu_rq(i);
- wl = weighted_cpuload(i);
+ wl = weighted_cpuload(i) * SCHED_LOAD_SCALE;
+ wl /= power;

- if (rq->nr_running == 1 && wl > imbalance)
+ if (capacity && rq->nr_running == 1 && wl > imbalance)
continue;

if (wl > max_load) {

--


Subject: Re: [RFC][PATCH 7/8] sched: try to deal with low capacity

On Tue, Sep 01, 2009 at 10:34:38AM +0200, Peter Zijlstra wrote:
> When the capacity drops low, we want to migrate load away. Allow the
> load-balancer to remove all tasks when we hit rock bottom.
>
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
> kernel/sched.c | 33 ++++++++++++++++++++++++++++-----
> 1 file changed, 28 insertions(+), 5 deletions(-)
>
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -3908,8 +3908,8 @@ static inline void update_sg_lb_stats(st
> if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
> sgs->group_imb = 1;
>
> - sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
> -
> + sgs->group_capacity =
> + DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
> }
>
> /**
> @@ -3959,7 +3959,7 @@ static inline void update_sd_lb_stats(st
> * and move all the excess tasks away.
> */
> if (prefer_sibling)
> - sgs.group_capacity = 1;
> + sgs.group_capacity = min(sgs.group_capacity, 1UL);

Ok, this means that sgs.group_capacity can now be 0, if for some reason
group->__cpu_power happens to be less than SCHED_LOAD_SCALE/2.

In that case, we need the following hunk to make it work for
update_sd_power_savings_stats(). That's because both sum_nr_running and
group_capacity are unsigned longs.

---->
diff --git a/kernel/sched.c b/kernel/sched.c
index 12fdbb8..9952292 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3580,7 +3580,7 @@ static inline void update_sd_power_savings_stats(struct sched_group *group,
* capacity but still has some space to pick up some load
* from other group and save more power
*/
- if (sgs->sum_nr_running > sgs->group_capacity - 1)
+ if (sgs->sum_nr_running + 1 > sgs->group_capacity)
return;

if (sgs->sum_nr_running > sds->leader_nr_running ||

---->


>
> if (local_group) {
> sds->this_load = sgs.avg_load;
> @@ -4191,6 +4191,26 @@ ret:
> return NULL;
> }
>

--
Thanks and Regards
gautham

2009-09-04 08:56:57

by Peter Zijlstra

[permalink] [raw]
Subject: [tip:sched/balancing] sched: Try to deal with low capacity

Commit-ID: bdb94aa5dbd8b55e75f5a50b61312fe589e2c2d1
Gitweb: http://git.kernel.org/tip/bdb94aa5dbd8b55e75f5a50b61312fe589e2c2d1
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 1 Sep 2009 10:34:38 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 4 Sep 2009 10:09:55 +0200

sched: Try to deal with low capacity

When the capacity drops low, we want to migrate load away.
Allow the load-balancer to remove all tasks when we hit rock
bottom.

Signed-off-by: Peter Zijlstra <[email protected]>
Tested-by: Andreas Herrmann <[email protected]>
Acked-by: Andreas Herrmann <[email protected]>
Acked-by: Gautham R Shenoy <[email protected]>
Cc: Balbir Singh <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>


---
kernel/sched.c | 33 ++++++++++++++++++++++++++++-----
1 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index ab532b5..5f5b359 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3908,8 +3908,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
sgs->group_imb = 1;

- sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
-
+ sgs->group_capacity =
+ DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
}

/**
@@ -3959,7 +3959,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
* and move all the excess tasks away.
*/
if (prefer_sibling)
- sgs.group_capacity = 1;
+ sgs.group_capacity = min(sgs.group_capacity, 1UL);

if (local_group) {
sds->this_load = sgs.avg_load;
@@ -4191,6 +4191,26 @@ ret:
return NULL;
}

+static struct sched_group *group_of(int cpu)
+{
+ struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+
+ if (!sd)
+ return NULL;
+
+ return sd->groups;
+}
+
+static unsigned long power_of(int cpu)
+{
+ struct sched_group *group = group_of(cpu);
+
+ if (!group)
+ return SCHED_LOAD_SCALE;
+
+ return group->__cpu_power;
+}
+
/*
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
@@ -4203,15 +4223,18 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
int i;

for_each_cpu(i, sched_group_cpus(group)) {
+ unsigned long power = power_of(i);
+ unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
unsigned long wl;

if (!cpumask_test_cpu(i, cpus))
continue;

rq = cpu_rq(i);
- wl = weighted_cpuload(i);
+ wl = weighted_cpuload(i) * SCHED_LOAD_SCALE;
+ wl /= power;

- if (rq->nr_running == 1 && wl > imbalance)
+ if (capacity && rq->nr_running == 1 && wl > imbalance)
continue;

if (wl > max_load) {