In order to prepare for a more dynamic cpu_power, update the group sum
while walking the sched domains during load-balance.
Signed-off-by: Peter Zijlstra <[email protected]>
---
kernel/sched.c | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -3699,6 +3699,28 @@ static inline int check_power_save_busie
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+static void update_sched_power(struct sched_domain *sd)
+{
+ struct sched_domain *child = sd->child;
+ struct sched_group *group, *sdg = sd->groups;
+ unsigned long power = sdg->__cpu_power;
+
+ if (!child) {
+ /* compute cpu power for this cpu */
+ return;
+ }
+
+ sdg->__cpu_power = 0;
+
+ group = child->groups;
+ do {
+ sdg->__cpu_power += group->__cpu_power;
+ group = group->next;
+ } while (group != child->groups);
+
+ if (power != sdg->__cpu_power)
+ sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
+}
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
@@ -3712,7 +3734,8 @@ static inline int check_power_save_busie
* @balance: Should we balance.
* @sgs: variable to hold the statistics for this group.
*/
-static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
+static inline void update_sg_lb_stats(struct sched_domain *sd,
+ struct sched_group *group, int this_cpu,
enum cpu_idle_type idle, int load_idx, int *sd_idle,
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
@@ -3723,8 +3746,11 @@ static inline void update_sg_lb_stats(st
unsigned long sum_avg_load_per_task;
unsigned long avg_load_per_task;
- if (local_group)
+ if (local_group) {
balance_cpu = group_first_cpu(group);
+ if (balance_cpu == this_cpu)
+ update_sched_power(sd);
+ }
/* Tally up the load of all CPUs in the group */
sum_avg_load_per_task = avg_load_per_task = 0;
@@ -3828,7 +3854,7 @@ static inline void update_sd_lb_stats(st
local_group = cpumask_test_cpu(this_cpu,
sched_group_cpus(group));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
+ update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
local_group, cpus, balance, &sgs);
if (local_group && balance && !(*balance))
@@ -3863,7 +3889,6 @@ static inline void update_sd_lb_stats(st
update_sd_power_savings_stats(group, sds, local_group, &sgs);
group = group->next;
} while (group != sd->groups);
-
}
/**
--
On Tue, Sep 01, 2009 at 10:34:34AM +0200, Peter Zijlstra wrote:
> In order to prepare for a more dynamic cpu_power, update the group sum
> while walking the sched domains during load-balance.
>
> Signed-off-by: Peter Zijlstra <[email protected]>
> ---
> kernel/sched.c | 33 +++++++++++++++++++++++++++++----
> 1 file changed, 29 insertions(+), 4 deletions(-)
>
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -3699,6 +3699,28 @@ static inline int check_power_save_busie
> }
> #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
>
> +static void update_sched_power(struct sched_domain *sd)
> +{
> + struct sched_domain *child = sd->child;
> + struct sched_group *group, *sdg = sd->groups;
> + unsigned long power = sdg->__cpu_power;
> +
> + if (!child) {
> + /* compute cpu power for this cpu */
> + return;
> + }
> +
> + sdg->__cpu_power = 0;
> +
> + group = child->groups;
> + do {
> + sdg->__cpu_power += group->__cpu_power;
> + group = group->next;
> + } while (group != child->groups);
> +
> + if (power != sdg->__cpu_power)
> + sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
> +}
>
> /**
> * update_sg_lb_stats - Update sched_group's statistics for load balancing.
> @@ -3712,7 +3734,8 @@ static inline int check_power_save_busie
> * @balance: Should we balance.
> * @sgs: variable to hold the statistics for this group.
> */
> -static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
> +static inline void update_sg_lb_stats(struct sched_domain *sd,
> + struct sched_group *group, int this_cpu,
> enum cpu_idle_type idle, int load_idx, int *sd_idle,
> int local_group, const struct cpumask *cpus,
> int *balance, struct sg_lb_stats *sgs)
> @@ -3723,8 +3746,11 @@ static inline void update_sg_lb_stats(st
> unsigned long sum_avg_load_per_task;
> unsigned long avg_load_per_task;
>
> - if (local_group)
> + if (local_group) {
> balance_cpu = group_first_cpu(group);
> + if (balance_cpu == this_cpu)
> + update_sched_power(sd);
> + }
I guess the intention of this check is to ensure that the cpu_power for
the group of sd is updated only by a specific member of the group and
that would ideally be the first member of the group.
Thus, this check has more to do with this_cpu being the
group_first_cpu() than this_cpu being the balance_cpu. Correct ?
>
> /* Tally up the load of all CPUs in the group */
> sum_avg_load_per_task = avg_load_per_task = 0;
> @@ -3828,7 +3854,7 @@ static inline void update_sd_lb_stats(st
> local_group = cpumask_test_cpu(this_cpu,
> sched_group_cpus(group));
> memset(&sgs, 0, sizeof(sgs));
> - update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
> + update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
> local_group, cpus, balance, &sgs);
>
> if (local_group && balance && !(*balance))
> @@ -3863,7 +3889,6 @@ static inline void update_sd_lb_stats(st
> update_sd_power_savings_stats(group, sds, local_group, &sgs);
> group = group->next;
> } while (group != sd->groups);
> -
> }
>
> /**
>
> --
--
Thanks and Regards
gautham
On Wed, 2009-09-02 at 16:47 +0530, Gautham R Shenoy wrote:
> > + if (local_group) {
> > balance_cpu = group_first_cpu(group);
> > + if (balance_cpu == this_cpu)
> > + update_sched_power(sd);
> > + }
>
> I guess the intention of this check is to ensure that the cpu_power for
> the group of sd is updated only by a specific member of the group and
> that would ideally be the first member of the group.
>
> Thus, this check has more to do with this_cpu being the
> group_first_cpu() than this_cpu being the balance_cpu. Correct ?
Right, so the load-balancer walks the sd tree in the fashion that only
the first cpu of the domain goes up a level. So I made that cpu also
aggregate the new cpu-power.
Commit-ID: cc9fba7d7672fa3ed58d9d9ecb6c45b1351c29a6
Gitweb: http://git.kernel.org/tip/cc9fba7d7672fa3ed58d9d9ecb6c45b1351c29a6
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 1 Sep 2009 10:34:34 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 4 Sep 2009 10:09:53 +0200
sched: Update the cpu_power sum during load-balance
In order to prepare for a more dynamic cpu_power, update the
group sum while walking the sched domains during load-balance.
Signed-off-by: Peter Zijlstra <[email protected]>
Tested-by: Andreas Herrmann <[email protected]>
Acked-by: Andreas Herrmann <[email protected]>
Acked-by: Gautham R Shenoy <[email protected]>
Cc: Balbir Singh <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched.c | 33 +++++++++++++++++++++++++++++----
1 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index 9d64cec..ecb4a47 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3699,6 +3699,28 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+static void update_sched_power(struct sched_domain *sd)
+{
+ struct sched_domain *child = sd->child;
+ struct sched_group *group, *sdg = sd->groups;
+ unsigned long power = sdg->__cpu_power;
+
+ if (!child) {
+ /* compute cpu power for this cpu */
+ return;
+ }
+
+ sdg->__cpu_power = 0;
+
+ group = child->groups;
+ do {
+ sdg->__cpu_power += group->__cpu_power;
+ group = group->next;
+ } while (group != child->groups);
+
+ if (power != sdg->__cpu_power)
+ sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
+}
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
@@ -3712,7 +3734,8 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
* @balance: Should we balance.
* @sgs: variable to hold the statistics for this group.
*/
-static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
+static inline void update_sg_lb_stats(struct sched_domain *sd,
+ struct sched_group *group, int this_cpu,
enum cpu_idle_type idle, int load_idx, int *sd_idle,
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
@@ -3723,8 +3746,11 @@ static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
unsigned long sum_avg_load_per_task;
unsigned long avg_load_per_task;
- if (local_group)
+ if (local_group) {
balance_cpu = group_first_cpu(group);
+ if (balance_cpu == this_cpu)
+ update_sched_power(sd);
+ }
/* Tally up the load of all CPUs in the group */
sum_avg_load_per_task = avg_load_per_task = 0;
@@ -3828,7 +3854,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
local_group = cpumask_test_cpu(this_cpu,
sched_group_cpus(group));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
+ update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
local_group, cpus, balance, &sgs);
if (local_group && balance && !(*balance))
@@ -3863,7 +3889,6 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
update_sd_power_savings_stats(group, sds, local_group, &sgs);
group = group->next;
} while (group != sd->groups);
-
}
/**