2022-01-22 00:33:46

by Chitti Babu Theegala

[permalink] [raw]
Subject: [PATCH v2] sched/fair: Prefer small idle cores for forkees

Newly forked threads don't have any useful utilization data yet and
it's not possible to forecast their impact on energy consumption.
These forkees (though very small, most times) end up waking big
cores from deep sleep for that very small durations.

Bias all forkees to small cores to prevent waking big cores from deep
sleep to save power.

Signed-off-by: Chitti Babu Theegala <[email protected]>
---
Changes in v2:
1. Enclosed the EAS check for this small core preferring logic
---
kernel/sched/fair.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c6046446c50b3..72f9ea7c98c05 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5872,7 +5872,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
}

static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag);

/*
* find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
@@ -5959,7 +5959,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
continue;
}

- group = find_idlest_group(sd, p, cpu);
+ group = find_idlest_group(sd, p, cpu, sd_flag);
if (!group) {
sd = sd->child;
continue;
@@ -8885,7 +8885,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
static bool update_pick_idlest(struct sched_group *idlest,
struct sg_lb_stats *idlest_sgs,
struct sched_group *group,
- struct sg_lb_stats *sgs)
+ struct sg_lb_stats *sgs,
+ int sd_flag)
{
if (sgs->group_type < idlest_sgs->group_type)
return true;
@@ -8922,6 +8923,13 @@ static bool update_pick_idlest(struct sched_group *idlest,
if (idlest_sgs->idle_cpus > sgs->idle_cpus)
return false;

+ if (sched_energy_enabled()) {
+ /* Select smaller cpu group for newly woken up forkees */
+ if ((sd_flag & SD_BALANCE_FORK) && (idlest_sgs->idle_cpus &&
+ !capacity_greater(idlest->sgc->max_capacity, group->sgc->max_capacity)))
+ return false;
+ }
+
/* Select group with lowest group_util */
if (idlest_sgs->idle_cpus == sgs->idle_cpus &&
idlest_sgs->group_util <= sgs->group_util)
@@ -8940,7 +8948,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
* Assumes p is allowed on at least one CPU in sd.
*/
static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag)
{
struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
struct sg_lb_stats local_sgs, tmp_sgs;
@@ -8978,7 +8986,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)

update_sg_wakeup_stats(sd, group, sgs, p);

- if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) {
+ if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs, sd_flag)) {
idlest = group;
idlest_sgs = *sgs;
}
--
2.17.1


2022-01-22 01:42:48

by Vincent Guittot

[permalink] [raw]
Subject: Re: [PATCH v2] sched/fair: Prefer small idle cores for forkees

On Fri, 21 Jan 2022 at 06:03, Chitti Babu Theegala
<[email protected]> wrote:
>
> Newly forked threads don't have any useful utilization data yet and
> it's not possible to forecast their impact on energy consumption.

It would be worth mentioning that you consider only EAS mode in the patch

> These forkees (though very small, most times) end up waking big

The assumption that " (though very small, most times)" is maybe true
for the cases that you are interested in and you monitor, but it's not
always true. It seems that Vincent D. raised some concerns about
forkee which would not be small at the end.

> cores from deep sleep for that very small durations.
>
> Bias all forkees to small cores to prevent waking big cores from deep

you are testing idlest_sgs->idle_cpus so you don't bias to small cores
but small & idle cores but if there is no small idle core then you
will wake up a big core though the forkees are small most times

> sleep to save power.

Then why do you want to wake up a small core from deep state instead
of packing in one of these already running cpus? If you care about
power, selecting a small idle core may not always be the best choice.
Would it be better to select a non idle cpu with largest spare
capacity at the current opp ?

>
> Signed-off-by: Chitti Babu Theegala <[email protected]>
> ---
> Changes in v2:
> 1. Enclosed the EAS check for this small core preferring logic
> ---
> kernel/sched/fair.c | 18 +++++++++++++-----
> 1 file changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c6046446c50b3..72f9ea7c98c05 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5872,7 +5872,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
> }
>
> static struct sched_group *
> -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
> +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag);
>
> /*
> * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
> @@ -5959,7 +5959,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
> continue;
> }
>
> - group = find_idlest_group(sd, p, cpu);
> + group = find_idlest_group(sd, p, cpu, sd_flag);
> if (!group) {
> sd = sd->child;
> continue;
> @@ -8885,7 +8885,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
> static bool update_pick_idlest(struct sched_group *idlest,
> struct sg_lb_stats *idlest_sgs,
> struct sched_group *group,
> - struct sg_lb_stats *sgs)
> + struct sg_lb_stats *sgs,
> + int sd_flag)
> {
> if (sgs->group_type < idlest_sgs->group_type)
> return true;
> @@ -8922,6 +8923,13 @@ static bool update_pick_idlest(struct sched_group *idlest,
> if (idlest_sgs->idle_cpus > sgs->idle_cpus)
> return false;
>
> + if (sched_energy_enabled()) {

This is not enough, the find_energy_efficient_cpu() returns early to
fallback to the default performance mode when the system is
overutilized


> + /* Select smaller cpu group for newly woken up forkees */
> + if ((sd_flag & SD_BALANCE_FORK) && (idlest_sgs->idle_cpus &&
> + !capacity_greater(idlest->sgc->max_capacity, group->sgc->max_capacity)))
> + return false;
> + }
> +
> /* Select group with lowest group_util */
> if (idlest_sgs->idle_cpus == sgs->idle_cpus &&
> idlest_sgs->group_util <= sgs->group_util)
> @@ -8940,7 +8948,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
> * Assumes p is allowed on at least one CPU in sd.
> */
> static struct sched_group *
> -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag)
> {
> struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
> struct sg_lb_stats local_sgs, tmp_sgs;
> @@ -8978,7 +8986,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>
> update_sg_wakeup_stats(sd, group, sgs, p);
>
> - if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) {
> + if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs, sd_flag)) {
> idlest = group;
> idlest_sgs = *sgs;
> }
> --
> 2.17.1
>

2022-01-25 09:12:23

by Chitti Babu Theegala

[permalink] [raw]
Subject: Re: [PATCH v2] sched/fair: Prefer small idle cores for forkees


On 1/21/2022 7:57 PM, Vincent Guittot wrote:
> On Fri, 21 Jan 2022 at 06:03, Chitti Babu Theegala
> <[email protected]> wrote:
>>
>> Newly forked threads don't have any useful utilization data yet and
>> it's not possible to forecast their impact on energy consumption.
>
> It would be worth mentioning that you consider only EAS mode in the patch
>
>> These forkees (though very small, most times) end up waking big
>
> The assumption that " (though very small, most times)" is maybe true
> for the cases that you are interested in and you monitor, but it's not
> always true. It seems that Vincent D. raised some concerns about
> forkee which would not be small at the end
Agreed.

>> cores from deep sleep for that very small durations.
>>
>> Bias all forkees to small cores to prevent waking big cores from deep
>
> you are testing idlest_sgs->idle_cpus so you don't bias to small cores
> but small & idle cores but if there is no small idle core then you
> will wake up a big core though the forkees are small most times
>

The function "find_idlest_cpu" expected to return idle cpu. So, I
followed the same. If idle small cpu is available, then we can use it
otherwise its ok to wakeup big cpu for newly forked tasks.
I felt that using idle CPUs for new tasks will be better as that would
give them a faster chance to run immediately.

>> sleep to save power.
>
> Then why do you want to wake up a small core from deep state instead
> of packing in one of these already running cpus? If you care about
> power, selecting a small idle core may not always be the best choice.
> Would it be better to select a non idle cpu with largest spare
> capacity at the current opp ?
>

How about running find_energy_efficient_cpu() for newly forked tasks as
well (with some default util value configured) ?

>>
>> Signed-off-by: Chitti Babu Theegala <[email protected]>
>> ---
>> Changes in v2:
>> 1. Enclosed the EAS check for this small core preferring logic
>> ---
>> kernel/sched/fair.c | 18 +++++++++++++-----
>> 1 file changed, 13 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index c6046446c50b3..72f9ea7c98c05 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -5872,7 +5872,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
>> }
>>
>> static struct sched_group *
>> -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
>> +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag);
>>
>> /*
>> * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
>> @@ -5959,7 +5959,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
>> continue;
>> }
>>
>> - group = find_idlest_group(sd, p, cpu);
>> + group = find_idlest_group(sd, p, cpu, sd_flag);
>> if (!group) {
>> sd = sd->child;
>> continue;
>> @@ -8885,7 +8885,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
>> static bool update_pick_idlest(struct sched_group *idlest,
>> struct sg_lb_stats *idlest_sgs,
>> struct sched_group *group,
>> - struct sg_lb_stats *sgs)
>> + struct sg_lb_stats *sgs,
>> + int sd_flag)
>> {
>> if (sgs->group_type < idlest_sgs->group_type)
>> return true;
>> @@ -8922,6 +8923,13 @@ static bool update_pick_idlest(struct sched_group *idlest,
>> if (idlest_sgs->idle_cpus > sgs->idle_cpus)
>> return false;
>>
>> + if (sched_energy_enabled()) {
>
> This is not enough, the find_energy_efficient_cpu() returns early to
> fallback to the default performance mode when the system is
> overutilized
>
>
>> + /* Select smaller cpu group for newly woken up forkees */
>> + if ((sd_flag & SD_BALANCE_FORK) && (idlest_sgs->idle_cpus &&
>> + !capacity_greater(idlest->sgc->max_capacity, group->sgc->max_capacity)))
>> + return false;
>> + }
>> +
>> /* Select group with lowest group_util */
>> if (idlest_sgs->idle_cpus == sgs->idle_cpus &&
>> idlest_sgs->group_util <= sgs->group_util)
>> @@ -8940,7 +8948,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
>> * Assumes p is allowed on at least one CPU in sd.
>> */
>> static struct sched_group *
>> -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>> +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag)
>> {
>> struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
>> struct sg_lb_stats local_sgs, tmp_sgs;
>> @@ -8978,7 +8986,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>>
>> update_sg_wakeup_stats(sd, group, sgs, p);
>>
>> - if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) {
>> + if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs, sd_flag)) {
>> idlest = group;
>> idlest_sgs = *sgs;
>> }
>> --
>> 2.17.1
>>

2022-01-25 19:39:21

by Vincent Guittot

[permalink] [raw]
Subject: Re: [PATCH v2] sched/fair: Prefer small idle cores for forkees

On Tue, 25 Jan 2022 at 08:09, Chitti Babu Theegala
<[email protected]> wrote:
>
>
> On 1/21/2022 7:57 PM, Vincent Guittot wrote:
> > On Fri, 21 Jan 2022 at 06:03, Chitti Babu Theegala
> > <[email protected]> wrote:
> >>
> >> Newly forked threads don't have any useful utilization data yet and
> >> it's not possible to forecast their impact on energy consumption.
> >
> > It would be worth mentioning that you consider only EAS mode in the patch
> >
> >> These forkees (though very small, most times) end up waking big
> >
> > The assumption that " (though very small, most times)" is maybe true
> > for the cases that you are interested in and you monitor, but it's not
> > always true. It seems that Vincent D. raised some concerns about
> > forkee which would not be small at the end
> Agreed.
>
> >> cores from deep sleep for that very small durations.
> >>
> >> Bias all forkees to small cores to prevent waking big cores from deep
> >
> > you are testing idlest_sgs->idle_cpus so you don't bias to small cores
> > but small & idle cores but if there is no small idle core then you
> > will wake up a big core though the forkees are small most times
> >
>
> The function "find_idlest_cpu" expected to return idle cpu. So, I
> followed the same. If idle small cpu is available, then we can use it

find_idlest_cpu() returns the idlest cpu which may not be idle

> otherwise its ok to wakeup big cpu for newly forked tasks.
> I felt that using idle CPUs for new tasks will be better as that would
> give them a faster chance to run immediately.

But this goes at the opposite of your 1st goal which is to not wake up
a big core in deep idle state. Just to say that this doesn't seem that
clear when it's worth waking up a big core

>
> >> sleep to save power.
> >
> > Then why do you want to wake up a small core from deep state instead
> > of packing in one of these already running cpus? If you care about
> > power, selecting a small idle core may not always be the best choice.
> > Would it be better to select a non idle cpu with largest spare
> > capacity at the current opp ?
> >
>
> How about running find_energy_efficient_cpu() for newly forked tasks as
> well (with some default util value configured) ?

Regarding what you are trying to do, I feel that it could be better to
modify and use feec to select a cpu for newly forked task when not
overutilized. Then you can use more input to select the most efficient
cpu

>
> >>
> >> Signed-off-by: Chitti Babu Theegala <[email protected]>
> >> ---
> >> Changes in v2:
> >> 1. Enclosed the EAS check for this small core preferring logic
> >> ---
> >> kernel/sched/fair.c | 18 +++++++++++++-----
> >> 1 file changed, 13 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> >> index c6046446c50b3..72f9ea7c98c05 100644
> >> --- a/kernel/sched/fair.c
> >> +++ b/kernel/sched/fair.c
> >> @@ -5872,7 +5872,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
> >> }
> >>
> >> static struct sched_group *
> >> -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
> >> +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag);
> >>
> >> /*
> >> * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
> >> @@ -5959,7 +5959,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
> >> continue;
> >> }
> >>
> >> - group = find_idlest_group(sd, p, cpu);
> >> + group = find_idlest_group(sd, p, cpu, sd_flag);
> >> if (!group) {
> >> sd = sd->child;
> >> continue;
> >> @@ -8885,7 +8885,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
> >> static bool update_pick_idlest(struct sched_group *idlest,
> >> struct sg_lb_stats *idlest_sgs,
> >> struct sched_group *group,
> >> - struct sg_lb_stats *sgs)
> >> + struct sg_lb_stats *sgs,
> >> + int sd_flag)
> >> {
> >> if (sgs->group_type < idlest_sgs->group_type)
> >> return true;
> >> @@ -8922,6 +8923,13 @@ static bool update_pick_idlest(struct sched_group *idlest,
> >> if (idlest_sgs->idle_cpus > sgs->idle_cpus)
> >> return false;
> >>
> >> + if (sched_energy_enabled()) {
> >
> > This is not enough, the find_energy_efficient_cpu() returns early to
> > fallback to the default performance mode when the system is
> > overutilized
> >
> >
> >> + /* Select smaller cpu group for newly woken up forkees */
> >> + if ((sd_flag & SD_BALANCE_FORK) && (idlest_sgs->idle_cpus &&
> >> + !capacity_greater(idlest->sgc->max_capacity, group->sgc->max_capacity)))
> >> + return false;
> >> + }
> >> +
> >> /* Select group with lowest group_util */
> >> if (idlest_sgs->idle_cpus == sgs->idle_cpus &&
> >> idlest_sgs->group_util <= sgs->group_util)
> >> @@ -8940,7 +8948,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
> >> * Assumes p is allowed on at least one CPU in sd.
> >> */
> >> static struct sched_group *
> >> -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> >> +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int sd_flag)
> >> {
> >> struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
> >> struct sg_lb_stats local_sgs, tmp_sgs;
> >> @@ -8978,7 +8986,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> >>
> >> update_sg_wakeup_stats(sd, group, sgs, p);
> >>
> >> - if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) {
> >> + if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs, sd_flag)) {
> >> idlest = group;
> >> idlest_sgs = *sgs;
> >> }
> >> --
> >> 2.17.1
> >>