2021-05-04 09:09:06

by Pierre Gondois

[permalink] [raw]
Subject: [PATCH v3 0/2] sched/fair: find_energy_efficient_cpu() enhancements

V2:
- Split the patch in 2. [Quentin]
- Add testing results to the cover-letter. [Dietmar]
- Put back 'rcu_read_unlock()' to unlock the rcu
earlier. [Dietmar]
- Various comments. [Dietmar/Quentin]

V3:
- Layout/phrasing. [Dietmar]

This patchset prevents underflows in find_energy_efficient_cpu().
This is done in the second patch:
sched/fair: Fix negative energy delta in find_energy_efficient_cpu()

The first patch:
sched/fair: Only compute base_energy_pd if necessary
prevents an unnecessary call to compute_energy() if no CPU is available
in a performance domain (pd).
When looping over the pds, it also allows to gather the calls
to compute_energy(), reducing the chances of having utilization signals
being concurrently updated and having a 'negative delta'.

The energy tests of the initial EAS enablement at:
https://lkml.kernel.org/r/[email protected]
have been executed using LISA on a Juno-r2 (2xA57 + 4xA53).

To recall the test:
"10 iterations of between 10 and 50 periodic rt-app tasks (16ms period,
5% duty-cycle) for 30 seconds with energy measurement. Unit is Joules.
The goal is to save energy, so lower is better."
"Energy is measured with the onboard energy meter. Numbers include
consumption of big and little CPUs."

+----------+-----------------+-------------------------+
| | Without patches | With patches |
+----------+--------+--------+------------------+------+
| Tasks nb | Mean | CI* | Mean | CI* |
+----------+--------+--------+------------------+------+
| 10 | 6.57 | 0.24 | 6.46 (-1.63%) | 0.27 |
| 20 | 12.44 | 0.21 | 12.44 (-0.01%) | 0.14 |
| 30 | 19.10 | 0.78 | 18.75 (-1.85%) | 0.15 |
| 40 | 27.27 | 0.53 | 27.35 (+0.31%) | 0.33 |
| 50 | 36.55 | 0.42 | 36.28 (-0.74%) | 0.42 |
+----------+-----------------+-------------------------+
CI: confidence interval

For each line, the intervals of values w/ w/o the patches are
overlapping (consider Mean +/- CI). Thus, the energy results shouldn't
have been impacted.

Pierre Gondois (2):
sched/fair: Only compute base_energy_pd if necessary
sched/fair: Fix negative energy delta in find_energy_efficient_cpu()

kernel/sched/fair.c | 68 ++++++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 29 deletions(-)

--
2.17.1


2021-05-04 09:10:25

by Pierre Gondois

[permalink] [raw]
Subject: [PATCH v3 2/2] sched/fair: Fix negative energy delta in find_energy_efficient_cpu()

find_energy_efficient_cpu() (feec()) searches the best energy CPU
to place a task on. To do so, compute_energy() estimates the energy
impact of placing the task on a CPU, based on CPU and task utilization
signals.

Utilization signals can be concurrently updated while evaluating a
performance domain (pd). In some cases, this leads to having a
'negative delta', i.e. placing the task in the pd is seen as an
energy gain. Thus, any further energy comparison is biased.

In case of a 'negative delta', return prev_cpu since:
1. a 'negative delta' happens in less than 0.5% of feec() calls,
on a Juno with 6 CPUs (4 little, 2 big)
2. it is unlikely to have two consecutive 'negative delta' for
a task, so if the first call fails, feec() will correctly
place the task in the next feec() call
3. EAS current behavior tends to select prev_cpu if the task
doesn't raise the OPP of its current pd. prev_cpu is EAS's
generic decision
4. prev_cpu should be preferred to returning an error code.
In the latter case, select_idle_sibling() would do the placement,
selecting a big (and not energy efficient) CPU. As 3., the task
would potentially reside on the big CPU for a long time

Reported-by: Xuewen Yan <[email protected]>
Suggested-by: Xuewen Yan <[email protected]>
Signed-off-by: Pierre Gondois <[email protected]>
Reviewed-by: Dietmar Eggemann <[email protected]>
Reviewed-by: Lukasz Luba <[email protected]>
---
kernel/sched/fair.c | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fcb9595fc123..c20c6e1acf41 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6594,15 +6594,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
{
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+ int cpu, best_energy_cpu = prev_cpu, target = -1;
unsigned long cpu_cap, util, base_energy = 0;
- int cpu, best_energy_cpu = prev_cpu;
struct sched_domain *sd;
struct perf_domain *pd;

rcu_read_lock();
pd = rcu_dereference(rd->pd);
if (!pd || READ_ONCE(rd->overutilized))
- goto fail;
+ goto unlock;

/*
* Energy-aware wake-up happens on the lowest sched_domain starting
@@ -6612,7 +6612,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
sd = sd->parent;
if (!sd)
- goto fail;
+ goto unlock;
+
+ target = prev_cpu;

sync_entity_load_avg(&p->se);
if (!task_util_est(p))
@@ -6667,6 +6669,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
/* Evaluate the energy impact of using prev_cpu. */
if (compute_prev_delta) {
prev_delta = compute_energy(p, prev_cpu, pd);
+ if (prev_delta < base_energy_pd)
+ goto unlock;
prev_delta -= base_energy_pd;
best_delta = min(best_delta, prev_delta);
}
@@ -6674,6 +6678,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
/* Evaluate the energy impact of using max_spare_cap_cpu. */
if (max_spare_cap_cpu >= 0) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+ if (cur_delta < base_energy_pd)
+ goto unlock;
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {
best_delta = cur_delta;
@@ -6681,25 +6687,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
}
}
-unlock:
rcu_read_unlock();

/*
* Pick the best CPU if prev_cpu cannot be used, or if it saves at
* least 6% of the energy used by prev_cpu.
*/
- if (prev_delta == ULONG_MAX)
- return best_energy_cpu;
+ if ((prev_delta == ULONG_MAX) ||
+ (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+ target = best_energy_cpu;

- if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
- return best_energy_cpu;
-
- return prev_cpu;
+ return target;

-fail:
+unlock:
rcu_read_unlock();

- return -1;
+ return target;
}

/*
--
2.17.1

2021-05-04 09:11:36

by Pierre Gondois

[permalink] [raw]
Subject: Re: [PATCH v3 0/2] sched/fair: find_energy_efficient_cpu() enhancements

Hi Xuewen,

Please let me know if you have some comments,
Pierre

On 5/4/21 10:07 AM, Pierre Gondois wrote:
> V2:
> - Split the patch in 2. [Quentin]
> - Add testing results to the cover-letter. [Dietmar]
> - Put back 'rcu_read_unlock()' to unlock the rcu
> earlier. [Dietmar]
> - Various comments. [Dietmar/Quentin]
>
> V3:
> - Layout/phrasing. [Dietmar]
>
> This patchset prevents underflows in find_energy_efficient_cpu().
> This is done in the second patch:
> sched/fair: Fix negative energy delta in find_energy_efficient_cpu()
>
> The first patch:
> sched/fair: Only compute base_energy_pd if necessary
> prevents an unnecessary call to compute_energy() if no CPU is available
> in a performance domain (pd).
> When looping over the pds, it also allows to gather the calls
> to compute_energy(), reducing the chances of having utilization signals
> being concurrently updated and having a 'negative delta'.
>
> The energy tests of the initial EAS enablement at:
> https://lkml.kernel.org/r/[email protected]
> have been executed using LISA on a Juno-r2 (2xA57 + 4xA53).
>
> To recall the test:
> "10 iterations of between 10 and 50 periodic rt-app tasks (16ms period,
> 5% duty-cycle) for 30 seconds with energy measurement. Unit is Joules.
> The goal is to save energy, so lower is better."
> "Energy is measured with the onboard energy meter. Numbers include
> consumption of big and little CPUs."
>
> +----------+-----------------+-------------------------+
> | | Without patches | With patches |
> +----------+--------+--------+------------------+------+
> | Tasks nb | Mean | CI* | Mean | CI* |
> +----------+--------+--------+------------------+------+
> | 10 | 6.57 | 0.24 | 6.46 (-1.63%) | 0.27 |
> | 20 | 12.44 | 0.21 | 12.44 (-0.01%) | 0.14 |
> | 30 | 19.10 | 0.78 | 18.75 (-1.85%) | 0.15 |
> | 40 | 27.27 | 0.53 | 27.35 (+0.31%) | 0.33 |
> | 50 | 36.55 | 0.42 | 36.28 (-0.74%) | 0.42 |
> +----------+-----------------+-------------------------+
> CI: confidence interval
>
> For each line, the intervals of values w/ w/o the patches are
> overlapping (consider Mean +/- CI). Thus, the energy results shouldn't
> have been impacted.
>
> Pierre Gondois (2):
> sched/fair: Only compute base_energy_pd if necessary
> sched/fair: Fix negative energy delta in find_energy_efficient_cpu()
>
> kernel/sched/fair.c | 68 ++++++++++++++++++++++++++-------------------
> 1 file changed, 39 insertions(+), 29 deletions(-)
>

2021-05-04 09:34:49

by Pierre Gondois

[permalink] [raw]
Subject: [PATCH v3 1/2] sched/fair: Only compute base_energy_pd if necessary

find_energy_efficient_cpu() searches the best energy CPU
to place a task on. To do so, the energy of each performance domain
(pd) is computed w/ and w/o the task placed on it.

The energy of a pd w/o the task (base_energy_pd) is computed prior
knowing whether a CPU is available in the pd.

Move the base_energy_pd computation after looping through the CPUs
of a pd and only compute it if at least one CPU is available.

Suggested-by: Xuewen Yan <[email protected]>
Signed-off-by: Pierre Gondois <[email protected]>
Reviewed-by: Dietmar Eggemann <[email protected]>
Reviewed-by: Lukasz Luba <[email protected]>
---
kernel/sched/fair.c | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0dba0ebc3657..fcb9595fc123 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6620,13 +6620,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)

for (; pd; pd = pd->next) {
unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+ bool compute_prev_delta = false;
unsigned long base_energy_pd;
int max_spare_cap_cpu = -1;

- /* Compute the 'base' energy of the pd, without @p */
- base_energy_pd = compute_energy(p, -1, pd);
- base_energy += base_energy_pd;
-
for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
@@ -6647,25 +6644,35 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!fits_capacity(util, cpu_cap))
continue;

- /* Always use prev_cpu as a candidate. */
if (cpu == prev_cpu) {
- prev_delta = compute_energy(p, prev_cpu, pd);
- prev_delta -= base_energy_pd;
- best_delta = min(best_delta, prev_delta);
- }
-
- /*
- * Find the CPU with the maximum spare capacity in
- * the performance domain
- */
- if (spare_cap > max_spare_cap) {
+ /* Always use prev_cpu as a candidate. */
+ compute_prev_delta = true;
+ } else if (spare_cap > max_spare_cap) {
+ /*
+ * Find the CPU with the maximum spare capacity
+ * in the performance domain.
+ */
max_spare_cap = spare_cap;
max_spare_cap_cpu = cpu;
}
}

- /* Evaluate the energy impact of using this CPU. */
- if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+ if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+ continue;
+
+ /* Compute the 'base' energy of the pd, without @p */
+ base_energy_pd = compute_energy(p, -1, pd);
+ base_energy += base_energy_pd;
+
+ /* Evaluate the energy impact of using prev_cpu. */
+ if (compute_prev_delta) {
+ prev_delta = compute_energy(p, prev_cpu, pd);
+ prev_delta -= base_energy_pd;
+ best_delta = min(best_delta, prev_delta);
+ }
+
+ /* Evaluate the energy impact of using max_spare_cap_cpu. */
+ if (max_spare_cap_cpu >= 0) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {
--
2.17.1

2021-05-04 10:46:35

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v3 0/2] sched/fair: find_energy_efficient_cpu() enhancements

On Tue, May 04, 2021 at 10:07:41AM +0100, Pierre Gondois wrote:
> Pierre Gondois (2):
> sched/fair: Only compute base_energy_pd if necessary
> sched/fair: Fix negative energy delta in find_energy_efficient_cpu()
>
> kernel/sched/fair.c | 68 ++++++++++++++++++++++++++-------------------
> 1 file changed, 39 insertions(+), 29 deletions(-)

Thanks!

2021-05-04 11:53:37

by Xuewen Yan

[permalink] [raw]
Subject: Re: [PATCH v3 0/2] sched/fair: find_energy_efficient_cpu() enhancements

Hi Pierreļ¼Œ

The two patches look good to me.

Thanks
Xuewen Yan

On Tue, May 4, 2021 at 5:11 PM Pierre Gondois <[email protected]> wrote:
>
> Hi Xuewen,
>
> Please let me know if you have some comments,
> Pierre
>
> On 5/4/21 10:07 AM, Pierre Gondois wrote:
> > V2:
> > - Split the patch in 2. [Quentin]
> > - Add testing results to the cover-letter. [Dietmar]
> > - Put back 'rcu_read_unlock()' to unlock the rcu
> > earlier. [Dietmar]
> > - Various comments. [Dietmar/Quentin]
> >
> > V3:
> > - Layout/phrasing. [Dietmar]
> >
> > This patchset prevents underflows in find_energy_efficient_cpu().
> > This is done in the second patch:
> > sched/fair: Fix negative energy delta in find_energy_efficient_cpu()
> >
> > The first patch:
> > sched/fair: Only compute base_energy_pd if necessary
> > prevents an unnecessary call to compute_energy() if no CPU is available
> > in a performance domain (pd).
> > When looping over the pds, it also allows to gather the calls
> > to compute_energy(), reducing the chances of having utilization signals
> > being concurrently updated and having a 'negative delta'.
> >
> > The energy tests of the initial EAS enablement at:
> > https://lkml.kernel.org/r/[email protected]
> > have been executed using LISA on a Juno-r2 (2xA57 + 4xA53).
> >
> > To recall the test:
> > "10 iterations of between 10 and 50 periodic rt-app tasks (16ms period,
> > 5% duty-cycle) for 30 seconds with energy measurement. Unit is Joules.
> > The goal is to save energy, so lower is better."
> > "Energy is measured with the onboard energy meter. Numbers include
> > consumption of big and little CPUs."
> >
> > +----------+-----------------+-------------------------+
> > | | Without patches | With patches |
> > +----------+--------+--------+------------------+------+
> > | Tasks nb | Mean | CI* | Mean | CI* |
> > +----------+--------+--------+------------------+------+
> > | 10 | 6.57 | 0.24 | 6.46 (-1.63%) | 0.27 |
> > | 20 | 12.44 | 0.21 | 12.44 (-0.01%) | 0.14 |
> > | 30 | 19.10 | 0.78 | 18.75 (-1.85%) | 0.15 |
> > | 40 | 27.27 | 0.53 | 27.35 (+0.31%) | 0.33 |
> > | 50 | 36.55 | 0.42 | 36.28 (-0.74%) | 0.42 |
> > +----------+-----------------+-------------------------+
> > CI: confidence interval
> >
> > For each line, the intervals of values w/ w/o the patches are
> > overlapping (consider Mean +/- CI). Thus, the energy results shouldn't
> > have been impacted.
> >
> > Pierre Gondois (2):
> > sched/fair: Only compute base_energy_pd if necessary
> > sched/fair: Fix negative energy delta in find_energy_efficient_cpu()
> >
> > kernel/sched/fair.c | 68 ++++++++++++++++++++++++++-------------------
> > 1 file changed, 39 insertions(+), 29 deletions(-)
> >

Subject: [tip: sched/core] sched/fair: Only compute base_energy_pd if necessary

The following commit has been merged into the sched/core branch of tip:

Commit-ID: 8d4c97c105ca0735b0d972d1025cb150a7008451
Gitweb: https://git.kernel.org/tip/8d4c97c105ca0735b0d972d1025cb150a7008451
Author: Pierre Gondois <[email protected]>
AuthorDate: Tue, 04 May 2021 10:07:42 +01:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 12 May 2021 11:43:23 +02:00

sched/fair: Only compute base_energy_pd if necessary

find_energy_efficient_cpu() searches the best energy CPU
to place a task on. To do so, the energy of each performance domain
(pd) is computed w/ and w/o the task placed on it.

The energy of a pd w/o the task (base_energy_pd) is computed prior
knowing whether a CPU is available in the pd.

Move the base_energy_pd computation after looping through the CPUs
of a pd and only compute it if at least one CPU is available.

Suggested-by: Xuewen Yan <[email protected]>
Signed-off-by: Pierre Gondois <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Lukasz Luba <[email protected]>
Reviewed-by: Dietmar Eggemann <[email protected]>
Reviewed-by: Vincent Donnefort <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/fair.c | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d10c6cc..b229d0c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6687,13 +6687,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)

for (; pd; pd = pd->next) {
unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+ bool compute_prev_delta = false;
unsigned long base_energy_pd;
int max_spare_cap_cpu = -1;

- /* Compute the 'base' energy of the pd, without @p */
- base_energy_pd = compute_energy(p, -1, pd);
- base_energy += base_energy_pd;
-
for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
@@ -6714,25 +6711,35 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (!fits_capacity(util, cpu_cap))
continue;

- /* Always use prev_cpu as a candidate. */
if (cpu == prev_cpu) {
- prev_delta = compute_energy(p, prev_cpu, pd);
- prev_delta -= base_energy_pd;
- best_delta = min(best_delta, prev_delta);
- }
-
- /*
- * Find the CPU with the maximum spare capacity in
- * the performance domain
- */
- if (spare_cap > max_spare_cap) {
+ /* Always use prev_cpu as a candidate. */
+ compute_prev_delta = true;
+ } else if (spare_cap > max_spare_cap) {
+ /*
+ * Find the CPU with the maximum spare capacity
+ * in the performance domain.
+ */
max_spare_cap = spare_cap;
max_spare_cap_cpu = cpu;
}
}

- /* Evaluate the energy impact of using this CPU. */
- if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+ if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+ continue;
+
+ /* Compute the 'base' energy of the pd, without @p */
+ base_energy_pd = compute_energy(p, -1, pd);
+ base_energy += base_energy_pd;
+
+ /* Evaluate the energy impact of using prev_cpu. */
+ if (compute_prev_delta) {
+ prev_delta = compute_energy(p, prev_cpu, pd);
+ prev_delta -= base_energy_pd;
+ best_delta = min(best_delta, prev_delta);
+ }
+
+ /* Evaluate the energy impact of using max_spare_cap_cpu. */
+ if (max_spare_cap_cpu >= 0) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {

Subject: [tip: sched/core] sched/fair: Fix negative energy delta in find_energy_efficient_cpu()

The following commit has been merged into the sched/core branch of tip:

Commit-ID: 619e090c8e409e09bd3e8edcd5a73d83f689890c
Gitweb: https://git.kernel.org/tip/619e090c8e409e09bd3e8edcd5a73d83f689890c
Author: Pierre Gondois <[email protected]>
AuthorDate: Tue, 04 May 2021 10:07:43 +01:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 12 May 2021 11:43:23 +02:00

sched/fair: Fix negative energy delta in find_energy_efficient_cpu()

find_energy_efficient_cpu() (feec()) searches the best energy CPU
to place a task on. To do so, compute_energy() estimates the energy
impact of placing the task on a CPU, based on CPU and task utilization
signals.

Utilization signals can be concurrently updated while evaluating a
performance domain (pd). In some cases, this leads to having a
'negative delta', i.e. placing the task in the pd is seen as an
energy gain. Thus, any further energy comparison is biased.

In case of a 'negative delta', return prev_cpu since:
1. a 'negative delta' happens in less than 0.5% of feec() calls,
on a Juno with 6 CPUs (4 little, 2 big)
2. it is unlikely to have two consecutive 'negative delta' for
a task, so if the first call fails, feec() will correctly
place the task in the next feec() call
3. EAS current behavior tends to select prev_cpu if the task
doesn't raise the OPP of its current pd. prev_cpu is EAS's
generic decision
4. prev_cpu should be preferred to returning an error code.
In the latter case, select_idle_sibling() would do the placement,
selecting a big (and not energy efficient) CPU. As 3., the task
would potentially reside on the big CPU for a long time

Reported-by: Xuewen Yan <[email protected]>
Suggested-by: Xuewen Yan <[email protected]>
Signed-off-by: Pierre Gondois <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Lukasz Luba <[email protected]>
Reviewed-by: Dietmar Eggemann <[email protected]>
Reviewed-by: Vincent Donnefort <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/fair.c | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b229d0c..c209f68 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6661,15 +6661,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
{
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+ int cpu, best_energy_cpu = prev_cpu, target = -1;
unsigned long cpu_cap, util, base_energy = 0;
- int cpu, best_energy_cpu = prev_cpu;
struct sched_domain *sd;
struct perf_domain *pd;

rcu_read_lock();
pd = rcu_dereference(rd->pd);
if (!pd || READ_ONCE(rd->overutilized))
- goto fail;
+ goto unlock;

/*
* Energy-aware wake-up happens on the lowest sched_domain starting
@@ -6679,7 +6679,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
sd = sd->parent;
if (!sd)
- goto fail;
+ goto unlock;
+
+ target = prev_cpu;

sync_entity_load_avg(&p->se);
if (!task_util_est(p))
@@ -6734,6 +6736,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
/* Evaluate the energy impact of using prev_cpu. */
if (compute_prev_delta) {
prev_delta = compute_energy(p, prev_cpu, pd);
+ if (prev_delta < base_energy_pd)
+ goto unlock;
prev_delta -= base_energy_pd;
best_delta = min(best_delta, prev_delta);
}
@@ -6741,6 +6745,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
/* Evaluate the energy impact of using max_spare_cap_cpu. */
if (max_spare_cap_cpu >= 0) {
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+ if (cur_delta < base_energy_pd)
+ goto unlock;
cur_delta -= base_energy_pd;
if (cur_delta < best_delta) {
best_delta = cur_delta;
@@ -6748,25 +6754,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
}
}
-unlock:
rcu_read_unlock();

/*
* Pick the best CPU if prev_cpu cannot be used, or if it saves at
* least 6% of the energy used by prev_cpu.
*/
- if (prev_delta == ULONG_MAX)
- return best_energy_cpu;
+ if ((prev_delta == ULONG_MAX) ||
+ (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+ target = best_energy_cpu;

- if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
- return best_energy_cpu;
-
- return prev_cpu;
+ return target;

-fail:
+unlock:
rcu_read_unlock();

- return -1;
+ return target;
}

/*