Patch 1 addresses a bug because forcing a task on a small CPU to honour
uclamp_max hint means we can end up with spare_capacity = 0; but the logic is
constructed such that spare_capacity = 0 leads to ignoring this CPU as
a candidate to compute_energy().
Patch 2 addresses a bug due to an optimization in feec() that could lead to
ignoring tasks whose uclamp_max = 0 but task_util(0) != 0.
Patch 3 adds a new tracepoint in compute_energy() as it was helpful in
debugging these two problems.
This is based on tip/sched/core + Vincent's v4 of
Unlink util_fits_cpu()... patch [1]
[1] https://lore.kernel.org/lkml/[email protected]/
Qais Yousef (3):
sched/uclamp: Set max_spare_cap_cpu even if max_spare_cap is 0
sched/uclamp: Ignore (util == 0) optimization in feec() when
p_util_max = 0
sched/tp: Add new tracepoint to track compute energy computation
include/trace/events/sched.h | 4 ++++
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 23 +++++++++++++++--------
3 files changed, 20 insertions(+), 8 deletions(-)
--
2.25.1
find_energy_efficient_cpu() bails out early if effective util of the
task is 0. When uclamp is being used, this could lead to wrong decisions
when uclamp_max is set to 0. Cater for that.
Fixes: d81304bc6193 ("sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early exit condition")
Signed-off-by: Qais Yousef (Google) <[email protected]>
---
kernel/sched/fair.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ca2c389d3180..3521aad67aa0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7382,7 +7382,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
target = prev_cpu;
sync_entity_load_avg(&p->se);
- if (!uclamp_task_util(p, p_util_min, p_util_max))
+ if (!uclamp_task_util(p, p_util_min, p_util_max) && p_util_max != 0)
goto unlock;
eenv_task_busy_time(&eenv, p, prev_cpu);
--
2.25.1
It was useful to track feec() placement decision and debug the spare
capacity and optimization issues vs uclamp_max.
Signed-off-by: Qais Yousef (Google) <[email protected]>
---
include/trace/events/sched.h | 4 ++++
kernel/sched/core.c | 1 +
kernel/sched/fair.c | 7 ++++++-
3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index fbb99a61f714..20cc884f72ff 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -735,6 +735,10 @@ DECLARE_TRACE(sched_update_nr_running_tp,
TP_PROTO(struct rq *rq, int change),
TP_ARGS(rq, change));
+DECLARE_TRACE(sched_compute_energy_tp,
+ TP_PROTO(struct task_struct *p, int dst_cpu, unsigned long energy),
+ TP_ARGS(p, dst_cpu, energy));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03b8529db73f..79ad6b8ea93e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -110,6 +110,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp);
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3521aad67aa0..3b55dbb0fcfe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7303,11 +7303,16 @@ compute_energy(struct energy_env *eenv, struct perf_domain *pd,
{
unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
unsigned long busy_time = eenv->pd_busy_time;
+ unsigned long energy;
if (dst_cpu >= 0)
busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);
- return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
+ energy = em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
+
+ trace_sched_compute_energy_tp(p, dst_cpu, energy);
+
+ return energy;
}
/*
--
2.25.1
When uclamp_max is being used, the util of the task could be higher than
the spare capacity of the CPU, but due to uclamp_max value we force fit
it there.
The way the condition for checking for max_spare_cap in
find_energy_efficient_cpu() was constructed; it ignored any CPU that has
its spare_cap less than or _equal_ to max_spare_cap. Since we initialize
max_spare_cap to 0; this lead to never setting max_spare_cap_cpu and
hence ending up never performing compute_energy() for this cluster and
missing an opportunity for a better energy efficient placement to honour
uclamp_max setting.
max_spare_cap = 0;
cpu_cap = capacity_of(cpu) - task_util(p); // 0 if task_util(p) is high
...
util_fits_cpu(...); // will return true if uclamp_max forces it to fit
...
// this logic will fail to update max_spare_cap_cpu if cpu_cap is 0
if (cpu_cap > max_spare_cap) {
max_spare_cap = cpu_cap;
max_spare_cap_cpu = cpu;
}
prev_spare_cap suffers from a similar problem.
Fix the logic by treating -1UL value as 'not populated' instead of
0 which is a viable and correct spare capacity value.
Fixes: 1d42509e475c ("sched/fair: Make EAS wakeup placement consider uclamp restrictions")
Signed-off-by: Qais Yousef (Google) <[email protected]>
---
kernel/sched/fair.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e29e9ea4cde8..ca2c389d3180 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7390,9 +7390,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
for (; pd; pd = pd->next) {
unsigned long util_min = p_util_min, util_max = p_util_max;
unsigned long cpu_cap, cpu_thermal_cap, util;
- unsigned long cur_delta, max_spare_cap = 0;
+ unsigned long cur_delta, max_spare_cap = -1UL;
unsigned long rq_util_min, rq_util_max;
- unsigned long prev_spare_cap = 0;
+ unsigned long prev_spare_cap = -1UL;
int max_spare_cap_cpu = -1;
unsigned long base_energy;
int fits, max_fits = -1;
@@ -7457,7 +7457,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
prev_spare_cap = cpu_cap;
prev_fits = fits;
} else if ((fits > max_fits) ||
- ((fits == max_fits) && (cpu_cap > max_spare_cap))) {
+ ((fits == max_fits) &&
+ (cpu_cap > max_spare_cap || max_spare_cap == -1UL) {
/*
* Find the CPU with the maximum spare capacity
* among the remaining CPUs in the performance
@@ -7469,7 +7470,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
}
- if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
+ if (max_spare_cap_cpu < 0 && prev_spare_cap == -1UL)
continue;
eenv_pd_busy_time(&eenv, cpus, p);
@@ -7477,7 +7478,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
base_energy = compute_energy(&eenv, pd, cpus, p, -1);
/* Evaluate the energy impact of using prev_cpu. */
- if (prev_spare_cap > 0) {
+ if (prev_spare_cap != -1UL) {
prev_delta = compute_energy(&eenv, pd, cpus, p,
prev_cpu);
/* CPU utilization has changed */
@@ -7489,7 +7490,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
}
/* Evaluate the energy impact of using max_spare_cap_cpu. */
- if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
+ if (max_spare_cap_cpu >= 0 &&
+ (max_spare_cap > prev_spare_cap || prev_spare_cap == -1UL)) {
/* Current best energy cpu fits better */
if (max_fits < best_fits)
continue;
--
2.25.1
On 01/29/23 16:14, Qais Yousef wrote:
> When uclamp_max is being used, the util of the task could be higher than
> the spare capacity of the CPU, but due to uclamp_max value we force fit
> it there.
>
> The way the condition for checking for max_spare_cap in
> find_energy_efficient_cpu() was constructed; it ignored any CPU that has
> its spare_cap less than or _equal_ to max_spare_cap. Since we initialize
> max_spare_cap to 0; this lead to never setting max_spare_cap_cpu and
> hence ending up never performing compute_energy() for this cluster and
> missing an opportunity for a better energy efficient placement to honour
> uclamp_max setting.
>
> max_spare_cap = 0;
> cpu_cap = capacity_of(cpu) - task_util(p); // 0 if task_util(p) is high
>
> ...
>
> util_fits_cpu(...); // will return true if uclamp_max forces it to fit
>
> ...
>
> // this logic will fail to update max_spare_cap_cpu if cpu_cap is 0
> if (cpu_cap > max_spare_cap) {
> max_spare_cap = cpu_cap;
> max_spare_cap_cpu = cpu;
> }
>
> prev_spare_cap suffers from a similar problem.
>
> Fix the logic by treating -1UL value as 'not populated' instead of
> 0 which is a viable and correct spare capacity value.
>
> Fixes: 1d42509e475c ("sched/fair: Make EAS wakeup placement consider uclamp restrictions")
> Signed-off-by: Qais Yousef (Google) <[email protected]>
> ---
> kernel/sched/fair.c | 14 ++++++++------
> 1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e29e9ea4cde8..ca2c389d3180 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7390,9 +7390,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> for (; pd; pd = pd->next) {
> unsigned long util_min = p_util_min, util_max = p_util_max;
> unsigned long cpu_cap, cpu_thermal_cap, util;
> - unsigned long cur_delta, max_spare_cap = 0;
> + unsigned long cur_delta, max_spare_cap = -1UL;
> unsigned long rq_util_min, rq_util_max;
> - unsigned long prev_spare_cap = 0;
> + unsigned long prev_spare_cap = -1UL;
> int max_spare_cap_cpu = -1;
> unsigned long base_energy;
> int fits, max_fits = -1;
> @@ -7457,7 +7457,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> prev_spare_cap = cpu_cap;
> prev_fits = fits;
> } else if ((fits > max_fits) ||
> - ((fits == max_fits) && (cpu_cap > max_spare_cap))) {
> + ((fits == max_fits) &&
> + (cpu_cap > max_spare_cap || max_spare_cap == -1UL) {
Oops. Sorry I just realized I bodged this while rebasing and preparing the
patches for posting. There are missing termination parenthesis that will cause
compilation errors.
Apologies..
--
Qais Yousef
> /*
> * Find the CPU with the maximum spare capacity
> * among the remaining CPUs in the performance
> @@ -7469,7 +7470,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> }
> }
>
> - if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
> + if (max_spare_cap_cpu < 0 && prev_spare_cap == -1UL)
> continue;
>
> eenv_pd_busy_time(&eenv, cpus, p);
> @@ -7477,7 +7478,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> base_energy = compute_energy(&eenv, pd, cpus, p, -1);
>
> /* Evaluate the energy impact of using prev_cpu. */
> - if (prev_spare_cap > 0) {
> + if (prev_spare_cap != -1UL) {
> prev_delta = compute_energy(&eenv, pd, cpus, p,
> prev_cpu);
> /* CPU utilization has changed */
> @@ -7489,7 +7490,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> }
>
> /* Evaluate the energy impact of using max_spare_cap_cpu. */
> - if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
> + if (max_spare_cap_cpu >= 0 &&
> + (max_spare_cap > prev_spare_cap || prev_spare_cap == -1UL)) {
> /* Current best energy cpu fits better */
> if (max_fits < best_fits)
> continue;
> --
> 2.25.1
>
On Sun, 29 Jan 2023 at 17:14, Qais Yousef <[email protected]> wrote:
>
> When uclamp_max is being used, the util of the task could be higher than
> the spare capacity of the CPU, but due to uclamp_max value we force fit
> it there.
>
> The way the condition for checking for max_spare_cap in
> find_energy_efficient_cpu() was constructed; it ignored any CPU that has
> its spare_cap less than or _equal_ to max_spare_cap. Since we initialize
> max_spare_cap to 0; this lead to never setting max_spare_cap_cpu and
> hence ending up never performing compute_energy() for this cluster and
> missing an opportunity for a better energy efficient placement to honour
> uclamp_max setting.
>
> max_spare_cap = 0;
> cpu_cap = capacity_of(cpu) - task_util(p); // 0 if task_util(p) is high
>
> ...
>
> util_fits_cpu(...); // will return true if uclamp_max forces it to fit
>
> ...
>
> // this logic will fail to update max_spare_cap_cpu if cpu_cap is 0
> if (cpu_cap > max_spare_cap) {
> max_spare_cap = cpu_cap;
> max_spare_cap_cpu = cpu;
> }
>
> prev_spare_cap suffers from a similar problem.
>
> Fix the logic by treating -1UL value as 'not populated' instead of
> 0 which is a viable and correct spare capacity value.
>
> Fixes: 1d42509e475c ("sched/fair: Make EAS wakeup placement consider uclamp restrictions")
> Signed-off-by: Qais Yousef (Google) <[email protected]>
> ---
> kernel/sched/fair.c | 14 ++++++++------
> 1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e29e9ea4cde8..ca2c389d3180 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7390,9 +7390,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> for (; pd; pd = pd->next) {
> unsigned long util_min = p_util_min, util_max = p_util_max;
> unsigned long cpu_cap, cpu_thermal_cap, util;
> - unsigned long cur_delta, max_spare_cap = 0;
> + unsigned long cur_delta, max_spare_cap = -1UL;
> unsigned long rq_util_min, rq_util_max;
> - unsigned long prev_spare_cap = 0;
> + unsigned long prev_spare_cap = -1UL;
> int max_spare_cap_cpu = -1;
> unsigned long base_energy;
> int fits, max_fits = -1;
> @@ -7457,7 +7457,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> prev_spare_cap = cpu_cap;
> prev_fits = fits;
> } else if ((fits > max_fits) ||
> - ((fits == max_fits) && (cpu_cap > max_spare_cap))) {
> + ((fits == max_fits) &&
> + (cpu_cap > max_spare_cap || max_spare_cap == -1UL) {
Can't we use a signed comparison to include the case of max_spare_cap
== -1 in cpu_cap > max_spare_cap ?
> /*
> * Find the CPU with the maximum spare capacity
> * among the remaining CPUs in the performance
> @@ -7469,7 +7470,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> }
> }
>
> - if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
> + if (max_spare_cap_cpu < 0 && prev_spare_cap == -1UL)
> continue;
>
> eenv_pd_busy_time(&eenv, cpus, p);
> @@ -7477,7 +7478,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> base_energy = compute_energy(&eenv, pd, cpus, p, -1);
>
> /* Evaluate the energy impact of using prev_cpu. */
> - if (prev_spare_cap > 0) {
> + if (prev_spare_cap != -1UL) {
> prev_delta = compute_energy(&eenv, pd, cpus, p,
> prev_cpu);
> /* CPU utilization has changed */
> @@ -7489,7 +7490,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> }
>
> /* Evaluate the energy impact of using max_spare_cap_cpu. */
> - if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
> + if (max_spare_cap_cpu >= 0 &&
> + (max_spare_cap > prev_spare_cap || prev_spare_cap == -1UL)) {
> /* Current best energy cpu fits better */
> if (max_fits < best_fits)
> continue;
> --
> 2.25.1
>
On 01/30/23 15:44, Vincent Guittot wrote:
> On Sun, 29 Jan 2023 at 17:14, Qais Yousef <[email protected]> wrote:
> >
> > When uclamp_max is being used, the util of the task could be higher than
> > the spare capacity of the CPU, but due to uclamp_max value we force fit
> > it there.
> >
> > The way the condition for checking for max_spare_cap in
> > find_energy_efficient_cpu() was constructed; it ignored any CPU that has
> > its spare_cap less than or _equal_ to max_spare_cap. Since we initialize
> > max_spare_cap to 0; this lead to never setting max_spare_cap_cpu and
> > hence ending up never performing compute_energy() for this cluster and
> > missing an opportunity for a better energy efficient placement to honour
> > uclamp_max setting.
> >
> > max_spare_cap = 0;
> > cpu_cap = capacity_of(cpu) - task_util(p); // 0 if task_util(p) is high
> >
> > ...
> >
> > util_fits_cpu(...); // will return true if uclamp_max forces it to fit
> >
> > ...
> >
> > // this logic will fail to update max_spare_cap_cpu if cpu_cap is 0
> > if (cpu_cap > max_spare_cap) {
> > max_spare_cap = cpu_cap;
> > max_spare_cap_cpu = cpu;
> > }
> >
> > prev_spare_cap suffers from a similar problem.
> >
> > Fix the logic by treating -1UL value as 'not populated' instead of
> > 0 which is a viable and correct spare capacity value.
> >
> > Fixes: 1d42509e475c ("sched/fair: Make EAS wakeup placement consider uclamp restrictions")
> > Signed-off-by: Qais Yousef (Google) <[email protected]>
> > ---
> > kernel/sched/fair.c | 14 ++++++++------
> > 1 file changed, 8 insertions(+), 6 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index e29e9ea4cde8..ca2c389d3180 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -7390,9 +7390,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > for (; pd; pd = pd->next) {
> > unsigned long util_min = p_util_min, util_max = p_util_max;
> > unsigned long cpu_cap, cpu_thermal_cap, util;
> > - unsigned long cur_delta, max_spare_cap = 0;
> > + unsigned long cur_delta, max_spare_cap = -1UL;
> > unsigned long rq_util_min, rq_util_max;
> > - unsigned long prev_spare_cap = 0;
> > + unsigned long prev_spare_cap = -1UL;
> > int max_spare_cap_cpu = -1;
> > unsigned long base_energy;
> > int fits, max_fits = -1;
> > @@ -7457,7 +7457,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > prev_spare_cap = cpu_cap;
> > prev_fits = fits;
> > } else if ((fits > max_fits) ||
> > - ((fits == max_fits) && (cpu_cap > max_spare_cap))) {
> > + ((fits == max_fits) &&
> > + (cpu_cap > max_spare_cap || max_spare_cap == -1UL) {
>
> Can't we use a signed comparison to include the case of max_spare_cap
> == -1 in cpu_cap > max_spare_cap ?
By converting max_spare_cap to long, right?
My memory could be failing me, but I seem to remember we had mixed usage and
consolidated into unsigned long. That's why I didn't want to break the trend.
Anyway. If no one shouts against that, I don't mind going for that.
Thanks
--
Qais Yousef
>
> > /*
> > * Find the CPU with the maximum spare capacity
> > * among the remaining CPUs in the performance
> > @@ -7469,7 +7470,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > }
> > }
> >
> > - if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
> > + if (max_spare_cap_cpu < 0 && prev_spare_cap == -1UL)
> > continue;
> >
> > eenv_pd_busy_time(&eenv, cpus, p);
> > @@ -7477,7 +7478,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > base_energy = compute_energy(&eenv, pd, cpus, p, -1);
> >
> > /* Evaluate the energy impact of using prev_cpu. */
> > - if (prev_spare_cap > 0) {
> > + if (prev_spare_cap != -1UL) {
> > prev_delta = compute_energy(&eenv, pd, cpus, p,
> > prev_cpu);
> > /* CPU utilization has changed */
> > @@ -7489,7 +7490,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> > }
> >
> > /* Evaluate the energy impact of using max_spare_cap_cpu. */
> > - if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
> > + if (max_spare_cap_cpu >= 0 &&
> > + (max_spare_cap > prev_spare_cap || prev_spare_cap == -1UL)) {
> > /* Current best energy cpu fits better */
> > if (max_fits < best_fits)
> > continue;
> > --
> > 2.25.1
> >