From: Vincent Wang <[email protected]>
When a task that is in_iowait state is enqueued, cpufreq_update_util()
will be invoked with SCHED_CPUFREQ_IOWAIT flag. In this case,the value
of util and cap, which are parameters used in map_util_freq(), may be
cpu frequency, instead of cpu util and capactiy.
For some 32bit architectures, the size of unsigned long is 32. When
calculating freq, there may be an overflow error in this expression:
freq = (freq + (freq >> 2)) * util / cap;
To fix the issus, a new member min is added into the struct sg_cpu to
store the capacity of policy's min frequency. iowait_boost and
iowait_boost_max will be initialized with capacity instead of frequency.
Signed-off-by: Vincent Wang <[email protected]>
Signed-off-by: Chunyan Zhang <[email protected]>
---
Changes from V3 (https://lkml.org/lkml/2019/1/29/346):
* Switch to a new method that will not lead in more calculation.
Changes from v2 (https://lkml.org/lkml/2019/1/9/1227):
* Fix for 32bit architectures only.
Changes from V1 (https://lkml.org/lkml/2018/12/24/22):
* Rebased onto v5.0-rc1;
* Addressed comments from Quentin Perret.
---
kernel/sched/cpufreq_schedutil.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 033ec7c45f13..04eb44a9b550 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -53,6 +53,7 @@ struct sugov_cpu {
unsigned long bw_dl;
unsigned long max;
+ unsigned long min;
/* The field below is for single-CPU policies only: */
#ifdef CONFIG_NO_HZ_COMMON
@@ -275,12 +276,11 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
{
struct rq *rq = cpu_rq(sg_cpu->cpu);
unsigned long util = cpu_util_cfs(rq);
- unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
- sg_cpu->max = max;
sg_cpu->bw_dl = cpu_bw_dl(rq);
- return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL);
+ return schedutil_freq_util(sg_cpu->cpu, util, sg_cpu->max,
+ FREQUENCY_UTIL);
}
/**
@@ -304,7 +304,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
return false;
sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -351,7 +351,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -398,7 +398,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
* reach the minimum.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
return;
}
@@ -823,6 +823,8 @@ static int sugov_start(struct cpufreq_policy *policy)
{
struct sugov_policy *sg_policy = policy->governor_data;
unsigned int cpu;
+ unsigned long max_cap = arch_scale_cpu_capacity(NULL, policy->cpu);
+ unsigned long min_cap = max_cap * policy->min / policy->cpuinfo.max_freq;
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
sg_policy->last_freq_update_time = 0;
@@ -837,7 +839,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->max = max_cap;
+ sg_cpu->min = min_cap;
+ sg_cpu->iowait_boost_max = max_cap;
}
for_each_cpu(cpu, policy->cpus) {
--
2.17.1
On Friday 22 Feb 2019 at 18:37:46 (+0800), Chunyan Zhang wrote:
> @@ -823,6 +823,8 @@ static int sugov_start(struct cpufreq_policy *policy)
> {
> struct sugov_policy *sg_policy = policy->governor_data;
> unsigned int cpu;
> + unsigned long max_cap = arch_scale_cpu_capacity(NULL, policy->cpu);
> + unsigned long min_cap = max_cap * policy->min / policy->cpuinfo.max_freq;
>
> sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
> sg_policy->last_freq_update_time = 0;
> @@ -837,7 +839,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> memset(sg_cpu, 0, sizeof(*sg_cpu));
> sg_cpu->cpu = cpu;
> sg_cpu->sg_policy = sg_policy;
> - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> + sg_cpu->max = max_cap;
> + sg_cpu->min = min_cap;
> + sg_cpu->iowait_boost_max = max_cap;
Unfortunately, I don't think you can do that only here. The return value
of arch_scale_cpu_capacity() can change at run time. And it does on arm64,
see drivers/base/arch_topology.c.
> }
>
> for_each_cpu(cpu, policy->cpus) {
> --
> 2.17.1
>
Thanks,
Quentin
Did you mean the value of arch_scale_cpu_capacity() is changed in cpu_capacity_store()?
If so, I can restart schedutil governor after new capacity is updated.
________________________________________
??????: Quentin Perret <[email protected]>
????ʱ??: 2019??2??22?? 18:59
?ռ???: Zhang, Chunyan (?Ŵ???)
????: Ingo Molnar; Peter Zijlstra; Wang, Vincent (????); [email protected]; Chunyan Zhang
????: Re: [PATCH V4] sched/cpufreq: initialize iowait_boost_max and iowait_boost with cpu capacity
On Friday 22 Feb 2019 at 18:37:46 (+0800), Chunyan Zhang wrote:
> @@ -823,6 +823,8 @@ static int sugov_start(struct cpufreq_policy *policy)
> {
> struct sugov_policy *sg_policy = policy->governor_data;
> unsigned int cpu;
> + unsigned long max_cap = arch_scale_cpu_capacity(NULL, policy->cpu);
> + unsigned long min_cap = max_cap * policy->min / policy->cpuinfo.max_freq;
>
> sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
> sg_policy->last_freq_update_time = 0;
> @@ -837,7 +839,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> memset(sg_cpu, 0, sizeof(*sg_cpu));
> sg_cpu->cpu = cpu;
> sg_cpu->sg_policy = sg_policy;
> - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> + sg_cpu->max = max_cap;
> + sg_cpu->min = min_cap;
> + sg_cpu->iowait_boost_max = max_cap;
Unfortunately, I don't think you can do that only here. The return value
of arch_scale_cpu_capacity() can change at run time. And it does on arm64,
see drivers/base/arch_topology.c.
> }
>
> for_each_cpu(cpu, policy->cpus) {
> --
> 2.17.1
>
Thanks,
Quentin
On Monday 04 Mar 2019 at 07:35:04 (+0000), Wang, Vincent (王争) wrote:
> Did you mean the value of arch_scale_cpu_capacity() is changed in
> cpu_capacity_store()?
Yes, there's that, but more importantly topology_normalize_cpu_scale()
is called during boot. With printks() in the relevant functions, the
boot log on my system with two CPUFreq policies looks like this:
[ 2.393085] init_cpu_capacity_callback: policy0
[ 2.397714] sugov_start: policy0
[ 2.403734] init_cpu_capacity_callback: policy1
[ 2.407901] topology_normalize_cpu_scale: done
[ 2.412581] sugov_start: policy1
So, the lack of order of sugov_start() and topology_normalize_cpu_scale()
is a problem, I think.
> If so, I can restart schedutil governor after new capacity is updated.
Hmm, that feels a bit overkill, but that should at least be a correct
way of updating sg_cpu->{min, max} in a non-racy way. And CPU capacity
changes are infrequent, so the overhead of re-starting the governor
isn't a major issue, I suppose.
You could also update the values in sugov_get_util() at the cost of a
small overhead to compute 'min'. I'm not sure what's preferable since
we wanted to avoid that kind of overhead in the first place ...
Thanks,
Quentin
On Mon, Mar 04, 2019 at 01:58:12PM +0000, Quentin Perret wrote:
> You could also update the values in sugov_get_util() at the cost of a
> small overhead to compute 'min'. I'm not sure what's preferable since
> we wanted to avoid that kind of overhead in the first place ...
Or,... we could actually make things simpler.
How's the below? I have a feq questions wrt min, mostly:
- what's the difference between policy->min and
policy->cpuinfo.min_freq; it used to be the former, the below uses
the latter.
- should we have a min_freq based value, instead of a constant; the
difference being that with this the actual boost speed depends in the
gap between min/max.
Anyway; the below converts iowait_boost to capacity scale (from kHz), it
side-steps the whole issue you guys are bickering about by limiting it
to SCHED_CAPACITY_SCALE (aka. 1024) on the boost path, and then limiting
it to @max by the time we figured out we ought to use it.
And since that means we never change @max anymore; we can simplify whole
return value thing.
---
kernel/sched/cpufreq_schedutil.c | 57 ++++++++++++++--------------------------
1 file changed, 20 insertions(+), 37 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2efe629425be..d1b8e7aeed44 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
bool iowait_boost_pending;
unsigned int iowait_boost;
- unsigned int iowait_boost_max;
u64 last_update;
unsigned long bw_dl;
+ unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -344,14 +343,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
/* Double the boost at each request */
if (sg_cpu->iowait_boost) {
- sg_cpu->iowait_boost <<= 1;
- if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ sg_cpu->iowait_boost = min(sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
return;
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -373,47 +370,31 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
* This mechanism is designed to boost high frequently IO waiting tasks, while
* being more conservative on tasks which does sporadic IO operations.
*/
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
- unsigned long *util, unsigned long *max)
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned long util, unsigned long max)
{
- unsigned int boost_util, boost_max;
-
/* No boost currently required */
if (!sg_cpu->iowait_boost)
- return;
+ return util;
/* Reset boost if the CPU appears to have been idle enough */
if (sugov_iowait_reset(sg_cpu, time, false))
- return;
+ return util;
- /*
- * An IO waiting task has just woken up:
- * allow to further double the boost value
- */
- if (sg_cpu->iowait_boost_pending) {
- sg_cpu->iowait_boost_pending = false;
- } else {
+ if (!sg_cpu->iowait_boost_pending) {
/*
- * Otherwise: reduce the boost value and disable it when we
- * reach the minimum.
+ * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
- return;
+ return util;
}
}
- /*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
- */
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ sg_cpu->iowait_boost_pending = false;
+
+ return min(max(util, sg_cpu->iowait_boost), max);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +441,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- sugov_iowait_apply(sg_cpu, time, &util, &max);
+ util = sugov_iowait_apply(sg_cpu, time, util, max);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +481,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+ j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
if (j_util * max > j_max * util) {
util = j_util;
@@ -837,7 +818,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->min =
+ (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+ policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {
On Mon, Mar 04, 2019 at 04:48:16PM +0000, Quentin Perret wrote:
> > @@ -837,7 +818,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> > memset(sg_cpu, 0, sizeof(*sg_cpu));
> > sg_cpu->cpu = cpu;
> > sg_cpu->sg_policy = sg_policy;
> > - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> > + sg_cpu->min =
> > + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
> > + policy->cpuinfo.max_freq;
>
> The 'funny' thing is that on big little this 'min' can end up being
> larger than 'max' ...
>
> On juno r0 for example, min_freq and max_freq for little CPUs are
> respectively 450MHz and 850MHz. So you get sg_cpu->min=542, but
> sg_cpu->max=446 ... So you'll max out after the first IO wakeup.
> And since iowait_boost is reset whenever it is smaller than sg_cpu->min,
> you end up with something that can either force max freq or apply no
> boost at all ...
>
> Perhaps you could keep the 'util' and 'max' pointers in
> sugov_iowait_apply() and overwrite them like before, but in the
> SCHED_CAPACITY_SCALE scale as you suggest ?
Urgh; but then we're back to having that boostrap problem.
Now; at this time; @max is in fact scale_cpu_capacity, so can't we
change this:
- /*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
- */
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ sg_cpu->iowait_boost_pending = false;
+
+ return min(max(util, sg_cpu->iowait_boost), max);
}
to something like:
/*
* @util is already in capacity scale, convert iowait_boost
* into the same scale so we can compare.
*/
boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
util = max(boost, util);
return min(util, max);
On Mon, Mar 04, 2019 at 06:40:28PM +0100, Peter Zijlstra wrote:
> /*
> * @util is already in capacity scale, convert iowait_boost
> * into the same scale so we can compare.
> */
> boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> util = max(boost, util);
> return min(util, max);
Ah, I don't think we need that last min in this case; both terms should
already be <= SCHED_CAPACITY_SCALE.
On Monday 04 Mar 2019 at 18:40:28 (+0100), Peter Zijlstra wrote:
> > Perhaps you could keep the 'util' and 'max' pointers in
> > sugov_iowait_apply() and overwrite them like before, but in the
> > SCHED_CAPACITY_SCALE scale as you suggest ?
>
> Urgh; but then we're back to having that boostrap problem.
Hmm, I don't understand :/
> Now; at this time; @max is in fact scale_cpu_capacity, so can't we
> change this:
>
> - /*
> - * Apply the current boost value: a CPU is boosted only if its current
> - * utilization is smaller then the current IO boost level.
> - */
> - boost_util = sg_cpu->iowait_boost;
> - boost_max = sg_cpu->iowait_boost_max;
I was basically suggesting to do 'boost_max = 1024;' here and you
should be good with you way of computing 'min' no ?
> - if (*util * boost_max < *max * boost_util) {
> - *util = boost_util;
> - *max = boost_max;
> - }
> + sg_cpu->iowait_boost_pending = false;
> +
> + return min(max(util, sg_cpu->iowait_boost), max);
> }
>
> to something like:
>
> /*
> * @util is already in capacity scale, convert iowait_boost
> * into the same scale so we can compare.
> */
> boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> util = max(boost, util);
> return min(util, max);
>
But this should work too, I think.
Thanks,
Quentin
On Monday 04 Mar 2019 at 16:26:16 (+0100), Peter Zijlstra wrote:
> On Mon, Mar 04, 2019 at 01:58:12PM +0000, Quentin Perret wrote:
> > You could also update the values in sugov_get_util() at the cost of a
> > small overhead to compute 'min'. I'm not sure what's preferable since
> > we wanted to avoid that kind of overhead in the first place ...
>
> Or,... we could actually make things simpler.
>
> How's the below? I have a feq questions wrt min, mostly:
>
> - what's the difference between policy->min and
> policy->cpuinfo.min_freq; it used to be the former, the below uses
> the latter.
As mentioned on IRC, IIRC policy->min is something that can be written
from userspace (for example) to cap the min freq. OTOH, cpuinfo.min_freq
is read-only and just reports the lowest OPP.
Rafael is this correct ?
> - should we have a min_freq based value, instead of a constant; the
> difference being that with this the actual boost speed depends in the
> gap between min/max.
If the above is correct, then I agree. Looking at min_freq simplifies
things quite a bit since it doesn't need to be updated all the time,
and the whole policy->min stuff is dealt with at the CPUFreq core level
so it's not obvious sugov should care.
> Anyway; the below converts iowait_boost to capacity scale (from kHz), it
> side-steps the whole issue you guys are bickering about by limiting it
> to SCHED_CAPACITY_SCALE (aka. 1024) on the boost path, and then limiting
> it to @max by the time we figured out we ought to use it.
>
> And since that means we never change @max anymore; we can simplify whole
> return value thing.
[...]
> @@ -837,7 +818,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> memset(sg_cpu, 0, sizeof(*sg_cpu));
> sg_cpu->cpu = cpu;
> sg_cpu->sg_policy = sg_policy;
> - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> + sg_cpu->min =
> + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
> + policy->cpuinfo.max_freq;
The 'funny' thing is that on big little this 'min' can end up being
larger than 'max' ...
On juno r0 for example, min_freq and max_freq for little CPUs are
respectively 450MHz and 850MHz. So you get sg_cpu->min=542, but
sg_cpu->max=446 ... So you'll max out after the first IO wakeup.
And since iowait_boost is reset whenever it is smaller than sg_cpu->min,
you end up with something that can either force max freq or apply no
boost at all ...
Perhaps you could keep the 'util' and 'max' pointers in
sugov_iowait_apply() and overwrite them like before, but in the
SCHED_CAPACITY_SCALE scale as you suggest ?
Thanks,
Quentin
On Mon, 4 Mar 2019 at 17:48, Quentin Perret <[email protected]> wrote:
>
> On Monday 04 Mar 2019 at 16:26:16 (+0100), Peter Zijlstra wrote:
> > On Mon, Mar 04, 2019 at 01:58:12PM +0000, Quentin Perret wrote:
> > > You could also update the values in sugov_get_util() at the cost of a
> > > small overhead to compute 'min'. I'm not sure what's preferable since
> > > we wanted to avoid that kind of overhead in the first place ...
> >
> > Or,... we could actually make things simpler.
> >
> > How's the below? I have a feq questions wrt min, mostly:
> >
> > - what's the difference between policy->min and
> > policy->cpuinfo.min_freq; it used to be the former, the below uses
> > the latter.
>
> As mentioned on IRC, IIRC policy->min is something that can be written
> from userspace (for example) to cap the min freq. OTOH, cpuinfo.min_freq
> is read-only and just reports the lowest OPP.
>
> Rafael is this correct ?
>
> > - should we have a min_freq based value, instead of a constant; the
> > difference being that with this the actual boost speed depends in the
> > gap between min/max.
>
> If the above is correct, then I agree. Looking at min_freq simplifies
> things quite a bit since it doesn't need to be updated all the time,
> and the whole policy->min stuff is dealt with at the CPUFreq core level
> so it's not obvious sugov should care.
>
> > Anyway; the below converts iowait_boost to capacity scale (from kHz), it
> > side-steps the whole issue you guys are bickering about by limiting it
> > to SCHED_CAPACITY_SCALE (aka. 1024) on the boost path, and then limiting
> > it to @max by the time we figured out we ought to use it.
> >
> > And since that means we never change @max anymore; we can simplify whole
> > return value thing.
>
> [...]
>
> > @@ -837,7 +818,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> > memset(sg_cpu, 0, sizeof(*sg_cpu));
> > sg_cpu->cpu = cpu;
> > sg_cpu->sg_policy = sg_policy;
> > - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> > + sg_cpu->min =
> > + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
> > + policy->cpuinfo.max_freq;
>
> The 'funny' thing is that on big little this 'min' can end up being
> larger than 'max' ...
yes arch_scale_cpu_capacity() should be used instead of
SCHED_CAPACITY_SCALE to be compared with sg_cpu->max and sg_cpu->util
>
> On juno r0 for example, min_freq and max_freq for little CPUs are
> respectively 450MHz and 850MHz. So you get sg_cpu->min=542, but
> sg_cpu->max=446 ... So you'll max out after the first IO wakeup.
> And since iowait_boost is reset whenever it is smaller than sg_cpu->min,
> you end up with something that can either force max freq or apply no
> boost at all ...
>
> Perhaps you could keep the 'util' and 'max' pointers in
> sugov_iowait_apply() and overwrite them like before, but in the
> SCHED_CAPACITY_SCALE scale as you suggest ?
>
> Thanks,
> Quentin
On Mon, Mar 04, 2019 at 04:48:16PM +0000, Quentin Perret wrote:
> On Monday 04 Mar 2019 at 16:26:16 (+0100), Peter Zijlstra wrote:
> > On Mon, Mar 04, 2019 at 01:58:12PM +0000, Quentin Perret wrote:
> > > You could also update the values in sugov_get_util() at the cost of a
> > > small overhead to compute 'min'. I'm not sure what's preferable since
> > > we wanted to avoid that kind of overhead in the first place ...
> >
> > Or,... we could actually make things simpler.
> >
> > How's the below? I have a feq questions wrt min, mostly:
> >
> > - what's the difference between policy->min and
> > policy->cpuinfo.min_freq; it used to be the former, the below uses
> > the latter.
>
> As mentioned on IRC, IIRC policy->min is something that can be written
> from userspace (for example) to cap the min freq. OTOH, cpuinfo.min_freq
> is read-only and just reports the lowest OPP.
>
> Rafael is this correct ?
>
> > - should we have a min_freq based value, instead of a constant; the
> > difference being that with this the actual boost speed depends in the
> > gap between min/max.
>
> If the above is correct, then I agree. Looking at min_freq simplifies
> things quite a bit since it doesn't need to be updated all the time,
> and the whole policy->min stuff is dealt with at the CPUFreq core level
> so it's not obvious sugov should care.
Using a constant value (my dice seem to like 128 for some reason) would
result in the boost curve being independent of the available frequencies
-- and thus the same for all machines.
With that particular value, we need 9 consecutive IOWAIT wakeups to
reach MAX, instead of some random number (7 for your juno r0).
On Mon, Mar 04, 2019 at 05:50:32PM +0000, Quentin Perret wrote:
> On Monday 04 Mar 2019 at 18:40:28 (+0100), Peter Zijlstra wrote:
> > > Perhaps you could keep the 'util' and 'max' pointers in
> > > sugov_iowait_apply() and overwrite them like before, but in the
> > > SCHED_CAPACITY_SCALE scale as you suggest ?
> >
> > Urgh; but then we're back to having that boostrap problem.
>
> Hmm, I don't understand :/
Yeah, I seen to have reading comprehension issues today. Ignore that.
> > Now; at this time; @max is in fact scale_cpu_capacity, so can't we
> > change this:
> >
> > - /*
> > - * Apply the current boost value: a CPU is boosted only if its current
> > - * utilization is smaller then the current IO boost level.
> > - */
> > - boost_util = sg_cpu->iowait_boost;
> > - boost_max = sg_cpu->iowait_boost_max;
>
> I was basically suggesting to do 'boost_max = 1024;' here and you
> should be good with you way of computing 'min' no ?
Right, but then we keep having to retain those two mults.
> > - if (*util * boost_max < *max * boost_util) {
> > - *util = boost_util;
> > - *max = boost_max;
> > - }
> > + sg_cpu->iowait_boost_pending = false;
> > +
> > + return min(max(util, sg_cpu->iowait_boost), max);
> > }
> >
> > to something like:
> >
> > /*
> > * @util is already in capacity scale, convert iowait_boost
> > * into the same scale so we can compare.
> > */
> > boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> > util = max(boost, util);
> > return min(util, max);
> >
>
> But this should work too, I think.
While that is only a single mult.
On Monday 04 Mar 2019 at 19:47:08 (+0100), Peter Zijlstra wrote:
> On Mon, Mar 04, 2019 at 05:50:32PM +0000, Quentin Perret wrote:
> > On Monday 04 Mar 2019 at 18:40:28 (+0100), Peter Zijlstra wrote:
> > > > Perhaps you could keep the 'util' and 'max' pointers in
> > > > sugov_iowait_apply() and overwrite them like before, but in the
> > > > SCHED_CAPACITY_SCALE scale as you suggest ?
> > >
> > > Urgh; but then we're back to having that boostrap problem.
> >
> > Hmm, I don't understand :/
>
> Yeah, I seen to have reading comprehension issues today. Ignore that.
>
> > > Now; at this time; @max is in fact scale_cpu_capacity, so can't we
> > > change this:
> > >
> > > - /*
> > > - * Apply the current boost value: a CPU is boosted only if its current
> > > - * utilization is smaller then the current IO boost level.
> > > - */
> > > - boost_util = sg_cpu->iowait_boost;
> > > - boost_max = sg_cpu->iowait_boost_max;
> >
> > I was basically suggesting to do 'boost_max = 1024;' here and you
> > should be good with you way of computing 'min' no ?
>
> Right, but then we keep having to retain those two mults.
>
> > > - if (*util * boost_max < *max * boost_util) {
> > > - *util = boost_util;
> > > - *max = boost_max;
> > > - }
> > > + sg_cpu->iowait_boost_pending = false;
> > > +
> > > + return min(max(util, sg_cpu->iowait_boost), max);
> > > }
> > >
> > > to something like:
> > >
> > > /*
> > > * @util is already in capacity scale, convert iowait_boost
> > > * into the same scale so we can compare.
> > > */
> > > boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> > > util = max(boost, util);
> > > return min(util, max);
> > >
> >
> > But this should work too, I think.
>
> While that is only a single mult.
Yes, and that's also easier to understand (IMO) because all the
requests going to get_next_freq() are in the scale_cpu_capacity range,
which keeps things consistent regardless of the iowait stuff.
So yeah, that works for me.
Thanks,
Quentin
On Mon, Mar 04, 2019 at 07:11:01PM +0000, Quentin Perret wrote:
> So yeah, that works for me.
Chunyan, Vincent; can you verify the below cures your ill?
---
Subject: sched/cpufreq: Fix 32bit math overflow
Vincent Wang reported that get_next_freq() has a mult overflow issue on
32bit platforms in the IOWAIT boost case, since in that case {util,max}
are in freq units instead of capacity units.
Solve this by moving the IOWAIT boost to capacity units. And since this
means @max is constant; simplify the code.
Cc: Chunyan Zhang <[email protected]>
Reported-by: Vincent Wang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
kernel/sched/cpufreq_schedutil.c | 58 +++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 34 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2efe629425be..72b62ac1c7c2 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
bool iowait_boost_pending;
unsigned int iowait_boost;
- unsigned int iowait_boost_max;
u64 last_update;
unsigned long bw_dl;
+ unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -344,14 +343,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
/* Double the boost at each request */
if (sg_cpu->iowait_boost) {
- sg_cpu->iowait_boost <<= 1;
- if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ sg_cpu->iowait_boost = min(sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
return;
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -373,47 +370,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
* This mechanism is designed to boost high frequently IO waiting tasks, while
* being more conservative on tasks which does sporadic IO operations.
*/
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
- unsigned long *util, unsigned long *max)
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned long util, unsigned long max)
{
- unsigned int boost_util, boost_max;
+ unsigned long boost;
/* No boost currently required */
if (!sg_cpu->iowait_boost)
- return;
+ return util;
/* Reset boost if the CPU appears to have been idle enough */
if (sugov_iowait_reset(sg_cpu, time, false))
- return;
+ return util;
- /*
- * An IO waiting task has just woken up:
- * allow to further double the boost value
- */
- if (sg_cpu->iowait_boost_pending) {
- sg_cpu->iowait_boost_pending = false;
- } else {
+ if (!sg_cpu->iowait_boost_pending) {
/*
- * Otherwise: reduce the boost value and disable it when we
- * reach the minimum.
+ * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
- return;
+ return util;
}
}
+ sg_cpu->iowait_boost_pending = false;
+
/*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
+ * @util is already in capacity scale; convert iowait_boost
+ * into the same scale so we can compare.
*/
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
+ return max(boost, util);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +448,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- sugov_iowait_apply(sg_cpu, time, &util, &max);
+ util = sugov_iowait_apply(sg_cpu, time, util, max);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +488,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+ j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
if (j_util * max > j_max * util) {
util = j_util;
@@ -837,7 +825,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->min =
+ (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+ policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {
On Tuesday, March 5, 2019 9:32:02 AM CET Peter Zijlstra wrote:
> On Mon, Mar 04, 2019 at 07:11:01PM +0000, Quentin Perret wrote:
>
> > So yeah, that works for me.
>
> Chunyan, Vincent; can you verify the below cures your ill?
>
> ---
> Subject: sched/cpufreq: Fix 32bit math overflow
>
> Vincent Wang reported that get_next_freq() has a mult overflow issue on
> 32bit platforms in the IOWAIT boost case, since in that case {util,max}
> are in freq units instead of capacity units.
>
> Solve this by moving the IOWAIT boost to capacity units. And since this
> means @max is constant; simplify the code.
>
> Cc: Chunyan Zhang <[email protected]>
> Reported-by: Vincent Wang <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
> ---
> kernel/sched/cpufreq_schedutil.c | 58 +++++++++++++++++-----------------------
> 1 file changed, 24 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 2efe629425be..72b62ac1c7c2 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -48,10 +48,10 @@ struct sugov_cpu {
>
> bool iowait_boost_pending;
> unsigned int iowait_boost;
> - unsigned int iowait_boost_max;
> u64 last_update;
>
> unsigned long bw_dl;
> + unsigned long min;
> unsigned long max;
>
> /* The field below is for single-CPU policies only: */
> @@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
> if (delta_ns <= TICK_NSEC)
> return false;
>
> - sg_cpu->iowait_boost = set_iowait_boost
> - ? sg_cpu->sg_policy->policy->min : 0;
> + sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
> sg_cpu->iowait_boost_pending = set_iowait_boost;
>
> return true;
> @@ -344,14 +343,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
>
> /* Double the boost at each request */
> if (sg_cpu->iowait_boost) {
> - sg_cpu->iowait_boost <<= 1;
> - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
> - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
> + sg_cpu->iowait_boost = min(sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
> return;
> }
>
> /* First wakeup after IO: start with minimum boost */
> - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
> + sg_cpu->iowait_boost = sg_cpu->min;
> }
>
> /**
> @@ -373,47 +370,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
> * This mechanism is designed to boost high frequently IO waiting tasks, while
> * being more conservative on tasks which does sporadic IO operations.
> */
> -static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
> - unsigned long *util, unsigned long *max)
> +static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
> + unsigned long util, unsigned long max)
> {
> - unsigned int boost_util, boost_max;
> + unsigned long boost;
>
> /* No boost currently required */
> if (!sg_cpu->iowait_boost)
> - return;
> + return util;
>
> /* Reset boost if the CPU appears to have been idle enough */
> if (sugov_iowait_reset(sg_cpu, time, false))
> - return;
> + return util;
>
> - /*
> - * An IO waiting task has just woken up:
> - * allow to further double the boost value
> - */
> - if (sg_cpu->iowait_boost_pending) {
> - sg_cpu->iowait_boost_pending = false;
> - } else {
> + if (!sg_cpu->iowait_boost_pending) {
> /*
> - * Otherwise: reduce the boost value and disable it when we
> - * reach the minimum.
> + * No boost pending; reduce the boost value.
> */
> sg_cpu->iowait_boost >>= 1;
> - if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
> + if (sg_cpu->iowait_boost < sg_cpu->min) {
> sg_cpu->iowait_boost = 0;
> - return;
> + return util;
> }
> }
>
> + sg_cpu->iowait_boost_pending = false;
> +
> /*
> - * Apply the current boost value: a CPU is boosted only if its current
> - * utilization is smaller then the current IO boost level.
> + * @util is already in capacity scale; convert iowait_boost
> + * into the same scale so we can compare.
> */
> - boost_util = sg_cpu->iowait_boost;
> - boost_max = sg_cpu->iowait_boost_max;
> - if (*util * boost_max < *max * boost_util) {
> - *util = boost_util;
> - *max = boost_max;
> - }
> + boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> + return max(boost, util);
> }
>
> #ifdef CONFIG_NO_HZ_COMMON
> @@ -460,7 +448,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
>
> util = sugov_get_util(sg_cpu);
> max = sg_cpu->max;
> - sugov_iowait_apply(sg_cpu, time, &util, &max);
> + util = sugov_iowait_apply(sg_cpu, time, util, max);
> next_f = get_next_freq(sg_policy, util, max);
> /*
> * Do not reduce the frequency if the CPU has not been idle
> @@ -500,7 +488,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
>
> j_util = sugov_get_util(j_sg_cpu);
> j_max = j_sg_cpu->max;
> - sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
> + j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
>
> if (j_util * max > j_max * util) {
> util = j_util;
> @@ -837,7 +825,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> memset(sg_cpu, 0, sizeof(*sg_cpu));
> sg_cpu->cpu = cpu;
> sg_cpu->sg_policy = sg_policy;
> - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> + sg_cpu->min =
> + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
> + policy->cpuinfo.max_freq;
> }
>
> for_each_cpu(cpu, policy->cpus) {
>
On Tue, 5 Mar 2019 at 16:32, Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Mar 04, 2019 at 07:11:01PM +0000, Quentin Perret wrote:
>
> > So yeah, that works for me.
>
> Chunyan, Vincent; can you verify the below cures your ill?
>
Hi Peter,
Sure, we will port the below changes to our 4.14 developing kernel for
testing, will get back to you once we have done the test.
Thanks,
Chunyan
> ---
> Subject: sched/cpufreq: Fix 32bit math overflow
>
> Vincent Wang reported that get_next_freq() has a mult overflow issue on
> 32bit platforms in the IOWAIT boost case, since in that case {util,max}
> are in freq units instead of capacity units.
>
> Solve this by moving the IOWAIT boost to capacity units. And since this
> means @max is constant; simplify the code.
>
> Cc: Chunyan Zhang <[email protected]>
> Reported-by: Vincent Wang <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/sched/cpufreq_schedutil.c | 58 +++++++++++++++++-----------------------
> 1 file changed, 24 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 2efe629425be..72b62ac1c7c2 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -48,10 +48,10 @@ struct sugov_cpu {
>
> bool iowait_boost_pending;
> unsigned int iowait_boost;
> - unsigned int iowait_boost_max;
> u64 last_update;
>
> unsigned long bw_dl;
> + unsigned long min;
> unsigned long max;
>
> /* The field below is for single-CPU policies only: */
> @@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
> if (delta_ns <= TICK_NSEC)
> return false;
>
> - sg_cpu->iowait_boost = set_iowait_boost
> - ? sg_cpu->sg_policy->policy->min : 0;
> + sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
> sg_cpu->iowait_boost_pending = set_iowait_boost;
>
> return true;
> @@ -344,14 +343,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
>
> /* Double the boost at each request */
> if (sg_cpu->iowait_boost) {
> - sg_cpu->iowait_boost <<= 1;
> - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
> - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
> + sg_cpu->iowait_boost = min(sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
> return;
> }
>
> /* First wakeup after IO: start with minimum boost */
> - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
> + sg_cpu->iowait_boost = sg_cpu->min;
> }
>
> /**
> @@ -373,47 +370,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
> * This mechanism is designed to boost high frequently IO waiting tasks, while
> * being more conservative on tasks which does sporadic IO operations.
> */
> -static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
> - unsigned long *util, unsigned long *max)
> +static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
> + unsigned long util, unsigned long max)
> {
> - unsigned int boost_util, boost_max;
> + unsigned long boost;
>
> /* No boost currently required */
> if (!sg_cpu->iowait_boost)
> - return;
> + return util;
>
> /* Reset boost if the CPU appears to have been idle enough */
> if (sugov_iowait_reset(sg_cpu, time, false))
> - return;
> + return util;
>
> - /*
> - * An IO waiting task has just woken up:
> - * allow to further double the boost value
> - */
> - if (sg_cpu->iowait_boost_pending) {
> - sg_cpu->iowait_boost_pending = false;
> - } else {
> + if (!sg_cpu->iowait_boost_pending) {
> /*
> - * Otherwise: reduce the boost value and disable it when we
> - * reach the minimum.
> + * No boost pending; reduce the boost value.
> */
> sg_cpu->iowait_boost >>= 1;
> - if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
> + if (sg_cpu->iowait_boost < sg_cpu->min) {
> sg_cpu->iowait_boost = 0;
> - return;
> + return util;
> }
> }
>
> + sg_cpu->iowait_boost_pending = false;
> +
> /*
> - * Apply the current boost value: a CPU is boosted only if its current
> - * utilization is smaller then the current IO boost level.
> + * @util is already in capacity scale; convert iowait_boost
> + * into the same scale so we can compare.
> */
> - boost_util = sg_cpu->iowait_boost;
> - boost_max = sg_cpu->iowait_boost_max;
> - if (*util * boost_max < *max * boost_util) {
> - *util = boost_util;
> - *max = boost_max;
> - }
> + boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> + return max(boost, util);
> }
>
> #ifdef CONFIG_NO_HZ_COMMON
> @@ -460,7 +448,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
>
> util = sugov_get_util(sg_cpu);
> max = sg_cpu->max;
> - sugov_iowait_apply(sg_cpu, time, &util, &max);
> + util = sugov_iowait_apply(sg_cpu, time, util, max);
> next_f = get_next_freq(sg_policy, util, max);
> /*
> * Do not reduce the frequency if the CPU has not been idle
> @@ -500,7 +488,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
>
> j_util = sugov_get_util(j_sg_cpu);
> j_max = j_sg_cpu->max;
> - sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
> + j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
>
> if (j_util * max > j_max * util) {
> util = j_util;
> @@ -837,7 +825,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> memset(sg_cpu, 0, sizeof(*sg_cpu));
> sg_cpu->cpu = cpu;
> sg_cpu->sg_policy = sg_policy;
> - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> + sg_cpu->min =
> + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
> + policy->cpuinfo.max_freq;
> }
>
> for_each_cpu(cpu, policy->cpus) {
On Tue, Mar 5, 2019 at 4:32 PM Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Mar 04, 2019 at 07:11:01PM +0000, Quentin Perret wrote:
>
> > So yeah, that works for me.
>
> Chunyan, Vincent; can you verify the below cures your ill?
Verified by Vincent, the patch below can fix the problem Vincent found
on our platform before.
Thanks,
Chunyan
>
> ---
> Subject: sched/cpufreq: Fix 32bit math overflow
>
> Vincent Wang reported that get_next_freq() has a mult overflow issue on
> 32bit platforms in the IOWAIT boost case, since in that case {util,max}
> are in freq units instead of capacity units.
>
> Solve this by moving the IOWAIT boost to capacity units. And since this
> means @max is constant; simplify the code.
>
> Cc: Chunyan Zhang <[email protected]>
> Reported-by: Vincent Wang <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/sched/cpufreq_schedutil.c | 58 +++++++++++++++++-----------------------
> 1 file changed, 24 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 2efe629425be..72b62ac1c7c2 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -48,10 +48,10 @@ struct sugov_cpu {
>
> bool iowait_boost_pending;
> unsigned int iowait_boost;
> - unsigned int iowait_boost_max;
> u64 last_update;
>
> unsigned long bw_dl;
> + unsigned long min;
> unsigned long max;
>
> /* The field below is for single-CPU policies only: */
> @@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
> if (delta_ns <= TICK_NSEC)
> return false;
>
> - sg_cpu->iowait_boost = set_iowait_boost
> - ? sg_cpu->sg_policy->policy->min : 0;
> + sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
> sg_cpu->iowait_boost_pending = set_iowait_boost;
>
> return true;
> @@ -344,14 +343,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
>
> /* Double the boost at each request */
> if (sg_cpu->iowait_boost) {
> - sg_cpu->iowait_boost <<= 1;
> - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
> - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
> + sg_cpu->iowait_boost = min(sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
> return;
> }
>
> /* First wakeup after IO: start with minimum boost */
> - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
> + sg_cpu->iowait_boost = sg_cpu->min;
> }
>
> /**
> @@ -373,47 +370,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
> * This mechanism is designed to boost high frequently IO waiting tasks, while
> * being more conservative on tasks which does sporadic IO operations.
> */
> -static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
> - unsigned long *util, unsigned long *max)
> +static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
> + unsigned long util, unsigned long max)
> {
> - unsigned int boost_util, boost_max;
> + unsigned long boost;
>
> /* No boost currently required */
> if (!sg_cpu->iowait_boost)
> - return;
> + return util;
>
> /* Reset boost if the CPU appears to have been idle enough */
> if (sugov_iowait_reset(sg_cpu, time, false))
> - return;
> + return util;
>
> - /*
> - * An IO waiting task has just woken up:
> - * allow to further double the boost value
> - */
> - if (sg_cpu->iowait_boost_pending) {
> - sg_cpu->iowait_boost_pending = false;
> - } else {
> + if (!sg_cpu->iowait_boost_pending) {
> /*
> - * Otherwise: reduce the boost value and disable it when we
> - * reach the minimum.
> + * No boost pending; reduce the boost value.
> */
> sg_cpu->iowait_boost >>= 1;
> - if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
> + if (sg_cpu->iowait_boost < sg_cpu->min) {
> sg_cpu->iowait_boost = 0;
> - return;
> + return util;
> }
> }
>
> + sg_cpu->iowait_boost_pending = false;
> +
> /*
> - * Apply the current boost value: a CPU is boosted only if its current
> - * utilization is smaller then the current IO boost level.
> + * @util is already in capacity scale; convert iowait_boost
> + * into the same scale so we can compare.
> */
> - boost_util = sg_cpu->iowait_boost;
> - boost_max = sg_cpu->iowait_boost_max;
> - if (*util * boost_max < *max * boost_util) {
> - *util = boost_util;
> - *max = boost_max;
> - }
> + boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
> + return max(boost, util);
> }
>
> #ifdef CONFIG_NO_HZ_COMMON
> @@ -460,7 +448,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
>
> util = sugov_get_util(sg_cpu);
> max = sg_cpu->max;
> - sugov_iowait_apply(sg_cpu, time, &util, &max);
> + util = sugov_iowait_apply(sg_cpu, time, util, max);
> next_f = get_next_freq(sg_policy, util, max);
> /*
> * Do not reduce the frequency if the CPU has not been idle
> @@ -500,7 +488,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
>
> j_util = sugov_get_util(j_sg_cpu);
> j_max = j_sg_cpu->max;
> - sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
> + j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
>
> if (j_util * max > j_max * util) {
> util = j_util;
> @@ -837,7 +825,9 @@ static int sugov_start(struct cpufreq_policy *policy)
> memset(sg_cpu, 0, sizeof(*sg_cpu));
> sg_cpu->cpu = cpu;
> sg_cpu->sg_policy = sg_policy;
> - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
> + sg_cpu->min =
> + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
> + policy->cpuinfo.max_freq;
> }
>
> for_each_cpu(cpu, policy->cpus) {
Commit-ID: f1212844e9dc3a31d41f99713c5522acf92ff291
Gitweb: https://git.kernel.org/tip/f1212844e9dc3a31d41f99713c5522acf92ff291
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 5 Mar 2019 09:32:02 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 9 Mar 2019 14:03:51 +0100
sched/cpufreq: Fix 32-bit math overflow
Vincent Wang reported that get_next_freq() has a mult overflow bug on
32-bit platforms in the IOWAIT boost case, since in that case {util,max}
are in freq units instead of capacity units.
Solve this by moving the IOWAIT boost to capacity units. And since this
means @max is constant; simplify the code.
Reported-by: Vincent Wang <[email protected]>
Tested-by: Vincent Wang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Chunyan Zhang <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Quentin Perret <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched/cpufreq_schedutil.c | 58 +++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 34 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 033ec7c45f13..5a8932ee5112 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
bool iowait_boost_pending;
unsigned int iowait_boost;
- unsigned int iowait_boost_max;
u64 last_update;
unsigned long bw_dl;
+ unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -344,14 +343,12 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
/* Double the boost at each request */
if (sg_cpu->iowait_boost) {
- sg_cpu->iowait_boost <<= 1;
- if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ sg_cpu->iowait_boost = min(sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
return;
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -373,47 +370,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
* This mechanism is designed to boost high frequently IO waiting tasks, while
* being more conservative on tasks which does sporadic IO operations.
*/
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
- unsigned long *util, unsigned long *max)
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned long util, unsigned long max)
{
- unsigned int boost_util, boost_max;
+ unsigned long boost;
/* No boost currently required */
if (!sg_cpu->iowait_boost)
- return;
+ return util;
/* Reset boost if the CPU appears to have been idle enough */
if (sugov_iowait_reset(sg_cpu, time, false))
- return;
+ return util;
- /*
- * An IO waiting task has just woken up:
- * allow to further double the boost value
- */
- if (sg_cpu->iowait_boost_pending) {
- sg_cpu->iowait_boost_pending = false;
- } else {
+ if (!sg_cpu->iowait_boost_pending) {
/*
- * Otherwise: reduce the boost value and disable it when we
- * reach the minimum.
+ * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
- return;
+ return util;
}
}
+ sg_cpu->iowait_boost_pending = false;
+
/*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
+ * @util is already in capacity scale; convert iowait_boost
+ * into the same scale so we can compare.
*/
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
+ return max(boost, util);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +448,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- sugov_iowait_apply(sg_cpu, time, &util, &max);
+ util = sugov_iowait_apply(sg_cpu, time, util, max);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +488,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+ j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
if (j_util * max > j_max * util) {
util = j_util;
@@ -837,7 +825,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->min =
+ (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+ policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {
Commit-ID: a23314e9d88d89d49e69db08f60b7caa470f04e1
Gitweb: https://git.kernel.org/tip/a23314e9d88d89d49e69db08f60b7caa470f04e1
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 5 Mar 2019 09:32:02 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 19 Mar 2019 12:06:11 +0100
sched/cpufreq: Fix 32-bit math overflow
Vincent Wang reported that get_next_freq() has a mult overflow bug on
32-bit platforms in the IOWAIT boost case, since in that case {util,max}
are in freq units instead of capacity units.
Solve this by moving the IOWAIT boost to capacity units. And since this
means @max is constant; simplify the code.
Reported-by: Vincent Wang <[email protected]>
Tested-by: Vincent Wang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Acked-by: Rafael J. Wysocki <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Chunyan Zhang <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Quentin Perret <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched/cpufreq_schedutil.c | 59 +++++++++++++++++-----------------------
1 file changed, 25 insertions(+), 34 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 033ec7c45f13..1ccf77f6d346 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
bool iowait_boost_pending;
unsigned int iowait_boost;
- unsigned int iowait_boost_max;
u64 last_update;
unsigned long bw_dl;
+ unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
/* Double the boost at each request */
if (sg_cpu->iowait_boost) {
- sg_cpu->iowait_boost <<= 1;
- if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ sg_cpu->iowait_boost =
+ min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
return;
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
* This mechanism is designed to boost high frequently IO waiting tasks, while
* being more conservative on tasks which does sporadic IO operations.
*/
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
- unsigned long *util, unsigned long *max)
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned long util, unsigned long max)
{
- unsigned int boost_util, boost_max;
+ unsigned long boost;
/* No boost currently required */
if (!sg_cpu->iowait_boost)
- return;
+ return util;
/* Reset boost if the CPU appears to have been idle enough */
if (sugov_iowait_reset(sg_cpu, time, false))
- return;
+ return util;
- /*
- * An IO waiting task has just woken up:
- * allow to further double the boost value
- */
- if (sg_cpu->iowait_boost_pending) {
- sg_cpu->iowait_boost_pending = false;
- } else {
+ if (!sg_cpu->iowait_boost_pending) {
/*
- * Otherwise: reduce the boost value and disable it when we
- * reach the minimum.
+ * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
- return;
+ return util;
}
}
+ sg_cpu->iowait_boost_pending = false;
+
/*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
+ * @util is already in capacity scale; convert iowait_boost
+ * into the same scale so we can compare.
*/
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
+ return max(boost, util);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- sugov_iowait_apply(sg_cpu, time, &util, &max);
+ util = sugov_iowait_apply(sg_cpu, time, util, max);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+ j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
if (j_util * max > j_max * util) {
util = j_util;
@@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->min =
+ (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+ policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {