LinuxLists.cc - [RFC PATCH] sched/fair: call cpufreq hook in additional paths

2016-03-24 22:26:15

Subject: [RFC PATCH] sched/fair: call cpufreq hook in additional paths

The cpufreq hook should be called any time the root CFS rq utilization
changes. This can occur when a task is switched to or from the fair
class, or a task moves between groups or CPUs, but these paths
currently do not call the cpufreq hook.

Fix this by adding the hook to attach_entity_load_avg() and
detach_entity_load_avg().

Suggested-by: Vincent Guittot <[email protected]>
Signed-off-by: Steve Muckle <[email protected]>
---
Unfortunately this means that in the migration case,
enqueue_entity_load_avg() will end up calling the cpufreq hook twice -
once via update_cfs_rq_load_avg() and once via
attach_entity_load_avg(). This should ideally get filtered out before
the cpufreq driver is invoked but nevertheless is wasteful. Possible
alternatives to this are

- moving the cpufreq hook from update_cfs_rq_load_avg() into
its callers (there are three) and also putting the hook
in attach_task_cfs_rq and detach_task_cfs_rq, resulting in
five call sites of the hook in fair.c as opposed to three

- passing an argument into attach_entity_load_avg() to indicate
whether calling the cpufreq hook is necessary

Both of these are ugly in their own way but would avoid a runtime
cost. Opinions welcome.

Note that this patch depends on the 2 patches I sent several days ago:
http://thread.gmane.org/gmane.linux.kernel/2181498

kernel/sched/fair.c | 62 +++++++++++++++++++++++++++++++----------------------
1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index af58e826cffe..73f18f2fc9f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2821,13 +2821,40 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}

static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);

+static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
+{
+ struct rq *rq = rq_of(cfs_rq);
+ int cpu = cpu_of(rq);
+
+ if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
+ unsigned long max = rq->cpu_capacity_orig;
+
+ /*
+ * There are a few boundary cases this might miss but it should
+ * get called often enough that that should (hopefully) not be
+ * a real problem -- added to that it only calls on the local
+ * CPU, so if we enqueue remotely we'll miss an update, but
+ * the next tick/schedule should update.
+ *
+ * It will not get called when we go idle, because the idle
+ * thread is a different class (!fair), nor will the utilization
+ * number include things like RT tasks.
+ *
+ * As is, the util number is not freq-invariant (we'd have to
+ * implement arch_scale_freq_capacity() for that).
+ *
+ * See cpu_util().
+ */
+ cpufreq_update_util(rq_clock(rq),
+ min(cfs_rq->avg.util_avg, max), max);
+ }
+}
+
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
{
struct sched_avg *sa = &cfs_rq->avg;
- struct rq *rq = rq_of(cfs_rq);
int decayed, removed_load = 0, removed_util = 0;
- int cpu = cpu_of(rq);

if (atomic_long_read(&cfs_rq->removed_load_avg)) {
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@@ -2843,7 +2870,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
removed_util = 1;
}

- decayed = __update_load_avg(now, cpu, sa,
+ decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);

#ifndef CONFIG_64BIT
@@ -2851,29 +2878,8 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif

- if (cpu == smp_processor_id() && &rq->cfs == cfs_rq &&
- (decayed || removed_util)) {
- unsigned long max = rq->cpu_capacity_orig;
-
- /*
- * There are a few boundary cases this might miss but it should
- * get called often enough that that should (hopefully) not be
- * a real problem -- added to that it only calls on the local
- * CPU, so if we enqueue remotely we'll miss an update, but
- * the next tick/schedule should update.
- *
- * It will not get called when we go idle, because the idle
- * thread is a different class (!fair), nor will the utilization
- * number include things like RT tasks.
- *
- * As is, the util number is not freq-invariant (we'd have to
- * implement arch_scale_freq_capacity() for that).
- *
- * See cpu_util().
- */
- cpufreq_update_util(rq_clock(rq),
- min(sa->util_avg, max), max);
- }
+ if (decayed || removed_util)
+ cfs_rq_util_change(cfs_rq);

return decayed || removed_load;
}
@@ -2923,6 +2929,8 @@ skip_aging:
cfs_rq->avg.load_sum += se->avg.load_sum;
cfs_rq->avg.util_avg += se->avg.util_avg;
cfs_rq->avg.util_sum += se->avg.util_sum;
+
+ cfs_rq_util_change(cfs_rq);
}

static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -2935,6 +2943,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+
+ cfs_rq_util_change(cfs_rq);
}

/* Add the load generated by se into cfs_rq's load average */
--
2.4.10

2016-03-31 07:59:58

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH] sched/fair: call cpufreq hook in additional paths

On Thu, Mar 24, 2016 at 03:26:07PM -0700, Steve Muckle wrote:
> Note that this patch depends on the 2 patches I sent several days ago:
> http://thread.gmane.org/gmane.linux.kernel/2181498

Right; I had something similar for a little while..

> Unfortunately this means that in the migration case,
> enqueue_entity_load_avg() will end up calling the cpufreq hook twice -
> once via update_cfs_rq_load_avg() and once via
> attach_entity_load_avg().

Right; this is the point where I gave up when I looked at that.

> This should ideally get filtered out before
> the cpufreq driver is invoked but nevertheless is wasteful. Possible
> alternatives to this are
>
> - moving the cpufreq hook from update_cfs_rq_load_avg() into
> its callers (there are three) and also putting the hook
> in attach_task_cfs_rq and detach_task_cfs_rq, resulting in
> five call sites of the hook in fair.c as opposed to three

Its worse; you get a whole nest of extra logic to determine when to call
what. See below (now arguably its still early so I might have made it
more complex than it needed to be..).

>
> - passing an argument into attach_entity_load_avg() to indicate
> whether calling the cpufreq hook is necessary
>
> Both of these are ugly in their own way but would avoid a runtime
> cost. Opinions welcome.

Lemme see what this would look like while I throw the below into the bit
bucket.

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2854,23 +2854,23 @@ static inline void cfs_rq_util_change(st
static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
{
struct sched_avg *sa = &cfs_rq->avg;
- int decayed, removed_load = 0, removed_util = 0;
+ int ret = 0;

if (atomic_long_read(&cfs_rq->removed_load_avg)) {
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
sa->load_avg = max_t(long, sa->load_avg - r, 0);
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
- removed_load = 1;
+ ret |= 1 << 1;
}

if (atomic_long_read(&cfs_rq->removed_util_avg)) {
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
sa->util_avg = max_t(long, sa->util_avg - r, 0);
sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
- removed_util = 1;
+ ret |= 1 << 2;
}

- decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
+ decayed |= __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);

#ifndef CONFIG_64BIT
@@ -2878,10 +2878,7 @@ static inline int update_cfs_rq_load_avg
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif

- if (decayed || removed_util)
- cfs_rq_util_change(cfs_rq);
-
- return decayed || removed_load;
+ return ret;
}

/* Update task and its cfs_rq load average */
@@ -2891,6 +2888,7 @@ static inline void update_load_avg(struc
u64 now = cfs_rq_clock_task(cfs_rq);
struct rq *rq = rq_of(cfs_rq);
int cpu = cpu_of(rq);
+ int updated;

/*
* Track task load average for carrying it to new CPU after migrated, and
@@ -2900,9 +2898,14 @@ static inline void update_load_avg(struc
se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL);

- if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
+ updated = update_cfs_rq_load_avg(now, cfs_rq);
+
+ if (updated & 5)
+ cfs_rq_util_change(cfs_rq);
+
+ if (update_tg && (updated & 3))
update_tg_load_avg(cfs_rq, 0);
-}
+

static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
@@ -2953,7 +2956,7 @@ enqueue_entity_load_avg(struct cfs_rq *c
{
struct sched_avg *sa = &se->avg;
u64 now = cfs_rq_clock_task(cfs_rq);
- int migrated, decayed;
+ int migrated, updated;

migrated = !sa->last_update_time;
if (!migrated) {
@@ -2962,16 +2965,18 @@ enqueue_entity_load_avg(struct cfs_rq *c
cfs_rq->curr == se, NULL);
}

- decayed = update_cfs_rq_load_avg(now, cfs_rq);
+ updated = update_cfs_rq_load_avg(now, cfs_rq);

cfs_rq->runnable_load_avg += sa->load_avg;
cfs_rq->runnable_load_sum += sa->load_sum;

- if (migrated)
+ if (migrated) {
attach_entity_load_avg(cfs_rq, se);
-
- if (decayed || migrated)
- update_tg_load_avg(cfs_rq, 0);
+ if (updated & 3)
+ update_tg_load_avg(cfs_rq, 0);
+ } else if (updated & 5) {
+ cfs_rq_util_change(cfs_rq);
+ }
}

/* Remove the runnable load generated by se from cfs_rq's runnable load average */
@@ -6157,6 +6162,7 @@ static void update_blocked_averages(int
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq;
unsigned long flags;
+ int updated;

raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
@@ -6170,7 +6176,10 @@ static void update_blocked_averages(int
if (throttled_hierarchy(cfs_rq))
continue;

- if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+ updated = update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ if (updated & 5)
+ cfs_rq_util_change(cfs_rq);
+ if (updated & 3)
update_tg_load_avg(cfs_rq, 0);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -6228,10 +6237,13 @@ static inline void update_blocked_averag
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq = &rq->cfs;
unsigned long flags;
+ int updated;

raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
- update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ updated = update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ if (updated & 5)
+ cfs_rq_util_change(cfs_rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}

2016-03-31 09:14:38

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH] sched/fair: call cpufreq hook in additional paths

On Thu, Mar 31, 2016 at 09:59:51AM +0200, Peter Zijlstra wrote:
> > - passing an argument into attach_entity_load_avg() to indicate
> > whether calling the cpufreq hook is necessary
> >
> > Both of these are ugly in their own way but would avoid a runtime
> > cost. Opinions welcome.
>
> Lemme see what this would look like while I throw the below into the bit
> bucket.

OK, so the below looks a lot more sane; and has the surprising benefit
of actually shrinking the text size..

43675 1226 24 44925 af7d defconfig-build/kernel/sched/fair.o.base
43723 1226 24 44973 afad defconfig-build/kernel/sched/fair.o.patch
43595 1226 24 44845 af2d defconfig-build/kernel/sched/fair.o.patch+

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2851,7 +2851,8 @@ static inline void cfs_rq_util_change(st
}

/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
-static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+static inline int
+update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
{
struct sched_avg *sa = &cfs_rq->avg;
int decayed, removed_load = 0, removed_util = 0;
@@ -2878,7 +2879,7 @@ static inline int update_cfs_rq_load_avg
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif

- if (decayed || removed_util)
+ if (update_freq && (decayed || removed_util))
cfs_rq_util_change(cfs_rq);

return decayed || removed_load;
@@ -2900,7 +2901,7 @@ static inline void update_load_avg(struc
se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL);

- if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
+ if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
update_tg_load_avg(cfs_rq, 0);
}

@@ -2962,7 +2963,7 @@ enqueue_entity_load_avg(struct cfs_rq *c
cfs_rq->curr == se, NULL);
}

- decayed = update_cfs_rq_load_avg(now, cfs_rq);
+ decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);

cfs_rq->runnable_load_avg += sa->load_avg;
cfs_rq->runnable_load_sum += sa->load_sum;
@@ -6170,7 +6171,7 @@ static void update_blocked_averages(int
if (throttled_hierarchy(cfs_rq))
continue;

- if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+ if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
update_tg_load_avg(cfs_rq, 0);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -6231,7 +6232,7 @@ static inline void update_blocked_averag

raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
- update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}

2016-03-31 11:50:28

by Rafael J. Wysocki

[permalink] [raw]

Subject: Re: [RFC PATCH] sched/fair: call cpufreq hook in additional paths

On Thu, Mar 31, 2016 at 11:14 AM, Peter Zijlstra <[email protected]> wrote:
> On Thu, Mar 31, 2016 at 09:59:51AM +0200, Peter Zijlstra wrote:
>> > - passing an argument into attach_entity_load_avg() to indicate
>> > whether calling the cpufreq hook is necessary
>> >
>> > Both of these are ugly in their own way but would avoid a runtime
>> > cost. Opinions welcome.
>>
>> Lemme see what this would look like while I throw the below into the bit
>> bucket.
>
> OK, so the below looks a lot more sane;

Yup.

> and has the surprising benefit of actually shrinking the text size..
>
> 43675 1226 24 44925 af7d defconfig-build/kernel/sched/fair.o.base
> 43723 1226 24 44973 afad defconfig-build/kernel/sched/fair.o.patch
> 43595 1226 24 44845 af2d defconfig-build/kernel/sched/fair.o.patch+

Interesting. :-)

2016-04-01 21:40:37

by Steve Muckle

[permalink] [raw]

Subject: Re: [RFC PATCH] sched/fair: call cpufreq hook in additional paths

On 03/31/2016 02:14 AM, Peter Zijlstra wrote:
> On Thu, Mar 31, 2016 at 09:59:51AM +0200, Peter Zijlstra wrote:
>>> > > - passing an argument into attach_entity_load_avg() to indicate
>>> > > whether calling the cpufreq hook is necessary
>>> > >
>>> > > Both of these are ugly in their own way but would avoid a runtime
>>> > > cost. Opinions welcome.
>> >
>> > Lemme see what this would look like while I throw the below into the bit
>> > bucket.
> OK, so the below looks a lot more sane; and has the surprising benefit
> of actually shrinking the text size..
>
> 43675 1226 24 44925 af7d defconfig-build/kernel/sched/fair.o.base
> 43723 1226 24 44973 afad defconfig-build/kernel/sched/fair.o.patch
> 43595 1226 24 44845 af2d defconfig-build/kernel/sched/fair.o.patch+
>
> ---
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c

Cool, thanks. Shall I fold this into this patch and resend the series of
3? Or would you prefer to add this change separately?

thanks,
Steve

2016-04-01 21:54:09

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC PATCH] sched/fair: call cpufreq hook in additional paths

On Fri, Apr 01, 2016 at 02:40:32PM -0700, Steve Muckle wrote:
> On 03/31/2016 02:14 AM, Peter Zijlstra wrote:
> > On Thu, Mar 31, 2016 at 09:59:51AM +0200, Peter Zijlstra wrote:
> >>> > > - passing an argument into attach_entity_load_avg() to indicate
> >>> > > whether calling the cpufreq hook is necessary
> >>> > >
> >>> > > Both of these are ugly in their own way but would avoid a runtime
> >>> > > cost. Opinions welcome.
> >> >
> >> > Lemme see what this would look like while I throw the below into the bit
> >> > bucket.
> > OK, so the below looks a lot more sane; and has the surprising benefit
> > of actually shrinking the text size..
> >
> > 43675 1226 24 44925 af7d defconfig-build/kernel/sched/fair.o.base
> > 43723 1226 24 44973 afad defconfig-build/kernel/sched/fair.o.patch
> > 43595 1226 24 44845 af2d defconfig-build/kernel/sched/fair.o.patch+
> >
> > ---
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
>
> Cool, thanks. Shall I fold this into this patch and resend the series of
> 3? Or would you prefer to add this change separately?

I think I've got it folded into your third patch, I haven't published
the queue yet though; I'll try and sort through the stuff I have on
Monday or so.

2016-04-23 12:59:13

by tip-bot for Vasyl Gomonovych

[permalink] [raw]

Subject: [tip:sched/core] sched/fair: Call cpufreq hook in additional paths

Commit-ID: a2c6c91f98247fef0fe75216d607812485aeb0df
Gitweb: http://git.kernel.org/tip/a2c6c91f98247fef0fe75216d607812485aeb0df
Author: Steve Muckle <[email protected]>
AuthorDate: Thu, 24 Mar 2016 15:26:07 -0700
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 23 Apr 2016 14:20:40 +0200

sched/fair: Call cpufreq hook in additional paths

The cpufreq hook should be called any time the root CFS rq utilization
changes. This can occur when a task is switched to or from the fair
class, or a task moves between groups or CPUs, but these paths
currently do not call the cpufreq hook.

Fix this by adding the hook to attach_entity_load_avg() and
detach_entity_load_avg().

Suggested-by: Vincent Guittot <[email protected]>
Signed-off-by: Steve Muckle <[email protected]>
[ Added the .update_freq argument to update_cfs_rq_load_avg() to avoid a double cpufreq call. ]
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Byungchul Park <[email protected]>
Cc: Dietmar Eggemann <[email protected]>
Cc: Juri Lelli <[email protected]>
Cc: Michael Turquette <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Morten Rasmussen <[email protected]>
Cc: Patrick Bellasi <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched/fair.c | 73 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8155281..c328bd7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2874,13 +2874,41 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}

static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);

+static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
+{
+ struct rq *rq = rq_of(cfs_rq);
+ int cpu = cpu_of(rq);
+
+ if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
+ unsigned long max = rq->cpu_capacity_orig;
+
+ /*
+ * There are a few boundary cases this might miss but it should
+ * get called often enough that that should (hopefully) not be
+ * a real problem -- added to that it only calls on the local
+ * CPU, so if we enqueue remotely we'll miss an update, but
+ * the next tick/schedule should update.
+ *
+ * It will not get called when we go idle, because the idle
+ * thread is a different class (!fair), nor will the utilization
+ * number include things like RT tasks.
+ *
+ * As is, the util number is not freq-invariant (we'd have to
+ * implement arch_scale_freq_capacity() for that).
+ *
+ * See cpu_util().
+ */
+ cpufreq_update_util(rq_clock(rq),
+ min(cfs_rq->avg.util_avg, max), max);
+ }
+}
+
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
-static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+static inline int
+update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
{
struct sched_avg *sa = &cfs_rq->avg;
- struct rq *rq = rq_of(cfs_rq);
int decayed, removed_load = 0, removed_util = 0;
- int cpu = cpu_of(rq);

if (atomic_long_read(&cfs_rq->removed_load_avg)) {
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@@ -2896,7 +2924,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
removed_util = 1;
}

- decayed = __update_load_avg(now, cpu, sa,
+ decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);

#ifndef CONFIG_64BIT
@@ -2904,29 +2932,8 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif

- if (cpu == smp_processor_id() && &rq->cfs == cfs_rq &&
- (decayed || removed_util)) {
- unsigned long max = rq->cpu_capacity_orig;
-
- /*
- * There are a few boundary cases this might miss but it should
- * get called often enough that that should (hopefully) not be
- * a real problem -- added to that it only calls on the local
- * CPU, so if we enqueue remotely we'll miss an update, but
- * the next tick/schedule should update.
- *
- * It will not get called when we go idle, because the idle
- * thread is a different class (!fair), nor will the utilization
- * number include things like RT tasks.
- *
- * As is, the util number is not freq-invariant (we'd have to
- * implement arch_scale_freq_capacity() for that).
- *
- * See cpu_util().
- */
- cpufreq_update_util(rq_clock(rq),
- min(sa->util_avg, max), max);
- }
+ if (update_freq && (decayed || removed_util))
+ cfs_rq_util_change(cfs_rq);

return decayed || removed_load;
}
@@ -2947,7 +2954,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL);

- if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
+ if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
update_tg_load_avg(cfs_rq, 0);
}

@@ -2976,6 +2983,8 @@ skip_aging:
cfs_rq->avg.load_sum += se->avg.load_sum;
cfs_rq->avg.util_avg += se->avg.util_avg;
cfs_rq->avg.util_sum += se->avg.util_sum;
+
+ cfs_rq_util_change(cfs_rq);
}

static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -2988,6 +2997,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+
+ cfs_rq_util_change(cfs_rq);
}

/* Add the load generated by se into cfs_rq's load average */
@@ -3005,7 +3016,7 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
cfs_rq->curr == se, NULL);
}

- decayed = update_cfs_rq_load_avg(now, cfs_rq);
+ decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);

cfs_rq->runnable_load_avg += sa->load_avg;
cfs_rq->runnable_load_sum += sa->load_sum;
@@ -6213,7 +6224,7 @@ static void update_blocked_averages(int cpu)
if (throttled_hierarchy(cfs_rq))
continue;

- if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+ if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
update_tg_load_avg(cfs_rq, 0);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -6274,7 +6285,7 @@ static inline void update_blocked_averages(int cpu)

raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
- update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}