2021-03-18 21:30:36

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH] sched: Optimize cpufreq_update_util


Hi,

The below replaces cpufreq_update_util()'s indirect call with a
static_call(). The patch is quite gross, and we definitely need
static_call_update_cpuslocked().

cpufreq folks, is there a better way to do that optimize pass? That is,
we need to know when all CPUs have the *same* function set. Otherwise we
obviously cannot rewrite the text, which is shared by all CPUs.

Also, is there a lock order comment in cpufreq somewhere? I tried
following it, but eventually gave up and figured 'asking' lockdep was
far simpler.

---
include/linux/sched/cpufreq.h | 9 ++++---
kernel/sched/cpufreq.c | 55 +++++++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 28 ++++++++++++++++++++--
kernel/static_call.c | 4 ++--
4 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 6205578ab6ee..6d2972b67fa0 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -12,14 +12,17 @@

#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy;
+struct update_util_data;
+
+typedef void (*cpu_util_update_f)(struct update_util_data *data,
+ u64 time, unsigned int flags);

struct update_util_data {
- void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
+ cpu_util_update_f func;
};

void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
- void (*func)(struct update_util_data *data, u64 time,
- unsigned int flags));
+ cpu_util_update_f func);
void cpufreq_remove_update_util_hook(int cpu);
bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy);

diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 7c2fe50fd76d..b362a04e04d1 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -6,11 +6,60 @@
* Author: Rafael J. Wysocki <[email protected]>
*/
#include <linux/cpufreq.h>
+#include <linux/static_call.h>

#include "sched.h"

DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);

+#ifdef CONFIG_HAVE_STATIC_CALL
+
+void cpufreq_update_indirect(struct update_util_data *data,
+ u64 time, unsigned int flags)
+{
+ if (data)
+ data->func(data, time, flags);
+}
+
+DEFINE_STATIC_CALL(cpufreq_update_util, cpufreq_update_indirect);
+
+static void cpufreq_update_safe(void)
+{
+ static_call_update(cpufreq_update_util, cpufreq_update_indirect);
+}
+
+static void cpufreq_update_optimize(void)
+{
+ struct update_util_data *data;
+ cpu_util_update_f func = NULL, dfunc;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ data = per_cpu(cpufreq_update_util_data, cpu);
+ dfunc = data ? READ_ONCE(data->func) : NULL;
+
+ if (dfunc) {
+ if (!func)
+ func = dfunc;
+ else if (func != dfunc)
+ return;
+ } else if (func)
+ return;
+ }
+
+ pr_info("sched: optimized cpufreq_update_util\n");
+
+ /* all CPUs have the same @func */
+ static_call_update(cpufreq_update_util, func);
+}
+
+#else
+
+static inline void cpufreq_update_safe(void) { }
+static inline void cpufreq_update_optimize(void) { }
+
+#endif
+
/**
* cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer.
* @cpu: The CPU to set the pointer for.
@@ -39,8 +88,12 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
return;

+ cpufreq_update_safe();
+
data->func = func;
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+
+ cpufreq_update_optimize();
}
EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);

@@ -56,7 +109,9 @@ EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
*/
void cpufreq_remove_update_util_hook(int cpu)
{
+ cpufreq_update_safe();
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
+ cpufreq_update_optimize();
}
EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 24ac31b40b55..333e33c3d496 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2473,6 +2473,30 @@ static inline u64 irq_time_read(int cpu)
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);

+#ifdef CONFIG_HAVE_STATIC_CALL
+
+extern void cpufreq_update_indirect(struct update_util_data *data,
+ u64 time, unsigned int flags);
+
+DECLARE_STATIC_CALL(cpufreq_update_util, cpufreq_update_indirect);
+
+static inline void cpufreq_update_util_call(struct update_util_data *data,
+ u64 time, unsigned int flags)
+{
+ static_call_cond(cpufreq_update_util)(data, time, flags);
+}
+
+#else
+
+static inline void cpufreq_update_util_call(struct update_util_data *data,
+ u64 time, unsigned int flags)
+{
+ if (data)
+ data->func(data, time, flags);
+}
+
+#endif
+
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @rq: Runqueue to carry out the update for.
@@ -2501,9 +2525,9 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)

data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
cpu_of(rq)));
- if (data)
- data->func(data, rq_clock(rq), flags);
+ cpufreq_update_util_call(data, rq_clock(rq), flags);
}
+
#else
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
diff --git a/kernel/static_call.c b/kernel/static_call.c
index ae825295cf68..fd73dfce3e50 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -122,7 +122,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
struct static_call_site *site, *stop;
struct static_call_mod *site_mod, first;

- cpus_read_lock();
+// cpus_read_lock();
static_call_lock();

if (key->func == func)
@@ -196,7 +196,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)

done:
static_call_unlock();
- cpus_read_unlock();
+// cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(__static_call_update);


2021-03-19 07:41:26

by Viresh Kumar

[permalink] [raw]
Subject: Re: [RFC][PATCH] sched: Optimize cpufreq_update_util

On 18-03-21, 22:28, Peter Zijlstra wrote:
> Also, is there a lock order comment in cpufreq somewhere?

I don't think so.

> I tried
> following it, but eventually gave up and figured 'asking' lockdep was
> far simpler.

This will get called from CPU's online/offline path at worst, nothing more.

> +static void cpufreq_update_optimize(void)
> +{
> + struct update_util_data *data;
> + cpu_util_update_f func = NULL, dfunc;
> + int cpu;
> +
> + for_each_online_cpu(cpu) {
> + data = per_cpu(cpufreq_update_util_data, cpu);
> + dfunc = data ? READ_ONCE(data->func) : NULL;
> +
> + if (dfunc) {
> + if (!func)
> + func = dfunc;
> + else if (func != dfunc)
> + return;
> + } else if (func)
> + return;
> + }

So there is nothing cpufreq specific IIRC that can help make this better, this
is basically per policy.

For example, on an ARM platform we have two cpufreq policies with one policy
covering 4 CPUs, while the other one covering only 1 (maybe because we didn't
add those CPUs in DT or something else), then also we will end up separate
routines.

Or if we take all CPUs of a policy offline and then bring them up one by one, I
think for the first CPU online event in that policy we will end up using the
sugov_update_single_freq() variant for some time, until the time more CPUs come
up.

So traversing the way you did this is probably something that will work properly
in all corner cases.

--
viresh

2021-03-19 14:37:45

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RFC][PATCH] sched: Optimize cpufreq_update_util

On Friday, March 19, 2021 8:37:51 AM CET Viresh Kumar wrote:
> On 18-03-21, 22:28, Peter Zijlstra wrote:
> > Also, is there a lock order comment in cpufreq somewhere?
>
> I don't think so.
>
> > I tried
> > following it, but eventually gave up and figured 'asking' lockdep was
> > far simpler.
>
> This will get called from CPU's online/offline path at worst, nothing more.

I'm not sure if I understand you correctly, but for completeness the callback
is also set/unset on driver registration and governor switch.

> > +static void cpufreq_update_optimize(void)
> > +{
> > + struct update_util_data *data;
> > + cpu_util_update_f func = NULL, dfunc;
> > + int cpu;
> > +
> > + for_each_online_cpu(cpu) {
> > + data = per_cpu(cpufreq_update_util_data, cpu);
> > + dfunc = data ? READ_ONCE(data->func) : NULL;
> > +
> > + if (dfunc) {
> > + if (!func)
> > + func = dfunc;
> > + else if (func != dfunc)
> > + return;
> > + } else if (func)
> > + return;
> > + }
>
> So there is nothing cpufreq specific IIRC that can help make this better, this
> is basically per policy.

Well, in some cases the driver knows that there will never be more that 1 CPU
per policy and so schedutil will never use the "shared" variant.

For instance, with intel_pstate all CPUs will always use the same callback.

> For example, on an ARM platform we have two cpufreq policies with one policy
> covering 4 CPUs, while the other one covering only 1 (maybe because we didn't
> add those CPUs in DT or something else), then also we will end up separate
> routines.
>
> Or if we take all CPUs of a policy offline and then bring them up one by one, I
> think for the first CPU online event in that policy we will end up using the
> sugov_update_single_freq() variant for some time, until the time more CPUs come
> up.
>
> So traversing the way you did this is probably something that will work properly
> in all corner cases.

Agreed.

It might be simplified in some cases, though, AFAICS.



2021-03-19 15:25:10

by Viresh Kumar

[permalink] [raw]
Subject: Re: [RFC][PATCH] sched: Optimize cpufreq_update_util

On 19-03-21, 15:35, Rafael J. Wysocki wrote:
> On Friday, March 19, 2021 8:37:51 AM CET Viresh Kumar wrote:
> > On 18-03-21, 22:28, Peter Zijlstra wrote:
> > > Also, is there a lock order comment in cpufreq somewhere?
> >
> > I don't think so.
> >
> > > I tried
> > > following it, but eventually gave up and figured 'asking' lockdep was
> > > far simpler.
> >
> > This will get called from CPU's online/offline path at worst, nothing more.
>
> I'm not sure if I understand you correctly, but for completeness the callback
> is also set/unset on driver registration and governor switch.

Right, I believe that those cases don't have any specific locking constraints
and so scheduler code doesn't need to worry about them. cpuslocked stuff needs
to be considered though.

> > > +static void cpufreq_update_optimize(void)
> > > +{
> > > + struct update_util_data *data;
> > > + cpu_util_update_f func = NULL, dfunc;
> > > + int cpu;
> > > +
> > > + for_each_online_cpu(cpu) {
> > > + data = per_cpu(cpufreq_update_util_data, cpu);
> > > + dfunc = data ? READ_ONCE(data->func) : NULL;
> > > +
> > > + if (dfunc) {
> > > + if (!func)
> > > + func = dfunc;
> > > + else if (func != dfunc)
> > > + return;
> > > + } else if (func)
> > > + return;
> > > + }
> >
> > So there is nothing cpufreq specific IIRC that can help make this better, this
> > is basically per policy.
>
> Well, in some cases the driver knows that there will never be more that 1 CPU
> per policy and so schedutil will never use the "shared" variant.
>
> For instance, with intel_pstate all CPUs will always use the same callback.

Right, only for single policy cases we can have some optimization (though I
don't feel its worth here) as this isn't going to happen in hotpath.

--
viresh