2023-10-05 15:14:04

by Pierre Gondois

[permalink] [raw]
Subject: [PATCH v2] cpufreq: Rebuild sched-domains when removing cpufreq driver

The Energy Aware Scheduler (EAS) relies on the schedutil governor.
When moving to/from the schedutil governor, sched domains must be
rebuilt to allow re-evaluating the enablement conditions of EAS.
This is done through sched_cpufreq_governor_change().

Having a cpufreq governor assumes a cpufreq driver is running.
Inserting/removing a cpufreq driver should trigger a re-evaluation
of EAS enablement conditions, avoiding to see EAS enabled when
removing a running cpufreq driver.

Rebuild the sched domains in schedutil's sugov_init()/sugov_exit(),
allowing to check EAS's enablement condition whenever schedutil
governor is initialized/exited from.
Move relevant code up in schedutil.c to avoid a split and conditional
function declaration.
Rename sched_cpufreq_governor_change() to sugov_eas_rebuild_sd().

Signed-off-by: Pierre Gondois <[email protected]>
---
drivers/cpufreq/cpufreq.c | 3 +-
include/linux/cpufreq.h | 8 -----
kernel/sched/cpufreq_schedutil.c | 55 +++++++++++++++++---------------
3 files changed, 30 insertions(+), 36 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 60ed89000e82..4bc15634d49c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1544,7 +1544,7 @@ static int cpufreq_online(unsigned int cpu)

/*
* Register with the energy model before
- * sched_cpufreq_governor_change() is called, which will result
+ * sugov_eas_rebuild_sd() is called, which will result
* in rebuilding of the sched domains, which should only be done
* once the energy model is properly initialized for the policy
* first.
@@ -2652,7 +2652,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
ret = cpufreq_start_governor(policy);
if (!ret) {
pr_debug("governor change\n");
- sched_cpufreq_governor_change(policy, old_gov);
return 0;
}
cpufreq_exit_governor(policy);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 71d186d6933a..1c5ca92a0555 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1193,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
}
#endif

-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- struct cpufreq_governor *old_gov);
-#else
-static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- struct cpufreq_governor *old_gov) { }
-#endif
-
extern unsigned int arch_freq_get_on_cpu(int cpu);

#ifndef arch_set_freq_scale
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 4492608b7d7f..901cada51ba7 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -555,6 +555,31 @@ static const struct kobj_type sugov_tunables_ktype = {

/********************** cpufreq governor interface *********************/

+#ifdef CONFIG_ENERGY_MODEL
+static void rebuild_sd_workfn(struct work_struct *work)
+{
+ rebuild_sched_domains_energy();
+}
+
+static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
+
+/*
+ * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
+ * on governor changes to make sure the scheduler knows about it.
+ */
+static void sugov_eas_rebuild_sd(void)
+{
+ /*
+ * When called from the cpufreq_register_driver() path, the
+ * cpu_hotplug_lock is already held, so use a work item to
+ * avoid nested locking in rebuild_sched_domains().
+ */
+ schedule_work(&rebuild_sd_work);
+}
+#else
+static inline void sugov_eas_rebuild_sd(void) { };
+#endif
+
struct cpufreq_governor schedutil_gov;

static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
@@ -709,6 +734,8 @@ static int sugov_init(struct cpufreq_policy *policy)
if (ret)
goto fail;

+ sugov_eas_rebuild_sd();
+
out:
mutex_unlock(&global_tunables_lock);
return 0;
@@ -750,6 +777,8 @@ static void sugov_exit(struct cpufreq_policy *policy)
sugov_kthread_stop(sg_policy);
sugov_policy_free(sg_policy);
cpufreq_disable_fast_switch(policy);
+
+ sugov_eas_rebuild_sd();
}

static int sugov_start(struct cpufreq_policy *policy)
@@ -838,29 +867,3 @@ struct cpufreq_governor *cpufreq_default_governor(void)
#endif

cpufreq_governor_init(schedutil_gov);
-
-#ifdef CONFIG_ENERGY_MODEL
-static void rebuild_sd_workfn(struct work_struct *work)
-{
- rebuild_sched_domains_energy();
-}
-static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
-
-/*
- * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
- * on governor changes to make sure the scheduler knows about it.
- */
-void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
- struct cpufreq_governor *old_gov)
-{
- if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
- /*
- * When called from the cpufreq_register_driver() path, the
- * cpu_hotplug_lock is already held, so use a work item to
- * avoid nested locking in rebuild_sched_domains().
- */
- schedule_work(&rebuild_sd_work);
- }
-
-}
-#endif
--
2.25.1


2023-10-06 07:07:18

by Viresh Kumar

[permalink] [raw]
Subject: Re: [PATCH v2] cpufreq: Rebuild sched-domains when removing cpufreq driver

On 05-10-23, 15:41, Pierre Gondois wrote:
> The Energy Aware Scheduler (EAS) relies on the schedutil governor.
> When moving to/from the schedutil governor, sched domains must be
> rebuilt to allow re-evaluating the enablement conditions of EAS.
> This is done through sched_cpufreq_governor_change().
>
> Having a cpufreq governor assumes a cpufreq driver is running.
> Inserting/removing a cpufreq driver should trigger a re-evaluation
> of EAS enablement conditions, avoiding to see EAS enabled when
> removing a running cpufreq driver.
>
> Rebuild the sched domains in schedutil's sugov_init()/sugov_exit(),
> allowing to check EAS's enablement condition whenever schedutil
> governor is initialized/exited from.
> Move relevant code up in schedutil.c to avoid a split and conditional
> function declaration.
> Rename sched_cpufreq_governor_change() to sugov_eas_rebuild_sd().
>
> Signed-off-by: Pierre Gondois <[email protected]>
> ---
> drivers/cpufreq/cpufreq.c | 3 +-
> include/linux/cpufreq.h | 8 -----
> kernel/sched/cpufreq_schedutil.c | 55 +++++++++++++++++---------------
> 3 files changed, 30 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 60ed89000e82..4bc15634d49c 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1544,7 +1544,7 @@ static int cpufreq_online(unsigned int cpu)
>
> /*
> * Register with the energy model before
> - * sched_cpufreq_governor_change() is called, which will result
> + * sugov_eas_rebuild_sd() is called, which will result
> * in rebuilding of the sched domains, which should only be done
> * once the energy model is properly initialized for the policy
> * first.
> @@ -2652,7 +2652,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
> ret = cpufreq_start_governor(policy);
> if (!ret) {
> pr_debug("governor change\n");
> - sched_cpufreq_governor_change(policy, old_gov);
> return 0;
> }
> cpufreq_exit_governor(policy);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 71d186d6933a..1c5ca92a0555 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -1193,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
> }
> #endif
>
> -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> -void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> - struct cpufreq_governor *old_gov);
> -#else
> -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> - struct cpufreq_governor *old_gov) { }
> -#endif
> -
> extern unsigned int arch_freq_get_on_cpu(int cpu);
>
> #ifndef arch_set_freq_scale
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 4492608b7d7f..901cada51ba7 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -555,6 +555,31 @@ static const struct kobj_type sugov_tunables_ktype = {
>
> /********************** cpufreq governor interface *********************/
>
> +#ifdef CONFIG_ENERGY_MODEL
> +static void rebuild_sd_workfn(struct work_struct *work)
> +{
> + rebuild_sched_domains_energy();
> +}
> +
> +static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
> +
> +/*
> + * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
> + * on governor changes to make sure the scheduler knows about it.
> + */
> +static void sugov_eas_rebuild_sd(void)
> +{
> + /*
> + * When called from the cpufreq_register_driver() path, the
> + * cpu_hotplug_lock is already held, so use a work item to
> + * avoid nested locking in rebuild_sched_domains().
> + */
> + schedule_work(&rebuild_sd_work);
> +}
> +#else
> +static inline void sugov_eas_rebuild_sd(void) { };
> +#endif
> +
> struct cpufreq_governor schedutil_gov;
>
> static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
> @@ -709,6 +734,8 @@ static int sugov_init(struct cpufreq_policy *policy)
> if (ret)
> goto fail;
>
> + sugov_eas_rebuild_sd();
> +
> out:
> mutex_unlock(&global_tunables_lock);
> return 0;
> @@ -750,6 +777,8 @@ static void sugov_exit(struct cpufreq_policy *policy)
> sugov_kthread_stop(sg_policy);
> sugov_policy_free(sg_policy);
> cpufreq_disable_fast_switch(policy);
> +
> + sugov_eas_rebuild_sd();
> }
>
> static int sugov_start(struct cpufreq_policy *policy)
> @@ -838,29 +867,3 @@ struct cpufreq_governor *cpufreq_default_governor(void)
> #endif
>
> cpufreq_governor_init(schedutil_gov);
> -
> -#ifdef CONFIG_ENERGY_MODEL
> -static void rebuild_sd_workfn(struct work_struct *work)
> -{
> - rebuild_sched_domains_energy();
> -}
> -static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
> -
> -/*
> - * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
> - * on governor changes to make sure the scheduler knows about it.
> - */
> -void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> - struct cpufreq_governor *old_gov)
> -{
> - if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
> - /*
> - * When called from the cpufreq_register_driver() path, the
> - * cpu_hotplug_lock is already held, so use a work item to
> - * avoid nested locking in rebuild_sched_domains().
> - */
> - schedule_work(&rebuild_sd_work);
> - }
> -
> -}
> -#endif

Acked-by: Viresh Kumar <[email protected]>

--
viresh

2023-10-06 20:08:03

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v2] cpufreq: Rebuild sched-domains when removing cpufreq driver

On Fri, Oct 6, 2023 at 9:07 AM Viresh Kumar <[email protected]> wrote:
>
> On 05-10-23, 15:41, Pierre Gondois wrote:
> > The Energy Aware Scheduler (EAS) relies on the schedutil governor.
> > When moving to/from the schedutil governor, sched domains must be
> > rebuilt to allow re-evaluating the enablement conditions of EAS.
> > This is done through sched_cpufreq_governor_change().
> >
> > Having a cpufreq governor assumes a cpufreq driver is running.
> > Inserting/removing a cpufreq driver should trigger a re-evaluation
> > of EAS enablement conditions, avoiding to see EAS enabled when
> > removing a running cpufreq driver.
> >
> > Rebuild the sched domains in schedutil's sugov_init()/sugov_exit(),
> > allowing to check EAS's enablement condition whenever schedutil
> > governor is initialized/exited from.
> > Move relevant code up in schedutil.c to avoid a split and conditional
> > function declaration.
> > Rename sched_cpufreq_governor_change() to sugov_eas_rebuild_sd().
> >
> > Signed-off-by: Pierre Gondois <[email protected]>
> > ---
> > drivers/cpufreq/cpufreq.c | 3 +-
> > include/linux/cpufreq.h | 8 -----
> > kernel/sched/cpufreq_schedutil.c | 55 +++++++++++++++++---------------
> > 3 files changed, 30 insertions(+), 36 deletions(-)
> >
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 60ed89000e82..4bc15634d49c 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -1544,7 +1544,7 @@ static int cpufreq_online(unsigned int cpu)
> >
> > /*
> > * Register with the energy model before
> > - * sched_cpufreq_governor_change() is called, which will result
> > + * sugov_eas_rebuild_sd() is called, which will result
> > * in rebuilding of the sched domains, which should only be done
> > * once the energy model is properly initialized for the policy
> > * first.
> > @@ -2652,7 +2652,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
> > ret = cpufreq_start_governor(policy);
> > if (!ret) {
> > pr_debug("governor change\n");
> > - sched_cpufreq_governor_change(policy, old_gov);
> > return 0;
> > }
> > cpufreq_exit_governor(policy);
> > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> > index 71d186d6933a..1c5ca92a0555 100644
> > --- a/include/linux/cpufreq.h
> > +++ b/include/linux/cpufreq.h
> > @@ -1193,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
> > }
> > #endif
> >
> > -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> > -void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> > - struct cpufreq_governor *old_gov);
> > -#else
> > -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> > - struct cpufreq_governor *old_gov) { }
> > -#endif
> > -
> > extern unsigned int arch_freq_get_on_cpu(int cpu);
> >
> > #ifndef arch_set_freq_scale
> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> > index 4492608b7d7f..901cada51ba7 100644
> > --- a/kernel/sched/cpufreq_schedutil.c
> > +++ b/kernel/sched/cpufreq_schedutil.c
> > @@ -555,6 +555,31 @@ static const struct kobj_type sugov_tunables_ktype = {
> >
> > /********************** cpufreq governor interface *********************/
> >
> > +#ifdef CONFIG_ENERGY_MODEL
> > +static void rebuild_sd_workfn(struct work_struct *work)
> > +{
> > + rebuild_sched_domains_energy();
> > +}
> > +
> > +static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
> > +
> > +/*
> > + * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
> > + * on governor changes to make sure the scheduler knows about it.
> > + */
> > +static void sugov_eas_rebuild_sd(void)
> > +{
> > + /*
> > + * When called from the cpufreq_register_driver() path, the
> > + * cpu_hotplug_lock is already held, so use a work item to
> > + * avoid nested locking in rebuild_sched_domains().
> > + */
> > + schedule_work(&rebuild_sd_work);
> > +}
> > +#else
> > +static inline void sugov_eas_rebuild_sd(void) { };
> > +#endif
> > +
> > struct cpufreq_governor schedutil_gov;
> >
> > static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
> > @@ -709,6 +734,8 @@ static int sugov_init(struct cpufreq_policy *policy)
> > if (ret)
> > goto fail;
> >
> > + sugov_eas_rebuild_sd();
> > +
> > out:
> > mutex_unlock(&global_tunables_lock);
> > return 0;
> > @@ -750,6 +777,8 @@ static void sugov_exit(struct cpufreq_policy *policy)
> > sugov_kthread_stop(sg_policy);
> > sugov_policy_free(sg_policy);
> > cpufreq_disable_fast_switch(policy);
> > +
> > + sugov_eas_rebuild_sd();
> > }
> >
> > static int sugov_start(struct cpufreq_policy *policy)
> > @@ -838,29 +867,3 @@ struct cpufreq_governor *cpufreq_default_governor(void)
> > #endif
> >
> > cpufreq_governor_init(schedutil_gov);
> > -
> > -#ifdef CONFIG_ENERGY_MODEL
> > -static void rebuild_sd_workfn(struct work_struct *work)
> > -{
> > - rebuild_sched_domains_energy();
> > -}
> > -static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
> > -
> > -/*
> > - * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
> > - * on governor changes to make sure the scheduler knows about it.
> > - */
> > -void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
> > - struct cpufreq_governor *old_gov)
> > -{
> > - if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
> > - /*
> > - * When called from the cpufreq_register_driver() path, the
> > - * cpu_hotplug_lock is already held, so use a work item to
> > - * avoid nested locking in rebuild_sched_domains().
> > - */
> > - schedule_work(&rebuild_sd_work);
> > - }
> > -
> > -}
> > -#endif
>
> Acked-by: Viresh Kumar <[email protected]>

Applied as 6.7 material, thanks!