Hi Everyone,
This series reworks the handling of disabling turbo in intel_pstate
on top of the previous series of cleanups
https://lore.kernel.org/linux-pm/12409658.O9o76ZdvQC@kreacher/
The underlying problem is that disabling turbo is handled quite consistently
in intel_pstate and basically it can get disabled at any time (through
MSR_IA32_MISC_ENABLE_TURBO_DISABLE) without much coordination with the
cpufreq core or anything else.
Disabling turbo through the "no_turbo" sysfs attribute is more consistent,
but it has issues too (for example, if turbo is disabled via "no_turbo",
the frequency-invariance code gets notified on the turbo status change,
but the actual maximum frequency of the CPU is only updated if the
MSR_IA32_MISC_ENABLE_TURBO_DISABLE value changes either, which need not
happen at the same time or even at all).
The first patch is not really related to the rest of the series, it's
just a cleanup and can be applied separately.
Patch [2/6] uses the observation that it should be necessary to read
MSR_IA32_MISC_ENABLE_TURBO_DISABLE after driver initialization to remove
in-flight reads on that MSR and turbo state updates related to them.
Patch [3/6] builds on top of the previous one to adjust the "no_turbo"
attribute "store" and "show" callbacks.
Patch [4/6] adds READ_ONCE() annotations to global.no_turbo accesses and
makes some related simplifications.
Patch [5/6] replaces the cached MSR_IA32_MISC_ENABLE_TURBO_DISABLE
value in some checks with global.no_turbo for consistency.
Patch [6/6] makes all of the code paths where the maximum CPU frequency
can change to do that consistently by using the same set of functions.
Details are described in the individual patch changelogs.
Thanks!
From: Rafael J. Wysocki <[email protected]>
There are 3 places at which the maximum CPU frequency may change,
store_no_turbo(), intel_pstate_update_limits() (when called by the
cpufreq core) and intel_pstate_notify_work() (when handling a HWP
change notification). Currently, cpuinfo.max_freq is only updated by
store_no_turbo(), although it principle it may be necessary to update
it at the other 2 places too.
Make all of them mutually consistent.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
drivers/cpufreq/intel_pstate.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -1153,18 +1153,32 @@ static void intel_pstate_update_policies
static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
struct cpufreq_policy *policy)
{
+ intel_pstate_get_hwp_cap(cpudata);
+
policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
+
refresh_frequency_limits(policy);
}
static void intel_pstate_update_limits(unsigned int cpu)
{
- mutex_lock(&intel_pstate_driver_lock);
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
- cpufreq_update_policy(cpu);
+ if (!policy)
+ return;
- mutex_unlock(&intel_pstate_driver_lock);
+ __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
+
+ cpufreq_cpu_release(policy);
+}
+
+static void intel_pstate_update_limits_for_all(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ intel_pstate_update_limits(cpu);
}
/************************** sysfs begin ************************/
@@ -1311,7 +1325,7 @@ static ssize_t store_no_turbo(struct kob
mutex_unlock(&intel_pstate_limits_lock);
- intel_pstate_update_policies();
+ intel_pstate_update_limits_for_all();
arch_set_max_freq_ratio(no_turbo);
unlock_driver:
@@ -1595,7 +1609,6 @@ static void intel_pstate_notify_work(str
struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
if (policy) {
- intel_pstate_get_hwp_cap(cpudata);
__intel_pstate_update_max_freq(cpudata, policy);
cpufreq_cpu_release(policy);
From: Rafael J. Wysocki <[email protected]>
Because global.no_turbo is generally not read under intel_pstate_driver_lock
make store_no_turbo() use WRITE_ONCE() for updating it (this is the only
place at which it is updated except for the initialization) and make the
majority of places reading it use READ_ONCE().
Also remove redundant global.turbo_disabled checks from places that
depend on the 'true' value of global.no_turbo because it can only be
'true' if global.turbo_disabled is also 'true'.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
drivers/cpufreq/intel_pstate.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -1296,7 +1296,7 @@ static ssize_t store_no_turbo(struct kob
goto unlock_driver;
}
- global.no_turbo = no_turbo;
+ WRITE_ONCE(global.no_turbo, no_turbo);
mutex_lock(&intel_pstate_limits_lock);
@@ -1748,7 +1748,7 @@ static u64 atom_get_val(struct cpudata *
u32 vid;
val = (u64)pstate << 8;
- if (global.no_turbo && !global.turbo_disabled)
+ if (READ_ONCE(global.no_turbo) && !global.turbo_disabled)
val |= (u64)1 << 32;
vid_fp = cpudata->vid.min + mul_fp(
@@ -1913,7 +1913,7 @@ static u64 core_get_val(struct cpudata *
u64 val;
val = (u64)pstate << 8;
- if (global.no_turbo && !global.turbo_disabled)
+ if (READ_ONCE(global.no_turbo) && !global.turbo_disabled)
val |= (u64)1 << 32;
return val;
@@ -2211,7 +2211,7 @@ static inline int32_t get_target_pstate(
sample->busy_scaled = busy_frac * 100;
- target = global.no_turbo || global.turbo_disabled ?
+ target = READ_ONCE(global.no_turbo) ?
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
target += target >> 2;
target = mul_fp(target, busy_frac);
@@ -2473,7 +2473,7 @@ static void intel_pstate_clear_update_ut
static int intel_pstate_get_max_freq(struct cpudata *cpu)
{
- return global.turbo_disabled || global.no_turbo ?
+ return READ_ONCE(global.no_turbo) ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
}
@@ -2610,7 +2610,7 @@ static void intel_pstate_verify_cpu_poli
if (hwp_active) {
intel_pstate_get_hwp_cap(cpu);
- max_freq = global.no_turbo || global.turbo_disabled ?
+ max_freq = READ_ONCE(global.no_turbo) ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
} else {
max_freq = intel_pstate_get_max_freq(cpu);
On Mon, 2024-03-25 at 18:06 +0100, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <[email protected]>
>
> There are 3 places at which the maximum CPU frequency may change,
> store_no_turbo(), intel_pstate_update_limits() (when called by the
> cpufreq core) and intel_pstate_notify_work() (when handling a HWP
> change notification). Currently, cpuinfo.max_freq is only updated by
> store_no_turbo(), although it principle it may be necessary to update
> it at the other 2 places too.
It also works for intel_pstate_notify_work() path as is without this
change.
To start with:
$ sudo rdmsr 0x771
6080c14
$ cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_*
2000000
800000
0
Now trigger a max frequency change via SST. intel_pstate_notify_work()
called because guaranteed also changed. We didn't subscribe for max
change only (although we should).
Max changed from 2GHz to 1.9 GHz.
$ sudo rdmsr 0x771
6080e13
[labuser@gnr-bkc ~]$ cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_*
1900000
800000
0
Now trigger SST to change to max frequency to 2GHz.
sudo rdmsr 0x771
6080c14
cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_*
2000000
800000
0
May be you mean something else.
Thanks,
Srinivas
>
> Make all of them mutually consistent.
>
> Signed-off-by: Rafael J. Wysocki <[email protected]>
> ---
> drivers/cpufreq/intel_pstate.c | 23 ++++++++++++++++++-----
> 1 file changed, 18 insertions(+), 5 deletions(-)
>
> Index: linux-pm/drivers/cpufreq/intel_pstate.c
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/intel_pstate.c
> +++ linux-pm/drivers/cpufreq/intel_pstate.c
> @@ -1153,18 +1153,32 @@ static void intel_pstate_update_policies
> static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
> struct cpufreq_policy
> *policy)
> {
> + intel_pstate_get_hwp_cap(cpudata);
> +
> policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
> cpudata->pstate.max_freq : cpudata-
> >pstate.turbo_freq;
> +
> refresh_frequency_limits(policy);
> }
>
> static void intel_pstate_update_limits(unsigned int cpu)
> {
> - mutex_lock(&intel_pstate_driver_lock);
> + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
>
> - cpufreq_update_policy(cpu);
> + if (!policy)
> + return;
>
> - mutex_unlock(&intel_pstate_driver_lock);
> + __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
> +
> + cpufreq_cpu_release(policy);
> +}
> +
> +static void intel_pstate_update_limits_for_all(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu)
> + intel_pstate_update_limits(cpu);
> }
>
> /************************** sysfs begin ************************/
> @@ -1311,7 +1325,7 @@ static ssize_t store_no_turbo(struct kob
>
> mutex_unlock(&intel_pstate_limits_lock);
>
> - intel_pstate_update_policies();
> + intel_pstate_update_limits_for_all();
> arch_set_max_freq_ratio(no_turbo);
>
> unlock_driver:
> @@ -1595,7 +1609,6 @@ static void intel_pstate_notify_work(str
> struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata-
> >cpu);
>
> if (policy) {
> - intel_pstate_get_hwp_cap(cpudata);
> __intel_pstate_update_max_freq(cpudata, policy);
>
> cpufreq_cpu_release(policy);
>
>
>
On Wed, Mar 27, 2024 at 7:08 PM srinivas pandruvada
<[email protected]> wrote:
>
> On Mon, 2024-03-25 at 18:06 +0100, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <[email protected]>
> >
> > There are 3 places at which the maximum CPU frequency may change,
> > store_no_turbo(), intel_pstate_update_limits() (when called by the
> > cpufreq core) and intel_pstate_notify_work() (when handling a HWP
> > change notification). Currently, cpuinfo.max_freq is only updated by
> > store_no_turbo(), although it principle it may be necessary to update
> > it at the other 2 places too.
>
> It also works for intel_pstate_notify_work() path as is without this
> change.
>
> To start with:
>
> $ sudo rdmsr 0x771
> 6080c14
> $ cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_*
> 2000000
> 800000
> 0
>
> Now trigger a max frequency change via SST. intel_pstate_notify_work()
> called because guaranteed also changed. We didn't subscribe for max
> change only (although we should).
>
> Max changed from 2GHz to 1.9 GHz.
>
> $ sudo rdmsr 0x771
> 6080e13
> [labuser@gnr-bkc ~]$ cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_*
> 1900000
> 800000
> 0
>
> Now trigger SST to change to max frequency to 2GHz.
>
> sudo rdmsr 0x771
> 6080c14
>
> cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_*
> 2000000
> 800000
> 0
>
> May be you mean something else.
No, I don't, and you are right.
When I was writing the changelog, I somehow forgot that
intel_pstate_notify_work() called __intel_pstate_update_max_freq(),
even though the code changes in the patch obviously take that into
account (I can't really explain what happened :-/).
I'll fix the changelog.
Cheers,
Rafael
From: Rafael J. Wysocki <[email protected]>
There are 3 places at which the maximum CPU frequency may change,
store_no_turbo(), intel_pstate_update_limits() (when called by the
cpufreq core) and intel_pstate_notify_work() (when handling a HWP
change notification). Currently, cpuinfo.max_freq is only updated by
store_no_turbo() and intel_pstate_notify_work(), although it principle
it may be necessary to update it in intel_pstate_update_limits() either.
Make all of them mutually consistent.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
v1 -> v1.1: Fix changelog (Srinivas).
---
drivers/cpufreq/intel_pstate.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
Index: linux-pm/drivers/cpufreq/intel_pstate.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -1153,18 +1153,32 @@ static void intel_pstate_update_policies
static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
struct cpufreq_policy *policy)
{
+ intel_pstate_get_hwp_cap(cpudata);
+
policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
+
refresh_frequency_limits(policy);
}
static void intel_pstate_update_limits(unsigned int cpu)
{
- mutex_lock(&intel_pstate_driver_lock);
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
- cpufreq_update_policy(cpu);
+ if (!policy)
+ return;
- mutex_unlock(&intel_pstate_driver_lock);
+ __intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
+
+ cpufreq_cpu_release(policy);
+}
+
+static void intel_pstate_update_limits_for_all(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ intel_pstate_update_limits(cpu);
}
/************************** sysfs begin ************************/
@@ -1311,7 +1325,7 @@ static ssize_t store_no_turbo(struct kob
mutex_unlock(&intel_pstate_limits_lock);
- intel_pstate_update_policies();
+ intel_pstate_update_limits_for_all();
arch_set_max_freq_ratio(no_turbo);
unlock_driver:
@@ -1595,7 +1609,6 @@ static void intel_pstate_notify_work(str
struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
if (policy) {
- intel_pstate_get_hwp_cap(cpudata);
__intel_pstate_update_max_freq(cpudata, policy);
cpufreq_cpu_release(policy);
On Mon, 2024-03-25 at 18:00 +0100, Rafael J. Wysocki wrote:
> Hi Everyone,
>
> This series reworks the handling of disabling turbo in intel_pstate
> on top of the previous series of cleanups
>
> https://lore.kernel.org/linux-pm/12409658.O9o76ZdvQC@kreacher/
>
> The underlying problem is that disabling turbo is handled quite
> consistently
> in intel_pstate and basically it can get disabled at any time
> (through
> MSR_IA32_MISC_ENABLE_TURBO_DISABLE) without much coordination with
> the
> cpufreq core or anything else.
>
> Disabling turbo through the "no_turbo" sysfs attribute is more
> consistent,
> but it has issues too (for example, if turbo is disabled via
> "no_turbo",
> the frequency-invariance code gets notified on the turbo status
> change,
> but the actual maximum frequency of the CPU is only updated if the
> MSR_IA32_MISC_ENABLE_TURBO_DISABLE value changes either, which need
> not
> happen at the same time or even at all).
>
> The first patch is not really related to the rest of the series, it's
> just a cleanup and can be applied separately.
>
> Patch [2/6] uses the observation that it should be necessary to read
> MSR_IA32_MISC_ENABLE_TURBO_DISABLE after driver initialization to
> remove
> in-flight reads on that MSR and turbo state updates related to them.
>
> Patch [3/6] builds on top of the previous one to adjust the
> "no_turbo"
> attribute "store" and "show" callbacks.
>
> Patch [4/6] adds READ_ONCE() annotations to global.no_turbo accesses
> and
> makes some related simplifications.
>
> Patch [5/6] replaces the cached MSR_IA32_MISC_ENABLE_TURBO_DISABLE
> value in some checks with global.no_turbo for consistency.
>
> Patch [6/6] makes all of the code paths where the maximum CPU
> frequency
> can change to do that consistently by using the same set of
> functions.
>
> Details are described in the individual patch changelogs.
>
Acked-by: Srinivas Pandruvada <[email protected]>
Thanks,
Srinivas
> Thanks!
>
>
>
>