Hi All,
The v2 is here to address feedback from Doug and one issue found by me.
The purpose of this series is to address some peculiarities related to
taking CPUs offline/online and switching between different operation
modes with HWP enabled that have become visible after allowing the
driver to work in the passive mode with HWP enabled in 5.9-rc1 (and
one that was there earlier, but can be addressed easily after the
changes madein 5.9-rc1).
Please refer to the patch changelogs for details.
For easier testing/review, the series is available from the git branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
intel_pstate-testing
Thanks,
Rafael
From: "Rafael J. Wysocki" <[email protected]>
Add ->offline and ->online driver callbacks to prepare for taking a
CPU offline and to restore its working configuration when it goes
back online, respectively, to avoid invoking the ->init callback on
every CPU online which is quite a bit of unnecessary overhead.
Define ->offline and ->online so that they can be used in the
passive mode as well as in the active mode and because ->offline
will do the majority of ->stop_cpu work, the passive mode does
not need that callback any more, so drop it.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
-> v2: Typo fixes and changelog edits (Doug).
---
drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
1 file changed, 32 insertions(+), 6 deletions(-)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d18934fa975..98836ac299db 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
return 0;
}
-static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
{
+ pr_debug("CPU %d going offline\n", policy->cpu);
+
+ intel_pstate_exit_perf_limits(policy);
+
+ /*
+ * If the CPU is an SMT thread and it goes offline with the performance
+ * settings different from the minimum, it will prevent its sibling
+ * from getting to lower performance levels, so force the minimum
+ * performance on CPU offline to prevent that from happening.
+ */
if (hwp_active)
intel_pstate_hwp_force_min_perf(policy->cpu);
else
intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
+
+ return 0;
+}
+
+static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
+{
+ pr_debug("CPU %d going online\n", policy->cpu);
+
+ intel_pstate_init_acpi_perf_limits(policy);
+
+ if (hwp_active)
+ wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
+ all_cpu_data[policy->cpu]->hwp_req_cached);
+
+ return 0;
}
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
{
- pr_debug("CPU %d exiting\n", policy->cpu);
+ pr_debug("CPU %d stopping\n", policy->cpu);
intel_pstate_clear_update_util_hook(policy->cpu);
if (hwp_active)
intel_pstate_hwp_save_state(policy);
-
- intel_cpufreq_stop_cpu(policy);
}
static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
{
- intel_pstate_exit_perf_limits(policy);
+ pr_debug("CPU %d exiting\n", policy->cpu);
policy->fast_switch_possible = false;
@@ -2398,6 +2421,8 @@ static struct cpufreq_driver intel_pstate = {
.init = intel_pstate_cpu_init,
.exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
+ .offline = intel_pstate_cpu_offline,
+ .online = intel_pstate_cpu_online,
.update_limits = intel_pstate_update_limits,
.name = "intel_pstate",
};
@@ -2652,7 +2677,8 @@ static struct cpufreq_driver intel_cpufreq = {
.fast_switch = intel_cpufreq_fast_switch,
.init = intel_cpufreq_cpu_init,
.exit = intel_cpufreq_cpu_exit,
- .stop_cpu = intel_cpufreq_stop_cpu,
+ .offline = intel_pstate_cpu_offline,
+ .online = intel_pstate_cpu_online,
.update_limits = intel_pstate_update_limits,
.name = "intel_cpufreq",
};
--
2.26.2
From: "Rafael J. Wysocki" <[email protected]>
Because hwp_req_cached contains the effective EPP value (0) when the
"performance" scaling algorithm is used in the active mode, replace
it with the cached EPP value during CPU offline to prevent it from
being used (unexpectedly) after switching over from the active mode
to the passive mode.
Also rename intel_pstate_hwp_force_min_perf() because it will do more
than just forcing the minimum performance now.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
-> v2: New patch.
---
drivers/cpufreq/intel_pstate.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 37731d45f0ea..61d7179bccdd 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -904,14 +904,25 @@ static void intel_pstate_hwp_set(unsigned int cpu)
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
-static void intel_pstate_hwp_force_min_perf(int cpu)
+static void intel_pstate_hwp_offline(int cpu)
{
- u64 value;
+ struct cpudata *cpudata = all_cpu_data[cpu];
+ u64 value = READ_ONCE(cpudata->hwp_req_cached);
int min_perf;
- value = all_cpu_data[cpu]->hwp_req_cached;
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * In case the EPP has been set to "performance" by the
+ * active mode "performance" scaling algorithm, replace that
+ * temporary value with the cached EPP one.
+ */
+ value &= ~GENMASK_ULL(31, 24);
+ value |= HWP_ENERGY_PERF_PREFERENCE(cpudata->epp_cached);
+ WRITE_ONCE(cpudata->hwp_req_cached, value);
+ }
+
value &= ~GENMASK_ULL(31, 0);
- min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached);
+ min_perf = HWP_LOWEST_PERF(cpudata->hwp_cap_cached);
/* Set hwp_max = hwp_min */
value |= HWP_MAX_PERF(min_perf);
@@ -2313,7 +2324,7 @@ static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
* performance on CPU offline to prevent that from happening.
*/
if (hwp_active)
- intel_pstate_hwp_force_min_perf(policy->cpu);
+ intel_pstate_hwp_offline(policy->cpu);
else
intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
--
2.26.2
From: "Rafael J. Wysocki" <[email protected]>
When intel_pstate switches the operation mode from "active" to
"passive" or the other way around, freeing its data structures
representing CPUs and allocating them again from scratch is not
necessary and wasteful. Moreover, if these data structures are
preserved, the cached HWP Request MSR value from there may be
written to the MSR to start with to reinitialize it and help to
restore the EPP value set previously (it is set to 0xFF when CPUs
go offline to allow their SMT siblings to use the full range of
EPP values and that also happens when the driver gets unregistered).
Accordingly, modify the driver to only do a full cleanup on driver
object registration errors and when its status is changed to "off"
via sysfs and to write the cached HWP Request MSR value back to
the MSR on CPU init if the data structure representing the given
CPU is still there.
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
-> v2: Rearrange intel_pstate_init_cpu() to restore some of the previous
behavior of it to retain the current active-mode EPP management.
---
drivers/cpufreq/intel_pstate.c | 54 +++++++++++++---------------------
1 file changed, 21 insertions(+), 33 deletions(-)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 98836ac299db..37731d45f0ea 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2098,25 +2098,28 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
all_cpu_data[cpunum] = cpu;
- cpu->epp_default = -EINVAL;
- cpu->epp_powersave = -EINVAL;
- cpu->epp_saved = -EINVAL;
- }
+ cpu->cpu = cpunum;
- cpu = all_cpu_data[cpunum];
-
- cpu->cpu = cpunum;
+ cpu->epp_default = -EINVAL;
- if (hwp_active) {
- const struct x86_cpu_id *id;
+ if (hwp_active) {
+ const struct x86_cpu_id *id;
- intel_pstate_hwp_enable(cpu);
+ intel_pstate_hwp_enable(cpu);
- id = x86_match_cpu(intel_pstate_hwp_boost_ids);
- if (id && intel_pstate_acpi_pm_profile_server())
- hwp_boost = true;
+ id = x86_match_cpu(intel_pstate_hwp_boost_ids);
+ if (id && intel_pstate_acpi_pm_profile_server())
+ hwp_boost = true;
+ }
+ } else if (hwp_active) {
+ cpu->epp_policy = 0;
+ wrmsrl_on_cpu(cpunum, MSR_HWP_REQUEST,
+ cpu->hwp_req_cached);
}
+ cpu->epp_powersave = -EINVAL;
+ cpu->epp_saved = -EINVAL;
+
intel_pstate_get_cpu_pstates(cpu);
pr_debug("controlling: cpu %d\n", cpunum);
@@ -2701,9 +2704,6 @@ static void intel_pstate_driver_cleanup(void)
}
put_online_cpus();
- if (intel_pstate_driver == &intel_pstate)
- intel_pstate_sysfs_hide_hwp_dynamic_boost();
-
intel_pstate_driver = NULL;
}
@@ -2729,14 +2729,6 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
return 0;
}
-static int intel_pstate_unregister_driver(void)
-{
- cpufreq_unregister_driver(intel_pstate_driver);
- intel_pstate_driver_cleanup();
-
- return 0;
-}
-
static ssize_t intel_pstate_show_status(char *buf)
{
if (!intel_pstate_driver)
@@ -2748,8 +2740,6 @@ static ssize_t intel_pstate_show_status(char *buf)
static int intel_pstate_update_status(const char *buf, size_t size)
{
- int ret;
-
if (size == 3 && !strncmp(buf, "off", size)) {
if (!intel_pstate_driver)
return -EINVAL;
@@ -2757,7 +2747,8 @@ static int intel_pstate_update_status(const char *buf, size_t size)
if (hwp_active)
return -EBUSY;
- return intel_pstate_unregister_driver();
+ cpufreq_unregister_driver(intel_pstate_driver);
+ intel_pstate_driver_cleanup();
}
if (size == 6 && !strncmp(buf, "active", size)) {
@@ -2765,9 +2756,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
if (intel_pstate_driver == &intel_pstate)
return 0;
- ret = intel_pstate_unregister_driver();
- if (ret)
- return ret;
+ cpufreq_unregister_driver(intel_pstate_driver);
}
return intel_pstate_register_driver(&intel_pstate);
@@ -2778,9 +2767,8 @@ static int intel_pstate_update_status(const char *buf, size_t size)
if (intel_pstate_driver == &intel_cpufreq)
return 0;
- ret = intel_pstate_unregister_driver();
- if (ret)
- return ret;
+ cpufreq_unregister_driver(intel_pstate_driver);
+ intel_pstate_sysfs_hide_hwp_dynamic_boost();
}
return intel_pstate_register_driver(&intel_cpufreq);
--
2.26.2
On Mon, 2020-08-24 at 19:39 +0200, Rafael J. Wysocki wrote:
> Hi All,
>
> The v2 is here to address feedback from Doug and one issue found by
> me.
>
> The purpose of this series is to address some peculiarities related
> to
> taking CPUs offline/online and switching between different operation
> modes with HWP enabled that have become visible after allowing the
> driver to work in the passive mode with HWP enabled in 5.9-rc1 (and
> one that was there earlier, but can be addressed easily after the
> changes madein 5.9-rc1).
>
> Please refer to the patch changelogs for details.
>
> For easier testing/review, the series is available from the git
> branch at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
> intel_pstate-testing
>
Applied these patches to 5.9-rc2
- After s3 limits got messed up.
# cat /sys/power/mem_sleep
s2idle [deep]
- In the dmesg unchecked MSR for HWP register
1.
Before test
sudo rdmsr -a 0x774
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
7f002b0c
cd /sys/devices/system/cpu/intel_pstate/
[root@otcpl-perf-test-skx-i9 intel_pstate]# grep . *
hwp_dynamic_boost:0
max_perf_pct:100
min_perf_pct:27
no_turbo:0
num_pstates:32
status:active
turbo_pct:32
cd ../cpu1/cpufreq/
[root@otcpl-perf-test-skx-i9 cpufreq]# grep . *
affected_cpus:1
base_frequency:3300000
cpuinfo_max_freq:4300000
cpuinfo_min_freq:1200000
cpuinfo_transition_latency:0
energy_performance_available_preferences:default performance
balance_performance balance_power power
energy_performance_preference:balance_performance
related_cpus:1
scaling_available_governors:performance powersave
scaling_cur_freq:1200000
scaling_driver:intel_pstate
scaling_governor:powersave
scaling_max_freq:4300000
scaling_min_freq:1200000
scaling_setspeed:<unsupported>
2. Now change the EPP
# echo 127 > energy_performance_preference
sudo rdmsr -a 0x774
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
Good here
3. Offline/online good
[root@otcpl-perf-test-skx-i9 cpufreq]# echo 0 >
/sys/devices/system/cpu/cpu1/online
[root@otcpl-perf-test-skx-i9 cpufreq]# echo ` >
/sys/devices/system/cpu/cpu1/online
> echo ` > /sys/devices/system/cpu/cpu1/online ^C
[root@otcpl-perf-test-skx-i9 cpufreq]# echo 1 >
/sys/devices/system/cpu/cpu1/online
sudo rdmsr -a 0x774
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
7f002b0c
Good. Online restored the setting
4. Now S3
rtcwake -m mem -s 10
All limits are now messed up
sudo rdmsr -a 0x774
80002b0c
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
5. Now switch to passive
Again bad, some CPU max/min is 0
sudo rdmsr -a 0x774
80002b0d
7f002b0f
80002b0c
80002d0e
80002b0c
80002b0d
80002b0f
80002b2b
80002b0c
80002d1d
80000000
80002b0c
80002b0c
80000000
80000000
80000000
80000000
80000000
80000000
80000000
6.
Switched back to active to restore back
Lost EPP setting but rest are good.
sudo rdmsr -a 0x774
80002b0c
7f002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
7. S3 again
rtcwake -m mem -s 10
Again messed up
sudo rdmsr -a 0x774
80002b0c
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
This time dmesg didn't have unchecked MSR in dmesg (I think because I
didn't change EPP before)
Thanks,
Srinivas
> Thanks,
> Rafael
>
>
>
Hi Srinivas,
I think there is a disconnect between your written
description of what is going on and your supporting MSR reads.
On 2020.08.24 16:56 Srinivas Pandruvada wrote:
> On Mon, 2020-08-24 at 19:39 +0200, Rafael J. Wysocki wrote:
> > Hi All,
> >
> > The v2 is here to address feedback from Doug and one issue found by
> > me.
> >
> > The purpose of this series is to address some peculiarities related
> > to
> > taking CPUs offline/online and switching between different operation
> > modes with HWP enabled that have become visible after allowing the
> > driver to work in the passive mode with HWP enabled in 5.9-rc1 (and
> > one that was there earlier, but can be addressed easily after the
> > changes madein 5.9-rc1).
> >
> > Please refer to the patch changelogs for details.
> >
> > For easier testing/review, the series is available from the git
> > branch at:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
> > intel_pstate-testing
> >
>
> Applied these patches to 5.9-rc2
So did I, and the issues I reported the other day are fine now.
I did try a few of the things you were doing.
>
> - After s3 limits got messed up.
> # cat /sys/power/mem_sleep
> s2idle [deep]
>
> - In the dmesg unchecked MSR for HWP register
>
> 1.
> Before test
>
> sudo rdmsr -a 0x774
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0d
> 7f002b0c
?? This looks like the MSR
read for further below, and for
CPU 19 instead of 1.
> cd /sys/devices/system/cpu/intel_pstate/
> [root@otcpl-perf-test-skx-i9 intel_pstate]# grep . *
> hwp_dynamic_boost:0
> max_perf_pct:100
> min_perf_pct:27
> no_turbo:0
> num_pstates:32
> status:active
> turbo_pct:32
>
> cd ../cpu1/cpufreq/
> [root@otcpl-perf-test-skx-i9 cpufreq]# grep . *
> affected_cpus:1
> base_frequency:3300000
> cpuinfo_max_freq:4300000
> cpuinfo_min_freq:1200000
> cpuinfo_transition_latency:0
> energy_performance_available_preferences:default performance
> balance_performance balance_power power
> energy_performance_preference:balance_performance
> related_cpus:1
> scaling_available_governors:performance powersave
> scaling_cur_freq:1200000
> scaling_driver:intel_pstate
> scaling_governor:powersave
> scaling_max_freq:4300000
> scaling_min_freq:1200000
> scaling_setspeed:<unsupported>
>
>
> 2. Now change the EPP
>
> # echo 127 > energy_performance_preference
> sudo rdmsr -a 0x774
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0d
This looks like the original MSR read.
>
> Good here
>
> 3. Offline/online good
>
> [root@otcpl-perf-test-skx-i9 cpufreq]# echo 0 >
> /sys/devices/system/cpu/cpu1/online
> [root@otcpl-perf-test-skx-i9 cpufreq]# echo ` >
> /sys/devices/system/cpu/cpu1/online
> > echo ` > /sys/devices/system/cpu/cpu1/online ^C
> [root@otcpl-perf-test-skx-i9 cpufreq]# echo 1 >
> /sys/devices/system/cpu/cpu1/online
>
> sudo rdmsr -a 0x774
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0d
> 7f002b0c
O.K.
>
> Good. Online restored the setting
>
> 4. Now S3
>
> rtcwake -m mem -s 10
Cool command. I did not know about it.
I tried it.
>
> All limits are now messed up
>
> sudo rdmsr -a 0x774
> 80002b0c
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
>
Yes, I got the same:
# /home/doug/c/msr-decoder (edited)
6.) 0x774: IA32_HWP_REQUEST: CPU 0-5 :
raw: 80002E08 : 8000FF01 : 8000FF01 : 8000FF01 : 8000FF01 : 8000FF01 :
min: 8 : 1 : 1 : 1 : 1 : 1 :
max: 46 : 255 : 255 : 255 : 255 : 255 :
des: 0 : 0 : 0 : 0 : 0 : 0 :
epp: 128 : 128 : 128 : 128 : 128 : 128 :
act: 0 : 0 : 0 : 0 : 0 : 0 :
> 5. Now switch to passive
> Again bad, some CPU max/min is 0
>
> sudo rdmsr -a 0x774
> 80002b0d
> 7f002b0f
Hmmm... Now seems to be CPU 1
> 80002b0c
> 80002d0e
> 80002b0c
> 80002b0d
> 80002b0f
> 80002b2b
> 80002b0c
> 80002d1d
> 80000000
> 80002b0c
> 80002b0c
> 80000000
> 80000000
> 80000000
> 80000000
> 80000000
> 80000000
> 80000000
MSR 774 was good for me, but in general my decoder was having troubles.
0x774: IA32_HWP_REQUEST: CPU 0-5 :
sh: 0: getcwd() failed: No such file or directory
raw: 80002E2E : 7F002E2E : 80002E2E : 80002E2E : 80002E2E : 80002E2E :
min: 46 : 46 : 46 : 46 : 46 : 46 :
max: 46 : 46 : 46 : 46 : 46 : 46 :
des: 0 : 0 : 0 : 0 : 0 : 0 :
epp: 128 : 127 : 128 : 128 : 128 : 128 :
act: 0 : 0 : 0 : 0 : 0 : 0 :
>
> 6.
> Switched back to active to restore back
>
> Lost EPP setting but rest are good.
>
> sudo rdmsr -a 0x774
> 80002b0c
> 7f002b0c
And again, now seems to be CPU1.
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002b0c
> 80002d0d
>
> 7. S3 again
>
> rtcwake -m mem -s 10
>
> Again messed up
>
> sudo rdmsr -a 0x774
> 80002b0c
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
> 8000ff00
>
> This time dmesg didn't have unchecked MSR in dmesg (I think because I
> didn't change EPP before)
>
> Thanks,
> Srinivas
>
> > Thanks,
> > Rafael
> >
> >
> >
Hi Doug,
On Mon, 2020-08-24 at 18:00 -0700, Doug Smythies wrote:
> Hi Srinivas,
>
> I think there is a disconnect between your written
> description of what is going on and your supporting MSR reads.
>
I reproduced again.
I see the copy paste individual at the first place swapped.
I pasted the full output by direct copy - paste from the screen.
But the issues are still there.
[labuser@otcpl-perf-test-skx-i9 ~]$ sudo -s
[root@otcpl-perf-test-skx-i9 labuser]# rdmsr -a 0x774
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
[root@otcpl-perf-test-skx-i9 labuser]# cd /sys/devices/system/cpu/cpu1
[root@otcpl-perf-test-skx-i9 cpu1]# cd cpufreq/
[root@otcpl-perf-test-skx-i9 cpufreq]# echo 127 >
energy_performance_preference
[root@otcpl-perf-test-skx-i9 cpufreq]# cat
energy_performance_preference
127
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -a 0x774
80002b0c
7f002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
[root@otcpl-perf-test-skx-i9 cpufreq]# echo 0 >
/sys/devices/system/cpu/cpu1/online
[root@otcpl-perf-test-skx-i9 cpufreq]# echo 1 >
/sys/devices/system/cpu/cpu1/online
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -a 0x774
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002d0c
80002b0c
80002b0c
80002b0c
80002b0c
80002b0c
80002d0d
7f002b0c
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -p 1 0x774
7f002b0c
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -p 19 0x774
80002d0d
[root@otcpl-perf-test-skx-i9 cpufreq]# rtcwake -m mem -s 10
rtcwake: wakeup from "mem" using /dev/rtc0 at Tue Aug 25 15:04:02 2020
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -a 0x774
80002b0c
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
8000ff00
[root@otcpl-perf-test-skx-i9 cpufreq]# dmesg > ~/temp/dmesg.txt
[root@otcpl-perf-test-skx-i9 cpufreq]# cat
energy_performance_preference
127
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -p 1 0x774
8000ff00
[root@otcpl-perf-test-skx-i9 cpufreq]# echo passive >
/sys/devices/system/
clockevents/ clocksource/ container/ cpu/ edac/ m
achinecheck/ memory/ node/
[root@otcpl-perf-test-skx-i9 cpufreq]# echo passive >
/sys/devices/system/cpu/intel_pstate/status
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -a 0x774
80002b0c
7f000000
80000000
80002d0c
80002b0c
80000000
80000000
80002b0c
80000000
80002d0c
80000000
80002b0c
80002b0d
80002d0c
80000000
80000000
80000000
80002b0d
80000000
80000000
[root@otcpl-perf-test-skx-i9 cpufreq]# rdmsr -a 0x774
80002b0c
7f000000
80000000
80002d0c
80002b0c
80000000
80000000
80002b0c
80000000
80002d0c
80000000
80002b0c
80002b0d
80002d0c
80000000
80002b0c
80000000
80002b0d
80000000
80000000
> On 2020.08.24 16:56 Srinivas Pandruvada wrote:
> > On Mon, 2020-08-24 at 19:39 +0200, Rafael J. Wysocki wrote:
> > > Hi All,
> > >
> > > The v2 is here to address feedback from Doug and one issue found
> > > by
> > > me.
> > >
> > > The purpose of this series is to address some peculiarities
> > > related
> > > to
> > > taking CPUs offline/online and switching between different
> > > operation
> > > modes with HWP enabled that have become visible after allowing
> > > the
> > > driver to work in the passive mode with HWP enabled in 5.9-rc1
> > > (and
> > > one that was there earlier, but can be addressed easily after the
> > > changes madein 5.9-rc1).
> > >
> > > Please refer to the patch changelogs for details.
> > >
> > > For easier testing/review, the series is available from the git
> > > branch at:
> > >
> > > git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-
> > > pm.git \
> > > intel_pstate-testing
> > >
> >
> > Applied these patches to 5.9-rc2
>
> So did I, and the issues I reported the other day are fine now.
> I did try a few of the things you were doing.
>
> > - After s3 limits got messed up.
> > # cat /sys/power/mem_sleep
> > s2idle [deep]
> >
> > - In the dmesg unchecked MSR for HWP register
> >
> > 1.
> > Before test
> >
> > sudo rdmsr -a 0x774
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0d
> > 7f002b0c
>
> ?? This looks like the MSR
> read for further below, and for
> CPU 19 instead of 1.
>
> > cd /sys/devices/system/cpu/intel_pstate/
> > [root@otcpl-perf-test-skx-i9 intel_pstate]# grep . *
> > hwp_dynamic_boost:0
> > max_perf_pct:100
> > min_perf_pct:27
> > no_turbo:0
> > num_pstates:32
> > status:active
> > turbo_pct:32
> >
> > cd ../cpu1/cpufreq/
> > [root@otcpl-perf-test-skx-i9 cpufreq]# grep . *
> > affected_cpus:1
> > base_frequency:3300000
> > cpuinfo_max_freq:4300000
> > cpuinfo_min_freq:1200000
> > cpuinfo_transition_latency:0
> > energy_performance_available_preferences:default performance
> > balance_performance balance_power power
> > energy_performance_preference:balance_performance
> > related_cpus:1
> > scaling_available_governors:performance powersave
> > scaling_cur_freq:1200000
> > scaling_driver:intel_pstate
> > scaling_governor:powersave
> > scaling_max_freq:4300000
> > scaling_min_freq:1200000
> > scaling_setspeed:<unsupported>
> >
> >
> > 2. Now change the EPP
> >
> > # echo 127 > energy_performance_preference
> > sudo rdmsr -a 0x774
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0d
>
> This looks like the original MSR read.
>
> > Good here
> >
> > 3. Offline/online good
> >
> > [root@otcpl-perf-test-skx-i9 cpufreq]# echo 0 >
> > /sys/devices/system/cpu/cpu1/online
> > [root@otcpl-perf-test-skx-i9 cpufreq]# echo ` >
> > /sys/devices/system/cpu/cpu1/online
> > > echo ` > /sys/devices/system/cpu/cpu1/online ^C
> > [root@otcpl-perf-test-skx-i9 cpufreq]# echo 1 >
> > /sys/devices/system/cpu/cpu1/online
> >
> > sudo rdmsr -a 0x774
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0d
> > 7f002b0c
>
> O.K.
>
> > Good. Online restored the setting
> >
> > 4. Now S3
> >
> > rtcwake -m mem -s 10
>
> Cool command. I did not know about it.
> I tried it.
> > All limits are now messed up
> >
> > sudo rdmsr -a 0x774
> > 80002b0c
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> >
>
> Yes, I got the same:
>
> # /home/doug/c/msr-decoder (edited)
> 6.) 0x774: IA32_HWP_REQUEST: CPU 0-5 :
> raw: 80002E08 : 8000FF01 : 8000FF01 : 8000FF01 : 8000FF01 :
> 8000FF01 :
> min: 8 : 1 : 1 : 1 : 1
> : 1 :
> max: 46 : 255 : 255 : 255 : 255
> : 255 :
> des: 0 : 0 : 0 : 0 : 0
> : 0 :
> epp: 128 : 128 : 128 : 128 : 128
> : 128 :
> act: 0 : 0 : 0 : 0 : 0
> : 0 :
>
> > 5. Now switch to passive
> > Again bad, some CPU max/min is 0
> >
> > sudo rdmsr -a 0x774
> > 80002b0d
> > 7f002b0f
>
> Hmmm... Now seems to be CPU 1
>
> > 80002b0c
> > 80002d0e
> > 80002b0c
> > 80002b0d
> > 80002b0f
> > 80002b2b
> > 80002b0c
> > 80002d1d
> > 80000000
> > 80002b0c
> > 80002b0c
> > 80000000
> > 80000000
> > 80000000
> > 80000000
> > 80000000
> > 80000000
> > 80000000
>
> MSR 774 was good for me, but in general my decoder was having
> troubles.
>
> 0x774: IA32_HWP_REQUEST: CPU 0-5 :
> sh: 0: getcwd() failed: No such file or directory
> raw: 80002E2E : 7F002E2E : 80002E2E : 80002E2E : 80002E2E :
> 80002E2E :
> min: 46 : 46 : 46 : 46 : 46
> : 46 :
> max: 46 : 46 : 46 : 46 : 46
> : 46 :
> des: 0 : 0 : 0 : 0 : 0
> : 0 :
> epp: 128 : 127 : 128 : 128 : 128
> : 128 :
> act: 0 : 0 : 0 : 0 : 0
> : 0 :
>
> > 6.
> > Switched back to active to restore back
> >
> > Lost EPP setting but rest are good.
> >
> > sudo rdmsr -a 0x774
> > 80002b0c
> > 7f002b0c
>
> And again, now seems to be CPU1.
>
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002b0c
> > 80002d0d
> >
> > 7. S3 again
> >
> > rtcwake -m mem -s 10
> >
> > Again messed up
> >
> > sudo rdmsr -a 0x774
> > 80002b0c
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> > 8000ff00
> >
> > This time dmesg didn't have unchecked MSR in dmesg (I think because
> > I
> > didn't change EPP before)
> >
> > Thanks,
> > Srinivas
> >
> > > Thanks,
> > > Rafael
> > >
> > >
> > >
Hi Srinivas,
Thanks for your reply.
On 2020.08.25 08:12 Srinivas Pandruvada wrote:
> On Mon, 2020-08-24 at 18:00 -0700, Doug Smythies wrote:
> > I think there is a disconnect between your written
> > description of what is going on and your supporting MSR reads.
> >
> I reproduced again.
> I see the copy paste individual at the first place swapped.
Yes, and that had me confused, initially.
> I pasted the full output by direct copy - paste from the screen.
>
> But the issues are still there.
Agreed.
I didn't try your offline/online of CPU 1 part previously,
but did now, and get the same results as you.
I did not know that "rdmsr -a 0x774" lists
stuff in the order that CPU were last brought on-line.
I had assumed the list was in CPU order. Weird.
My example (nothing new here, just me catching up.
The offline/online order was cpu1, then cpu3, then cpu2):
root@s18:/sys/devices/system/cpu# grep . cpu*/cpufreq/energy_performance_preference
cpu0/cpufreq/energy_performance_preference:balance_performance
cpu1/cpufreq/energy_performance_preference:127
cpu2/cpufreq/energy_performance_preference:125
cpu3/cpufreq/energy_performance_preference:126
cpu4/cpufreq/energy_performance_preference:balance_performance
cpu5/cpufreq/energy_performance_preference:balance_performance
root@s18:/sys/devices/system/cpu# rdmsr -p 0 0x774
80002e2e
root@s18:/sys/devices/system/cpu# rdmsr -p 1 0x774
7f002e2e
root@s18:/sys/devices/system/cpu# rdmsr -p 2 0x774
7d002e2e
root@s18:/sys/devices/system/cpu# rdmsr -p 3 0x774
7e002e2e
root@s18:/sys/devices/system/cpu# rdmsr -p 4 0x774
80002e2e
root@s18:/sys/devices/system/cpu# rdmsr -p 5 0x774
80002e2e
root@s18:/sys/devices/system/cpu# rdmsr -a 0x774
80002e2e
80002e2e
80002e2e
7f002e2e
7e002e2e
7d002e2e
... Doug
On 2020/8/25 1:43, Rafael J. Wysocki wrote:
> From: "Rafael J. Wysocki" <[email protected]>
>
> Add ->offline and ->online driver callbacks to prepare for taking a
> CPU offline and to restore its working configuration when it goes
> back online, respectively, to avoid invoking the ->init callback on
> every CPU online which is quite a bit of unnecessary overhead.
>
> Define ->offline and ->online so that they can be used in the
> passive mode as well as in the active mode and because ->offline
> will do the majority of ->stop_cpu work, the passive mode does
> not need that callback any more, so drop it.
>
> Signed-off-by: Rafael J. Wysocki <[email protected]>
> ---
>
> -> v2: Typo fixes and changelog edits (Doug).
>
> ---
> drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
> 1 file changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 3d18934fa975..98836ac299db 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
> return 0;
> }
>
> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
> {
> + pr_debug("CPU %d going offline\n", policy->cpu);
> +
> + intel_pstate_exit_perf_limits(policy);
> +
> + /*
> + * If the CPU is an SMT thread and it goes offline with the performance
> + * settings different from the minimum, it will prevent its sibling
> + * from getting to lower performance levels, so force the minimum
> + * performance on CPU offline to prevent that from happening.
> + */
> if (hwp_active)
> intel_pstate_hwp_force_min_perf(policy->cpu);
> else
> intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
> +
> + return 0;
> +}
> +
> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> +{
> + pr_debug("CPU %d going online\n", policy->cpu);
> +
> + intel_pstate_init_acpi_perf_limits(policy);
> +
> + if (hwp_active)
> + wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
> + all_cpu_data[policy->cpu]->hwp_req_cached);
> +
> + return 0;
> }
On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
not configure intel_pstate=xxx in command line.
Although the Performance tuner is used, the CPU have the lowest
frequency in scaling_cur_freq after the CPU goes offline and then goes
online, running the same infinite loop load.
How to produce:
echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
cat while_true.c
#include <stdio.h>
void main(void)
{
while(1);
}
[root@localhost freq_test]# cat test.sh
#!/bin/bash
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
taskset -c ${1} ./while_true &
sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
echo 0 > /sys/devices/system/cpu/cpu${1}/online
sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
sleep 1s
echo 1 > /sys/devices/system/cpu/cpu${1}/online
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
taskset -c ${1} ./while_true &
sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
[root@localhost freq_test]# sh test.sh 40
2300000
performance
2299977
cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
resource busy
2300000
2300022
2300000
2299953
[root@localhost freq_test]# sh test.sh 50
2300000
performance
2300000
cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
resource busy
2300000
2299977
2300022
2299977
[root@localhost freq_test]# sh test.sh 20
2300000
performance
2299977
cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
resource busy
800000
800000
800000
799992
[root@localhost freq_test]# sh test.sh 21
2300000
performance
2300000
cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
resource busy
800000
800000
800000
800000
[root@localhost freq_test]# cat
/sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
2300000
[root@localhost freq_test]# cat
/sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
800000
>
> static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
> {
> - pr_debug("CPU %d exiting\n", policy->cpu);
> + pr_debug("CPU %d stopping\n", policy->cpu);
>
> intel_pstate_clear_update_util_hook(policy->cpu);
> if (hwp_active)
> intel_pstate_hwp_save_state(policy);
> -
> - intel_cpufreq_stop_cpu(policy);
> }
>
> static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> {
> - intel_pstate_exit_perf_limits(policy);
> + pr_debug("CPU %d exiting\n", policy->cpu);
>
> policy->fast_switch_possible = false;
>
> @@ -2398,6 +2421,8 @@ static struct cpufreq_driver intel_pstate = {
> .init = intel_pstate_cpu_init,
> .exit = intel_pstate_cpu_exit,
> .stop_cpu = intel_pstate_stop_cpu,
> + .offline = intel_pstate_cpu_offline,
> + .online = intel_pstate_cpu_online,
> .update_limits = intel_pstate_update_limits,
> .name = "intel_pstate",
> };
> @@ -2652,7 +2677,8 @@ static struct cpufreq_driver intel_cpufreq = {
> .fast_switch = intel_cpufreq_fast_switch,
> .init = intel_cpufreq_cpu_init,
> .exit = intel_cpufreq_cpu_exit,
> - .stop_cpu = intel_cpufreq_stop_cpu,
> + .offline = intel_pstate_cpu_offline,
> + .online = intel_pstate_cpu_online,
> .update_limits = intel_pstate_update_limits,
> .name = "intel_cpufreq",
> };
On Fri, Nov 3, 2023 at 3:57 AM Jinjie Ruan <[email protected]> wrote:
> On 2020/8/25 1:43, Rafael J. Wysocki wrote:
> > From: "Rafael J. Wysocki" <[email protected]>
> >
> > Add ->offline and ->online driver callbacks to prepare for taking a
> > CPU offline and to restore its working configuration when it goes
> > back online, respectively, to avoid invoking the ->init callback on
> > every CPU online which is quite a bit of unnecessary overhead.
> >
> > Define ->offline and ->online so that they can be used in the
> > passive mode as well as in the active mode and because ->offline
> > will do the majority of ->stop_cpu work, the passive mode does
> > not need that callback any more, so drop it.
> >
> > Signed-off-by: Rafael J. Wysocki <[email protected]>
> > ---
> >
> > -> v2: Typo fixes and changelog edits (Doug).
> >
> > ---
> > drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
> > 1 file changed, 32 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> > index 3d18934fa975..98836ac299db 100644
> > --- a/drivers/cpufreq/intel_pstate.c
> > +++ b/drivers/cpufreq/intel_pstate.c
> > @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
> > return 0;
> > }
> >
> > -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> > +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
> > {
> > + pr_debug("CPU %d going offline\n", policy->cpu);
> > +
> > + intel_pstate_exit_perf_limits(policy);
> > +
> > + /*
> > + * If the CPU is an SMT thread and it goes offline with the performance
> > + * settings different from the minimum, it will prevent its sibling
> > + * from getting to lower performance levels, so force the minimum
> > + * performance on CPU offline to prevent that from happening.
> > + */
> > if (hwp_active)
> > intel_pstate_hwp_force_min_perf(policy->cpu);
> > else
> > intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
> > +
> > + return 0;
> > +}
> > +
> > +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> > +{
> > + pr_debug("CPU %d going online\n", policy->cpu);
> > +
> > + intel_pstate_init_acpi_perf_limits(policy);
> > +
> > + if (hwp_active)
> > + wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
> > + all_cpu_data[policy->cpu]->hwp_req_cached);
> > +
> > + return 0;
> > }
>
> On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
> not configure intel_pstate=xxx in command line.
>
> Although the Performance tuner is used, the CPU have the lowest
> frequency in scaling_cur_freq after the CPU goes offline and then goes
> online, running the same infinite loop load.
>
> How to produce:
>
> echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
>
> cat while_true.c
> #include <stdio.h>
> void main(void)
> {
> while(1);
> }
>
>
> [root@localhost freq_test]# cat test.sh
> #!/bin/bash
>
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
> taskset -c ${1} ./while_true &
> sleep 1s
>
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> echo 0 > /sys/devices/system/cpu/cpu${1}/online
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> sleep 1s
>
> echo 1 > /sys/devices/system/cpu/cpu${1}/online
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> taskset -c ${1} ./while_true &
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
>
> [root@localhost freq_test]# sh test.sh 40
> 2300000
> performance
> 2299977
> cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
> resource busy
> 2300000
> 2300022
> 2300000
> 2299953
> [root@localhost freq_test]# sh test.sh 50
> 2300000
> performance
> 2300000
> cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
> resource busy
> 2300000
> 2299977
> 2300022
> 2299977
> [root@localhost freq_test]# sh test.sh 20
> 2300000
> performance
> 2299977
> cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
> resource busy
> 800000
> 800000
> 800000
> 799992
> [root@localhost freq_test]# sh test.sh 21
> 2300000
> performance
> 2300000
> cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
> resource busy
> 800000
> 800000
> 800000
> 800000
>
> [root@localhost freq_test]# cat
> /sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
> 2300000
> [root@localhost freq_test]# cat
> /sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
> 800000
Hi,
I followed your "how to reproduce" notes exactly.
So far, I have been unable to reproduce your issue.
I am using kernel 6.6.
My processor is:
Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz
Results:
root@s19:/home/doug/pstate# ./test.sh 8
800000
performance
4799994
cat: /sys/devices/system/cpu/cpu8/cpufreq/scaling_cur_freq: Device or
resource busy
4799999
4800000
4800001
4799996
root@s19:/home/doug/pstate# ./test.sh 7
800000
performance
4800001
cat: /sys/devices/system/cpu/cpu7/cpufreq/scaling_cur_freq: Device or
resource busy
4799967
4800028
4800006
4799997
root@s19:/home/doug/pstate# ./test.sh 6
800000
performance
4800001
cat: /sys/devices/system/cpu/cpu6/cpufreq/scaling_cur_freq: Device or
resource busy
4799983
4800001
4799993
4800002
root@s19:/home/doug/pstate# ./test.sh 5
800000
performance
4799990
cat: /sys/devices/system/cpu/cpu5/cpufreq/scaling_cur_freq: Device or
resource busy
4800006
4800002
4800011
4799980
root@s19:/home/doug/pstate# ./test.sh 4
4799940
performance
4799985
cat: /sys/devices/system/cpu/cpu4/cpufreq/scaling_cur_freq: Device or
resource busy
4799975
4799994
4799984
4799996
root@s19:/home/doug/pstate# ./test.sh 3
4799986
performance
4799990
cat: /sys/devices/system/cpu/cpu3/cpufreq/scaling_cur_freq: Device or
resource busy
4799976
4800015
4800000
4799995
... Doug
On 2023/11/3 23:56, Doug Smythies wrote:
> On Fri, Nov 3, 2023 at 3:57 AM Jinjie Ruan <[email protected]> wrote:
>> On 2020/8/25 1:43, Rafael J. Wysocki wrote:
>>> From: "Rafael J. Wysocki" <[email protected]>
>>>
>>> Add ->offline and ->online driver callbacks to prepare for taking a
>>> CPU offline and to restore its working configuration when it goes
>>> back online, respectively, to avoid invoking the ->init callback on
>>> every CPU online which is quite a bit of unnecessary overhead.
>>>
>>> Define ->offline and ->online so that they can be used in the
>>> passive mode as well as in the active mode and because ->offline
>>> will do the majority of ->stop_cpu work, the passive mode does
>>> not need that callback any more, so drop it.
>>>
>>> Signed-off-by: Rafael J. Wysocki <[email protected]>
>>> ---
>>>
>>> -> v2: Typo fixes and changelog edits (Doug).
>>>
>>> ---
>>> drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
>>> 1 file changed, 32 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>>> index 3d18934fa975..98836ac299db 100644
>>> --- a/drivers/cpufreq/intel_pstate.c
>>> +++ b/drivers/cpufreq/intel_pstate.c
>>> @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
>>> return 0;
>>> }
>>>
>>> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
>>> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
>>> {
>>> + pr_debug("CPU %d going offline\n", policy->cpu);
>>> +
>>> + intel_pstate_exit_perf_limits(policy);
>>> +
>>> + /*
>>> + * If the CPU is an SMT thread and it goes offline with the performance
>>> + * settings different from the minimum, it will prevent its sibling
>>> + * from getting to lower performance levels, so force the minimum
>>> + * performance on CPU offline to prevent that from happening.
>>> + */
>>> if (hwp_active)
>>> intel_pstate_hwp_force_min_perf(policy->cpu);
>>> else
>>> intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
>>> +{
>>> + pr_debug("CPU %d going online\n", policy->cpu);
>>> +
>>> + intel_pstate_init_acpi_perf_limits(policy);
>>> +
>>> + if (hwp_active)
>>> + wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
>>> + all_cpu_data[policy->cpu]->hwp_req_cached);
>>> +
>>> + return 0;
>>> }
>>
>> On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
>> not configure intel_pstate=xxx in command line.
>>
>> Although the Performance tuner is used, the CPU have the lowest
>> frequency in scaling_cur_freq after the CPU goes offline and then goes
>> online, running the same infinite loop load.
>>
>> How to produce:
>>
>> echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
>>
>> cat while_true.c
>> #include <stdio.h>
>> void main(void)
>> {
>> while(1);
>> }
>>
>>
>> [root@localhost freq_test]# cat test.sh
>> #!/bin/bash
>>
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
>> taskset -c ${1} ./while_true &
>> sleep 1s
>>
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> echo 0 > /sys/devices/system/cpu/cpu${1}/online
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>>
>> echo 1 > /sys/devices/system/cpu/cpu${1}/online
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> taskset -c ${1} ./while_true &
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>>
>> [root@localhost freq_test]# sh test.sh 40
>> 2300000
>> performance
>> 2299977
>> cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 2300000
>> 2300022
>> 2300000
>> 2299953
>> [root@localhost freq_test]# sh test.sh 50
>> 2300000
>> performance
>> 2300000
>> cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 2300000
>> 2299977
>> 2300022
>> 2299977
>> [root@localhost freq_test]# sh test.sh 20
>> 2300000
>> performance
>> 2299977
>> cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 800000
>> 800000
>> 800000
>> 799992
>> [root@localhost freq_test]# sh test.sh 21
>> 2300000
>> performance
>> 2300000
>> cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 800000
>> 800000
>> 800000
>> 800000
>>
>> [root@localhost freq_test]# cat
>> /sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
>> 2300000
>> [root@localhost freq_test]# cat
>> /sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
>> 800000
>
> Hi,
>
> I followed your "how to reproduce" notes exactly.
> So far, I have been unable to reproduce your issue.
It seems that this issue is platform-specific.
The following CPU family has the issue:
1、Products formerly Haswell
2、Model name: Intel(R) Xeon(R) Platinum 8380 CPU @
2.30GHz(Ice Lake server)
But the following CPU family do not have the issue:
1、Model name: Intel(R) Xeon(R) CPU E5-2620 v2 @
2.10GHz
2、Model name: Intel(R) Xeon(R) CPU E5-2698 v3 @ 2.30GHz
>
> I am using kernel 6.6.
> My processor is:
> Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz
>
> Results:
> root@s19:/home/doug/pstate# ./test.sh 8
> 800000
> performance
> 4799994
> cat: /sys/devices/system/cpu/cpu8/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799999
> 4800000
> 4800001
> 4799996
> root@s19:/home/doug/pstate# ./test.sh 7
> 800000
> performance
> 4800001
> cat: /sys/devices/system/cpu/cpu7/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799967
> 4800028
> 4800006
> 4799997
> root@s19:/home/doug/pstate# ./test.sh 6
> 800000
> performance
> 4800001
> cat: /sys/devices/system/cpu/cpu6/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799983
> 4800001
> 4799993
> 4800002
> root@s19:/home/doug/pstate# ./test.sh 5
> 800000
> performance
> 4799990
> cat: /sys/devices/system/cpu/cpu5/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4800006
> 4800002
> 4800011
> 4799980
> root@s19:/home/doug/pstate# ./test.sh 4
> 4799940
> performance
> 4799985
> cat: /sys/devices/system/cpu/cpu4/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799975
> 4799994
> 4799984
> 4799996
> root@s19:/home/doug/pstate# ./test.sh 3
> 4799986
> performance
> 4799990
> cat: /sys/devices/system/cpu/cpu3/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799976
> 4800015
> 4800000
> 4799995
>
> ... Doug
>