This patchsets adds support for precision boost hardware control
for AMD processors.
Meanwhile the patchset fixs lowest perf query and desired perf scope issues.
Update transition delay and latency default value to meet SMU firmware requirement.
and do some code cleanups,
It also exports cpufreq cpu release and acquire for coming amd-pstate epp mode driver
The patch series are tested on the AMD mobile and EYPC server systems
v3->v4:
- check cached bit for core performance boost from hardware configuration
register
- pick up the Acked-by flags from Viresh and Sudeep
v2->v3:
- drop cpufreq cpu release and acquire export patch
- remove the clamp_t in the amd_pstate_adjust_perf()
v1->v2:
- add two new patches to remove the acpi_disabled check
- fix some typos in the commit info
- move the clamp_t() into amd_pstate_update() function
- rebased to 5.19-rc5
[v3] https://lore.kernel.org/all/20220713052944.xpk4fi7nuj6rmekd@vireshk-i7/t/
[v2]:https://lore.kernel.org/lkml/[email protected]/
[v1]:https://patchwork.kernel.org/project/linux-pm/list/?series=657510
Perry Yuan (13):
x86/msr: Add the MSR definition for AMD CPPC hardware control.
cpufreq: amd-pstate: enable AMD Precision Boost mode switch
cpufreq: amd-pstate: cleanup the unused and duplicated headers
declaration
cpufreq: amd-pstate: prefetch cppc_req_cached value in
amd_pstate_cpu_init()
cpufreq: amd-pstate: simplify cpudata pointer assignment
cpufreq: amd_pstate: fix wrong lowest perf fetch
cpufreq: amd_pstate: map desired perf into pstate scope for powersave
governor
cpufreq: amd-pstate: fix white-space
cpufreq: amd-pstate: update pstate frequency transition delay time
cpufreq: amd-pstate: add ACPI disabled check in acpi_cpc_valid()
cpufreq: amd_pstate: update transition delay time to 1ms
arch_topology: remove the acpi_disabled check
cpufreq: CPPC: remove the acpi_disabled check
arch/x86/include/asm/msr-index.h | 3 ++
drivers/acpi/cppc_acpi.c | 3 ++
drivers/base/arch_topology.c | 2 +-
drivers/cpufreq/amd-pstate.c | 62 +++++++++++++++++++-------------
drivers/cpufreq/cppc_cpufreq.c | 2 +-
5 files changed, 45 insertions(+), 27 deletions(-)
--
2.32.0
This MSR can be used for controlling whether the CPU boost state
is enabled in the hardware.
AMD Processor Programming Reference (PPR)
Link: https://www.amd.com/system/files/TechDocs/40332.pdf [p1095]
Link: https://www.amd.com/system/files/TechDocs/56569-A1-PUB.zip [p162]
Signed-off-by: Perry Yuan <[email protected]>
---
arch/x86/include/asm/msr-index.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d27e0581b777..869508de8269 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -548,6 +548,7 @@
#define MSR_AMD_CPPC_CAP2 0xc00102b2
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
+#define MSR_AMD_CPPC_HW_CTL 0xc0010015
#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
--
2.32.0
Add support to switch AMD precision boost state to scale cpu max
frequency that will help to improve the processor throughput.
when set boost state to be enabled, user will need to execute below commands,
the CPU will reach absolute maximum performance level or the highest perf which
CPU physical support. This performance level may not be sustainable for
long durations, it will help to improve the IO workload tasks.
* turn on CPU boost state under root
echo 1 > /sys/devices/system/cpu/cpufreq/boost
If user set boost off,the CPU can reach to the maximum sustained
performance level of the process, that level is the process can maintain
continously working and definitely it can save some power compared to
boost on mode.
* turn off CPU boost state under root
echo 0 > /sys/devices/system/cpu/cpufreq/boost
Signed-off-by: Perry Yuan <[email protected]>
---
arch/x86/include/asm/msr-index.h | 2 ++
drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++---
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 869508de8269..b952fd6d6916 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -559,6 +559,8 @@
#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+#define AMD_CPPC_PRECISION_BOOST_BIT 25
+#define AMD_CPPC_PRECISION_BOOST_ENABLED BIT_ULL(AMD_CPPC_PRECISION_BOOST_BIT)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9ac75c1cde9c..188e055e24a2 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -122,6 +122,7 @@ struct amd_cpudata {
u64 freq;
bool boost_supported;
+ u64 cppc_hw_conf_cached;
};
static inline int pstate_enable(bool enable)
@@ -438,18 +439,27 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
{
struct amd_cpudata *cpudata = policy->driver_data;
int ret;
+ u64 value;
if (!cpudata->boost_supported) {
pr_err("Boost mode is not supported by this processor or SBIOS\n");
return -EINVAL;
}
- if (state)
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
+ if (ret)
+ return ret;
+
+ if (state) {
+ value |= AMD_CPPC_PRECISION_BOOST_ENABLED;
policy->cpuinfo.max_freq = cpudata->max_freq;
- else
+ } else {
+ value &= ~AMD_CPPC_PRECISION_BOOST_ENABLED;
policy->cpuinfo.max_freq = cpudata->nominal_freq;
-
+ }
policy->max = policy->cpuinfo.max_freq;
+ WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, value);
ret = freq_qos_update_request(&cpudata->req[1],
policy->cpuinfo.max_freq);
@@ -478,6 +488,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
struct device *dev;
struct amd_cpudata *cpudata;
+ u64 value;
dev = get_cpu_device(policy->cpu);
if (!dev)
@@ -542,6 +553,11 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
policy->driver_data = cpudata;
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
+
amd_pstate_boost_init(cpudata);
return 0;
--
2.32.0
Cleanup the headers declaration which are not used
actually and some duplicated declaration which is declarated in some
other headers already, it will help to simplify the header part.
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 188e055e24a2..43e6df9f67f6 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -31,19 +31,14 @@
#include <linux/compiler.h>
#include <linux/dmi.h>
#include <linux/slab.h>
-#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <linux/static_call.h>
-#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
#include "amd-pstate-trace.h"
#define AMD_PSTATE_TRANSITION_LATENCY 0x20000
--
2.32.0
This cppc_req_cached valued should be prefetched during
amd_pstate_cpu_init call period, then the amd_pstate_update() will get
correct cached value before updating the perf to change the cpu perf
level.The cached values are read through MSR interface, so here use
shared_mem flag to check the registers are accessible
Also the core performance boost state will be initialized through
hardware configuration register
* shared_mem flag is used for the shared memory type CPPC implementation
which dose not support MSR interface operation
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 43e6df9f67f6..d8c4153dbe4f 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -92,6 +92,8 @@ struct amd_aperf_mperf {
* @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @precision_boost_off: the core performance boost disabled state
+ * @cppc_hw_conf_cached: the cached hardware configuration register
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime.
@@ -117,6 +119,7 @@ struct amd_cpudata {
u64 freq;
bool boost_supported;
+ bool precision_boost_off;
u64 cppc_hw_conf_cached;
};
@@ -547,12 +550,17 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
policy->driver_data = cpudata;
+ if (!shared_mem) {
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
+ if (ret)
+ return ret;
+ cpudata->precision_boost_off = value & AMD_CPPC_PRECISION_BOOST_ENABLED;
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
- if (ret)
- return ret;
- WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
-
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+ }
amd_pstate_boost_init(cpudata);
return 0;
--
2.32.0
move the cpudata assignment to cpudata declaration which
will simplify the functions.
No functional change intended.
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index d8c4153dbe4f..0c8256638ed7 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -574,9 +574,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata;
-
- cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata = policy->driver_data;
freq_qos_remove_request(&cpudata->req[1]);
freq_qos_remove_request(&cpudata->req[0]);
@@ -618,9 +616,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
char *buf)
{
int max_freq;
- struct amd_cpudata *cpudata;
-
- cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata = policy->driver_data;
max_freq = amd_get_max_freq(cpudata);
if (max_freq < 0)
@@ -633,9 +629,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli
char *buf)
{
int freq;
- struct amd_cpudata *cpudata;
-
- cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata = policy->driver_data;
freq = amd_get_lowest_nonlinear_freq(cpudata);
if (freq < 0)
--
2.32.0
Update transition delay time to 1ms, in the AMD CPU autonomous mode and
non-autonomous mode, CPPC firmware will decide frequency at 1ms timescale
based on the workload utilization.
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
Signed-off-by: Su Jinzhou <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 4f8600a36194..d3bc441b3923 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -42,7 +42,7 @@
#include "amd-pstate-trace.h"
#define AMD_PSTATE_TRANSITION_LATENCY 20000
-#define AMD_PSTATE_TRANSITION_DELAY 500
+#define AMD_PSTATE_TRANSITION_DELAY 1000
/*
* TODO: We need more time to fine tune processors with shared memory solution
--
2.32.0
Fix the wrong lowest perf value reading which is used for new
des_perf calculation by governor requested, the incorrect min_perf will
get incorrect des_perf to be set , that will cause the system frequency
changing unexpectedly.
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
Signed-off-by: Su Jinzhou <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 0c8256638ed7..4b764870035e 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -311,7 +311,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
return -ENODEV;
cap_perf = READ_ONCE(cpudata->highest_perf);
- min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+ min_perf = READ_ONCE(cpudata->lowest_perf);
max_perf = cap_perf;
freqs.old = policy->cur;
--
2.32.0
The patch will fix the invalid desired perf value for powersave
governor. This issue is found when testing on one AMD EPYC system, the
actual des_perf is smaller than the min_perf value, that is invalid
value. because the min_perf is the lowest_perf system can support in
idle state.
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 4b764870035e..ebc7c6d389be 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -268,6 +268,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
u64 prev = READ_ONCE(cpudata->cppc_req_cached);
u64 value = prev;
+ des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
value &= ~AMD_CPPC_MIN_PERF(~0L);
value |= AMD_CPPC_MIN_PERF(min_perf);
@@ -356,8 +357,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
- des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
-
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
}
--
2.32.0
"acpi_cpc_valid" function already includes the acpi_disabled check and we can
remove the duplicated check here
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/cppc_cpufreq.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 24eaf0ec344d..9adb7612993e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -947,7 +947,7 @@ static int __init cppc_cpufreq_init(void)
{
int ret;
- if ((acpi_disabled) || !acpi_cpc_valid())
+ if (!acpi_cpc_valid())
return -ENODEV;
cppc_check_hisi_workaround();
--
2.32.0
"acpi_cpc_valid" function already includes the acpi_disabled check and we can
remove the duplicated check here
Acked-by: Viresh Kumar <[email protected]>
Acked-by: Sudeep Holla <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/base/arch_topology.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 579c851a2bd7..73a8cb31529d 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -352,7 +352,7 @@ void topology_init_cpu_capacity_cppc(void)
struct cppc_perf_caps perf_caps;
int cpu;
- if (likely(acpi_disabled || !acpi_cpc_valid()))
+ if (likely(!acpi_cpc_valid()))
return;
raw_capacity = kcalloc(num_possible_cpus(), sizeof(*raw_capacity),
--
2.32.0
Remove the white space and correct mixed-up indentation
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index ebc7c6d389be..acb62ec5ebab 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -117,7 +117,7 @@ struct amd_cpudata {
struct amd_aperf_mperf cur;
struct amd_aperf_mperf prev;
- u64 freq;
+ u64 freq;
bool boost_supported;
bool precision_boost_off;
u64 cppc_hw_conf_cached;
@@ -674,7 +674,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
.name = "amd-pstate",
- .attr = amd_pstate_attr,
+ .attr = amd_pstate_attr,
};
static int __init amd_pstate_init(void)
--
2.32.0
Add acpi function check in case ACPI is not enabled, that will cause
pstate driver failed to call cppc acpi to change perf or update epp
value for shared memory solution processors.
When CPPC or ACPI is invalid, warning log will be needed to tell
user that AMD pstate driver failed to load and what is wrong.
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/acpi/cppc_acpi.c | 3 +++
drivers/cpufreq/amd-pstate.c | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 6ff1901d7d43..17d67e3ededf 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -424,6 +424,9 @@ bool acpi_cpc_valid(void)
struct cpc_desc *cpc_ptr;
int cpu;
+ if (acpi_disabled)
+ return false;
+
for_each_present_cpu(cpu) {
cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
if (!cpc_ptr)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index c29d36f56ab4..4f8600a36194 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -685,7 +685,7 @@ static int __init amd_pstate_init(void)
return -ENODEV;
if (!acpi_cpc_valid()) {
- pr_debug("the _CPC object is not present in SBIOS\n");
+ pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
return -ENODEV;
}
--
2.32.0
change the default transition latency to be 20ms that is more
reasonable transition delay for AMD processors in non-EPP driver mode.
Acked-by: Viresh Kumar <[email protected]>
Signed-off-by: Perry Yuan <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index acb62ec5ebab..c29d36f56ab4 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -41,7 +41,7 @@
#include <asm/msr.h>
#include "amd-pstate-trace.h"
-#define AMD_PSTATE_TRANSITION_LATENCY 0x20000
+#define AMD_PSTATE_TRANSITION_LATENCY 20000
#define AMD_PSTATE_TRANSITION_DELAY 500
/*
--
2.32.0
On Fri, Jul 15, 2022 at 06:04:21PM +0800, Yuan, Perry wrote:
> Add support to switch AMD precision boost state to scale cpu max
> frequency that will help to improve the processor throughput.
>
> when set boost state to be enabled, user will need to execute below commands,
> the CPU will reach absolute maximum performance level or the highest perf which
> CPU physical support. This performance level may not be sustainable for
> long durations, it will help to improve the IO workload tasks.
>
> * turn on CPU boost state under root
> echo 1 > /sys/devices/system/cpu/cpufreq/boost
>
> If user set boost off,the CPU can reach to the maximum sustained
> performance level of the process, that level is the process can maintain
> continously working and definitely it can save some power compared to
> boost on mode.
>
> * turn off CPU boost state under root
> echo 0 > /sys/devices/system/cpu/cpufreq/boost
>
> Signed-off-by: Perry Yuan <[email protected]>
> ---
> arch/x86/include/asm/msr-index.h | 2 ++
> drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++---
> 2 files changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 869508de8269..b952fd6d6916 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -559,6 +559,8 @@
> #define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
> #define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
> #define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
> +#define AMD_CPPC_PRECISION_BOOST_BIT 25
> +#define AMD_CPPC_PRECISION_BOOST_ENABLED BIT_ULL(AMD_CPPC_PRECISION_BOOST_BIT)
The bit 25 (CpbDis) of MSRC001_0015 [Hardware Configuration] indicates the
core performance boost disable flag.
Please see the section 17.2 Core Performance Boost of PPR:
https://www.amd.com/system/files/TechDocs/40332.pdf
Core performance boost (CPB) dynamically monitors processor activity to
create an estimate of power consumption. If the estimated processor
consumption is below an internally defined power limit and software has
requested P0 on a given core, hardware may transition the core to a
frequency and voltage beyond those defined for P0. If the estimated power
consumption exceeds the defined power limit, some or all cores are limited
to the frequency and voltage defined by P0.
The boost state is designed for legacy ACPI P-State function which is
to request higher frequency beyond P0 State (it's equal to nominal
frequency in CPPC), and we already have the operation like
MSR_K7_HWCR_CPB_DIS in acpi-cpufreq driver. However, in CPPC, we can modify
the performance hint beyond the nominal perf to reach the goal. That won't
need this control anymore. And furthermore, this function for legacy ACPI
P-State should not be mixed them up with CPPC policy. We should prevent the
effect for this flag in CPPC.
Thanks,
Ray
>
> /* AMD Performance Counter Global Status and Control MSRs */
> #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 9ac75c1cde9c..188e055e24a2 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -122,6 +122,7 @@ struct amd_cpudata {
>
> u64 freq;
> bool boost_supported;
> + u64 cppc_hw_conf_cached;
> };
>
> static inline int pstate_enable(bool enable)
> @@ -438,18 +439,27 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
> {
> struct amd_cpudata *cpudata = policy->driver_data;
> int ret;
> + u64 value;
>
> if (!cpudata->boost_supported) {
> pr_err("Boost mode is not supported by this processor or SBIOS\n");
> return -EINVAL;
> }
>
> - if (state)
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> + if (ret)
> + return ret;
> +
> + if (state) {
> + value |= AMD_CPPC_PRECISION_BOOST_ENABLED;
> policy->cpuinfo.max_freq = cpudata->max_freq;
> - else
> + } else {
> + value &= ~AMD_CPPC_PRECISION_BOOST_ENABLED;
> policy->cpuinfo.max_freq = cpudata->nominal_freq;
> -
> + }
> policy->max = policy->cpuinfo.max_freq;
> + WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, value);
>
> ret = freq_qos_update_request(&cpudata->req[1],
> policy->cpuinfo.max_freq);
> @@ -478,6 +488,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
> int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> struct device *dev;
> struct amd_cpudata *cpudata;
> + u64 value;
>
> dev = get_cpu_device(policy->cpu);
> if (!dev)
> @@ -542,6 +553,11 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>
> policy->driver_data = cpudata;
>
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> +
> amd_pstate_boost_init(cpudata);
>
> return 0;
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:22PM +0800, Yuan, Perry wrote:
> Cleanup the headers declaration which are not used
> actually and some duplicated declaration which is declarated in some
> other headers already, it will help to simplify the header part.
>
> Acked-by: Viresh Kumar <[email protected]>
> Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Huang Rui <[email protected]>
> ---
> drivers/cpufreq/amd-pstate.c | 5 -----
> 1 file changed, 5 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 188e055e24a2..43e6df9f67f6 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -31,19 +31,14 @@
> #include <linux/compiler.h>
> #include <linux/dmi.h>
> #include <linux/slab.h>
> -#include <linux/acpi.h>
> #include <linux/io.h>
> #include <linux/delay.h>
> #include <linux/uaccess.h>
> #include <linux/static_call.h>
>
> -#include <acpi/processor.h>
> #include <acpi/cppc_acpi.h>
>
> #include <asm/msr.h>
> -#include <asm/processor.h>
> -#include <asm/cpufeature.h>
> -#include <asm/cpu_device_id.h>
> #include "amd-pstate-trace.h"
>
> #define AMD_PSTATE_TRANSITION_LATENCY 0x20000
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:24PM +0800, Yuan, Perry wrote:
> move the cpudata assignment to cpudata declaration which
> will simplify the functions.
>
> No functional change intended.
>
> Acked-by: Viresh Kumar <[email protected]>
> Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Huang Rui <[email protected]>
> ---
> drivers/cpufreq/amd-pstate.c | 12 +++---------
> 1 file changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index d8c4153dbe4f..0c8256638ed7 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -574,9 +574,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>
> static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
> {
> - struct amd_cpudata *cpudata;
> -
> - cpudata = policy->driver_data;
> + struct amd_cpudata *cpudata = policy->driver_data;
>
> freq_qos_remove_request(&cpudata->req[1]);
> freq_qos_remove_request(&cpudata->req[0]);
> @@ -618,9 +616,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
> char *buf)
> {
> int max_freq;
> - struct amd_cpudata *cpudata;
> -
> - cpudata = policy->driver_data;
> + struct amd_cpudata *cpudata = policy->driver_data;
>
> max_freq = amd_get_max_freq(cpudata);
> if (max_freq < 0)
> @@ -633,9 +629,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli
> char *buf)
> {
> int freq;
> - struct amd_cpudata *cpudata;
> -
> - cpudata = policy->driver_data;
> + struct amd_cpudata *cpudata = policy->driver_data;
>
> freq = amd_get_lowest_nonlinear_freq(cpudata);
> if (freq < 0)
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:30PM +0800, Yuan, Perry wrote:
> Update transition delay time to 1ms, in the AMD CPU autonomous mode and
> non-autonomous mode, CPPC firmware will decide frequency at 1ms timescale
> based on the workload utilization.
>
> Acked-by: Viresh Kumar <[email protected]>
> Signed-off-by: Perry Yuan <[email protected]>
> Signed-off-by: Su Jinzhou <[email protected]>
Please squeeze this patch into patch 9. I don't really want to separate the
transition marco update as two patches.
Thanks,
Ray
> ---
> drivers/cpufreq/amd-pstate.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 4f8600a36194..d3bc441b3923 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -42,7 +42,7 @@
> #include "amd-pstate-trace.h"
>
> #define AMD_PSTATE_TRANSITION_LATENCY 20000
> -#define AMD_PSTATE_TRANSITION_DELAY 500
> +#define AMD_PSTATE_TRANSITION_DELAY 1000
>
> /*
> * TODO: We need more time to fine tune processors with shared memory solution
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:26PM +0800, Yuan, Perry wrote:
> The patch will fix the invalid desired perf value for powersave
> governor. This issue is found when testing on one AMD EPYC system, the
> actual des_perf is smaller than the min_perf value, that is invalid
> value. because the min_perf is the lowest_perf system can support in
> idle state.
>
> Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Huang Rui <[email protected]>
> ---
> drivers/cpufreq/amd-pstate.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 4b764870035e..ebc7c6d389be 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -268,6 +268,7 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
> u64 prev = READ_ONCE(cpudata->cppc_req_cached);
> u64 value = prev;
>
> + des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
> value &= ~AMD_CPPC_MIN_PERF(~0L);
> value |= AMD_CPPC_MIN_PERF(min_perf);
>
> @@ -356,8 +357,6 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
> if (max_perf < min_perf)
> max_perf = min_perf;
>
> - des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
> -
> amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true);
> }
>
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:23PM +0800, Yuan, Perry wrote:
> This cppc_req_cached valued should be prefetched during
> amd_pstate_cpu_init call period, then the amd_pstate_update() will get
> correct cached value before updating the perf to change the cpu perf
> level.The cached values are read through MSR interface, so here use
> shared_mem flag to check the registers are accessible
>
> Also the core performance boost state will be initialized through
> hardware configuration register
>
> * shared_mem flag is used for the shared memory type CPPC implementation
> which dose not support MSR interface operation
Please check the comment of patch 2.
>
> Signed-off-by: Perry Yuan <[email protected]>
> ---
> drivers/cpufreq/amd-pstate.c | 18 +++++++++++++-----
> 1 file changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 43e6df9f67f6..d8c4153dbe4f 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -92,6 +92,8 @@ struct amd_aperf_mperf {
> * @prev: Last Aperf/Mperf/tsc count value read from register
> * @freq: current cpu frequency value
> * @boost_supported: check whether the Processor or SBIOS supports boost mode
> + * @precision_boost_off: the core performance boost disabled state
> + * @cppc_hw_conf_cached: the cached hardware configuration register
> *
> * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
> * represents all the attributes and goals that AMD P-State requests at runtime.
> @@ -117,6 +119,7 @@ struct amd_cpudata {
>
> u64 freq;
> bool boost_supported;
> + bool precision_boost_off;
> u64 cppc_hw_conf_cached;
> };
>
> @@ -547,12 +550,17 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
> cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
>
> policy->driver_data = cpudata;
> + if (!shared_mem) {
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> + if (ret)
> + return ret;
> + cpudata->precision_boost_off = value & AMD_CPPC_PRECISION_BOOST_ENABLED;
>
> - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> - if (ret)
> - return ret;
> - WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> -
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> + }
> amd_pstate_boost_init(cpudata);
>
> return 0;
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:25PM +0800, Yuan, Perry wrote:
> Fix the wrong lowest perf value reading which is used for new
> des_perf calculation by governor requested, the incorrect min_perf will
> get incorrect des_perf to be set , that will cause the system frequency
> changing unexpectedly.
>
> Acked-by: Viresh Kumar <[email protected]>
> Signed-off-by: Perry Yuan <[email protected]>
> Signed-off-by: Su Jinzhou <[email protected]>
Reviewed-by: Huang Rui <[email protected]>
> ---
> drivers/cpufreq/amd-pstate.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 0c8256638ed7..4b764870035e 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -311,7 +311,7 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
> return -ENODEV;
>
> cap_perf = READ_ONCE(cpudata->highest_perf);
> - min_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
> + min_perf = READ_ONCE(cpudata->lowest_perf);
> max_perf = cap_perf;
>
> freqs.old = policy->cur;
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:27PM +0800, Yuan, Perry wrote:
> Remove the white space and correct mixed-up indentation
>
> Acked-by: Viresh Kumar <[email protected]>
> Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Huang Rui <[email protected]>
> ---
> drivers/cpufreq/amd-pstate.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index ebc7c6d389be..acb62ec5ebab 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -117,7 +117,7 @@ struct amd_cpudata {
> struct amd_aperf_mperf cur;
> struct amd_aperf_mperf prev;
>
> - u64 freq;
> + u64 freq;
> bool boost_supported;
> bool precision_boost_off;
> u64 cppc_hw_conf_cached;
> @@ -674,7 +674,7 @@ static struct cpufreq_driver amd_pstate_driver = {
> .resume = amd_pstate_cpu_resume,
> .set_boost = amd_pstate_set_boost,
> .name = "amd-pstate",
> - .attr = amd_pstate_attr,
> + .attr = amd_pstate_attr,
> };
>
> static int __init amd_pstate_init(void)
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:20PM +0800, Yuan, Perry wrote:
> This MSR can be used for controlling whether the CPU boost state
> is enabled in the hardware.
>
> AMD Processor Programming Reference (PPR)
> Link: https://www.amd.com/system/files/TechDocs/40332.pdf [p1095]
> Link: https://www.amd.com/system/files/TechDocs/56569-A1-PUB.zip [p162]
>
> Signed-off-by: Perry Yuan <[email protected]>
> ---
> arch/x86/include/asm/msr-index.h | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index d27e0581b777..869508de8269 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -548,6 +548,7 @@
> #define MSR_AMD_CPPC_CAP2 0xc00102b2
> #define MSR_AMD_CPPC_REQ 0xc00102b3
> #define MSR_AMD_CPPC_STATUS 0xc00102b4
> +#define MSR_AMD_CPPC_HW_CTL 0xc0010015
It's actually the duplicated macro definition with MSR_K7_HWCR:
#define MSR_K7_HWCR 0xc0010015
Thanks,
Ray
>
> #define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
> #define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
> --
> 2.32.0
>
On Fri, Jul 15, 2022 at 06:04:32PM +0800, Yuan, Perry wrote:
> "acpi_cpc_valid" function already includes the acpi_disabled check and we can
> remove the duplicated check here
>
> Acked-by: Viresh Kumar <[email protected]>
> Signed-off-by: Perry Yuan <[email protected]>
Patch 10, 12, and 13 should be one function update to move the
acpi_disabled check into acpi_cpc_valid(), should we squeeze them as one
patch?
Thanks,
Ray
> ---
> drivers/cpufreq/cppc_cpufreq.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
> index 24eaf0ec344d..9adb7612993e 100644
> --- a/drivers/cpufreq/cppc_cpufreq.c
> +++ b/drivers/cpufreq/cppc_cpufreq.c
> @@ -947,7 +947,7 @@ static int __init cppc_cpufreq_init(void)
> {
> int ret;
>
> - if ((acpi_disabled) || !acpi_cpc_valid())
> + if (!acpi_cpc_valid())
> return -ENODEV;
>
> cppc_check_hisi_workaround();
> --
> 2.32.0
>
[AMD Official Use Only - General]
Hi Ray.
> -----Original Message-----
> From: Huang, Ray <[email protected]>
> Sent: Tuesday, July 19, 2022 9:08 AM
> To: Yuan, Perry <[email protected]>
> Cc: [email protected]; [email protected]; Sharma, Deepak
> <[email protected]>; Limonciello, Mario
> <[email protected]>; Fontenot, Nathan
> <[email protected]>; Deucher, Alexander
> <[email protected]>; Su, Jinzhou (Joe) <[email protected]>;
> Huang, Shimmer <[email protected]>; Du, Xiaojian
> <[email protected]>; Meng, Li (Jassmine) <[email protected]>; linux-
> [email protected]; [email protected]
> Subject: Re: [PATCH v4 13/13] cpufreq: CPPC: remove the acpi_disabled check
>
> On Fri, Jul 15, 2022 at 06:04:32PM +0800, Yuan, Perry wrote:
> > "acpi_cpc_valid" function already includes the acpi_disabled check and
> > we can remove the duplicated check here
> >
> > Acked-by: Viresh Kumar <[email protected]>
> > Signed-off-by: Perry Yuan <[email protected]>
>
> Patch 10, 12, and 13 should be one function update to move the acpi_disabled
> check into acpi_cpc_valid(), should we squeeze them as one patch?
>
> Thanks,
> Ray
Sure, will merge the changes into one patch in V5.
Thanks.
Perry.
>
> > ---
> > drivers/cpufreq/cppc_cpufreq.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/cpufreq/cppc_cpufreq.c
> > b/drivers/cpufreq/cppc_cpufreq.c index 24eaf0ec344d..9adb7612993e
> > 100644
> > --- a/drivers/cpufreq/cppc_cpufreq.c
> > +++ b/drivers/cpufreq/cppc_cpufreq.c
> > @@ -947,7 +947,7 @@ static int __init cppc_cpufreq_init(void) {
> > int ret;
> >
> > - if ((acpi_disabled) || !acpi_cpc_valid())
> > + if (!acpi_cpc_valid())
> > return -ENODEV;
> >
> > cppc_check_hisi_workaround();
> > --
> > 2.32.0
> >
[AMD Official Use Only - General]
Hi Ray.
> -----Original Message-----
> From: Huang, Ray <[email protected]>
> Sent: Tuesday, July 19, 2022 9:05 AM
> To: Yuan, Perry <[email protected]>
> Cc: [email protected]; [email protected]; Sharma, Deepak
> <[email protected]>; Limonciello, Mario
> <[email protected]>; Fontenot, Nathan
> <[email protected]>; Deucher, Alexander
> <[email protected]>; Su, Jinzhou (Joe) <[email protected]>;
> Huang, Shimmer <[email protected]>; Du, Xiaojian
> <[email protected]>; Meng, Li (Jassmine) <[email protected]>; linux-
> [email protected]; [email protected]
> Subject: Re: [PATCH v4 11/13] cpufreq: amd_pstate: update transition delay
> time to 1ms
>
> On Fri, Jul 15, 2022 at 06:04:30PM +0800, Yuan, Perry wrote:
> > Update transition delay time to 1ms, in the AMD CPU autonomous mode
> > and non-autonomous mode, CPPC firmware will decide frequency at 1ms
> > timescale based on the workload utilization.
> >
> > Acked-by: Viresh Kumar <[email protected]>
> > Signed-off-by: Perry Yuan <[email protected]>
> > Signed-off-by: Su Jinzhou <[email protected]>
>
> Please squeeze this patch into patch 9. I don't really want to separate the
> transition marco update as two patches.
>
> Thanks,
> Ray
Will update this in V5.
Thanks.
Perry.
>
> > ---
> > drivers/cpufreq/amd-pstate.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 4f8600a36194..d3bc441b3923 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -42,7 +42,7 @@
> > #include "amd-pstate-trace.h"
> >
> > #define AMD_PSTATE_TRANSITION_LATENCY 20000
> > -#define AMD_PSTATE_TRANSITION_DELAY 500
> > +#define AMD_PSTATE_TRANSITION_DELAY 1000
> >
> > /*
> > * TODO: We need more time to fine tune processors with shared memory
> > solution
> > --
> > 2.32.0
> >
[AMD Official Use Only - General]
Hi Ray.
> -----Original Message-----
> From: Huang, Ray <[email protected]>
> Sent: Tuesday, July 19, 2022 8:46 AM
> To: Yuan, Perry <[email protected]>
> Cc: [email protected]; [email protected]; Sharma, Deepak
> <[email protected]>; Limonciello, Mario
> <[email protected]>; Fontenot, Nathan
> <[email protected]>; Deucher, Alexander
> <[email protected]>; Su, Jinzhou (Joe) <[email protected]>;
> Huang, Shimmer <[email protected]>; Du, Xiaojian
> <[email protected]>; Meng, Li (Jassmine) <[email protected]>; linux-
> [email protected]; [email protected]
> Subject: Re: [PATCH v4 02/13] cpufreq: amd-pstate: enable AMD Precision Boost
> mode switch
>
> On Fri, Jul 15, 2022 at 06:04:21PM +0800, Yuan, Perry wrote:
> > Add support to switch AMD precision boost state to scale cpu max
> > frequency that will help to improve the processor throughput.
> >
> > when set boost state to be enabled, user will need to execute below
> > commands, the CPU will reach absolute maximum performance level or the
> > highest perf which CPU physical support. This performance level may
> > not be sustainable for long durations, it will help to improve the IO workload
> tasks.
> >
> > * turn on CPU boost state under root
> > echo 1 > /sys/devices/system/cpu/cpufreq/boost
> >
> > If user set boost off,the CPU can reach to the maximum sustained
> > performance level of the process, that level is the process can
> > maintain continously working and definitely it can save some power
> > compared to boost on mode.
> >
> > * turn off CPU boost state under root
> > echo 0 > /sys/devices/system/cpu/cpufreq/boost
> >
> > Signed-off-by: Perry Yuan <[email protected]>
> > ---
> > arch/x86/include/asm/msr-index.h | 2 ++
> > drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++---
> > 2 files changed, 21 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/msr-index.h
> > b/arch/x86/include/asm/msr-index.h
> > index 869508de8269..b952fd6d6916 100644
> > --- a/arch/x86/include/asm/msr-index.h
> > +++ b/arch/x86/include/asm/msr-index.h
> > @@ -559,6 +559,8 @@
> > #define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
> > #define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
> > #define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
> > +#define AMD_CPPC_PRECISION_BOOST_BIT 25
> > +#define AMD_CPPC_PRECISION_BOOST_ENABLED
> BIT_ULL(AMD_CPPC_PRECISION_BOOST_BIT)
>
> The bit 25 (CpbDis) of MSRC001_0015 [Hardware Configuration] indicates the
> core performance boost disable flag.
>
> Please see the section 17.2 Core Performance Boost of PPR:
>
> https://www.amd.com/system/files/TechDocs/40332.pdf
>
> Core performance boost (CPB) dynamically monitors processor activity to create
> an estimate of power consumption. If the estimated processor consumption is
> below an internally defined power limit and software has requested P0 on a
> given core, hardware may transition the core to a frequency and voltage beyond
> those defined for P0. If the estimated power consumption exceeds the defined
> power limit, some or all cores are limited to the frequency and voltage defined
> by P0.
>
> The boost state is designed for legacy ACPI P-State function which is to request
> higher frequency beyond P0 State (it's equal to nominal frequency in CPPC), and
> we already have the operation like MSR_K7_HWCR_CPB_DIS in acpi-cpufreq
> driver. However, in CPPC, we can modify the performance hint beyond the
> nominal perf to reach the goal. That won't need this control anymore. And
> furthermore, this function for legacy ACPI P-State should not be mixed them up
> with CPPC policy. We should prevent the effect for this flag in CPPC.
>
> Thanks,
> Ray
I did not notice that acpi_cpufreq already use this bit to control performance boost.
Seems like the patch is not needed for CPPC like you said.
I will drop the patch in V5 and use target perf to get target perf to firmware.
That will also do the same thing to limit the perf level and power consumption.
Thanks for your feedback. Will send V5 soon.
Perry .
>
> >
> > /* AMD Performance Counter Global Status and Control MSRs */
> > #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 9ac75c1cde9c..188e055e24a2 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -122,6 +122,7 @@ struct amd_cpudata {
> >
> > u64 freq;
> > bool boost_supported;
> > + u64 cppc_hw_conf_cached;
> > };
> >
> > static inline int pstate_enable(bool enable) @@ -438,18 +439,27 @@
> > static int amd_pstate_set_boost(struct cpufreq_policy *policy, int
> > state) {
> > struct amd_cpudata *cpudata = policy->driver_data;
> > int ret;
> > + u64 value;
> >
> > if (!cpudata->boost_supported) {
> > pr_err("Boost mode is not supported by this processor or
> SBIOS\n");
> > return -EINVAL;
> > }
> >
> > - if (state)
> > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> > + if (ret)
> > + return ret;
> > +
> > + if (state) {
> > + value |= AMD_CPPC_PRECISION_BOOST_ENABLED;
> > policy->cpuinfo.max_freq = cpudata->max_freq;
> > - else
> > + } else {
> > + value &= ~AMD_CPPC_PRECISION_BOOST_ENABLED;
> > policy->cpuinfo.max_freq = cpudata->nominal_freq;
> > -
> > + }
> > policy->max = policy->cpuinfo.max_freq;
> > + WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> > + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, value);
> >
> > ret = freq_qos_update_request(&cpudata->req[1],
> > policy->cpuinfo.max_freq);
> > @@ -478,6 +488,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy
> *policy)
> > int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> > struct device *dev;
> > struct amd_cpudata *cpudata;
> > + u64 value;
> >
> > dev = get_cpu_device(policy->cpu);
> > if (!dev)
> > @@ -542,6 +553,11 @@ static int amd_pstate_cpu_init(struct
> > cpufreq_policy *policy)
> >
> > policy->driver_data = cpudata;
> >
> > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> > + if (ret)
> > + return ret;
> > + WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> > +
> > amd_pstate_boost_init(cpudata);
> >
> > return 0;
> > --
> > 2.32.0
> >