2023-08-08 18:45:31

by Meng, Li (Jassmine)

[permalink] [raw]
Subject: [PATCH V1 0/6] AMD Pstate Preferred Core

Hi all:

The core frequency is subjected to the process variation in semiconductors.
Not all cores are able to reach the maximum frequency respecting the
infrastructure limits. Consequently, AMD has redefined the concept of
maximum frequency of a part. This means that a fraction of cores can reach
maximum frequency. To find the best process scheduling policy for a given
scenario, OS needs to know the core ordering informed by the platform through
highest performance capability register of the CPPC interface.

Earlier implementations of AMD Pstate Preferred Core only support a static
core ranking and targeted performance. Now it has the ability to dynamically
change the preferred core based on the workload and platform conditions and
accounting for thermals and aging.

AMD Pstate driver utilizes the functions and data structures provided by
the ITMT architecture to enable the scheduler to favor scheduling on cores
which can be get a higher frequency with lower voltage.
We call it AMD Pstate Preferrred Core.

Here sched_set_itmt_core_prio() is called to set priorities and
sched_set_itmt_support() is called to enable ITMT feature.
AMD Pstate driver uses the highest performance value to indicate
the priority of CPU. The higher value has a higher priority.

AMD Pstate driver will provide an initial core ordering at boot time.
It relies on the CPPC interface to communicate the core ranking to the
operating system and scheduler to make sure that OS is choosing the cores
with highest performance firstly for scheduling the process. When AMD Pstate
driver receives a message with the highest performance change, it will
update the core ranking.

Meng Li (6):
ACPI: CPPC: Add get the highest performance cppc control
cpufreq: amd-pstate: Enable AMD Pstate Preferred Core Supporting.
cpufreq: Add a notification message that the highest perf has changed
cpufreq: amd-pstate: Update AMD Pstate Preferred Core ranking
dynamically
Documentation: amd-pstate: introduce AMD Pstate Preferred Core
Documentation: introduce AMD Pstate Preferrd Core mode kernel command
line options

.../admin-guide/kernel-parameters.txt | 5 +
Documentation/admin-guide/pm/amd-pstate.rst | 55 ++++++
drivers/acpi/cppc_acpi.c | 13 ++
drivers/acpi/processor_driver.c | 6 +
drivers/cpufreq/amd-pstate.c | 181 ++++++++++++++++--
drivers/cpufreq/cpufreq.c | 13 ++
include/acpi/cppc_acpi.h | 5 +
include/linux/amd-pstate.h | 1 +
include/linux/cpufreq.h | 4 +
9 files changed, 267 insertions(+), 16 deletions(-)

--
2.34.1



2023-08-08 18:46:51

by Meng, Li (Jassmine)

[permalink] [raw]
Subject: [PATCH V1 4/6] cpufreq: amd-pstate: Update AMD Pstate Preferred Core ranking dynamically

Preferred core rankings can be changed dynamically by the
platform based on the workload and platform conditions and
accounting for thermals and aging.
When this occurs, cpu priority need to be set.

Signed-off-by: Meng Li <[email protected]>
---
drivers/cpufreq/amd-pstate.c | 32 ++++++++++++++++++++++++++++++++
include/linux/amd-pstate.h | 1 +
2 files changed, 33 insertions(+)

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index e919b3f4ab18..ba10aa971dcb 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -320,6 +320,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+ WRITE_ONCE(cpudata->prefcore_highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));

return 0;
}
@@ -341,6 +342,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
cppc_perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+ WRITE_ONCE(cpudata->prefcore_highest_perf, cppc_perf.highest_perf);

if (cppc_state == AMD_PSTATE_ACTIVE)
return 0;
@@ -743,6 +745,34 @@ static void amd_pstate_init_prefcore(void)
schedule_work(&sched_prefcore_work);
}

+static void amd_pstate_update_highest_perf(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+ struct amd_cpudata *cpudata;
+ u32 prev_high = 0, cur_high = 0;
+ u64 highest_perf;
+ int ret;
+
+ if (!prefcore_enabled)
+ return;
+
+ ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
+ if (ret)
+ return;
+
+ policy = cpufreq_cpu_get(cpu);
+ cpudata = policy->driver_data;
+ cur_high = highest_perf;
+ prev_high = READ_ONCE(cpudata->prefcore_highest_perf);
+
+ if (prev_high != cur_high) {
+ WRITE_ONCE(cpudata->prefcore_highest_perf, cur_high);
+ sched_set_itmt_core_prio(cur_high, cpu);
+ }
+
+ cpufreq_cpu_put(policy);
+}
+
/*
* Check if AMD Pstate Preferred core feature is supported and enabled
* 1) no_prefcore is used to enable or disable AMD Pstate Preferred Core
@@ -1497,6 +1527,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.suspend = amd_pstate_cpu_suspend,
.resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
+ .update_highest_perf = amd_pstate_update_highest_perf,
.name = "amd-pstate",
.attr = amd_pstate_attr,
};
@@ -1511,6 +1542,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
+ .update_highest_perf = amd_pstate_update_highest_perf,
.name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 446394f84606..fa86bc953d3e 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -70,6 +70,7 @@ struct amd_cpudata {
u32 nominal_perf;
u32 lowest_nonlinear_perf;
u32 lowest_perf;
+ u32 prefcore_highest_perf;

u32 max_freq;
u32 min_freq;
--
2.34.1


2023-08-08 18:59:46

by Meng, Li (Jassmine)

[permalink] [raw]
Subject: [PATCH V1 5/6] Documentation: amd-pstate: introduce AMD Pstate Preferred Core

Introduce AMD Pstate Preferred Core.

check preferred core state:
$ cat /sys/devices/system/cpu/amd-pstate/prefcore_state

Signed-off-by: Meng Li <[email protected]>
---
Documentation/admin-guide/pm/amd-pstate.rst | 55 +++++++++++++++++++++
1 file changed, 55 insertions(+)

diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 1cf40f69278c..4a30cf235425 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -353,6 +353,49 @@ is activated. In this mode, driver requests minimum and maximum performance
level and the platform autonomously selects a performance level in this range
and appropriate to the current workload.

+AMD Pstate Preferred Core
+=================================
+
+The core frequency is subjected to the process variation in semiconductors.
+Not all cores are able to reach the maximum frequency respecting the
+infrastructure limits. Consequently, AMD has redefined the concept of
+maximum frequency of a part. This means that a fraction of cores can reach
+maximum frequency. To find the best process scheduling policy for a given
+scenario, OS needs to know the core ordering informed by the platform through
+highest performance capability register of the CPPC interface.
+
+``AMD Pstate Preferred Core`` use ITMT arch provides functions and data structures
+for enabling the scheduler to favor scheduling on cores can be get a higher frequency
+with lower voltage under preferred core. And it has the ability to dynamically
+change the preferred core based on the workload and platform conditions and
+accounting for thermals and aging.
+
+The priority metric will be initialized by the AMD Pstate driver. The AMD Pstate
+driver will also determine whether or not ``AMD Pstate Preferred Core`` is
+supported by the platform.
+
+AMD Pstate driver will provide an initial core ordering when the system boots.
+The platform uses the CPPC interfaces to communicate the core ranking to the
+operating system and scheduler to make sure that OS is choosing the cores
+with highest performance firstly for scheduling the process. When AMD Pstate
+driver receives a message with the highest performance change, it will
+update the core ranking and set the cpu's priority.
+
+AMD Preferred Core Switch
+=================================
+Kernel Parameters
+-----------------
+
+``AMD Pstate Preferred Core`` has two states: enable and disable.
+Enable/disable states can be chosen by different kernel parameters.
+Default disable ``AMD Pstate Preferred Core``.
+
+``amd_prefcore=enable``
+
+If ``amd_prefcore=enable`` is passed to kernel command line option
+then enable ``AMD Pstate Preferred Core`` if the processor and power
+firmware can support preferred core feature.
+
User Space Interface in ``sysfs`` - General
===========================================

@@ -385,6 +428,18 @@ control its functionality at the system level. They are located in the
to the operation mode represented by that string - or to be
unregistered in the "disable" case.

+``prefcore_state``
+ Preferred Core state of the driver: "enabled" or "disabled".
+
+ "enabled"
+ Enable the AMD Preferred Core.
+
+ "disabled"
+ Disable the AMD Preferred Core
+
+
+ This attribute is read-only to check the state of Preferred Core.
+
``cpupower`` tool support for ``amd-pstate``
===============================================

--
2.34.1


2023-08-08 19:04:47

by Meng, Li (Jassmine)

[permalink] [raw]
Subject: [PATCH V1 3/6] cpufreq: Add a notification message that the highest perf has changed

Please refer to the ACPI_Spec for details on the highest
performance and notify events of CPPC.

Signed-off-by: Meng Li <[email protected]>
Link: https://uefi.org/htmlspecs/AddCPI_Spec_6_4_html/08_Processor_Configuration_and_Control/declaring-processors.html?highlight=0x85#highest-performance
---
drivers/acpi/processor_driver.c | 6 ++++++
drivers/cpufreq/cpufreq.c | 13 +++++++++++++
include/linux/cpufreq.h | 4 ++++
3 files changed, 23 insertions(+)

diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 4bd16b3f0781..29b2fb68a35d 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -27,6 +27,7 @@
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
+#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85

MODULE_AUTHOR("Paul Diefenbaugh");
MODULE_DESCRIPTION("ACPI Processor Driver");
@@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
acpi_bus_generate_netlink_event(device->pnp.device_class,
dev_name(&device->dev), event, 0);
break;
+ case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
+ cpufreq_update_highest_perf(pr->id);
+ acpi_bus_generate_netlink_event(device->pnp.device_class,
+ dev_name(&device->dev), event, 0);
+ break;
default:
acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
break;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 50bbc969ffe5..842357abfae6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2675,6 +2675,19 @@ void cpufreq_update_limits(unsigned int cpu)
}
EXPORT_SYMBOL_GPL(cpufreq_update_limits);

+/**
+ * cpufreq_update_highest_perf - Update highest performance for a given CPU.
+ * @cpu: CPU to update the highest performance for.
+ *
+ * Invoke the driver's ->update_highest_perf callback if present
+ */
+void cpufreq_update_highest_perf(unsigned int cpu)
+{
+ if (cpufreq_driver->update_highest_perf)
+ cpufreq_driver->update_highest_perf(cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_update_highest_perf);
+
/*********************************************************************
* BOOST *
*********************************************************************/
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 172ff51c1b2a..766c83a4fae7 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -231,6 +231,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
void refresh_frequency_limits(struct cpufreq_policy *policy);
void cpufreq_update_policy(unsigned int cpu);
void cpufreq_update_limits(unsigned int cpu);
+void cpufreq_update_highest_perf(unsigned int cpu);
bool have_governor_per_policy(void);
bool cpufreq_supports_freq_invariance(void);
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
@@ -376,6 +377,9 @@ struct cpufreq_driver {
/* Called to update policy limits on firmware notifications. */
void (*update_limits)(unsigned int cpu);

+ /* Called to update highest performance on firmware notifications. */
+ void (*update_highest_perf)(unsigned int cpu);
+
/* optional */
int (*bios_limit)(int cpu, unsigned int *limit);

--
2.34.1