For Zhaoxin CPUs, the cores' highest frequencies may be different, which
means that cores may run at different max frequencies,
According to ACPI-spec6 chapter 8.4.7, the per-core highest frequency
value can be obtained via cppc.
The core with the higher frequency have better performance, which can be
called as preferred core. And better performance can be achieved by
making the scheduler to run tasks on these preferred cores.
The cpufreq driver can use the highest frequency value as the prioriy of
core to make the scheduler try to get better performace.
More specifically:
Add cppc_get_highest_perf function in CPPC driver.
Export two funcions which will be used in acpi-cpufreq drvier.
In the acpi-cpufreq driver use cppc_get_highest_perf() to get highest
frequency value of each core, use sched_set_itmt_core_prio() to set
highest frequency value as core priority, and use sched_set_itmt_support()
provided by ITMT to tell the scheduler to favor on the preferred cores.
Tony W Wang-oc (3):
ACPI: CPPC: Add get the highest perf register value support
x86/sched/itmt: Export two API symbols
ACPI: cpufreq: Add ITMT support when CPPC enabled for Zhaoxin CPUs
arch/x86/kernel/itmt.c | 2 ++
drivers/acpi/cppc_acpi.c | 13 ++++++++
drivers/cpufreq/acpi-cpufreq.c | 56 +++++++++++++++++++++++++++++++++-
include/acpi/cppc_acpi.h | 5 +++
4 files changed, 75 insertions(+), 1 deletion(-)
--
2.25.1
Add function cppc_get_highest_perf in generic CPPC driver to get the
highest perf register value for specified core.
Signed-off-by: Tony W Wang-oc <[email protected]>
---
drivers/acpi/cppc_acpi.c | 13 +++++++++++++
include/acpi/cppc_acpi.h | 5 +++++
2 files changed, 18 insertions(+)
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 7ff269a78c20..1a77c514d007 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
}
+/**
+ * cppc_get_highest_perf - Get the highest performance register value.
+ * @cpunum: CPU from which to get highest performance.
+ * @highest_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+{
+ return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
+}
+EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
+
/**
* cppc_get_epp_perf - Get the epp register value.
* @cpunum: CPU from which to get epp preference value.
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 6126c977ece0..c0b69ffe7bdb 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -139,6 +139,7 @@ struct cppc_cpudata {
#ifdef CONFIG_ACPI_CPPC_LIB
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
+extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_set_enable(int cpu, bool enable);
@@ -165,6 +166,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
{
return -ENOTSUPP;
}
+static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+{
+ return -ENOTSUPP;
+}
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
return -ENOTSUPP;
--
2.25.1
Mark two API symbols to be exported to kernel loadable GPL modules,
namely:
sched_set_itmt_support;
sched_set_itmt_core_prio;
Signed-off-by: Tony W Wang-oc <[email protected]>
---
arch/x86/kernel/itmt.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 9a7c03d47861..96c74fa8ea9c 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -121,6 +121,7 @@ int sched_set_itmt_support(void)
return 0;
}
+EXPORT_SYMBOL_GPL(sched_set_itmt_support);
/**
* sched_clear_itmt_support() - Revoke platform's support of ITMT
@@ -180,3 +181,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
{
per_cpu(sched_core_priority, cpu) = prio;
}
+EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
--
2.25.1
For Zhaoxin CPUs, the cores' highest frequencies may be different, which
means that cores may run at different max frequencies,
According to ACPI-spec6 chapter 8.4.7, the per-core highest frequency
value can be obtained via cppc.
The core with the higher frequency have better performance, which can be
called as preferred core. And better performance can be achieved by
making the scheduler to run tasks on these preferred cores.
The cpufreq driver can use the highest frequency value as the prioriy of
core to make the scheduler try to get better performace. More specifically,
in the acpi-cpufreq driver use cppc_get_highest_perf() to get highest
frequency value of each core, use sched_set_itmt_core_prio() to set
highest frequency value as core priority, and use sched_set_itmt_support()
provided by ITMT to tell the scheduler to favor on the preferred cores.
Signed-off-by: Tony W Wang-oc <[email protected]>
---
drivers/cpufreq/acpi-cpufreq.c | 56 +++++++++++++++++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 37f1cdf46d29..f4c1ff9e4bb0 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -663,8 +663,56 @@ static u64 get_max_boost_ratio(unsigned int cpu)
return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
}
+
+/* The work item is needed to avoid CPU hotplug locking issues */
+static void sched_itmt_work_fn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+
+static DECLARE_WORK(sched_itmt_work, sched_itmt_work_fn);
+
+static void set_itmt_prio(int cpu)
+{
+ static bool cppc_highest_perf_diff;
+ static struct cpumask core_prior_mask;
+ u64 highest_perf;
+ static u64 max_highest_perf = 0, min_highest_perf = U64_MAX;
+ int ret;
+
+ ret = cppc_get_highest_perf(cpu, &highest_perf);
+ if (ret)
+ return;
+
+ sched_set_itmt_core_prio(highest_perf, cpu);
+ cpumask_set_cpu(cpu, &core_prior_mask);
+
+ if (max_highest_perf <= min_highest_perf) {
+ if (highest_perf > max_highest_perf)
+ max_highest_perf = highest_perf;
+
+ if (highest_perf < min_highest_perf)
+ min_highest_perf = highest_perf;
+
+ if (max_highest_perf > min_highest_perf) {
+ /*
+ * This code can be run during CPU online under the
+ * CPU hotplug locks, so sched_set_itmt_support()
+ * cannot be called from here. Queue up a work item
+ * to invoke it.
+ */
+ cppc_highest_perf_diff = true;
+ }
+ }
+
+ if (cppc_highest_perf_diff && cpumask_equal(&core_prior_mask, cpu_online_mask)) {
+ pr_debug("queue a work to set itmt enabled\n");
+ schedule_work(&sched_itmt_work);
+ }
+}
#else
static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
+static void set_itmt_prio(int cpu) { }
#endif
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -677,7 +725,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int valid_states = 0;
unsigned int result = 0;
u64 max_boost_ratio;
- unsigned int i;
+ unsigned int i, j;
#ifdef CONFIG_SMP
static int blacklisted;
#endif
@@ -742,6 +790,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
#endif
+ if (c->x86_vendor == X86_VENDOR_CENTAUR || c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+ for_each_cpu(j, policy->cpus) {
+ set_itmt_prio(j);
+ }
+ }
+
/* capability check */
if (perf->state_count <= 1) {
pr_debug("No P-States\n");
--
2.25.1
On Thu, Dec 28, 2023 at 8:57 AM Tony W Wang-oc <[email protected]> wrote:
>
> For Zhaoxin CPUs, the cores' highest frequencies may be different, which
> means that cores may run at different max frequencies,
>
> According to ACPI-spec6 chapter 8.4.7, the per-core highest frequency
> value can be obtained via cppc.
>
> The core with the higher frequency have better performance, which can be
> called as preferred core. And better performance can be achieved by
> making the scheduler to run tasks on these preferred cores.
>
> The cpufreq driver can use the highest frequency value as the prioriy of
> core to make the scheduler try to get better performace. More specifically,
> in the acpi-cpufreq driver use cppc_get_highest_perf() to get highest
> frequency value of each core, use sched_set_itmt_core_prio() to set
> highest frequency value as core priority, and use sched_set_itmt_support()
> provided by ITMT to tell the scheduler to favor on the preferred cores.
>
> Signed-off-by: Tony W Wang-oc <[email protected]>
> ---
> drivers/cpufreq/acpi-cpufreq.c | 56 +++++++++++++++++++++++++++++++++-
> 1 file changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 37f1cdf46d29..f4c1ff9e4bb0 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -663,8 +663,56 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>
> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
> }
> +
> +/* The work item is needed to avoid CPU hotplug locking issues */
> +static void sched_itmt_work_fn(struct work_struct *work)
> +{
> + sched_set_itmt_support();
> +}
> +
> +static DECLARE_WORK(sched_itmt_work, sched_itmt_work_fn);
> +
> +static void set_itmt_prio(int cpu)
> +{
> + static bool cppc_highest_perf_diff;
> + static struct cpumask core_prior_mask;
> + u64 highest_perf;
> + static u64 max_highest_perf = 0, min_highest_perf = U64_MAX;
> + int ret;
> +
> + ret = cppc_get_highest_perf(cpu, &highest_perf);
> + if (ret)
> + return;
> +
> + sched_set_itmt_core_prio(highest_perf, cpu);
> + cpumask_set_cpu(cpu, &core_prior_mask);
> +
> + if (max_highest_perf <= min_highest_perf) {
> + if (highest_perf > max_highest_perf)
> + max_highest_perf = highest_perf;
> +
> + if (highest_perf < min_highest_perf)
> + min_highest_perf = highest_perf;
> +
> + if (max_highest_perf > min_highest_perf) {
> + /*
> + * This code can be run during CPU online under the
> + * CPU hotplug locks, so sched_set_itmt_support()
> + * cannot be called from here. Queue up a work item
> + * to invoke it.
> + */
> + cppc_highest_perf_diff = true;
> + }
> + }
> +
> + if (cppc_highest_perf_diff && cpumask_equal(&core_prior_mask, cpu_online_mask)) {
> + pr_debug("queue a work to set itmt enabled\n");
> + schedule_work(&sched_itmt_work);
> + }
> +}
> #else
> static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
> +static void set_itmt_prio(int cpu) { }
> #endif
>
> static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> @@ -677,7 +725,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> unsigned int valid_states = 0;
> unsigned int result = 0;
> u64 max_boost_ratio;
> - unsigned int i;
> + unsigned int i, j;
> #ifdef CONFIG_SMP
> static int blacklisted;
> #endif
> @@ -742,6 +790,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> }
> #endif
>
> + if (c->x86_vendor == X86_VENDOR_CENTAUR || c->x86_vendor == X86_VENDOR_ZHAOXIN) {
> + for_each_cpu(j, policy->cpus) {
> + set_itmt_prio(j);
> + }
> + }
> +
> /* capability check */
> if (perf->state_count <= 1) {
> pr_debug("No P-States\n");
> --
Have you considered using the CPPC cpufreq driver on those platforms?
On 2024/1/3 23:37, Rafael J. Wysocki wrote:
> On Thu, Dec 28, 2023 at 8:57 AM Tony W Wang-oc <[email protected]> wrote:
>> For Zhaoxin CPUs, the cores' highest frequencies may be different, which
>> means that cores may run at different max frequencies,
>>
>> According to ACPI-spec6 chapter 8.4.7, the per-core highest frequency
>> value can be obtained via cppc.
>>
>> The core with the higher frequency have better performance, which can be
>> called as preferred core. And better performance can be achieved by
>> making the scheduler to run tasks on these preferred cores.
>>
>> The cpufreq driver can use the highest frequency value as the prioriy of
>> core to make the scheduler try to get better performace. More specifically,
>> in the acpi-cpufreq driver use cppc_get_highest_perf() to get highest
>> frequency value of each core, use sched_set_itmt_core_prio() to set
>> highest frequency value as core priority, and use sched_set_itmt_support()
>> provided by ITMT to tell the scheduler to favor on the preferred cores.
>>
>> Signed-off-by: Tony W Wang-oc <[email protected]>
>> ---
>> drivers/cpufreq/acpi-cpufreq.c | 56 +++++++++++++++++++++++++++++++++-
>> 1 file changed, 55 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
>> index 37f1cdf46d29..f4c1ff9e4bb0 100644
>> --- a/drivers/cpufreq/acpi-cpufreq.c
>> +++ b/drivers/cpufreq/acpi-cpufreq.c
>> @@ -663,8 +663,56 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>>
>> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
>> }
>> +
>> +/* The work item is needed to avoid CPU hotplug locking issues */
>> +static void sched_itmt_work_fn(struct work_struct *work)
>> +{
>> + sched_set_itmt_support();
>> +}
>> +
>> +static DECLARE_WORK(sched_itmt_work, sched_itmt_work_fn);
>> +
>> +static void set_itmt_prio(int cpu)
>> +{
>> + static bool cppc_highest_perf_diff;
>> + static struct cpumask core_prior_mask;
>> + u64 highest_perf;
>> + static u64 max_highest_perf = 0, min_highest_perf = U64_MAX;
>> + int ret;
>> +
>> + ret = cppc_get_highest_perf(cpu, &highest_perf);
>> + if (ret)
>> + return;
>> +
>> + sched_set_itmt_core_prio(highest_perf, cpu);
>> + cpumask_set_cpu(cpu, &core_prior_mask);
>> +
>> + if (max_highest_perf <= min_highest_perf) {
>> + if (highest_perf > max_highest_perf)
>> + max_highest_perf = highest_perf;
>> +
>> + if (highest_perf < min_highest_perf)
>> + min_highest_perf = highest_perf;
>> +
>> + if (max_highest_perf > min_highest_perf) {
>> + /*
>> + * This code can be run during CPU online under the
>> + * CPU hotplug locks, so sched_set_itmt_support()
>> + * cannot be called from here. Queue up a work item
>> + * to invoke it.
>> + */
>> + cppc_highest_perf_diff = true;
>> + }
>> + }
>> +
>> + if (cppc_highest_perf_diff && cpumask_equal(&core_prior_mask, cpu_online_mask)) {
>> + pr_debug("queue a work to set itmt enabled\n");
>> + schedule_work(&sched_itmt_work);
>> + }
>> +}
>> #else
>> static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
>> +static void set_itmt_prio(int cpu) { }
>> #endif
>>
>> static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> @@ -677,7 +725,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> unsigned int valid_states = 0;
>> unsigned int result = 0;
>> u64 max_boost_ratio;
>> - unsigned int i;
>> + unsigned int i, j;
>> #ifdef CONFIG_SMP
>> static int blacklisted;
>> #endif
>> @@ -742,6 +790,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> }
>> #endif
>>
>> + if (c->x86_vendor == X86_VENDOR_CENTAUR || c->x86_vendor == X86_VENDOR_ZHAOXIN) {
>> + for_each_cpu(j, policy->cpus) {
>> + set_itmt_prio(j);
>> + }
>> + }
>> +
>> /* capability check */
>> if (perf->state_count <= 1) {
>> pr_debug("No P-States\n");
>> --
> Have you considered using the CPPC cpufreq driver on those platforms?
Thanks for your reply.
The ACPI cpufreq driver is used by default on Zhaoxin platforms. We added
Zhaoxin preferred core support and did related tests based on the ACPI
cpufreq driver.
The CPPC cpufreq driver is currently used on the ARM platforms. We have
not yet considered using the CPPC cpufreq driver to support the Zhaoxin
preferred core feature, and we also unclear how well the CPPC cpufreq
driver works for the X86 platform.
At the moment, it seems that it is more appropriate to add Zhaoxin preferred
core support to the ACPI cpufreq Driver.
Sincerely
TonyWWangoc
On Thu, Dec 28, 2023 at 03:57:03PM +0800, Tony W Wang-oc wrote:
> Add function cppc_get_highest_perf in generic CPPC driver to get the
> highest perf register value for specified core.
>
> Signed-off-by: Tony W Wang-oc <[email protected]>
> ---
> drivers/acpi/cppc_acpi.c | 13 +++++++++++++
> include/acpi/cppc_acpi.h | 5 +++++
> 2 files changed, 18 insertions(+)
>
> diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
> index 7ff269a78c20..1a77c514d007 100644
> --- a/drivers/acpi/cppc_acpi.c
> +++ b/drivers/acpi/cppc_acpi.c
> @@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
> return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
> }
>
> +/**
> + * cppc_get_highest_perf - Get the highest performance register value.
> + * @cpunum: CPU from which to get highest performance.
> + * @highest_perf: Return address.
> + *
> + * Return: 0 for success, -EIO otherwise.
> + */
> +int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
> +{
> + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
> +}
> +EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
The Intel P-State driver uses cppc_get_perf_caps(). You would not need to
export this function.
Having said that, the Intel P-State driver could also use this new
function, IMO. AFAICS, it does not *have* to use cppc_get_perf_caps().
On Thu, Dec 28, 2023 at 03:57:05PM +0800, Tony W Wang-oc wrote:
> For Zhaoxin CPUs, the cores' highest frequencies may be different, which
> means that cores may run at different max frequencies,
>
> According to ACPI-spec6 chapter 8.4.7, the per-core highest frequency
> value can be obtained via cppc.
>
> The core with the higher frequency have better performance, which can be
> called as preferred core. And better performance can be achieved by
> making the scheduler to run tasks on these preferred cores.
>
> The cpufreq driver can use the highest frequency value as the prioriy of
> core to make the scheduler try to get better performace. More specifically,
> in the acpi-cpufreq driver use cppc_get_highest_perf() to get highest
> frequency value of each core, use sched_set_itmt_core_prio() to set
> highest frequency value as core priority, and use sched_set_itmt_support()
> provided by ITMT to tell the scheduler to favor on the preferred cores.
>
> Signed-off-by: Tony W Wang-oc <[email protected]>
> ---
> drivers/cpufreq/acpi-cpufreq.c | 56 +++++++++++++++++++++++++++++++++-
> 1 file changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 37f1cdf46d29..f4c1ff9e4bb0 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -663,8 +663,56 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>
> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
> }
> +
> +/* The work item is needed to avoid CPU hotplug locking issues */
> +static void sched_itmt_work_fn(struct work_struct *work)
> +{
> + sched_set_itmt_support();
> +}
> +
> +static DECLARE_WORK(sched_itmt_work, sched_itmt_work_fn);
> +
> +static void set_itmt_prio(int cpu)
> +{
> + static bool cppc_highest_perf_diff;
> + static struct cpumask core_prior_mask;
> + u64 highest_perf;
> + static u64 max_highest_perf = 0, min_highest_perf = U64_MAX;
> + int ret;
> +
> + ret = cppc_get_highest_perf(cpu, &highest_perf);
> + if (ret)
> + return;
> +
> + sched_set_itmt_core_prio(highest_perf, cpu);
> + cpumask_set_cpu(cpu, &core_prior_mask);
> +
> + if (max_highest_perf <= min_highest_perf) {
> + if (highest_perf > max_highest_perf)
> + max_highest_perf = highest_perf;
> +
> + if (highest_perf < min_highest_perf)
> + min_highest_perf = highest_perf;
> +
> + if (max_highest_perf > min_highest_perf) {
> + /*
> + * This code can be run during CPU online under the
> + * CPU hotplug locks, so sched_set_itmt_support()
> + * cannot be called from here. Queue up a work item
> + * to invoke it.
> + */
> + cppc_highest_perf_diff = true;
> + }
> + }
> +
> + if (cppc_highest_perf_diff && cpumask_equal(&core_prior_mask, cpu_online_mask)) {
> + pr_debug("queue a work to set itmt enabled\n");
> + schedule_work(&sched_itmt_work);
> + }
> +}
sched_itmt_work and this function is a duplicate of what the intel_pstate
driver already does. It might be good if consolidate in a single place
if you are going to pursue this approach.
On 2024/1/18 06:07, Ricardo Neri wrote:
>
> [这封邮件来自外部发件人]
>
> On Thu, Dec 28, 2023 at 03:57:03PM +0800, Tony W Wang-oc wrote:
>> Add function cppc_get_highest_perf in generic CPPC driver to get the
>> highest perf register value for specified core.
>>
>> Signed-off-by: Tony W Wang-oc <[email protected]>
>> ---
>> drivers/acpi/cppc_acpi.c | 13 +++++++++++++
>> include/acpi/cppc_acpi.h | 5 +++++
>> 2 files changed, 18 insertions(+)
>>
>> diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
>> index 7ff269a78c20..1a77c514d007 100644
>> --- a/drivers/acpi/cppc_acpi.c
>> +++ b/drivers/acpi/cppc_acpi.c
>> @@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
>> return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
>> }
>>
>> +/**
>> + * cppc_get_highest_perf - Get the highest performance register value.
>> + * @cpunum: CPU from which to get highest performance.
>> + * @highest_perf: Return address.
>> + *
>> + * Return: 0 for success, -EIO otherwise.
>> + */
>> +int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
>> +{
>> + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
>> +}
>> +EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
> The Intel P-State driver uses cppc_get_perf_caps(). You would not need to
> export this function.
>
> Having said that, the Intel P-State driver could also use this new
> function, IMO. AFAICS, it does not *have* to use cppc_get_perf_caps().
Yes, you are right.
On 2024/1/18 06:51, Ricardo Neri wrote:
>
> [这封邮件来自外部发件人]
>
> On Thu, Dec 28, 2023 at 03:57:05PM +0800, Tony W Wang-oc wrote:
>> For Zhaoxin CPUs, the cores' highest frequencies may be different, which
>> means that cores may run at different max frequencies,
>>
>> According to ACPI-spec6 chapter 8.4.7, the per-core highest frequency
>> value can be obtained via cppc.
>>
>> The core with the higher frequency have better performance, which can be
>> called as preferred core. And better performance can be achieved by
>> making the scheduler to run tasks on these preferred cores.
>>
>> The cpufreq driver can use the highest frequency value as the prioriy of
>> core to make the scheduler try to get better performace. More specifically,
>> in the acpi-cpufreq driver use cppc_get_highest_perf() to get highest
>> frequency value of each core, use sched_set_itmt_core_prio() to set
>> highest frequency value as core priority, and use sched_set_itmt_support()
>> provided by ITMT to tell the scheduler to favor on the preferred cores.
>>
>> Signed-off-by: Tony W Wang-oc <[email protected]>
>> ---
>> drivers/cpufreq/acpi-cpufreq.c | 56 +++++++++++++++++++++++++++++++++-
>> 1 file changed, 55 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
>> index 37f1cdf46d29..f4c1ff9e4bb0 100644
>> --- a/drivers/cpufreq/acpi-cpufreq.c
>> +++ b/drivers/cpufreq/acpi-cpufreq.c
>> @@ -663,8 +663,56 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>>
>> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
>> }
>> +
>> +/* The work item is needed to avoid CPU hotplug locking issues */
>> +static void sched_itmt_work_fn(struct work_struct *work)
>> +{
>> + sched_set_itmt_support();
>> +}
>> +
>> +static DECLARE_WORK(sched_itmt_work, sched_itmt_work_fn);
>> +
>> +static void set_itmt_prio(int cpu)
>> +{
>> + static bool cppc_highest_perf_diff;
>> + static struct cpumask core_prior_mask;
>> + u64 highest_perf;
>> + static u64 max_highest_perf = 0, min_highest_perf = U64_MAX;
>> + int ret;
>> +
>> + ret = cppc_get_highest_perf(cpu, &highest_perf);
>> + if (ret)
>> + return;
>> +
>> + sched_set_itmt_core_prio(highest_perf, cpu);
>> + cpumask_set_cpu(cpu, &core_prior_mask);
>> +
>> + if (max_highest_perf <= min_highest_perf) {
>> + if (highest_perf > max_highest_perf)
>> + max_highest_perf = highest_perf;
>> +
>> + if (highest_perf < min_highest_perf)
>> + min_highest_perf = highest_perf;
>> +
>> + if (max_highest_perf > min_highest_perf) {
>> + /*
>> + * This code can be run during CPU online under the
>> + * CPU hotplug locks, so sched_set_itmt_support()
>> + * cannot be called from here. Queue up a work item
>> + * to invoke it.
>> + */
>> + cppc_highest_perf_diff = true;
>> + }
>> + }
>> +
>> + if (cppc_highest_perf_diff && cpumask_equal(&core_prior_mask, cpu_online_mask)) {
>> + pr_debug("queue a work to set itmt enabled\n");
>> + schedule_work(&sched_itmt_work);
>> + }
>> +}
> sched_itmt_work and this function is a duplicate of what the intel_pstate
> driver already does. It might be good if consolidate in a single place
> if you are going to pursue this approach.
Thanks for your suggestion, will change the patch code in v2.
Sorry for late.
Hi ,Ricardo
About this itmt dumplicated code in intel-pstate and acpi-cpufreq driver;
how about we put these common codes in arch/x86/kernel/itmt.c?
in intel-pstate/acpi-cpufreq driver, it only get highest frequency through cppc interface and call sched_set_itmt_core_prio provided by itmt.c;
in the sched_set_itmt_core_prio, do these following works:
1) set core priorty according to the highest frequency;
2) check whether cores’highest frequencies are different, if yes, set itmt capable & enable;
3) check whether all online cores have updated core priority, which guarantee rebuild_sched_domains will get the correct priority info for each online core;
Following this rule, the patch looks like this:
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 9a7c03d47861..eb24df2826bc 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -22,6 +22,7 @@
#include <linux/mutex.h>
#include <linux/sysctl.h>
#include <linux/nodemask.h>
+#include <linux/itmt.h>
static DEFINE_MUTEX(itmt_update_mutex);
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
@@ -162,6 +163,13 @@ int arch_asym_cpu_priority(int cpu)
return per_cpu(sched_core_priority, cpu);
}
+/* The work item is needed to avoid CPU hotplug locking issues */
+static void sched_itmt_work_fn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+static DECLARE_WORK(sched_itmt_work, sched_itmt_work_fn);
+
/**
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
* @prio: Priority of @cpu
@@ -176,7 +184,36 @@ int arch_asym_cpu_priority(int cpu)
* the CPU priorities. The sched domains have no
* dependency on CPU priorities.
*/
+
+static u64 max_highest_prio = 0, min_highest_prio = U64_MAX;
+static bool core_priority_diff=false;
+static struct cpumask core_prio_cpumask;
void sched_set_itmt_core_prio(int prio, int cpu)
{
per_cpu(sched_core_priority, cpu) = prio;
+ cpumask_set_cpu(cpu, &core_prio_cpumask);
+
+ if (max_highest_prio <= min_highest_prio)
+ {
+ if (prio > max_highest_prio)
+ max_highest_prio = prio;
+
+ if (prio < min_highest_prio)
+ min_highest_prio = prio;
+
+ if (max_highest_prio > min_highest_prio)
+ core_priority_diff = true;
+ }
+
+ if (core_priority_diff && cpumask_equal(&core_prio_cpumask, cpu_online_mask))
+ {
+ /*
+ * This code can be run during CPU online under the CPU hotplug locks,
+ * so sched_set_itmt cannot be called from here.
+ * queue a work item to invoke it
+ */
+ pr_debug("queue a work to set itmt support and enable\n");
+ schedule_work(&sched_itmt_work);
+ }
+
}
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 37f1cdf46d29..3e5e0f66b2ed 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -30,6 +30,7 @@
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
+#include <linux/itmt.h>
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
@@ -663,8 +664,17 @@ static u64 get_max_boost_ratio(unsigned int cpu)
return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
}
+
+static void core_set_itmt_prio(int cpu)
+{
+ u64 highest_perf;
+ cppc_get_highest_perf(cpu, &highest_perf);
+ sched_set_itmt_core_prio(highest_perf, cpu);
+}
+
#else
static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
+static void core_set_itmt_prio(int cpu) {}
#endif
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -677,7 +687,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int valid_states = 0;
unsigned int result = 0;
u64 max_boost_ratio;
- unsigned int i;
+ unsigned int i,j;
#ifdef CONFIG_SMP
static int blacklisted;
#endif
@@ -741,6 +751,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
pr_info_once("overriding BIOS provided _PSD data\n");
}
#endif
+ for_each_cpu(j,policy->cpus)
+ {
+ core_set_itmt_prio(j);
+ }
/* capability check */
if (perf->state_count <= 1) {
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index dbbf299f4219..4b04e6db9d5b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -27,6 +27,7 @@
#include <linux/pm_qos.h>
#include <linux/bitfield.h>
#include <trace/events/power.h>
+#include <linux/itmt.h>
#include <asm/cpu.h>
#include <asm/div64.h>
@@ -340,23 +341,14 @@ static bool intel_pstate_get_ppc_enable_status(void)
#ifdef CONFIG_ACPI_CPPC_LIB
-/* The work item is needed to avoid CPU hotplug locking issues */
-static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
-{
- sched_set_itmt_support();
-}
-
-static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
-
#define CPPC_MAX_PERF U8_MAX
static void intel_pstate_set_itmt_prio(int cpu)
{
- struct cppc_perf_caps cppc_perf;
- static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
+ u64 highest_perf;
int ret;
- ret = cppc_get_perf_caps(cpu, &cppc_perf);
+ ret = cppc_get_highest_perf(cpu,&highest_perf);
if (ret)
return;
@@ -365,33 +357,15 @@ static void intel_pstate_set_itmt_prio(int cpu)
* In this case we can't use CPPC.highest_perf to enable ITMT.
* In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
*/
- if (cppc_perf.highest_perf == CPPC_MAX_PERF)
- cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
+ if (highest_perf == CPPC_MAX_PERF)
+ highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
/*
* The priorities can be set regardless of whether or not
* sched_set_itmt_support(true) has been called and it is valid to
* update them at any time after it has been called.
*/
- sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
-
- if (max_highest_perf <= min_highest_perf) {
- if (cppc_perf.highest_perf > max_highest_perf)
- max_highest_perf = cppc_perf.highest_perf;
-
- if (cppc_perf.highest_perf < min_highest_perf)
- min_highest_perf = cppc_perf.highest_perf;
-
- if (max_highest_perf > min_highest_perf) {
- /*
- * This code can be run during CPU online under the
- * CPU hotplug locks, so sched_set_itmt_support()
- * cannot be called from here. Queue up a work item
- * to invoke it.
- */
- schedule_work(&sched_itmt_work);
- }
- }
+ sched_set_itmt_core_prio(highest_perf, cpu);
}
static int intel_pstate_get_cppc_guaranteed(int cpu)
Thanks
Linda
保密声明:
本邮件含有保密或专有信息,仅供指定收件人使用。严禁对本邮件或其内容做任何未经授权的查阅、使用、复制或转发。
CONFIDENTIAL NOTE:
This email contains confidential or legally privileged information and is for the sole use of its intended recipient. Any unauthorized review, use, copying or forwarding of this email or the content of this email is strictly prohibited.