The _CPC method can get per-core highest frequency.
The highest frequency may varies between cores which mean cores can
running at different max frequency, so can use it as a core priority
and give a hint to scheduler in order to put critical task to the
higher priority core.
Signed-off-by: Tony W Wang-oc <[email protected]>
---
v1->v2: Fix build errors reported by kernel test robot
arch/x86/kernel/itmt.c | 2 ++
drivers/cpufreq/acpi-cpufreq.c | 59 ++++++++++++++++++++++++++++++----
2 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index ee4fe8cdb857..b49ac8ecbbd6 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -122,6 +122,7 @@ int sched_set_itmt_support(void)
return 0;
}
+EXPORT_SYMBOL_GPL(sched_set_itmt_support);
/**
* sched_clear_itmt_support() - Revoke platform's support of ITMT
@@ -181,3 +182,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
{
per_cpu(sched_core_priority, cpu) = prio;
}
+EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index b2f05d27167e..5733323e04ac 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -628,28 +628,35 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
#endif
#ifdef CONFIG_ACPI_CPPC_LIB
-static u64 get_max_boost_ratio(unsigned int cpu)
+static void cpufreq_get_core_perf(int cpu, u64 *highest_perf, u64 *nominal_perf)
{
struct cppc_perf_caps perf_caps;
- u64 highest_perf, nominal_perf;
int ret;
if (acpi_pstate_strict)
- return 0;
+ return;
ret = cppc_get_perf_caps(cpu, &perf_caps);
if (ret) {
pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
cpu, ret);
- return 0;
+ return;
}
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- highest_perf = amd_get_highest_perf();
+ *highest_perf = amd_get_highest_perf();
else
- highest_perf = perf_caps.highest_perf;
+ *highest_perf = perf_caps.highest_perf;
+
+ *nominal_perf = perf_caps.nominal_perf;
+ return;
+}
- nominal_perf = perf_caps.nominal_perf;
+static u64 get_max_boost_ratio(unsigned int cpu)
+{
+ u64 highest_perf, nominal_perf;
+
+ cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
if (!highest_perf || !nominal_perf) {
pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
@@ -663,8 +670,44 @@ static u64 get_max_boost_ratio(unsigned int cpu)
return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
}
+
+static void cpufreq_sched_itmt_work_fn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+
+static DECLARE_WORK(sched_itmt_work, cpufreq_sched_itmt_work_fn);
+
+static void cpufreq_set_itmt_prio(int cpu)
+{
+ u64 highest_perf, nominal_perf;
+ static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
+
+ cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
+
+ sched_set_itmt_core_prio(highest_perf, cpu);
+
+ if (max_highest_perf <= min_highest_perf) {
+ if (highest_perf > max_highest_perf)
+ max_highest_perf = highest_perf;
+
+ if (highest_perf < min_highest_perf)
+ min_highest_perf = highest_perf;
+
+ if (max_highest_perf > min_highest_perf) {
+ /*
+ * This code can be run during CPU online under the
+ * CPU hotplug locks, so sched_set_itmt_support()
+ * cannot be called from here. Queue up a work item
+ * to invoke it.
+ */
+ schedule_work(&sched_itmt_work);
+ }
+ }
+}
#else
static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
+static void cpufreq_set_itmt_prio(int cpu) { return; }
#endif
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -870,6 +913,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ cpufreq_set_itmt_prio(cpu);
+
pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
for (i = 0; i < perf->state_count; i++)
pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
--
2.17.1
On Tue, Aug 8, 2023 at 1:13 PM Tony W Wang-oc <[email protected]> wrote:
>
> The _CPC method can get per-core highest frequency.
Well, not exactly. A more precise way to say this would be "The
per-core highest frequency can be obtained via CPPC."
> The highest frequency may varies between cores which mean cores can
"may vary" and "which means"
> running at different max frequency, so can use it as a core priority
"can run", but it would be better to say "may run".
> and give a hint to scheduler in order to put critical task to the
> higher priority core.
Well, roughly speaking ...
You should really talk about ITMT and how it can be hooked up to this.
> Signed-off-by: Tony W Wang-oc <[email protected]>
> ---
> v1->v2: Fix build errors reported by kernel test robot
>
> arch/x86/kernel/itmt.c | 2 ++
> drivers/cpufreq/acpi-cpufreq.c | 59 ++++++++++++++++++++++++++++++----
> 2 files changed, 54 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
> index ee4fe8cdb857..b49ac8ecbbd6 100644
> --- a/arch/x86/kernel/itmt.c
> +++ b/arch/x86/kernel/itmt.c
> @@ -122,6 +122,7 @@ int sched_set_itmt_support(void)
>
> return 0;
> }
> +EXPORT_SYMBOL_GPL(sched_set_itmt_support);
This requires an ACK from the x86 maintainers.
>
> /**
> * sched_clear_itmt_support() - Revoke platform's support of ITMT
> @@ -181,3 +182,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
> {
> per_cpu(sched_core_priority, cpu) = prio;
> }
> +EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
And same here.
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index b2f05d27167e..5733323e04ac 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -628,28 +628,35 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
> #endif
>
> #ifdef CONFIG_ACPI_CPPC_LIB
> -static u64 get_max_boost_ratio(unsigned int cpu)
> +static void cpufreq_get_core_perf(int cpu, u64 *highest_perf, u64 *nominal_perf)
This is not a cpufreq core function, so please use a different prefix
in its name.
> {
> struct cppc_perf_caps perf_caps;
> - u64 highest_perf, nominal_perf;
> int ret;
>
> if (acpi_pstate_strict)
> - return 0;
> + return;
>
> ret = cppc_get_perf_caps(cpu, &perf_caps);
> if (ret) {
> pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
> cpu, ret);
> - return 0;
> + return;
> }
>
> if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
> - highest_perf = amd_get_highest_perf();
> + *highest_perf = amd_get_highest_perf();
> else
> - highest_perf = perf_caps.highest_perf;
> + *highest_perf = perf_caps.highest_perf;
> +
> + *nominal_perf = perf_caps.nominal_perf;
> + return;
> +}
>
> - nominal_perf = perf_caps.nominal_perf;
> +static u64 get_max_boost_ratio(unsigned int cpu)
> +{
> + u64 highest_perf, nominal_perf;
> +
> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
>
> if (!highest_perf || !nominal_perf) {
> pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
> @@ -663,8 +670,44 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>
> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
> }
> +
> +static void cpufreq_sched_itmt_work_fn(struct work_struct *work)
A similar comment applies here.
> +{
> + sched_set_itmt_support();
> +}
> +
> +static DECLARE_WORK(sched_itmt_work, cpufreq_sched_itmt_work_fn);
> +
> +static void cpufreq_set_itmt_prio(int cpu)
> +{
> + u64 highest_perf, nominal_perf;
> + static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
> +
> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
> +
> + sched_set_itmt_core_prio(highest_perf, cpu);
> +
> + if (max_highest_perf <= min_highest_perf) {
> + if (highest_perf > max_highest_perf)
> + max_highest_perf = highest_perf;
> +
> + if (highest_perf < min_highest_perf)
> + min_highest_perf = highest_perf;
> +
> + if (max_highest_perf > min_highest_perf) {
> + /*
> + * This code can be run during CPU online under the
> + * CPU hotplug locks, so sched_set_itmt_support()
> + * cannot be called from here. Queue up a work item
> + * to invoke it.
> + */
> + schedule_work(&sched_itmt_work);
> + }
This potentially runs before ITMT priorities are set for all CPUs.
Isn't it a problem?
> + }
> +}
> #else
> static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
> +static void cpufreq_set_itmt_prio(int cpu) { return; }
> #endif
>
> static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> @@ -870,6 +913,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> /* notify BIOS that we exist */
> acpi_processor_notify_smm(THIS_MODULE);
>
> + cpufreq_set_itmt_prio(cpu);
> +
> pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
> for (i = 0; i < perf->state_count; i++)
> pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
> --
On 8/23/23 04:01, Rafael J. Wysocki wrote:
> On Tue, Aug 8, 2023 at 1:13 PM Tony W Wang-oc <[email protected]> wrote:
>>
>> The _CPC method can get per-core highest frequency.
>
> Well, not exactly. A more precise way to say this would be "The
> per-core highest frequency can be obtained via CPPC."
>
Thanks for your reply, will rewrite the commit in next version.
>> The highest frequency may varies between cores which mean cores can
>
> "may vary" and "which means"
>
>> running at different max frequency, so can use it as a core priority
>
> "can run", but it would be better to say "may run".
>
>> and give a hint to scheduler in order to put critical task to the
>> higher priority core.
>
> Well, roughly speaking ...
>
> You should really talk about ITMT and how it can be hooked up to this.
>
Ok, Got it.
>> Signed-off-by: Tony W Wang-oc <[email protected]>
>> ---
>> v1->v2: Fix build errors reported by kernel test robot
>>
>> arch/x86/kernel/itmt.c | 2 ++
>> drivers/cpufreq/acpi-cpufreq.c | 59 ++++++++++++++++++++++++++++++----
>> 2 files changed, 54 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
>> index ee4fe8cdb857..b49ac8ecbbd6 100644
>> --- a/arch/x86/kernel/itmt.c
>> +++ b/arch/x86/kernel/itmt.c
>> @@ -122,6 +122,7 @@ int sched_set_itmt_support(void)
>>
>> return 0;
>> }
>> +EXPORT_SYMBOL_GPL(sched_set_itmt_support);
>
> This requires an ACK from the x86 maintainers.
>
>>
>> /**
>> * sched_clear_itmt_support() - Revoke platform's support of ITMT
>> @@ -181,3 +182,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
>> {
>> per_cpu(sched_core_priority, cpu) = prio;
>> }
>> +EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
>
> And same here.
>
>> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
>> index b2f05d27167e..5733323e04ac 100644
>> --- a/drivers/cpufreq/acpi-cpufreq.c
>> +++ b/drivers/cpufreq/acpi-cpufreq.c
>> @@ -628,28 +628,35 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
>> #endif
>>
>> #ifdef CONFIG_ACPI_CPPC_LIB
>> -static u64 get_max_boost_ratio(unsigned int cpu)
>> +static void cpufreq_get_core_perf(int cpu, u64 *highest_perf, u64 *nominal_perf)
>
> This is not a cpufreq core function, so please use a different prefix
> in its name.
>
Ok. Will remove the prefix of "cpufreq_".
>> {
>> struct cppc_perf_caps perf_caps;
>> - u64 highest_perf, nominal_perf;
>> int ret;
>>
>> if (acpi_pstate_strict)
>> - return 0;
>> + return;
>>
>> ret = cppc_get_perf_caps(cpu, &perf_caps);
>> if (ret) {
>> pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
>> cpu, ret);
>> - return 0;
>> + return;
>> }
>>
>> if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
>> - highest_perf = amd_get_highest_perf();
>> + *highest_perf = amd_get_highest_perf();
>> else
>> - highest_perf = perf_caps.highest_perf;
>> + *highest_perf = perf_caps.highest_perf;
>> +
>> + *nominal_perf = perf_caps.nominal_perf;
>> + return;
>> +}
>>
>> - nominal_perf = perf_caps.nominal_perf;
>> +static u64 get_max_boost_ratio(unsigned int cpu)
>> +{
>> + u64 highest_perf, nominal_perf;
>> +
>> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
>>
>> if (!highest_perf || !nominal_perf) {
>> pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
>> @@ -663,8 +670,44 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>>
>> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
>> }
>> +
>> +static void cpufreq_sched_itmt_work_fn(struct work_struct *work)
>
> A similar comment applies here.
>
>> +{
>> + sched_set_itmt_support();
>> +}
>> +
>> +static DECLARE_WORK(sched_itmt_work, cpufreq_sched_itmt_work_fn);
>> +
>> +static void cpufreq_set_itmt_prio(int cpu)
>> +{
>> + u64 highest_perf, nominal_perf;
>> + static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
>> +
>> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
>> +
>> + sched_set_itmt_core_prio(highest_perf, cpu);
>> +
>> + if (max_highest_perf <= min_highest_perf) {
>> + if (highest_perf > max_highest_perf)
>> + max_highest_perf = highest_perf;
>> +
>> + if (highest_perf < min_highest_perf)
>> + min_highest_perf = highest_perf;
>> +
>> + if (max_highest_perf > min_highest_perf) {
>> + /*
>> + * This code can be run during CPU online under the
>> + * CPU hotplug locks, so sched_set_itmt_support()
>> + * cannot be called from here. Queue up a work item
>> + * to invoke it.
>> + */
>> + schedule_work(&sched_itmt_work);
>> + }
>
> This potentially runs before ITMT priorities are set for all CPUs.
> Isn't it a problem?
>
Yes, you are right.
Will use schedule_delayed_work(&sched_itmt_work, msecs_to_jiffies(500))
to fix this.
Sincerely.
TonyWWang-oc
>> + }
>> +}
>> #else
>> static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
>> +static void cpufreq_set_itmt_prio(int cpu) { return; }
>> #endif
>>
>> static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> @@ -870,6 +913,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> /* notify BIOS that we exist */
>> acpi_processor_notify_smm(THIS_MODULE);
>>
>> + cpufreq_set_itmt_prio(cpu);
>> +
>> pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
>> for (i = 0; i < perf->state_count; i++)
>> pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n",
>> --
On Thu, Aug 31, 2023 at 12:19 PM Tony W Wang-oc
<[email protected]> wrote:
>
>
> On 8/23/23 04:01, Rafael J. Wysocki wrote:
> > On Tue, Aug 8, 2023 at 1:13 PM Tony W Wang-oc <[email protected]> wrote:
> >>
> >> The _CPC method can get per-core highest frequency.
> >
> > Well, not exactly. A more precise way to say this would be "The
> > per-core highest frequency can be obtained via CPPC."
> >
>
> Thanks for your reply, will rewrite the commit in next version.
>
> >> The highest frequency may varies between cores which mean cores can
> >
> > "may vary" and "which means"
> >
> >> running at different max frequency, so can use it as a core priority
> >
> > "can run", but it would be better to say "may run".
> >
> >> and give a hint to scheduler in order to put critical task to the
> >> higher priority core.
> >
> > Well, roughly speaking ...
> >
> > You should really talk about ITMT and how it can be hooked up to this.
> >
>
> Ok, Got it.
>
> >> Signed-off-by: Tony W Wang-oc <[email protected]>
> >> ---
> >> v1->v2: Fix build errors reported by kernel test robot
> >>
> >> arch/x86/kernel/itmt.c | 2 ++
> >> drivers/cpufreq/acpi-cpufreq.c | 59 ++++++++++++++++++++++++++++++----
> >> 2 files changed, 54 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
> >> index ee4fe8cdb857..b49ac8ecbbd6 100644
> >> --- a/arch/x86/kernel/itmt.c
> >> +++ b/arch/x86/kernel/itmt.c
> >> @@ -122,6 +122,7 @@ int sched_set_itmt_support(void)
> >>
> >> return 0;
> >> }
> >> +EXPORT_SYMBOL_GPL(sched_set_itmt_support);
> >
> > This requires an ACK from the x86 maintainers.
> >
> >>
> >> /**
> >> * sched_clear_itmt_support() - Revoke platform's support of ITMT
> >> @@ -181,3 +182,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
> >> {
> >> per_cpu(sched_core_priority, cpu) = prio;
> >> }
> >> +EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
> >
> > And same here.
> >
> >> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> >> index b2f05d27167e..5733323e04ac 100644
> >> --- a/drivers/cpufreq/acpi-cpufreq.c
> >> +++ b/drivers/cpufreq/acpi-cpufreq.c
> >> @@ -628,28 +628,35 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
> >> #endif
> >>
> >> #ifdef CONFIG_ACPI_CPPC_LIB
> >> -static u64 get_max_boost_ratio(unsigned int cpu)
> >> +static void cpufreq_get_core_perf(int cpu, u64 *highest_perf, u64 *nominal_perf)
> >
> > This is not a cpufreq core function, so please use a different prefix
> > in its name.
> >
>
> Ok. Will remove the prefix of "cpufreq_".
>
> >> {
> >> struct cppc_perf_caps perf_caps;
> >> - u64 highest_perf, nominal_perf;
> >> int ret;
> >>
> >> if (acpi_pstate_strict)
> >> - return 0;
> >> + return;
> >>
> >> ret = cppc_get_perf_caps(cpu, &perf_caps);
> >> if (ret) {
> >> pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
> >> cpu, ret);
> >> - return 0;
> >> + return;
> >> }
> >>
> >> if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
> >> - highest_perf = amd_get_highest_perf();
> >> + *highest_perf = amd_get_highest_perf();
> >> else
> >> - highest_perf = perf_caps.highest_perf;
> >> + *highest_perf = perf_caps.highest_perf;
> >> +
> >> + *nominal_perf = perf_caps.nominal_perf;
> >> + return;
> >> +}
> >>
> >> - nominal_perf = perf_caps.nominal_perf;
> >> +static u64 get_max_boost_ratio(unsigned int cpu)
> >> +{
> >> + u64 highest_perf, nominal_perf;
> >> +
> >> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
> >>
> >> if (!highest_perf || !nominal_perf) {
> >> pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
> >> @@ -663,8 +670,44 @@ static u64 get_max_boost_ratio(unsigned int cpu)
> >>
> >> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
> >> }
> >> +
> >> +static void cpufreq_sched_itmt_work_fn(struct work_struct *work)
> >
> > A similar comment applies here.
> >
> >> +{
> >> + sched_set_itmt_support();
> >> +}
> >> +
> >> +static DECLARE_WORK(sched_itmt_work, cpufreq_sched_itmt_work_fn);
> >> +
> >> +static void cpufreq_set_itmt_prio(int cpu)
> >> +{
> >> + u64 highest_perf, nominal_perf;
> >> + static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
> >> +
> >> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
> >> +
> >> + sched_set_itmt_core_prio(highest_perf, cpu);
> >> +
> >> + if (max_highest_perf <= min_highest_perf) {
> >> + if (highest_perf > max_highest_perf)
> >> + max_highest_perf = highest_perf;
> >> +
> >> + if (highest_perf < min_highest_perf)
> >> + min_highest_perf = highest_perf;
> >> +
> >> + if (max_highest_perf > min_highest_perf) {
> >> + /*
> >> + * This code can be run during CPU online under the
> >> + * CPU hotplug locks, so sched_set_itmt_support()
> >> + * cannot be called from here. Queue up a work item
> >> + * to invoke it.
> >> + */
> >> + schedule_work(&sched_itmt_work);
> >> + }
> >
> > This potentially runs before ITMT priorities are set for all CPUs.
> > Isn't it a problem?
> >
>
> Yes, you are right.
> Will use schedule_delayed_work(&sched_itmt_work, msecs_to_jiffies(500))
> to fix this.
If the ordering matters, it is better to enforce it directly (through
an explicit code dependency, for example) than to rely on the timing
to do the right thing.
If you do the above, then it will not be clear why it is done (a
comment may help to address that, though) and why the delay is 500 us
in particular.
On 8/31/23 21:03, Rafael J. Wysocki wrote:
> On Thu, Aug 31, 2023 at 12:19 PM Tony W Wang-oc
> <[email protected]> wrote:
>>
>>
>> On 8/23/23 04:01, Rafael J. Wysocki wrote:
>>> On Tue, Aug 8, 2023 at 1:13 PM Tony W Wang-oc <[email protected]> wrote:
>>>>
>>>> The _CPC method can get per-core highest frequency.
>>>
>>> Well, not exactly. A more precise way to say this would be "The
>>> per-core highest frequency can be obtained via CPPC."
>>>
>>
>> Thanks for your reply, will rewrite the commit in next version.
>>
>>>> The highest frequency may varies between cores which mean cores can
>>>
>>> "may vary" and "which means"
>>>
>>>> running at different max frequency, so can use it as a core priority
>>>
>>> "can run", but it would be better to say "may run".
>>>
>>>> and give a hint to scheduler in order to put critical task to the
>>>> higher priority core.
>>>
>>> Well, roughly speaking ...
>>>
>>> You should really talk about ITMT and how it can be hooked up to this.
>>>
>>
>> Ok, Got it.
>>
>>>> Signed-off-by: Tony W Wang-oc <[email protected]>
>>>> ---
>>>> v1->v2: Fix build errors reported by kernel test robot
>>>>
>>>> arch/x86/kernel/itmt.c | 2 ++
>>>> drivers/cpufreq/acpi-cpufreq.c | 59 ++++++++++++++++++++++++++++++----
>>>> 2 files changed, 54 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
>>>> index ee4fe8cdb857..b49ac8ecbbd6 100644
>>>> --- a/arch/x86/kernel/itmt.c
>>>> +++ b/arch/x86/kernel/itmt.c
>>>> @@ -122,6 +122,7 @@ int sched_set_itmt_support(void)
>>>>
>>>> return 0;
>>>> }
>>>> +EXPORT_SYMBOL_GPL(sched_set_itmt_support);
>>>
>>> This requires an ACK from the x86 maintainers.
>>>
>>>>
>>>> /**
>>>> * sched_clear_itmt_support() - Revoke platform's support of ITMT
>>>> @@ -181,3 +182,4 @@ void sched_set_itmt_core_prio(int prio, int cpu)
>>>> {
>>>> per_cpu(sched_core_priority, cpu) = prio;
>>>> }
>>>> +EXPORT_SYMBOL_GPL(sched_set_itmt_core_prio);
>>>
>>> And same here.
>>>
>>>> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
>>>> index b2f05d27167e..5733323e04ac 100644
>>>> --- a/drivers/cpufreq/acpi-cpufreq.c
>>>> +++ b/drivers/cpufreq/acpi-cpufreq.c
>>>> @@ -628,28 +628,35 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
>>>> #endif
>>>>
>>>> #ifdef CONFIG_ACPI_CPPC_LIB
>>>> -static u64 get_max_boost_ratio(unsigned int cpu)
>>>> +static void cpufreq_get_core_perf(int cpu, u64 *highest_perf, u64 *nominal_perf)
>>>
>>> This is not a cpufreq core function, so please use a different prefix
>>> in its name.
>>>
>>
>> Ok. Will remove the prefix of "cpufreq_".
>>
>>>> {
>>>> struct cppc_perf_caps perf_caps;
>>>> - u64 highest_perf, nominal_perf;
>>>> int ret;
>>>>
>>>> if (acpi_pstate_strict)
>>>> - return 0;
>>>> + return;
>>>>
>>>> ret = cppc_get_perf_caps(cpu, &perf_caps);
>>>> if (ret) {
>>>> pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
>>>> cpu, ret);
>>>> - return 0;
>>>> + return;
>>>> }
>>>>
>>>> if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
>>>> - highest_perf = amd_get_highest_perf();
>>>> + *highest_perf = amd_get_highest_perf();
>>>> else
>>>> - highest_perf = perf_caps.highest_perf;
>>>> + *highest_perf = perf_caps.highest_perf;
>>>> +
>>>> + *nominal_perf = perf_caps.nominal_perf;
>>>> + return;
>>>> +}
>>>>
>>>> - nominal_perf = perf_caps.nominal_perf;
>>>> +static u64 get_max_boost_ratio(unsigned int cpu)
>>>> +{
>>>> + u64 highest_perf, nominal_perf;
>>>> +
>>>> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
>>>>
>>>> if (!highest_perf || !nominal_perf) {
>>>> pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
>>>> @@ -663,8 +670,44 @@ static u64 get_max_boost_ratio(unsigned int cpu)
>>>>
>>>> return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
>>>> }
>>>> +
>>>> +static void cpufreq_sched_itmt_work_fn(struct work_struct *work)
>>>
>>> A similar comment applies here.
>>>
>>>> +{
>>>> + sched_set_itmt_support();
>>>> +}
>>>> +
>>>> +static DECLARE_WORK(sched_itmt_work, cpufreq_sched_itmt_work_fn);
>>>> +
>>>> +static void cpufreq_set_itmt_prio(int cpu)
>>>> +{
>>>> + u64 highest_perf, nominal_perf;
>>>> + static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
>>>> +
>>>> + cpufreq_get_core_perf(cpu, &highest_perf, &nominal_perf);
>>>> +
>>>> + sched_set_itmt_core_prio(highest_perf, cpu);
>>>> +
>>>> + if (max_highest_perf <= min_highest_perf) {
>>>> + if (highest_perf > max_highest_perf)
>>>> + max_highest_perf = highest_perf;
>>>> +
>>>> + if (highest_perf < min_highest_perf)
>>>> + min_highest_perf = highest_perf;
>>>> +
>>>> + if (max_highest_perf > min_highest_perf) {
>>>> + /*
>>>> + * This code can be run during CPU online under the
>>>> + * CPU hotplug locks, so sched_set_itmt_support()
>>>> + * cannot be called from here. Queue up a work item
>>>> + * to invoke it.
>>>> + */
>>>> + schedule_work(&sched_itmt_work);
>>>> + }
>>>
>>> This potentially runs before ITMT priorities are set for all CPUs.
>>> Isn't it a problem?
>>>
>>
>> Yes, you are right.
>> Will use schedule_delayed_work(&sched_itmt_work, msecs_to_jiffies(500))
>> to fix this.
>
> If the ordering matters, it is better to enforce it directly (through
> an explicit code dependency, for example) than to rely on the timing
> to do the right thing.
>
> If you do the above, then it will not be clear why it is done (a
> comment may help to address that, though) and why the delay is 500 us
> in particular.
Yes, you are right. Rely on the timing is not exactly.
Will find the other way to enforce the order.
Sincerely
TonyWWang-oc