2015-07-16 18:17:13

by Konstantin Khlebnikov

[permalink] [raw]
Subject: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

IPMI can control CPU P-states remotely: configuration is reported via
common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
support in intel_pstate to receive and use these P-state limits.

* ignore limit of top state in _PPC: it lower than turbo boost frequency
* register intel_pstate in acpi-processor to get states from _PSS
* link acpi_processor_get_bios_limit: this adds attribute "bios_limit"

Signed-off-by: Konstantin Khlebnikov <[email protected]>
---
drivers/acpi/processor_perflib.c | 3 +-
drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index cfc8aba72f86..781e328c9d5f 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,

ppc = (unsigned int)pr->performance_platform_limit;

- if (ppc >= pr->performance->state_count)
+ /* Ignore limit of top state: it lower than turbo boost frequency */
+ if (!ppc || ppc >= pr->performance->state_count)
goto out;

cpufreq_verify_within_limits(policy, 0,
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15ada47bb720..4a34ddf4fa73 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/acpi.h>
+#include <acpi/processor.h>
#include <linux/vmalloc.h>
#include <trace/events/power.h>

@@ -113,6 +114,9 @@ struct cpudata {
u64 prev_mperf;
u64 prev_tsc;
struct sample sample;
+#ifdef CONFIG_ACPI_PROCESSOR
+ struct acpi_processor_performance acpi_data;
+#endif
};

static struct cpudata **all_cpu_data;
@@ -145,6 +149,7 @@ static int hwp_active;

struct perf_limits {
int no_turbo;
+ int no_acpi;
int turbo_disabled;
int max_perf_pct;
int min_perf_pct;
@@ -158,6 +163,7 @@ struct perf_limits {

static struct perf_limits limits = {
.no_turbo = 0,
+ .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
.turbo_disabled = 0,
.max_perf_pct = 100,
.max_perf = int_tofp(1),
@@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
return count;
}

+static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+#ifdef CONFIG_ACPI_PROCESSOR
+ return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
+#else
+ return -ENODEV;
+#endif
+}
+show_one(no_acpi, no_acpi);
+define_one_global_rw(no_acpi);
+
show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);

@@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);

static struct attribute *intel_pstate_attributes[] = {
&no_turbo.attr,
+ &no_acpi.attr,
&max_perf_pct.attr,
&min_perf_pct.attr,
&turbo_pct.attr,
@@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);

+#ifdef CONFIG_ACPI_PROCESSOR
+ if (!limits.no_acpi) {
+ /*
+ * Minimum necessary to get acpi_processor_ppc_notifier() and
+ * acpi_processor_get_bios_limit() working.
+ */
+ if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
+ GFP_KERNEL))
+ rc = -ENOMEM;
+ else
+ rc = acpi_processor_register_performance(
+ &cpu->acpi_data, policy->cpu);
+ if (rc) {
+ pr_err("intel_pstate: acpi init failed: %d\n", rc);
+ free_cpumask_var(cpu->acpi_data.shared_cpu_map);
+ limits.no_acpi = 1;
+ }
+ }
+#endif
+ return 0;
+}
+
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+#ifdef CONFIG_ACPI_PROCESSOR
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+ if (cpu->acpi_data.state_count)
+ acpi_processor_unregister_performance(&cpu->acpi_data,
+ policy->cpu);
+ free_cpumask_var(cpu->acpi_data.shared_cpu_map);
+#endif
return 0;
}

@@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
.get = intel_pstate_get,
+#ifdef CONFIG_ACPI_PROCESSOR
+ .bios_limit = acpi_processor_get_bios_limit,
+#endif
.init = intel_pstate_cpu_init,
+ .exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
.name = "intel_pstate",
};
@@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
+ if (!strcmp(str, "no_acpi"))
+ limits.no_acpi = 1;
return 0;
}
early_param("intel_pstate", intel_pstate_setup);


2015-07-16 22:11:21

by srinivas pandruvada

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> IPMI can control CPU P-states remotely: configuration is reported via
> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> support in intel_pstate to receive and use these P-state limits.
>
> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> * register intel_pstate in acpi-processor to get states from _PSS
> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>
> Signed-off-by: Konstantin Khlebnikov <[email protected]>
> ---
> drivers/acpi/processor_perflib.c | 3 +-
> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> index cfc8aba72f86..781e328c9d5f 100644
> --- a/drivers/acpi/processor_perflib.c
> +++ b/drivers/acpi/processor_perflib.c
> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>
> ppc = (unsigned int)pr->performance_platform_limit;
>
> - if (ppc >= pr->performance->state_count)
> + /* Ignore limit of top state: it lower than turbo boost frequency */
> + if (!ppc || ppc >= pr->performance->state_count)
Why? Isn't the previous check enough?
> goto out;
>
> cpufreq_verify_within_limits(policy, 0,
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 15ada47bb720..4a34ddf4fa73 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -26,6 +26,7 @@
> #include <linux/fs.h>
> #include <linux/debugfs.h>
> #include <linux/acpi.h>
> +#include <acpi/processor.h>
> #include <linux/vmalloc.h>
> #include <trace/events/power.h>
>
> @@ -113,6 +114,9 @@ struct cpudata {
> u64 prev_mperf;
> u64 prev_tsc;
> struct sample sample;
> +#ifdef CONFIG_ACPI_PROCESSOR
> + struct acpi_processor_performance acpi_data;
> +#endif
> };
>
> static struct cpudata **all_cpu_data;
> @@ -145,6 +149,7 @@ static int hwp_active;
>
> struct perf_limits {
> int no_turbo;
> + int no_acpi;
> int turbo_disabled;
> int max_perf_pct;
> int min_perf_pct;
> @@ -158,6 +163,7 @@ struct perf_limits {
>
> static struct perf_limits limits = {
> .no_turbo = 0,
> + .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
> .turbo_disabled = 0,
> .max_perf_pct = 100,
> .max_perf = int_tofp(1),
> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
> return count;
> }
>
> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
> + const char *buf, size_t count)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> + return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
> +#else
> + return -ENODEV;
> +#endif
> +}
> +show_one(no_acpi, no_acpi);
> +define_one_global_rw(no_acpi);
> +
> show_one(max_perf_pct, max_perf_pct);
> show_one(min_perf_pct, min_perf_pct);
>
> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>
> static struct attribute *intel_pstate_attributes[] = {
> &no_turbo.attr,
> + &no_acpi.attr,
> &max_perf_pct.attr,
> &min_perf_pct.attr,
> &turbo_pct.attr,
> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
> policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
> cpumask_set_cpu(policy->cpu, policy->cpus);
>
> +#ifdef CONFIG_ACPI_PROCESSOR
> + if (!limits.no_acpi) {
> + /*
> + * Minimum necessary to get acpi_processor_ppc_notifier() and
> + * acpi_processor_get_bios_limit() working.
> + */
> + if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
> + GFP_KERNEL))
> + rc = -ENOMEM;
> + else
> + rc = acpi_processor_register_performance(
> + &cpu->acpi_data, policy->cpu);
> + if (rc) {
> + pr_err("intel_pstate: acpi init failed: %d\n", rc);
> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> + limits.no_acpi = 1;
> + }
> + }
> +#endif
> + return 0;
> +}
> +
> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> + if (cpu->acpi_data.state_count)
> + acpi_processor_unregister_performance(&cpu->acpi_data,
> + policy->cpu);
> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> +#endif
> return 0;
> }
>
> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
> .verify = intel_pstate_verify_policy,
> .setpolicy = intel_pstate_set_policy,
> .get = intel_pstate_get,
> +#ifdef CONFIG_ACPI_PROCESSOR
> + .bios_limit = acpi_processor_get_bios_limit,
> +#endif
> .init = intel_pstate_cpu_init,
> + .exit = intel_pstate_cpu_exit,
> .stop_cpu = intel_pstate_stop_cpu,
> .name = "intel_pstate",
> };
> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
> force_load = 1;
> if (!strcmp(str, "hwp_only"))
> hwp_only = 1;
> + if (!strcmp(str, "no_acpi"))
> + limits.no_acpi = 1;
> return 0;
> }
> early_param("intel_pstate", intel_pstate_setup);
>
_PPC is index into _PSS. Since intel P state doesn't follow _PSS, the
states may not be 1:1 matching. So we have to harmonize them.


> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2015-07-17 04:36:32

by Konstantin Khlebnikov

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
<[email protected]> wrote:
> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
>> IPMI can control CPU P-states remotely: configuration is reported via
>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>> support in intel_pstate to receive and use these P-state limits.
>>
>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>> * register intel_pstate in acpi-processor to get states from _PSS
>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>
>> Signed-off-by: Konstantin Khlebnikov <[email protected]>
>> ---
>> drivers/acpi/processor_perflib.c | 3 +-
>> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 59 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
>> index cfc8aba72f86..781e328c9d5f 100644
>> --- a/drivers/acpi/processor_perflib.c
>> +++ b/drivers/acpi/processor_perflib.c
>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>>
>> ppc = (unsigned int)pr->performance_platform_limit;
>>
>> - if (ppc >= pr->performance->state_count)
>> + /* Ignore limit of top state: it lower than turbo boost frequency */
>> + if (!ppc || ppc >= pr->performance->state_count)
> Why? Isn't the previous check enough?

Zero _PPC state must be top performance state but as I see frequency in
_PSS is lower than maximum possible turbo frequency. So, in this case
intel_pstate cannnot get "100%" for max bound even it there is no limit set.

For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
state is 3400 Mhz.

>> goto out;
>>
>> cpufreq_verify_within_limits(policy, 0,
>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>> index 15ada47bb720..4a34ddf4fa73 100644
>> --- a/drivers/cpufreq/intel_pstate.c
>> +++ b/drivers/cpufreq/intel_pstate.c
>> @@ -26,6 +26,7 @@
>> #include <linux/fs.h>
>> #include <linux/debugfs.h>
>> #include <linux/acpi.h>
>> +#include <acpi/processor.h>
>> #include <linux/vmalloc.h>
>> #include <trace/events/power.h>
>>
>> @@ -113,6 +114,9 @@ struct cpudata {
>> u64 prev_mperf;
>> u64 prev_tsc;
>> struct sample sample;
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + struct acpi_processor_performance acpi_data;
>> +#endif
>> };
>>
>> static struct cpudata **all_cpu_data;
>> @@ -145,6 +149,7 @@ static int hwp_active;
>>
>> struct perf_limits {
>> int no_turbo;
>> + int no_acpi;
>> int turbo_disabled;
>> int max_perf_pct;
>> int min_perf_pct;
>> @@ -158,6 +163,7 @@ struct perf_limits {
>>
>> static struct perf_limits limits = {
>> .no_turbo = 0,
>> + .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
>> .turbo_disabled = 0,
>> .max_perf_pct = 100,
>> .max_perf = int_tofp(1),
>> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
>> return count;
>> }
>>
>> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
>> + const char *buf, size_t count)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
>> +#else
>> + return -ENODEV;
>> +#endif
>> +}
>> +show_one(no_acpi, no_acpi);
>> +define_one_global_rw(no_acpi);
>> +
>> show_one(max_perf_pct, max_perf_pct);
>> show_one(min_perf_pct, min_perf_pct);
>>
>> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>>
>> static struct attribute *intel_pstate_attributes[] = {
>> &no_turbo.attr,
>> + &no_acpi.attr,
>> &max_perf_pct.attr,
>> &min_perf_pct.attr,
>> &turbo_pct.attr,
>> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
>> policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
>> cpumask_set_cpu(policy->cpu, policy->cpus);
>>
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + if (!limits.no_acpi) {
>> + /*
>> + * Minimum necessary to get acpi_processor_ppc_notifier() and
>> + * acpi_processor_get_bios_limit() working.
>> + */
>> + if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
>> + GFP_KERNEL))
>> + rc = -ENOMEM;
>> + else
>> + rc = acpi_processor_register_performance(
>> + &cpu->acpi_data, policy->cpu);
>> + if (rc) {
>> + pr_err("intel_pstate: acpi init failed: %d\n", rc);
>> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> + limits.no_acpi = 1;
>> + }
>> + }
>> +#endif
>> + return 0;
>> +}
>> +
>> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + struct cpudata *cpu = all_cpu_data[policy->cpu];
>> +
>> + if (cpu->acpi_data.state_count)
>> + acpi_processor_unregister_performance(&cpu->acpi_data,
>> + policy->cpu);
>> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> +#endif
>> return 0;
>> }
>>
>> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
>> .verify = intel_pstate_verify_policy,
>> .setpolicy = intel_pstate_set_policy,
>> .get = intel_pstate_get,
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + .bios_limit = acpi_processor_get_bios_limit,
>> +#endif
>> .init = intel_pstate_cpu_init,
>> + .exit = intel_pstate_cpu_exit,
>> .stop_cpu = intel_pstate_stop_cpu,
>> .name = "intel_pstate",
>> };
>> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
>> force_load = 1;
>> if (!strcmp(str, "hwp_only"))
>> hwp_only = 1;
>> + if (!strcmp(str, "no_acpi"))
>> + limits.no_acpi = 1;
>> return 0;
>> }
>> early_param("intel_pstate", intel_pstate_setup);
>>
> _PPC is index into _PSS. Since intel P state doesn't follow _PSS, the
> states may not be 1:1 matching. So we have to harmonize them.
>
>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2015-07-17 06:00:50

by Ethan Zhao

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi


On 2015/7/17 2:17, Konstantin Khlebnikov wrote:
> IPMI can control CPU P-states remotely: configuration is reported via
> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> support in intel_pstate to receive and use these P-state limits.
>
> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> * register intel_pstate in acpi-processor to get states from _PSS
> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>
> Signed-off-by: Konstantin Khlebnikov <[email protected]>
> ---
> drivers/acpi/processor_perflib.c | 3 +-
> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> index cfc8aba72f86..781e328c9d5f 100644
> --- a/drivers/acpi/processor_perflib.c
> +++ b/drivers/acpi/processor_perflib.c
> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>
> ppc = (unsigned int)pr->performance_platform_limit;
>
> - if (ppc >= pr->performance->state_count)
> + /* Ignore limit of top state: it lower than turbo boost frequency */
> + if (!ppc || ppc >= pr->performance->state_count)
Perhaps the !ppc is wrong if we check it against ACPI spec.
Zero value of ppc means:

"0 – States 0 through Nth state are available (all states available)"

> goto out;
>
> cpufreq_verify_within_limits(policy, 0,
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 15ada47bb720..4a34ddf4fa73 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -26,6 +26,7 @@
> #include <linux/fs.h>
> #include <linux/debugfs.h>
> #include <linux/acpi.h>
> +#include <acpi/processor.h>
> #include <linux/vmalloc.h>
> #include <trace/events/power.h>
>
> @@ -113,6 +114,9 @@ struct cpudata {
> u64 prev_mperf;
> u64 prev_tsc;
> struct sample sample;
> +#ifdef CONFIG_ACPI_PROCESSOR
> + struct acpi_processor_performance acpi_data;
> +#endif
> };
>
> static struct cpudata **all_cpu_data;
> @@ -145,6 +149,7 @@ static int hwp_active;
>
> struct perf_limits {
> int no_turbo;
> + int no_acpi;
> int turbo_disabled;
> int max_perf_pct;
> int min_perf_pct;
> @@ -158,6 +163,7 @@ struct perf_limits {
>
> static struct perf_limits limits = {
> .no_turbo = 0,
> + .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
> .turbo_disabled = 0,
> .max_perf_pct = 100,
> .max_perf = int_tofp(1),
> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
> return count;
> }
>
> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
> + const char *buf, size_t count)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> + return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
> +#else
> + return -ENODEV;
> +#endif
> +}
> +show_one(no_acpi, no_acpi);
> +define_one_global_rw(no_acpi);
> +
> show_one(max_perf_pct, max_perf_pct);
> show_one(min_perf_pct, min_perf_pct);
>
> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>
> static struct attribute *intel_pstate_attributes[] = {
> &no_turbo.attr,
> + &no_acpi.attr,
> &max_perf_pct.attr,
> &min_perf_pct.attr,
> &turbo_pct.attr,
> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
> policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
> cpumask_set_cpu(policy->cpu, policy->cpus);
>
> +#ifdef CONFIG_ACPI_PROCESSOR
> + if (!limits.no_acpi) {
> + /*
> + * Minimum necessary to get acpi_processor_ppc_notifier() and
> + * acpi_processor_get_bios_limit() working.
> + */
> + if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
> + GFP_KERNEL))
> + rc = -ENOMEM;
> + else
> + rc = acpi_processor_register_performance(
> + &cpu->acpi_data, policy->cpu);
> + if (rc) {
> + pr_err("intel_pstate: acpi init failed: %d\n", rc);
> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> + limits.no_acpi = 1;
> + }
> + }
> +#endif
> + return 0;
> +}
> +
> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> + if (cpu->acpi_data.state_count)
> + acpi_processor_unregister_performance(&cpu->acpi_data,
> + policy->cpu);
> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> +#endif
> return 0;
> }
>
> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
> .verify = intel_pstate_verify_policy,
> .setpolicy = intel_pstate_set_policy,
> .get = intel_pstate_get,
> +#ifdef CONFIG_ACPI_PROCESSOR
> + .bios_limit = acpi_processor_get_bios_limit,
> +#endif
> .init = intel_pstate_cpu_init,
> + .exit = intel_pstate_cpu_exit,
> .stop_cpu = intel_pstate_stop_cpu,
> .name = "intel_pstate",
> };
> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
> force_load = 1;
> if (!strcmp(str, "hwp_only"))
> hwp_only = 1;
> + if (!strcmp(str, "no_acpi"))
> + limits.no_acpi = 1;
> return 0;
> }
> early_param("intel_pstate", intel_pstate_setup);
>


Thanks,
Ethan

2015-07-17 07:10:58

by Konstantin Khlebnikov

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On Fri, Jul 17, 2015 at 9:00 AM, ethan zhao <[email protected]> wrote:
>
> On 2015/7/17 2:17, Konstantin Khlebnikov wrote:
>>
>> IPMI can control CPU P-states remotely: configuration is reported via
>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>> support in intel_pstate to receive and use these P-state limits.
>>
>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>> * register intel_pstate in acpi-processor to get states from _PSS
>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>
>> Signed-off-by: Konstantin Khlebnikov <[email protected]>
>> ---
>> drivers/acpi/processor_perflib.c | 3 +-
>> drivers/cpufreq/intel_pstate.c | 57
>> ++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 59 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/acpi/processor_perflib.c
>> b/drivers/acpi/processor_perflib.c
>> index cfc8aba72f86..781e328c9d5f 100644
>> --- a/drivers/acpi/processor_perflib.c
>> +++ b/drivers/acpi/processor_perflib.c
>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct
>> notifier_block *nb,
>> ppc = (unsigned int)pr->performance_platform_limit;
>> - if (ppc >= pr->performance->state_count)
>> + /* Ignore limit of top state: it lower than turbo boost frequency
>> */
>> + if (!ppc || ppc >= pr->performance->state_count)
>
> Perhaps the !ppc is wrong if we check it against ACPI spec.
> Zero value of ppc means:
>
> "0 – States 0 through Nth state are available (all states available)"

Yep, also states are ordered by power - state0 must must have highest
performance.
This code below clamps range of available frequences to 0.._PSS[_PPC].frequency.
So if _PPC = 0 then there should be no limit but then turbo is enabled frequency
of state0 is actually could be lower than effective turbo frequency:
for hardware I have it's _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
state is 3400 Mhz.

>
>
>> goto out;
>> cpufreq_verify_within_limits(policy, 0,
>> diff --git a/drivers/cpufreq/intel_pstate.c
>> b/drivers/cpufreq/intel_pstate.c
>> index 15ada47bb720..4a34ddf4fa73 100644
>> --- a/drivers/cpufreq/intel_pstate.c
>> +++ b/drivers/cpufreq/intel_pstate.c
>> @@ -26,6 +26,7 @@
>> #include <linux/fs.h>
>> #include <linux/debugfs.h>
>> #include <linux/acpi.h>
>> +#include <acpi/processor.h>
>> #include <linux/vmalloc.h>
>> #include <trace/events/power.h>
>> @@ -113,6 +114,9 @@ struct cpudata {
>> u64 prev_mperf;
>> u64 prev_tsc;
>> struct sample sample;
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + struct acpi_processor_performance acpi_data;
>> +#endif
>> };
>> static struct cpudata **all_cpu_data;
>> @@ -145,6 +149,7 @@ static int hwp_active;
>> struct perf_limits {
>> int no_turbo;
>> + int no_acpi;
>> int turbo_disabled;
>> int max_perf_pct;
>> int min_perf_pct;
>> @@ -158,6 +163,7 @@ struct perf_limits {
>> static struct perf_limits limits = {
>> .no_turbo = 0,
>> + .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
>> .turbo_disabled = 0,
>> .max_perf_pct = 100,
>> .max_perf = int_tofp(1),
>> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a,
>> struct attribute *b,
>> return count;
>> }
>> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
>> + const char *buf, size_t count)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
>> +#else
>> + return -ENODEV;
>> +#endif
>> +}
>> +show_one(no_acpi, no_acpi);
>> +define_one_global_rw(no_acpi);
>> +
>> show_one(max_perf_pct, max_perf_pct);
>> show_one(min_perf_pct, min_perf_pct);
>> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>> static struct attribute *intel_pstate_attributes[] = {
>> &no_turbo.attr,
>> + &no_acpi.attr,
>> &max_perf_pct.attr,
>> &min_perf_pct.attr,
>> &turbo_pct.attr,
>> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct
>> cpufreq_policy *policy)
>> policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
>> cpumask_set_cpu(policy->cpu, policy->cpus);
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + if (!limits.no_acpi) {
>> + /*
>> + * Minimum necessary to get acpi_processor_ppc_notifier()
>> and
>> + * acpi_processor_get_bios_limit() working.
>> + */
>> + if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
>> + GFP_KERNEL))
>> + rc = -ENOMEM;
>> + else
>> + rc = acpi_processor_register_performance(
>> + &cpu->acpi_data, policy->cpu);
>> + if (rc) {
>> + pr_err("intel_pstate: acpi init failed: %d\n",
>> rc);
>> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> + limits.no_acpi = 1;
>> + }
>> + }
>> +#endif
>> + return 0;
>> +}
>> +
>> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + struct cpudata *cpu = all_cpu_data[policy->cpu];
>> +
>> + if (cpu->acpi_data.state_count)
>> + acpi_processor_unregister_performance(&cpu->acpi_data,
>> + policy->cpu);
>> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> +#endif
>> return 0;
>> }
>> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver
>> = {
>> .verify = intel_pstate_verify_policy,
>> .setpolicy = intel_pstate_set_policy,
>> .get = intel_pstate_get,
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> + .bios_limit = acpi_processor_get_bios_limit,
>> +#endif
>> .init = intel_pstate_cpu_init,
>> + .exit = intel_pstate_cpu_exit,
>> .stop_cpu = intel_pstate_stop_cpu,
>> .name = "intel_pstate",
>> };
>> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
>> force_load = 1;
>> if (!strcmp(str, "hwp_only"))
>> hwp_only = 1;
>> + if (!strcmp(str, "no_acpi"))
>> + limits.no_acpi = 1;
>> return 0;
>> }
>> early_param("intel_pstate", intel_pstate_setup);
>>
>
>
> Thanks,
> Ethan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2015-07-20 21:10:48

by srinivas pandruvada

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
> <[email protected]> wrote:
> > On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> >> IPMI can control CPU P-states remotely: configuration is reported via
> >> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> >> support in intel_pstate to receive and use these P-state limits.
> >>
> >> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> >> * register intel_pstate in acpi-processor to get states from _PSS
> >> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> >>
> >> Signed-off-by: Konstantin Khlebnikov <[email protected]>
> >> ---
> >> drivers/acpi/processor_perflib.c | 3 +-
> >> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
> >> 2 files changed, 59 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> >> index cfc8aba72f86..781e328c9d5f 100644
> >> --- a/drivers/acpi/processor_perflib.c
> >> +++ b/drivers/acpi/processor_perflib.c
> >> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
> >>
> >> ppc = (unsigned int)pr->performance_platform_limit;
> >>
> >> - if (ppc >= pr->performance->state_count)
> >> + /* Ignore limit of top state: it lower than turbo boost frequency */
> >> + if (!ppc || ppc >= pr->performance->state_count)
> > Why? Isn't the previous check enough?
>
> Zero _PPC state must be top performance state but as I see frequency in
> _PSS is lower than maximum possible turbo frequency. So, in this case
> intel_pstate cannnot get "100%" for max bound even it there is no limit set.
>
> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
> state is 3400 Mhz.
>
Have you tested dynamic _PPC modification with acpi cpufreq with this
change (after boot)? Suppose _PPC is changed from 3 to 0, then
cpufreq_verify_within_limits will not be called to change to new max
turbo performance state.

Thanks,
Srinivas
> >> goto out;
> >>
> >> cpufreq_verify_within_limits(policy, 0,
> >> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> >> index 15ada47bb720..4a34ddf4fa73 100644
> >> --- a/drivers/cpufreq/intel_pstate.c
> >> +++ b/drivers/cpufreq/intel_pstate.c
> >> @@ -26,6 +26,7 @@
> >> #include <linux/fs.h>
> >> #include <linux/debugfs.h>
> >> #include <linux/acpi.h>
> >> +#include <acpi/processor.h>
> >> #include <linux/vmalloc.h>
> >> #include <trace/events/power.h>
> >>
> >> @@ -113,6 +114,9 @@ struct cpudata {
> >> u64 prev_mperf;
> >> u64 prev_tsc;
> >> struct sample sample;
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> + struct acpi_processor_performance acpi_data;
> >> +#endif
> >> };
> >>
> >> static struct cpudata **all_cpu_data;
> >> @@ -145,6 +149,7 @@ static int hwp_active;
> >>
> >> struct perf_limits {
> >> int no_turbo;
> >> + int no_acpi;
> >> int turbo_disabled;
> >> int max_perf_pct;
> >> int min_perf_pct;
> >> @@ -158,6 +163,7 @@ struct perf_limits {
> >>
> >> static struct perf_limits limits = {
> >> .no_turbo = 0,
> >> + .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
> >> .turbo_disabled = 0,
> >> .max_perf_pct = 100,
> >> .max_perf = int_tofp(1),
> >> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
> >> return count;
> >> }
> >>
> >> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
> >> + const char *buf, size_t count)
> >> +{
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> + return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
> >> +#else
> >> + return -ENODEV;
> >> +#endif
> >> +}
> >> +show_one(no_acpi, no_acpi);
> >> +define_one_global_rw(no_acpi);
> >> +
> >> show_one(max_perf_pct, max_perf_pct);
> >> show_one(min_perf_pct, min_perf_pct);
> >>
> >> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
> >>
> >> static struct attribute *intel_pstate_attributes[] = {
> >> &no_turbo.attr,
> >> + &no_acpi.attr,
> >> &max_perf_pct.attr,
> >> &min_perf_pct.attr,
> >> &turbo_pct.attr,
> >> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
> >> policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
> >> cpumask_set_cpu(policy->cpu, policy->cpus);
> >>
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> + if (!limits.no_acpi) {
> >> + /*
> >> + * Minimum necessary to get acpi_processor_ppc_notifier() and
> >> + * acpi_processor_get_bios_limit() working.
> >> + */
> >> + if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
> >> + GFP_KERNEL))
> >> + rc = -ENOMEM;
> >> + else
> >> + rc = acpi_processor_register_performance(
> >> + &cpu->acpi_data, policy->cpu);
> >> + if (rc) {
> >> + pr_err("intel_pstate: acpi init failed: %d\n", rc);
> >> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> >> + limits.no_acpi = 1;
> >> + }
> >> + }
> >> +#endif
> >> + return 0;
> >> +}
> >> +
> >> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> >> +{
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> + struct cpudata *cpu = all_cpu_data[policy->cpu];
> >> +
> >> + if (cpu->acpi_data.state_count)
> >> + acpi_processor_unregister_performance(&cpu->acpi_data,
> >> + policy->cpu);
> >> + free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> >> +#endif
> >> return 0;
> >> }
> >>
> >> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
> >> .verify = intel_pstate_verify_policy,
> >> .setpolicy = intel_pstate_set_policy,
> >> .get = intel_pstate_get,
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> + .bios_limit = acpi_processor_get_bios_limit,
> >> +#endif
> >> .init = intel_pstate_cpu_init,
> >> + .exit = intel_pstate_cpu_exit,
> >> .stop_cpu = intel_pstate_stop_cpu,
> >> .name = "intel_pstate",
> >> };
> >> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
> >> force_load = 1;
> >> if (!strcmp(str, "hwp_only"))
> >> hwp_only = 1;
> >> + if (!strcmp(str, "no_acpi"))
> >> + limits.no_acpi = 1;
> >> return 0;
> >> }
> >> early_param("intel_pstate", intel_pstate_setup);
> >>
> > _PPC is index into _PSS. Since intel P state doesn't follow _PSS, the
> > states may not be 1:1 matching. So we have to harmonize them.
> >
> >
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> >> the body of a message to [email protected]
> >> More majordomo info at http://vger.kernel.org/majordomo-info.html
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2015-07-21 10:25:07

by Konstantin Khlebnikov

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On 21.07.2015 00:08, Srinivas Pandruvada wrote:
> On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
>> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
>> <[email protected]> wrote:
>>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
>>>> IPMI can control CPU P-states remotely: configuration is reported via
>>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>>>> support in intel_pstate to receive and use these P-state limits.
>>>>
>>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>>>> * register intel_pstate in acpi-processor to get states from _PSS
>>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>>>
>>>> Signed-off-by: Konstantin Khlebnikov <[email protected]>
>>>> ---
>>>> drivers/acpi/processor_perflib.c | 3 +-
>>>> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
>>>> 2 files changed, 59 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
>>>> index cfc8aba72f86..781e328c9d5f 100644
>>>> --- a/drivers/acpi/processor_perflib.c
>>>> +++ b/drivers/acpi/processor_perflib.c
>>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>>>>
>>>> ppc = (unsigned int)pr->performance_platform_limit;
>>>>
>>>> - if (ppc >= pr->performance->state_count)
>>>> + /* Ignore limit of top state: it lower than turbo boost frequency */
>>>> + if (!ppc || ppc >= pr->performance->state_count)
>>> Why? Isn't the previous check enough?
>>
>> Zero _PPC state must be top performance state but as I see frequency in
>> _PSS is lower than maximum possible turbo frequency. So, in this case
>> intel_pstate cannnot get "100%" for max bound even it there is no limit set.
>>
>> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
>> state is 3400 Mhz.
>>
> Have you tested dynamic _PPC modification with acpi cpufreq with this
> change (after boot)? Suppose _PPC is changed from 3 to 0, then
> cpufreq_verify_within_limits will not be called to change to new max
> turbo performance state.
>

I haven't checked that but as I see acpi_processor_ppc_notifier()
can only reduce maximum frequency. So, there should be no problem
in this case.

--
Konstantin

2015-07-21 15:40:13

by srinivas pandruvada

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On Tue, 2015-07-21 at 13:25 +0300, Konstantin Khlebnikov wrote:
> On 21.07.2015 00:08, Srinivas Pandruvada wrote:
> > On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
> >> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
> >> <[email protected]> wrote:
> >>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> >>>> IPMI can control CPU P-states remotely: configuration is reported via
> >>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> >>>> support in intel_pstate to receive and use these P-state limits.
> >>>>
> >>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> >>>> * register intel_pstate in acpi-processor to get states from _PSS
> >>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> >>>>
> >>>> Signed-off-by: Konstantin Khlebnikov <[email protected]>
> >>>> ---
> >>>> drivers/acpi/processor_perflib.c | 3 +-
> >>>> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
> >>>> 2 files changed, 59 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> >>>> index cfc8aba72f86..781e328c9d5f 100644
> >>>> --- a/drivers/acpi/processor_perflib.c
> >>>> +++ b/drivers/acpi/processor_perflib.c
> >>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
> >>>>
> >>>> ppc = (unsigned int)pr->performance_platform_limit;
> >>>>
> >>>> - if (ppc >= pr->performance->state_count)
> >>>> + /* Ignore limit of top state: it lower than turbo boost frequency */
> >>>> + if (!ppc || ppc >= pr->performance->state_count)
> >>> Why? Isn't the previous check enough?
> >>
> >> Zero _PPC state must be top performance state but as I see frequency in
> >> _PSS is lower than maximum possible turbo frequency. So, in this case
> >> intel_pstate cannnot get "100%" for max bound even it there is no limit set.
> >>
> >> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
> >> state is 3400 Mhz.
> >>
> > Have you tested dynamic _PPC modification with acpi cpufreq with this
> > change (after boot)? Suppose _PPC is changed from 3 to 0, then
> > cpufreq_verify_within_limits will not be called to change to new max
> > turbo performance state.
> >
>
> I haven't checked that but as I see acpi_processor_ppc_notifier()
> can only reduce maximum frequency. So, there should be no problem
> in this case.
No, it can also be used in both ways. Once reduced, it can increase as
well. _PPC can be dynamically modified by BIOS to reduce and also to
increase.


2015-07-21 16:37:50

by Konstantin Khlebnikov

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On 21.07.2015 18:37, Srinivas Pandruvada wrote:
> On Tue, 2015-07-21 at 13:25 +0300, Konstantin Khlebnikov wrote:
>> On 21.07.2015 00:08, Srinivas Pandruvada wrote:
>>> On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
>>>> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
>>>> <[email protected]> wrote:
>>>>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
>>>>>> IPMI can control CPU P-states remotely: configuration is reported via
>>>>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>>>>>> support in intel_pstate to receive and use these P-state limits.
>>>>>>
>>>>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>>>>>> * register intel_pstate in acpi-processor to get states from _PSS
>>>>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>>>>>
>>>>>> Signed-off-by: Konstantin Khlebnikov <[email protected]>
>>>>>> ---
>>>>>> drivers/acpi/processor_perflib.c | 3 +-
>>>>>> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
>>>>>> 2 files changed, 59 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
>>>>>> index cfc8aba72f86..781e328c9d5f 100644
>>>>>> --- a/drivers/acpi/processor_perflib.c
>>>>>> +++ b/drivers/acpi/processor_perflib.c
>>>>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>>>>>>
>>>>>> ppc = (unsigned int)pr->performance_platform_limit;
>>>>>>
>>>>>> - if (ppc >= pr->performance->state_count)
>>>>>> + /* Ignore limit of top state: it lower than turbo boost frequency */
>>>>>> + if (!ppc || ppc >= pr->performance->state_count)
>>>>> Why? Isn't the previous check enough?
>>>>
>>>> Zero _PPC state must be top performance state but as I see frequency in
>>>> _PSS is lower than maximum possible turbo frequency. So, in this case
>>>> intel_pstate cannnot get "100%" for max bound even it there is no limit set.
>>>>
>>>> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
>>>> state is 3400 Mhz.
>>>>
>>> Have you tested dynamic _PPC modification with acpi cpufreq with this
>>> change (after boot)? Suppose _PPC is changed from 3 to 0, then
>>> cpufreq_verify_within_limits will not be called to change to new max
>>> turbo performance state.
>>>
>>
>> I haven't checked that but as I see acpi_processor_ppc_notifier()
>> can only reduce maximum frequency. So, there should be no problem
>> in this case.
> No, it can also be used in both ways. Once reduced, it can increase as
> well. _PPC can be dynamically modified by BIOS to reduce and also to
> increase.

Well, in this case BIOS will trigger ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
kernel evaluate new _PPC and call cpufreq_update_policy()
which set initial frequency min/max range according to user setup and
apply all limits after that. Initial policy->user_policy.min/max stay
unchanged. So, that dynamic modification works in both ways.

--
Konstantin

2015-07-21 18:56:08

by srinivas pandruvada

[permalink] [raw]
Subject: Re: [PATCH RFC] intel_pstate: play well with frequency limits set by acpi

On Tue, 2015-07-21 at 19:37 +0300, Konstantin Khlebnikov wrote:
> On 21.07.2015 18:37, Srinivas Pandruvada wrote:
> > On Tue, 2015-07-21 at 13:25 +0300, Konstantin Khlebnikov wrote:
> >> On 21.07.2015 00:08, Srinivas Pandruvada wrote:
> >>> On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
> >>>> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
> >>>> <[email protected]> wrote:
> >>>>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> >>>>>> IPMI can control CPU P-states remotely: configuration is reported via
> >>>>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> >>>>>> support in intel_pstate to receive and use these P-state limits.
> >>>>>>
> >>>>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> >>>>>> * register intel_pstate in acpi-processor to get states from _PSS
> >>>>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> >>>>>>
> >>>>>> Signed-off-by: Konstantin Khlebnikov <[email protected]>
> >>>>>> ---
> >>>>>> drivers/acpi/processor_perflib.c | 3 +-
> >>>>>> drivers/cpufreq/intel_pstate.c | 57 ++++++++++++++++++++++++++++++++++++++
> >>>>>> 2 files changed, 59 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> >>>>>> index cfc8aba72f86..781e328c9d5f 100644
> >>>>>> --- a/drivers/acpi/processor_perflib.c
> >>>>>> +++ b/drivers/acpi/processor_perflib.c
> >>>>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
> >>>>>>
> >>>>>> ppc = (unsigned int)pr->performance_platform_limit;
> >>>>>>
> >>>>>> - if (ppc >= pr->performance->state_count)
> >>>>>> + /* Ignore limit of top state: it lower than turbo boost frequency */
> >>>>>> + if (!ppc || ppc >= pr->performance->state_count)
> >>>>> Why? Isn't the previous check enough?
> >>>>
> >>>> Zero _PPC state must be top performance state but as I see frequency in
> >>>> _PSS is lower than maximum possible turbo frequency. So, in this case
> >>>> intel_pstate cannnot get "100%" for max bound even it there is no limit set.
> >>>>
> >>>> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
> >>>> state is 3400 Mhz.
> >>>>
> >>> Have you tested dynamic _PPC modification with acpi cpufreq with this
> >>> change (after boot)? Suppose _PPC is changed from 3 to 0, then
> >>> cpufreq_verify_within_limits will not be called to change to new max
> >>> turbo performance state.
> >>>
> >>
> >> I haven't checked that but as I see acpi_processor_ppc_notifier()
> >> can only reduce maximum frequency. So, there should be no problem
> >> in this case.
> > No, it can also be used in both ways. Once reduced, it can increase as
> > well. _PPC can be dynamically modified by BIOS to reduce and also to
> > increase.
>
> Well, in this case BIOS will trigger ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
> kernel evaluate new _PPC and call cpufreq_update_policy()
> which set initial frequency min/max range according to user setup and
> apply all limits after that. Initial policy->user_policy.min/max stay
> unchanged. So, that dynamic modification works in both ways.
>
Fair enough. We need to take account for _PSS. We have some changes for
this, but not gone through test cycle. I will post them as RFC, please
check. Thanks for your patience.

- Srinivas