This patchset adds AMD specific powersave bias function to the ondemand
governor; which can be used to help ondemand governor make more power conscious
frequency change decisions based on feedback from hardware (availble on AMD
Family 16h and above).
Hardware feedback tells software how "sensitive" to frequency changes the
workloads are. CPU-bound workloads will be more sensitive -- they will
perform better as frequency increases. Memory/IO-bound workloads will be less
sensitive -- they will not necessarily perform better as frequnecy increases.
This patchset was compared against ondemand governor without powersave bias
and did not show any performance degradation on CPU-bound workloads such as
kernbench and unixbench. While saving power on Memory-bound workloads such as
stream.
V4:
* Added proper documentation to Documentation/cpu-freq/
* Revised so that when this driver loads, the feature is enabled by
default with a sane tunable value.
V3:
* Added to CPUID bit to cpufeature.h
* Added MODULE_DEVICE_TABLE to autoload this driver.
* Other small changes per feedback from:
https://lkml.org/lkml/2013/4/2/349
V2:
* Added proper include files to amd_freq_sensitivity.c
* Only register powersave_bias_target function pointer and not the entire
od_ops.
Jacob Shin (2):
cpufreq: ondemand: allow custom powersave_bias_target handler to be
registered
cpufreq: AMD "frequency sensitivity feedback" powersave bias for
ondemand governor
Documentation/cpu-freq/governors.txt | 21 +++++
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/kernel/cpu/scattered.c | 3 +-
drivers/cpufreq/Kconfig.x86 | 17 ++++
drivers/cpufreq/Makefile | 1 +
drivers/cpufreq/amd_freq_sensitivity.c | 148 ++++++++++++++++++++++++++++++++
drivers/cpufreq/cpufreq_governor.h | 4 +
drivers/cpufreq/cpufreq_ondemand.c | 56 ++++++++++--
8 files changed, 242 insertions(+), 9 deletions(-)
create mode 100644 drivers/cpufreq/amd_freq_sensitivity.c
--
1.7.9.5
This allows for another [arch specific] driver to hook into existing
powersave bias function of the ondemand governor. i.e. This allows AMD
specific powersave bias function (in a separate AMD specific driver)
to aid ondemand governor's frequency transition decisions.
Signed-off-by: Jacob Shin <[email protected]>
---
drivers/cpufreq/cpufreq_governor.h | 4 +++
drivers/cpufreq/cpufreq_ondemand.c | 56 ++++++++++++++++++++++++++++++------
2 files changed, 52 insertions(+), 8 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 6593769..8ac3353 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -263,4 +263,8 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
struct common_dbs_data *cdata, unsigned int event);
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
unsigned int delay, bool all_cpus);
+void od_register_powersave_bias_handler(unsigned int (*f)
+ (struct cpufreq_policy *, unsigned int, unsigned int),
+ unsigned int powersave_bias);
+void od_unregister_powersave_bias_handler(void);
#endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 1471478..e5d1e8c 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -40,6 +40,8 @@
static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
+static struct od_ops od_ops;
+
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
static struct cpufreq_governor cpufreq_gov_ondemand;
#endif
@@ -80,7 +82,7 @@ static int should_io_be_busy(void)
* Returns the freq_hi to be used right now and will set freq_hi_jiffies,
* freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
*/
-static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_next, unsigned int relation)
{
unsigned int freq_req, freq_reduc, freq_avg;
@@ -145,7 +147,8 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
if (od_tuners->powersave_bias)
- freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+ freq = od_ops.powersave_bias_target(p, freq,
+ CPUFREQ_RELATION_H);
else if (p->cur == p->max)
return;
@@ -205,12 +208,12 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
if (!od_tuners->powersave_bias) {
__cpufreq_driver_target(policy, freq_next,
CPUFREQ_RELATION_L);
- } else {
- int freq = powersave_bias_target(policy, freq_next,
- CPUFREQ_RELATION_L);
- __cpufreq_driver_target(policy, freq,
- CPUFREQ_RELATION_L);
+ return;
}
+
+ freq_next = od_ops.powersave_bias_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
}
}
@@ -557,7 +560,7 @@ define_get_cpu_dbs_routines(od_cpu_dbs_info);
static struct od_ops od_ops = {
.powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
- .powersave_bias_target = powersave_bias_target,
+ .powersave_bias_target = generic_powersave_bias_target,
.freq_increase = dbs_freq_increase,
};
@@ -574,6 +577,43 @@ static struct common_dbs_data od_dbs_cdata = {
.exit = od_exit,
};
+static void od_set_powersave_bias(unsigned int powersave_bias)
+{
+ unsigned int cpu;
+ struct od_dbs_tuners *od_tuners;
+
+ if (!have_governor_per_policy()) {
+ od_tuners = od_dbs_cdata.gdbs_data->tuners;
+ od_tuners->powersave_bias = powersave_bias;
+ return;
+ }
+
+ for_each_online_cpu(cpu) {
+ struct cpufreq_policy *policy;
+ struct dbs_data *dbs_data;
+ policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
+ dbs_data = policy->governor_data;
+ od_tuners = dbs_data->tuners;
+ od_tuners->powersave_bias = powersave_bias;
+ }
+}
+
+void od_register_powersave_bias_handler(unsigned int (*f)
+ (struct cpufreq_policy *, unsigned int, unsigned int),
+ unsigned int powersave_bias)
+{
+ od_ops.powersave_bias_target = f;
+ od_set_powersave_bias(powersave_bias);
+}
+EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler);
+
+void od_unregister_powersave_bias_handler(void)
+{
+ od_ops.powersave_bias_target = generic_powersave_bias_target;
+ od_set_powersave_bias(0);
+}
+EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
+
static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int event)
{
--
1.7.9.5
Future AMD processors, starting with Family 16h, can provide software
with feedback on how the workload may respond to frequency change --
memory-bound workloads will not benefit from higher frequency, where
as compute-bound workloads will. This patch enables this "frequency
sensitivity feedback" to aid the ondemand governor to make better
frequency change decisions by hooking into the powersave bias.
Signed-off-by: Jacob Shin <[email protected]>
---
Documentation/cpu-freq/governors.txt | 21 +++++
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/kernel/cpu/scattered.c | 3 +-
drivers/cpufreq/Kconfig.x86 | 17 ++++
drivers/cpufreq/Makefile | 1 +
drivers/cpufreq/amd_freq_sensitivity.c | 148 ++++++++++++++++++++++++++++++++
6 files changed, 190 insertions(+), 1 deletion(-)
create mode 100644 drivers/cpufreq/amd_freq_sensitivity.c
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index 4dfed30..66f9cc3 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -167,6 +167,27 @@ of load evaluation and helping the CPU stay at its top speed when truly
busy, rather than shifting back and forth in speed. This tunable has no
effect on behavior at lower speeds/lower CPU loads.
+powersave_bias: this parameter takes a value between 0 to 1000. It
+defines the percentage (times 10) value of the target frequency that
+will be shaved off of the target. For example, when set to 100 -- 10%,
+when ondemand governor would have targeted 1000 MHz, it will target
+1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
+(disabled) by default.
+When AMD frequency sensitivity powersave bias driver --
+drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
+defines the workload frequency sensitivity threshold in which a lower
+frequency is chosen instead of ondemand governor's original target.
+The frequency sensitivity is a hardware reported (on AMD Family 16h
+Processors and above) value between 0 to 100% that tells software how
+the performance of the workload running on a CPU will change when
+frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
+will not perform any better on higher core frequency, whereas a
+workload with sensitivity of 100% (CPU-bound) will perform better
+higher the frequency. When the driver is loaded, this is set to 400
+by default -- for CPUs running workloads with sensitivity value below
+40%, a lower frequency is chosen. Unloading the driver or writing 0
+will disable this feature.
+
2.5 Conservative
----------------
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 93fe929..9e22520 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -182,6 +182,7 @@
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index ee8e9ab..d92b5da 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -39,8 +39,9 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
- { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
+ { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index d7dc0ed..2b8a8c3 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -129,6 +129,23 @@ config X86_POWERNOW_K8
For details, take a look at <file:Documentation/cpu-freq/>.
+config X86_AMD_FREQ_SENSITIVITY
+ tristate "AMD frequency sensitivity feedback powersave bias"
+ depends on CPU_FREQ_GOV_ONDEMAND && X86_ACPI_CPUFREQ && CPU_SUP_AMD
+ help
+ This adds AMD-specific powersave bias function to the ondemand
+ governor, which allows it to make more power-conscious frequency
+ change decisions based on feedback from hardware (availble on AMD
+ Family 16h and above).
+
+ Hardware feedback tells software how "sensitive" to frequency changes
+ the CPUs' workloads are. CPU-bound workloads will be more sensitive
+ -- they will perform better as frequency increases. Memory/IO-bound
+ workloads will be less sensitive -- they will not necessarily perform
+ better as frequency increases.
+
+ If in doubt, say N.
+
config X86_GX_SUSPMOD
tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
depends on X86_32 && PCI
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index ba9a3e1..aea81f2 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
obj-$(CONFIG_X86_INTEL_PSTATE) += intel_pstate.o
+obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
##################################################################################
# ARM SoC drivers
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
new file mode 100644
index 0000000..f6b79ab
--- /dev/null
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -0,0 +1,148 @@
+/*
+ * amd_freq_sensitivity.c: AMD frequency sensitivity feedback powersave bias
+ * for the ondemand governor.
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/percpu-defs.h>
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
+
+#include <asm/msr.h>
+#include <asm/cpufeature.h>
+
+#include "cpufreq_governor.h"
+
+#define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080
+#define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081
+#define CLASS_CODE_SHIFT 56
+#define POWERSAVE_BIAS_MAX 1000
+#define POWERSAVE_BIAS_DEF 400
+
+struct cpu_data_t {
+ u64 actual;
+ u64 reference;
+ unsigned int freq_prev;
+};
+
+static DEFINE_PER_CPU(struct cpu_data_t, cpu_data);
+
+static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
+ unsigned int freq_next,
+ unsigned int relation)
+{
+ int sensitivity;
+ long d_actual, d_reference;
+ struct msr actual, reference;
+ struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu);
+ struct dbs_data *od_data = policy->governor_data;
+ struct od_dbs_tuners *od_tuners = od_data->tuners;
+ struct od_cpu_dbs_info_s *od_info =
+ od_data->cdata->get_cpu_dbs_info_s(policy->cpu);
+
+ if (!od_info->freq_table)
+ return freq_next;
+
+ rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL,
+ &actual.l, &actual.h);
+ rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_REFERENCE,
+ &reference.l, &reference.h);
+ actual.h &= 0x00ffffff;
+ reference.h &= 0x00ffffff;
+
+ /* counter wrapped around, so stay on current frequency */
+ if (actual.q < data->actual || reference.q < data->reference) {
+ freq_next = policy->cur;
+ goto out;
+ }
+
+ d_actual = actual.q - data->actual;
+ d_reference = reference.q - data->reference;
+
+ /* divide by 0, so stay on current frequency as well */
+ if (d_reference == 0) {
+ freq_next = policy->cur;
+ goto out;
+ }
+
+ sensitivity = POWERSAVE_BIAS_MAX -
+ (POWERSAVE_BIAS_MAX * (d_reference - d_actual) / d_reference);
+
+ clamp(sensitivity, 0, POWERSAVE_BIAS_MAX);
+
+ /* this workload is not CPU bound, so choose a lower freq */
+ if (sensitivity < od_tuners->powersave_bias) {
+ if (data->freq_prev == policy->cur)
+ freq_next = policy->cur;
+
+ if (freq_next > policy->cur)
+ freq_next = policy->cur;
+ else if (freq_next < policy->cur)
+ freq_next = policy->min;
+ else {
+ unsigned int index;
+
+ cpufreq_frequency_table_target(policy,
+ od_info->freq_table, policy->cur - 1,
+ CPUFREQ_RELATION_H, &index);
+ freq_next = od_info->freq_table[index].frequency;
+ }
+
+ data->freq_prev = freq_next;
+ } else
+ data->freq_prev = 0;
+
+out:
+ data->actual = actual.q;
+ data->reference = reference.q;
+ return freq_next;
+}
+
+static int __init amd_freq_sensitivity_init(void)
+{
+ u64 val;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return -ENODEV;
+
+ if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK))
+ return -ENODEV;
+
+ if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val))
+ return -ENODEV;
+
+ if (!(val >> CLASS_CODE_SHIFT))
+ return -ENODEV;
+
+ od_register_powersave_bias_handler(amd_powersave_bias_target,
+ POWERSAVE_BIAS_DEF);
+ return 0;
+}
+late_initcall(amd_freq_sensitivity_init);
+
+static void __exit amd_freq_sensitivity_exit(void)
+{
+ od_unregister_powersave_bias_handler();
+}
+module_exit(amd_freq_sensitivity_exit);
+
+static const struct x86_cpu_id amd_freq_sensitivity_ids[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_PROC_FEEDBACK),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, amd_freq_sensitivity_ids);
+
+MODULE_AUTHOR("Jacob Shin <[email protected]>");
+MODULE_DESCRIPTION("AMD frequency sensitivity feedback powersave bias for "
+ "the ondemand governor.");
+MODULE_LICENSE("GPL");
--
1.7.9.5
On 4 April 2013 21:49, Jacob Shin <[email protected]> wrote:
> diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
> +static void od_set_powersave_bias(unsigned int powersave_bias)
> +{
> + unsigned int cpu;
> + struct od_dbs_tuners *od_tuners;
> +
> + if (!have_governor_per_policy()) {
> + od_tuners = od_dbs_cdata.gdbs_data->tuners;
> + od_tuners->powersave_bias = powersave_bias;
> + return;
> + }
> +
> + for_each_online_cpu(cpu) {
> + struct cpufreq_policy *policy;
> + struct dbs_data *dbs_data;
> + policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
> + dbs_data = policy->governor_data;
> + od_tuners = dbs_data->tuners;
> + od_tuners->powersave_bias = powersave_bias;
> + }
You can keep only the for_each_online_cpu() loop and remove the other
one. And in that one also, you don't have to do this for every cpu...
something like this will help you...
cpus_processed = NULL;
for_each_online_cpu(cpu) {
if cpu-is-present-in cpus_processed
continue;
cpu-set-mask(cpus_processed, policy->cpus);
}
Syntax is poor, please choose the correct one.
> +}
On Thursday, April 04, 2013 11:19:02 AM Jacob Shin wrote:
> This patchset adds AMD specific powersave bias function to the ondemand
> governor; which can be used to help ondemand governor make more power
> conscious frequency change decisions based on feedback from hardware
> (availble on AMD Family 16h and above).
Either the one way:
1) Documenting powersave_bias and add the stuff there, best with a default
set so that the stuff gets used
or
2) Marking powersave_bias deprecated and embed things into ondemand
directly
should be fine.
As you give this some usefulness now and it's going to get
used (automatically) and the stuff is even documented, I cannot suggest
anything anymore how to integrate that better.
Acked-by: Thomas Renninger <[email protected]>
On Thu, Apr 04, 2013 at 10:06:35PM +0530, Viresh Kumar wrote:
> On 4 April 2013 21:49, Jacob Shin <[email protected]> wrote:
> > diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
>
> > +static void od_set_powersave_bias(unsigned int powersave_bias)
> > +{
> > + unsigned int cpu;
> > + struct od_dbs_tuners *od_tuners;
> > +
> > + if (!have_governor_per_policy()) {
> > + od_tuners = od_dbs_cdata.gdbs_data->tuners;
> > + od_tuners->powersave_bias = powersave_bias;
> > + return;
> > + }
> > +
> > + for_each_online_cpu(cpu) {
> > + struct cpufreq_policy *policy;
> > + struct dbs_data *dbs_data;
> > + policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
> > + dbs_data = policy->governor_data;
> > + od_tuners = dbs_data->tuners;
> > + od_tuners->powersave_bias = powersave_bias;
> > + }
>
> You can keep only the for_each_online_cpu() loop and remove the other
> one. And in that one also, you don't have to do this for every cpu...
>
> something like this will help you...
>
> cpus_processed = NULL;
>
> for_each_online_cpu(cpu) {
> if cpu-is-present-in cpus_processed
> continue;
>
> cpu-set-mask(cpus_processed, policy->cpus);
>
> }
>
> Syntax is poor, please choose the correct one.
Ah okay, thanks for the hint, here:
>From 59728d09d0dc5403c9bb0238336ecb367c04694f Mon Sep 17 00:00:00 2001
From: Jacob Shin <[email protected]>
Date: Tue, 2 Apr 2013 09:56:56 -0500
Subject: [PATCH 1/2] cpufreq: ondemand: allow custom powersave_bias_target
handler to be registered
This allows for another [arch specific] driver to hook into existing
powersave bias function of the ondemand governor. i.e. This allows AMD
specific powersave bias function (in a separate AMD specific driver)
to aid ondemand governor's frequency transition decisions.
Signed-off-by: Jacob Shin <[email protected]>
---
drivers/cpufreq/cpufreq_governor.h | 4 +++
drivers/cpufreq/cpufreq_ondemand.c | 58 +++++++++++++++++++++++++++++++-----
2 files changed, 54 insertions(+), 8 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 6593769..8ac3353 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -263,4 +263,8 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
struct common_dbs_data *cdata, unsigned int event);
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
unsigned int delay, bool all_cpus);
+void od_register_powersave_bias_handler(unsigned int (*f)
+ (struct cpufreq_policy *, unsigned int, unsigned int),
+ unsigned int powersave_bias);
+void od_unregister_powersave_bias_handler(void);
#endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 1471478..80fb624 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -40,6 +40,8 @@
static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
+static struct od_ops od_ops;
+
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
static struct cpufreq_governor cpufreq_gov_ondemand;
#endif
@@ -80,7 +82,7 @@ static int should_io_be_busy(void)
* Returns the freq_hi to be used right now and will set freq_hi_jiffies,
* freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
*/
-static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_next, unsigned int relation)
{
unsigned int freq_req, freq_reduc, freq_avg;
@@ -145,7 +147,8 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
if (od_tuners->powersave_bias)
- freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+ freq = od_ops.powersave_bias_target(p, freq,
+ CPUFREQ_RELATION_H);
else if (p->cur == p->max)
return;
@@ -205,12 +208,12 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
if (!od_tuners->powersave_bias) {
__cpufreq_driver_target(policy, freq_next,
CPUFREQ_RELATION_L);
- } else {
- int freq = powersave_bias_target(policy, freq_next,
- CPUFREQ_RELATION_L);
- __cpufreq_driver_target(policy, freq,
- CPUFREQ_RELATION_L);
+ return;
}
+
+ freq_next = od_ops.powersave_bias_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
}
}
@@ -557,7 +560,7 @@ define_get_cpu_dbs_routines(od_cpu_dbs_info);
static struct od_ops od_ops = {
.powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
- .powersave_bias_target = powersave_bias_target,
+ .powersave_bias_target = generic_powersave_bias_target,
.freq_increase = dbs_freq_increase,
};
@@ -574,6 +577,45 @@ static struct common_dbs_data od_dbs_cdata = {
.exit = od_exit,
};
+static void od_set_powersave_bias(unsigned int powersave_bias)
+{
+ struct cpufreq_policy *policy;
+ struct dbs_data *dbs_data;
+ struct od_dbs_tuners *od_tuners;
+ unsigned int cpu;
+ cpumask_t done;
+
+ cpumask_clear(&done);
+
+ for_each_online_cpu(cpu) {
+ if (cpumask_test_cpu(cpu, &done))
+ continue;
+
+ policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
+ dbs_data = policy->governor_data;
+ od_tuners = dbs_data->tuners;
+ od_tuners->powersave_bias = powersave_bias;
+
+ cpumask_or(&done, &done, policy->cpus);
+ }
+}
+
+void od_register_powersave_bias_handler(unsigned int (*f)
+ (struct cpufreq_policy *, unsigned int, unsigned int),
+ unsigned int powersave_bias)
+{
+ od_ops.powersave_bias_target = f;
+ od_set_powersave_bias(powersave_bias);
+}
+EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler);
+
+void od_unregister_powersave_bias_handler(void)
+{
+ od_ops.powersave_bias_target = generic_powersave_bias_target;
+ od_set_powersave_bias(0);
+}
+EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
+
static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int event)
{
--
1.7.9.5
On Thu, Apr 04, 2013 at 12:18:04PM -0500, Jacob Shin wrote:
> @@ -574,6 +577,45 @@ static struct common_dbs_data od_dbs_cdata = {
> .exit = od_exit,
> };
>
> +static void od_set_powersave_bias(unsigned int powersave_bias)
> +{
> + struct cpufreq_policy *policy;
> + struct dbs_data *dbs_data;
> + struct od_dbs_tuners *od_tuners;
> + unsigned int cpu;
> + cpumask_t done;
> +
> + cpumask_clear(&done);
> +
get_online_cpus();
> + for_each_online_cpu(cpu) {
> + if (cpumask_test_cpu(cpu, &done))
> + continue;
> +
> + policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
> + dbs_data = policy->governor_data;
> + od_tuners = dbs_data->tuners;
> + od_tuners->powersave_bias = powersave_bias;
> +
> + cpumask_or(&done, &done, policy->cpus);
> + }
put_online_cpus();
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
On Thu, Apr 04, 2013 at 11:19:04AM -0500, Jacob Shin wrote:
> Future AMD processors, starting with Family 16h, can provide software
> with feedback on how the workload may respond to frequency change --
> memory-bound workloads will not benefit from higher frequency, where
> as compute-bound workloads will. This patch enables this "frequency
> sensitivity feedback" to aid the ondemand governor to make better
> frequency change decisions by hooking into the powersave bias.
>
> Signed-off-by: Jacob Shin <[email protected]>
Looks good to me.
Acked-by: Borislav Petkov <[email protected]>
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
On Thu, Apr 04, 2013 at 09:12:25PM +0200, Borislav Petkov wrote:
> On Thu, Apr 04, 2013 at 12:18:04PM -0500, Jacob Shin wrote:
> > @@ -574,6 +577,45 @@ static struct common_dbs_data od_dbs_cdata = {
> > .exit = od_exit,
> > };
> >
> > +static void od_set_powersave_bias(unsigned int powersave_bias)
> > +{
> > + struct cpufreq_policy *policy;
> > + struct dbs_data *dbs_data;
> > + struct od_dbs_tuners *od_tuners;
> > + unsigned int cpu;
> > + cpumask_t done;
> > +
> > + cpumask_clear(&done);
> > +
>
> get_online_cpus();
>
> > + for_each_online_cpu(cpu) {
> > + if (cpumask_test_cpu(cpu, &done))
> > + continue;
> > +
> > + policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
> > + dbs_data = policy->governor_data;
> > + od_tuners = dbs_data->tuners;
> > + od_tuners->powersave_bias = powersave_bias;
> > +
> > + cpumask_or(&done, &done, policy->cpus);
> > + }
>
> put_online_cpus();
>
> --
> Regards/Gruss,
> Boris.
Ah, okay .. here is the fixup:
>From 7236287faa1a499686c9aac1d3f3f224516a7bbf Mon Sep 17 00:00:00 2001
From: Jacob Shin <[email protected]>
Date: Tue, 2 Apr 2013 09:56:56 -0500
Subject: [PATCH 1/2] cpufreq: ondemand: allow custom powersave_bias_target
handler to be registered
This allows for another [arch specific] driver to hook into existing
powersave bias function of the ondemand governor. i.e. This allows AMD
specific powersave bias function (in a separate AMD specific driver)
to aid ondemand governor's frequency transition decisions.
Signed-off-by: Jacob Shin <[email protected]>
---
drivers/cpufreq/cpufreq_governor.h | 4 +++
drivers/cpufreq/cpufreq_ondemand.c | 61 +++++++++++++++++++++++++++++++-----
2 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 6593769..8ac3353 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -263,4 +263,8 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
struct common_dbs_data *cdata, unsigned int event);
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
unsigned int delay, bool all_cpus);
+void od_register_powersave_bias_handler(unsigned int (*f)
+ (struct cpufreq_policy *, unsigned int, unsigned int),
+ unsigned int powersave_bias);
+void od_unregister_powersave_bias_handler(void);
#endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 1471478..b0ffef9 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -24,6 +24,7 @@
#include <linux/sysfs.h>
#include <linux/tick.h>
#include <linux/types.h>
+#include <linux/cpu.h>
#include "cpufreq_governor.h"
@@ -40,6 +41,8 @@
static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
+static struct od_ops od_ops;
+
#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
static struct cpufreq_governor cpufreq_gov_ondemand;
#endif
@@ -80,7 +83,7 @@ static int should_io_be_busy(void)
* Returns the freq_hi to be used right now and will set freq_hi_jiffies,
* freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
*/
-static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int freq_next, unsigned int relation)
{
unsigned int freq_req, freq_reduc, freq_avg;
@@ -145,7 +148,8 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
if (od_tuners->powersave_bias)
- freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+ freq = od_ops.powersave_bias_target(p, freq,
+ CPUFREQ_RELATION_H);
else if (p->cur == p->max)
return;
@@ -205,12 +209,12 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
if (!od_tuners->powersave_bias) {
__cpufreq_driver_target(policy, freq_next,
CPUFREQ_RELATION_L);
- } else {
- int freq = powersave_bias_target(policy, freq_next,
- CPUFREQ_RELATION_L);
- __cpufreq_driver_target(policy, freq,
- CPUFREQ_RELATION_L);
+ return;
}
+
+ freq_next = od_ops.powersave_bias_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
}
}
@@ -557,7 +561,7 @@ define_get_cpu_dbs_routines(od_cpu_dbs_info);
static struct od_ops od_ops = {
.powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
- .powersave_bias_target = powersave_bias_target,
+ .powersave_bias_target = generic_powersave_bias_target,
.freq_increase = dbs_freq_increase,
};
@@ -574,6 +578,47 @@ static struct common_dbs_data od_dbs_cdata = {
.exit = od_exit,
};
+static void od_set_powersave_bias(unsigned int powersave_bias)
+{
+ struct cpufreq_policy *policy;
+ struct dbs_data *dbs_data;
+ struct od_dbs_tuners *od_tuners;
+ unsigned int cpu;
+ cpumask_t done;
+
+ cpumask_clear(&done);
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if (cpumask_test_cpu(cpu, &done))
+ continue;
+
+ policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
+ dbs_data = policy->governor_data;
+ od_tuners = dbs_data->tuners;
+ od_tuners->powersave_bias = powersave_bias;
+
+ cpumask_or(&done, &done, policy->cpus);
+ }
+ put_online_cpus();
+}
+
+void od_register_powersave_bias_handler(unsigned int (*f)
+ (struct cpufreq_policy *, unsigned int, unsigned int),
+ unsigned int powersave_bias)
+{
+ od_ops.powersave_bias_target = f;
+ od_set_powersave_bias(powersave_bias);
+}
+EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler);
+
+void od_unregister_powersave_bias_handler(void)
+{
+ od_ops.powersave_bias_target = generic_powersave_bias_target;
+ od_set_powersave_bias(0);
+}
+EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
+
static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
unsigned int event)
{
--
1.7.9.5
On Thu, Apr 04, 2013 at 09:23:23PM +0200, Borislav Petkov wrote:
> On Thu, Apr 04, 2013 at 11:19:04AM -0500, Jacob Shin wrote:
> > Future AMD processors, starting with Family 16h, can provide software
> > with feedback on how the workload may respond to frequency change --
> > memory-bound workloads will not benefit from higher frequency, where
> > as compute-bound workloads will. This patch enables this "frequency
> > sensitivity feedback" to aid the ondemand governor to make better
> > frequency change decisions by hooking into the powersave bias.
> >
> > Signed-off-by: Jacob Shin <[email protected]>
>
> Looks good to me.
>
> Acked-by: Borislav Petkov <[email protected]>
Rafael, got acks from both Boris and Thomas. Please commit to
linux-next when you get the chance.
Thanks,
-Jacob
>
> --
> Regards/Gruss,
> Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --
>
On Thu, Apr 04, 2013 at 03:28:13PM -0500, Jacob Shin wrote:
> Ah, okay .. here is the fixup:
>
> From 7236287faa1a499686c9aac1d3f3f224516a7bbf Mon Sep 17 00:00:00 2001
> From: Jacob Shin <[email protected]>
> Date: Tue, 2 Apr 2013 09:56:56 -0500
> Subject: [PATCH 1/2] cpufreq: ondemand: allow custom powersave_bias_target
> handler to be registered
>
> This allows for another [arch specific] driver to hook into existing
> powersave bias function of the ondemand governor. i.e. This allows AMD
> specific powersave bias function (in a separate AMD specific driver)
> to aid ondemand governor's frequency transition decisions.
>
> Signed-off-by: Jacob Shin <[email protected]>
Acked-by: Borislav Petkov <[email protected]>
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--
On 4 April 2013 21:49, Jacob Shin <[email protected]> wrote:
> This patchset adds AMD specific powersave bias function to the ondemand
> governor; which can be used to help ondemand governor make more power conscious
> frequency change decisions based on feedback from hardware (availble on AMD
> Family 16h and above).
>
> Hardware feedback tells software how "sensitive" to frequency changes the
> workloads are. CPU-bound workloads will be more sensitive -- they will
> perform better as frequency increases. Memory/IO-bound workloads will be less
> sensitive -- they will not necessarily perform better as frequnecy increases.
>
> This patchset was compared against ondemand governor without powersave bias
> and did not show any performance degradation on CPU-bound workloads such as
> kernbench and unixbench. While saving power on Memory-bound workloads such as
> stream.
>
> V4:
> * Added proper documentation to Documentation/cpu-freq/
> * Revised so that when this driver loads, the feature is enabled by
> default with a sane tunable value.
>
> V3:
> * Added to CPUID bit to cpufeature.h
> * Added MODULE_DEVICE_TABLE to autoload this driver.
> * Other small changes per feedback from:
> https://lkml.org/lkml/2013/4/2/349
>
> V2:
> * Added proper include files to amd_freq_sensitivity.c
> * Only register powersave_bias_target function pointer and not the entire
> od_ops.
>
> Jacob Shin (2):
> cpufreq: ondemand: allow custom powersave_bias_target handler to be
> registered
> cpufreq: AMD "frequency sensitivity feedback" powersave bias for
> ondemand governor
>
> Documentation/cpu-freq/governors.txt | 21 +++++
> arch/x86/include/asm/cpufeature.h | 1 +
> arch/x86/kernel/cpu/scattered.c | 3 +-
> drivers/cpufreq/Kconfig.x86 | 17 ++++
> drivers/cpufreq/Makefile | 1 +
> drivers/cpufreq/amd_freq_sensitivity.c | 148 ++++++++++++++++++++++++++++++++
> drivers/cpufreq/cpufreq_governor.h | 4 +
> drivers/cpufreq/cpufreq_ondemand.c | 56 ++++++++++--
> 8 files changed, 242 insertions(+), 9 deletions(-)
> create mode 100644 drivers/cpufreq/amd_freq_sensitivity.c
For the latest versions sent by you (including {get|put}_online_cpus()):
Acked-by: Viresh Kumar <[email protected]>
On 5 April 2013 10:19, Viresh Kumar <[email protected]> wrote:
> For the latest versions sent by you (including {get|put}_online_cpus()):
>
> Acked-by: Viresh Kumar <[email protected]>
Hi Rafael,
I see that you have applied this patchset to bleeding-edge, but in
wrong order. You have applied 2/2 first and then 1/2, which is
wrong as after 2/2 we will have a compilation failure.
On Friday, April 05, 2013 12:19:36 PM Viresh Kumar wrote:
> On 5 April 2013 10:19, Viresh Kumar <[email protected]> wrote:
> > For the latest versions sent by you (including {get|put}_online_cpus()):
> >
> > Acked-by: Viresh Kumar <[email protected]>
>
> Hi Rafael,
>
> I see that you have applied this patchset to bleeding-edge, but in
> wrong order. You have applied 2/2 first and then 1/2, which is
> wrong as after 2/2 we will have a compilation failure.
Ah, good catch. I didn't notice that patchwork changed the ordering when
creating a bundle.
Should be fixed now.
Thanks,
Rafael
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.