2011-06-17 19:51:20

by Matthew Garrett

[permalink] [raw]
Subject: [PATCH V2 1/6] x86: Add AMD HW_PSTATE cpu feature bit and MSRs

AMD CPUs that support hardware P-state setting expose this via cpuid. Add
the bit to the generic code so the ACPI cpufreq driver can make use of it,
and also add the related MSRs.

Signed-off-by: Matthew Garrett <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/scattered.c | 1 +
3 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 71cc380..312f455 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -174,6 +174,7 @@
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD frequency scaling */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 485b4f1..1772978 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -234,6 +234,8 @@

#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PERF_CTL 0xc0010062

#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c7f64e6..4adffc4 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -39,6 +39,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
+ { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
--
1.7.5.2


2011-06-17 19:51:22

by Matthew Garrett

[permalink] [raw]
Subject: [PATCH V2 2/6] acpi-cpufreq: Add support for modern AMD CPUs

The programming model for P-states on modern AMD CPUs is very similar to
that of Intel and VIA. It makes sense to consolidate this support into one
driver rather than duplicating functionality between two of them. This
patch adds support for AMDs with hardware P-state control to acpi-cpufreq.

Signed-off-by: Matthew Garrett <[email protected]>
---
drivers/cpufreq/acpi-cpufreq.c | 43 ++++++++++++++++++++++++++++++++++-----
1 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 4e04e12..298a947 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -54,10 +54,12 @@ MODULE_LICENSE("GPL");
enum {
UNDEFINED_CAPABLE = 0,
SYSTEM_INTEL_MSR_CAPABLE,
+ SYSTEM_AMD_MSR_CAPABLE,
SYSTEM_IO_CAPABLE,
};

#define INTEL_MSR_RANGE (0xffff)
+#define AMD_MSR_RANGE (0x7)

struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
@@ -82,6 +84,13 @@ static int check_est_cpu(unsigned int cpuid)
return cpu_has(cpu, X86_FEATURE_EST);
}

+static int check_amd_hwpstate_cpu(unsigned int cpuid)
+{
+ struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
+
+ return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
+}
+
static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
{
struct acpi_processor_performance *perf;
@@ -101,7 +110,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
int i;
struct acpi_processor_performance *perf;

- msr &= INTEL_MSR_RANGE;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ msr &= AMD_MSR_RANGE;
+ else
+ msr &= INTEL_MSR_RANGE;
+
perf = data->acpi_data;

for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
@@ -115,6 +128,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
{
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
+ case SYSTEM_AMD_MSR_CAPABLE:
return extract_msr(val, data);
case SYSTEM_IO_CAPABLE:
return extract_io(val, data);
@@ -150,6 +164,7 @@ static void do_drv_read(void *_cmd)

switch (cmd->type) {
case SYSTEM_INTEL_MSR_CAPABLE:
+ case SYSTEM_AMD_MSR_CAPABLE:
rdmsr(cmd->addr.msr.reg, cmd->val, h);
break;
case SYSTEM_IO_CAPABLE:
@@ -174,6 +189,9 @@ static void do_drv_write(void *_cmd)
lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
wrmsr(cmd->addr.msr.reg, lo, hi);
break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ wrmsr(cmd->addr.msr.reg, cmd->val, 0);
+ break;
case SYSTEM_IO_CAPABLE:
acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
cmd->val,
@@ -217,6 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask)
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+ cmd.addr.msr.reg = MSR_AMD_PERF_STATUS;
+ break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
@@ -326,6 +348,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
cmd.val = (u32) perf->states[next_perf_state].control;
break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+ cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
+ cmd.val = (u32) perf->states[next_perf_state].control;
+ break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
cmd.addr.io.port = perf->control_register.address;
@@ -580,12 +607,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
pr_debug("HARDWARE addr space\n");
- if (!check_est_cpu(cpu)) {
- result = -ENODEV;
- goto err_unreg;
+ if (check_est_cpu(cpu)) {
+ data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+ break;
}
- data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
- break;
+ if (check_amd_hwpstate_cpu(cpu)) {
+ data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
+ break;
+ }
+ result = -ENODEV;
+ goto err_unreg;
default:
pr_debug("Unknown addr space %d\n",
(u32) (perf->control_register.space_id));
--
1.7.5.2

2011-06-17 19:51:27

by Matthew Garrett

[permalink] [raw]
Subject: [PATCH V2 3/6] acpi-cpufreq: Add support for disabling dynamic overclocking

One feature present in powernow-k8 that isn't present in acpi-cpufreq is
support for enabling or disabling AMD's core performance boost technology.
This patch adds that support to acpi-cpufreq, but also extends it to allow
Intel's dynamic acceleration to be disabled via the same interface. The
sysfs entry retains the cpb name for compatibility purposes.

Signed-off-by: Matthew Garrett <[email protected]>
---
drivers/cpufreq/acpi-cpufreq.c | 160 ++++++++++++++++++++++++++++++++++++++++
1 files changed, 160 insertions(+), 0 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 298a947..8e6d89e 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -76,6 +76,86 @@ static struct acpi_processor_performance __percpu *acpi_perf_data;
static struct cpufreq_driver acpi_cpufreq_driver;

static unsigned int acpi_pstate_strict;
+static bool cpb_enabled, cpb_supported;
+static struct msr __percpu *msrs;
+
+static void _cpb_toggle_msrs(unsigned int cpu, bool t)
+{
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
+
+ get_online_cpus();
+
+ switch (data->cpu_feature) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ rdmsr_on_cpus(cpu_online_mask, MSR_IA32_MISC_ENABLE, msrs);
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ if (t)
+ reg->q &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ else
+ reg->q |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ }
+
+ wrmsr_on_cpus(cpu_online_mask, MSR_IA32_MISC_ENABLE, msrs);
+ break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct msr *reg = per_cpu_ptr(msrs, cpu);
+ if (t)
+ reg->l &= ~BIT(25);
+ else
+ reg->l |= BIT(25);
+ }
+
+ wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+ break;
+ }
+
+ put_online_cpus();
+}
+
+static void cpb_toggle(unsigned int cpu, bool t)
+{
+ if (t && !cpb_enabled) {
+ cpb_enabled = true;
+ _cpb_toggle_msrs(cpu, t);
+ pr_debug("Core Boosting enabled.\n");
+ } else if (!t && cpb_enabled) {
+ cpb_enabled = false;
+ _cpb_toggle_msrs(cpu, t);
+ pr_debug("Core Boosting disabled.\n");
+ }
+}
+
+static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
+ size_t count)
+{
+ int ret = -EINVAL;
+ unsigned long val = 0;
+ unsigned int cpu = policy->cpu;
+
+ ret = strict_strtoul(buf, 10, &val);
+ if (!ret && (val == 0 || val == 1) && cpb_supported)
+ cpb_toggle(cpu, val);
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
+{
+ return sprintf(buf, "%u\n", cpb_enabled);
+}
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+ __ATTR(_name, 0644, show_##_name, store_##_name)
+
+define_one_rw(cpb);

static int check_est_cpu(unsigned int cpuid)
{
@@ -446,6 +526,63 @@ static void free_acpi_perf_data(void)
free_percpu(acpi_perf_data);
}

+static int cpb_notify(struct notifier_block *nb, unsigned long action,
+ void *hcpu)
+{
+ unsigned cpu = (long)hcpu;
+ u32 lo, hi;
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
+ int msr;
+ u64 bit;
+
+ switch (data->cpu_feature) {
+ case SYSTEM_INTEL_MSR_CAPABLE:
+ msr = MSR_IA32_MISC_ENABLE;
+ bit = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ break;
+ case SYSTEM_AMD_MSR_CAPABLE:
+ msr = MSR_K7_HWCR;
+ bit = BIT(25);
+ break;
+ default:
+ return NOTIFY_OK;
+ }
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ if (!cpb_enabled) {
+ rdmsr_on_cpu(cpu, msr, &lo, &hi);
+ if (bit < BIT(32))
+ lo |= bit;
+ else
+ hi |= (bit >> 32);
+ wrmsr_on_cpu(cpu, msr, lo, hi);
+ }
+ break;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ rdmsr_on_cpu(cpu, msr, &lo, &hi);
+ if (bit < BIT(32))
+ lo &= ~bit;
+ else
+ hi &= ~(bit >> 32);
+ wrmsr_on_cpu(cpu, msr, lo, hi);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+
+static struct notifier_block cpb_nb = {
+ .notifier_call = cpb_notify,
+};
+
/*
* acpi_cpufreq_early_init - initialize ACPI P-States library
*
@@ -666,6 +803,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (result)
goto err_freqfree;

+ if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
+ msrs = msrs_alloc();
+
+ if (!msrs) {
+ result = -ENOMEM;
+ goto err_freqfree;
+ }
+
+ cpb_supported = true;
+
+ register_cpu_notifier(&cpb_nb);
+
+ cpb_toggle(0, true);
+ }
+
if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");

@@ -749,6 +901,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)

static struct freq_attr *acpi_cpufreq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
+ &cpb,
NULL,
};

@@ -788,6 +941,13 @@ static void __exit acpi_cpufreq_exit(void)
{
pr_debug("acpi_cpufreq_exit\n");

+ if (msrs) {
+ unregister_cpu_notifier(&cpb_nb);
+
+ msrs_free(msrs);
+ msrs = NULL;
+ }
+
cpufreq_unregister_driver(&acpi_cpufreq_driver);

free_percpu(acpi_perf_data);
--
1.7.5.2

2011-06-17 19:52:13

by Matthew Garrett

[permalink] [raw]
Subject: [PATCH V2 4/6] ACPI: Add fixups for AMD P-state figures

Some AMD systems may round the frequencies in ACPI tables to 100MHz
boundaries. We can obtain the real frequencies from MSRs, so add a quirk
to fix these frequencies up on AMD systems.

Signed-off-by: Matthew Garrett <[email protected]>
---
drivers/acpi/processor_perflib.c | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 85b3237..9e18173 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -302,6 +302,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
return result;
}

+#ifdef CONFIG_X86
+/*
+ * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding
+ * in their ACPI data. Calculate the real values and fix up the _PSS data.
+ */
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i)
+{
+ u32 hi, lo, fid, did;
+ int index = px->control & 0x00000007;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return;
+
+ if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+ || boot_cpu_data.x86 == 0x11) {
+ rdmsr(0xc0010064 + index, lo, hi);
+ fid = lo & 0x3f;
+ did = (lo >> 6) & 7;
+ if (boot_cpu_data.x86 == 0x10)
+ px->core_frequency = (100 * (fid + 0x10)) >> did;
+ else
+ px->core_frequency = (100 * (fid + 8)) >> did;
+ }
+}
+#else
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {};
+#endif
+
static int acpi_processor_get_performance_states(struct acpi_processor *pr)
{
int result = 0;
@@ -356,6 +384,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
goto end;
}

+ amd_fixup_frequency(px, i);
+
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
"State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n",
i,
--
1.7.5.2

2011-06-17 19:51:32

by Matthew Garrett

[permalink] [raw]
Subject: [PATCH V2 5/6] cpufreq: Add compatibility hack to powernow-k8

cpufreq modules are often loaded from init scripts that assume that all
recent AMD systems will use powernow-k8, so we should ensure that loading
it triggers a load of acpi-cpufreq if the latter is built as a module.
This avoids the problem of users ending up without any cpufreq support
after the transition.

Signed-off-by: Matthew Garrett <[email protected]>
---
drivers/cpufreq/powernow-k8.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 83479b6..8591ec8 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1545,8 +1545,12 @@ static int __cpuinit powernowk8_init(void)
supported_cpus++;
}

- if (supported_cpus != num_online_cpus())
+ if (supported_cpus != num_online_cpus()) {
+ if (cpu_has(0, X86_FEATURE_HW_PSTATE))
+ request_module("acpi_cpufreq");
+
return -ENODEV;
+ }

printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
--
1.7.5.2

2011-06-17 19:51:49

by Matthew Garrett

[permalink] [raw]
Subject: [PATCH V2 6/6] cpufreq: Remove support for hardware P-state chips from powernow-k8

These chips are now supported by acpi-cpufreq, so we can delete all the
code handling them.

Signed-off-by: Matthew Garrett <[email protected]>
---
drivers/cpufreq/Makefile | 2 +-
drivers/cpufreq/powernow-k8.c | 376 +++--------------------------------------
drivers/cpufreq/powernow-k8.h | 32 ----
3 files changed, 27 insertions(+), 383 deletions(-)

diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index e2fc2d2..f8813d4 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -19,7 +19,7 @@ obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
# K8 systems. ACPI is preferred to all other hardware-specific drivers.
# speedstep-* is preferred over p4-clockmod.

-obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o
+obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o
obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 8591ec8..1dacf09 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -45,19 +45,12 @@
#define PFX "powernow-k8: "
#define VERSION "version 2.20.00"
#include "powernow-k8.h"
-#include "mperf.h"

/* serialize freq changes */
static DEFINE_MUTEX(fidvid_mutex);

static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);

-static int cpu_family = CPU_OPTERON;
-
-/* core performance boost */
-static bool cpb_capable, cpb_enabled;
-static struct msr __percpu *msrs;
-
static struct cpufreq_driver cpufreq_amd64_driver;

#ifndef CONFIG_SMP
@@ -79,12 +72,6 @@ static u32 find_khz_freq_from_fid(u32 fid)
return 1000 * find_freq_from_fid(fid);
}

-static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
- u32 pstate)
-{
- return data[pstate].frequency;
-}
-
/* Return the vco fid for an input fid
*
* Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -107,9 +94,6 @@ static int pending_bit_stuck(void)
{
u32 lo, hi;

- if (cpu_family == CPU_HW_PSTATE)
- return 0;
-
rdmsr(MSR_FIDVID_STATUS, lo, hi);
return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
}
@@ -123,20 +107,6 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 lo, hi;
u32 i = 0;

- if (cpu_family == CPU_HW_PSTATE) {
- rdmsr(MSR_PSTATE_STATUS, lo, hi);
- i = lo & HW_PSTATE_MASK;
- data->currpstate = i;
-
- /*
- * a workaround for family 11h erratum 311 might cause
- * an "out-of-range Pstate if the core is in Pstate-0
- */
- if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
- data->currpstate = HW_PSTATE_0;
-
- return 0;
- }
do {
if (i++ > 10000) {
pr_debug("detected change pending stuck\n");
@@ -293,14 +263,6 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data,
return 0;
}

-/* Change hardware pstate by single MSR write */
-static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
-{
- wrmsr(MSR_PSTATE_CTRL, pstate, 0);
- data->currpstate = pstate;
- return 0;
-}
-
/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
static int transition_fid_vid(struct powernow_k8_data *data,
u32 reqfid, u32 reqvid)
@@ -551,15 +513,8 @@ static void check_supported_cpu(void *_rc)
"Power state transitions not supported\n");
return;
}
- } else { /* must be a HW Pstate capable processor */
- cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
- if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
- cpu_family = CPU_HW_PSTATE;
- else
- return;
+ *rc = 0;
}
-
- *rc = 0;
}

static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
@@ -623,18 +578,11 @@ static void print_basics(struct powernow_k8_data *data)
for (j = 0; j < data->numps; j++) {
if (data->powernow_table[j].frequency !=
CPUFREQ_ENTRY_INVALID) {
- if (cpu_family == CPU_HW_PSTATE) {
- printk(KERN_INFO PFX
- " %d : pstate %d (%d MHz)\n", j,
- data->powernow_table[j].index,
- data->powernow_table[j].frequency/1000);
- } else {
printk(KERN_INFO PFX
"fid 0x%x (%d MHz), vid 0x%x\n",
data->powernow_table[j].index & 0xff,
data->powernow_table[j].frequency/1000,
data->powernow_table[j].index >> 8);
- }
}
}
if (data->batps)
@@ -642,20 +590,6 @@ static void print_basics(struct powernow_k8_data *data)
data->batps);
}

-static u32 freq_from_fid_did(u32 fid, u32 did)
-{
- u32 mhz = 0;
-
- if (boot_cpu_data.x86 == 0x10)
- mhz = (100 * (fid + 0x10)) >> did;
- else if (boot_cpu_data.x86 == 0x11)
- mhz = (100 * (fid + 8)) >> did;
- else
- BUG();
-
- return mhz * 1000;
-}
-
static int fill_powernow_table(struct powernow_k8_data *data,
struct pst_s *pst, u8 maxvid)
{
@@ -815,7 +749,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
{
u64 control;

- if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+ if (!data->acpi_data.state_count)
return;

control = data->acpi_data.states[index].control;
@@ -866,10 +800,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
data->numps = data->acpi_data.state_count;
powernow_k8_acpi_pst_values(data, 0);

- if (cpu_family == CPU_HW_PSTATE)
- ret_val = fill_powernow_table_pstate(data, powernow_table);
- else
- ret_val = fill_powernow_table_fidvid(data, powernow_table);
+ ret_val = fill_powernow_table_fidvid(data, powernow_table);
if (ret_val)
goto err_out_mem;

@@ -906,47 +837,6 @@ err_out:
return ret_val;
}

-static int fill_powernow_table_pstate(struct powernow_k8_data *data,
- struct cpufreq_frequency_table *powernow_table)
-{
- int i;
- u32 hi = 0, lo = 0;
- rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
- data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
-
- for (i = 0; i < data->acpi_data.state_count; i++) {
- u32 index;
-
- index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
- if (index > data->max_hw_pstate) {
- printk(KERN_ERR PFX "invalid pstate %d - "
- "bad value %d.\n", i, index);
- printk(KERN_ERR PFX "Please report to BIOS "
- "manufacturer\n");
- invalidate_entry(powernow_table, i);
- continue;
- }
- rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
- if (!(hi & HW_PSTATE_VALID_MASK)) {
- pr_debug("invalid pstate %d, ignoring\n", index);
- invalidate_entry(powernow_table, i);
- continue;
- }
-
- powernow_table[i].index = index;
-
- /* Frequency may be rounded for these */
- if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
- || boot_cpu_data.x86 == 0x11) {
- powernow_table[i].frequency =
- freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
- } else
- powernow_table[i].frequency =
- data->acpi_data.states[i].core_frequency * 1000;
- }
- return 0;
-}
-
static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
struct cpufreq_frequency_table *powernow_table)
{
@@ -1023,15 +913,8 @@ static int get_transition_latency(struct powernow_k8_data *data)
max_latency = cur_latency;
}
if (max_latency == 0) {
- /*
- * Fam 11h and later may return 0 as transition latency. This
- * is intended and means "very fast". While cpufreq core and
- * governors currently can handle that gracefully, better set it
- * to 1 to avoid problems in the future.
- */
- if (boot_cpu_data.x86 < 0x11)
- printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
- "latency\n");
+ printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
+ "latency\n");
max_latency = 1;
}
/* value in usecs, needs to be in nanoseconds */
@@ -1088,39 +971,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
return res;
}

-/* Take a frequency, and issue the hardware pstate transition command */
-static int transition_frequency_pstate(struct powernow_k8_data *data,
- unsigned int index)
-{
- u32 pstate = 0;
- int res, i;
- struct cpufreq_freqs freqs;
-
- pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
-
- /* get MSR index for hardware pstate transition */
- pstate = index & HW_PSTATE_MASK;
- if (pstate > data->max_hw_pstate)
- return 0;
- freqs.old = find_khz_freq_from_pstate(data->powernow_table,
- data->currpstate);
- freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
-
- for_each_cpu(i, data->available_cores) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
- }
-
- res = transition_pstate(data, pstate);
- freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
-
- for_each_cpu(i, data->available_cores) {
- freqs.cpu = i;
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
- }
- return res;
-}
-
/* Driver entry point to switch to the target frequency */
static int powernowk8_target(struct cpufreq_policy *pol,
unsigned targfreq, unsigned relation)
@@ -1162,18 +1012,16 @@ static int powernowk8_target(struct cpufreq_policy *pol,
if (query_current_values_with_pending_wait(data))
goto err_out;

- if (cpu_family != CPU_HW_PSTATE) {
- pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
- data->currfid, data->currvid);
+ pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
+ data->currfid, data->currvid);

- if ((checkvid != data->currvid) ||
- (checkfid != data->currfid)) {
- printk(KERN_INFO PFX
- "error - out of sync, fix 0x%x 0x%x, "
- "vid 0x%x 0x%x\n",
- checkfid, data->currfid,
- checkvid, data->currvid);
- }
+ if ((checkvid != data->currvid) ||
+ (checkfid != data->currfid)) {
+ printk(KERN_INFO PFX
+ "error - out of sync, fix 0x%x 0x%x, "
+ "vid 0x%x 0x%x\n",
+ checkfid, data->currfid,
+ checkvid, data->currvid);
}

if (cpufreq_frequency_table_target(pol, data->powernow_table,
@@ -1184,10 +1032,8 @@ static int powernowk8_target(struct cpufreq_policy *pol,

powernow_k8_acpi_pst_values(data, newstate);

- if (cpu_family == CPU_HW_PSTATE)
- ret = transition_frequency_pstate(data, newstate);
- else
- ret = transition_frequency_fidvid(data, newstate);
+ ret = transition_frequency_fidvid(data, newstate);
+
if (ret) {
printk(KERN_ERR PFX "transition frequency failed\n");
ret = 1;
@@ -1196,11 +1042,7 @@ static int powernowk8_target(struct cpufreq_policy *pol,
}
mutex_unlock(&fidvid_mutex);

- if (cpu_family == CPU_HW_PSTATE)
- pol->cur = find_khz_freq_from_pstate(data->powernow_table,
- newstate);
- else
- pol->cur = find_khz_freq_from_fid(data->currfid);
+ pol->cur = find_khz_freq_from_fid(data->currfid);
ret = 0;

err_out:
@@ -1240,8 +1082,7 @@ static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
return;
}

- if (cpu_family == CPU_OPTERON)
- fidvid_msr_init();
+ fidvid_msr_init();

init_on_cpu->rc = 0;
}
@@ -1255,7 +1096,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
struct powernow_k8_data *data;
struct init_on_cpu init_on_cpu;
int rc;
- struct cpuinfo_x86 *c = &cpu_data(pol->cpu);

if (!cpu_online(pol->cpu))
return -ENODEV;
@@ -1271,7 +1111,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
}

data->cpu = pol->cpu;
- data->currpstate = HW_PSTATE_INVALID;

if (powernow_k8_cpu_init_acpi(data)) {
/*
@@ -1308,17 +1147,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
if (rc != 0)
goto err_out_exit_acpi;

- if (cpu_family == CPU_HW_PSTATE)
- cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
- else
- cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
+ cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
data->available_cores = pol->cpus;

- if (cpu_family == CPU_HW_PSTATE)
- pol->cur = find_khz_freq_from_pstate(data->powernow_table,
- data->currpstate);
- else
- pol->cur = find_khz_freq_from_fid(data->currfid);
+ pol->cur = find_khz_freq_from_fid(data->currfid);
pr_debug("policy current frequency %d kHz\n", pol->cur);

/* min/max the cpu is capable of */
@@ -1330,18 +1162,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
return -EINVAL;
}

- /* Check for APERF/MPERF support in hardware */
- if (cpu_has(c, X86_FEATURE_APERFMPERF))
- cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
-
cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);

- if (cpu_family == CPU_HW_PSTATE)
- pr_debug("cpu_init done, current pstate 0x%x\n",
- data->currpstate);
- else
- pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
- data->currfid, data->currvid);
+ pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
+ data->currfid, data->currvid);

per_cpu(powernow_data, pol->cpu) = data;

@@ -1394,88 +1218,15 @@ static unsigned int powernowk8_get(unsigned int cpu)
if (err)
goto out;

- if (cpu_family == CPU_HW_PSTATE)
- khz = find_khz_freq_from_pstate(data->powernow_table,
- data->currpstate);
- else
- khz = find_khz_freq_from_fid(data->currfid);
+ khz = find_khz_freq_from_fid(data->currfid);


out:
return khz;
}

-static void _cpb_toggle_msrs(bool t)
-{
- int cpu;
-
- get_online_cpus();
-
- rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
- for_each_cpu(cpu, cpu_online_mask) {
- struct msr *reg = per_cpu_ptr(msrs, cpu);
- if (t)
- reg->l &= ~BIT(25);
- else
- reg->l |= BIT(25);
- }
- wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
- put_online_cpus();
-}
-
-/*
- * Switch on/off core performance boosting.
- *
- * 0=disable
- * 1=enable.
- */
-static void cpb_toggle(bool t)
-{
- if (!cpb_capable)
- return;
-
- if (t && !cpb_enabled) {
- cpb_enabled = true;
- _cpb_toggle_msrs(t);
- printk(KERN_INFO PFX "Core Boosting enabled.\n");
- } else if (!t && cpb_enabled) {
- cpb_enabled = false;
- _cpb_toggle_msrs(t);
- printk(KERN_INFO PFX "Core Boosting disabled.\n");
- }
-}
-
-static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
- size_t count)
-{
- int ret = -EINVAL;
- unsigned long val = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (!ret && (val == 0 || val == 1) && cpb_capable)
- cpb_toggle(val);
- else
- return -EINVAL;
-
- return count;
-}
-
-static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
-{
- return sprintf(buf, "%u\n", cpb_enabled);
-}
-
-#define define_one_rw(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_rw(cpb);
-
static struct freq_attr *powernow_k8_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
- &cpb,
NULL,
};

@@ -1491,51 +1242,10 @@ static struct cpufreq_driver cpufreq_amd64_driver = {
.attr = powernow_k8_attr,
};

-/*
- * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
- * cannot block the remaining ones from boosting. On the CPU_UP path we
- * simply keep the boost-disable flag in sync with the current global
- * state.
- */
-static int cpb_notify(struct notifier_block *nb, unsigned long action,
- void *hcpu)
-{
- unsigned cpu = (long)hcpu;
- u32 lo, hi;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
-
- if (!cpb_enabled) {
- rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
- lo |= BIT(25);
- wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
- }
- break;
-
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
- lo &= ~BIT(25);
- wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
- break;
-
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block cpb_nb = {
- .notifier_call = cpb_notify,
-};
-
/* driver entry point for init */
static int __cpuinit powernowk8_init(void)
{
- unsigned int i, supported_cpus = 0, cpu;
+ unsigned int i, supported_cpus = 0;
int rv;

for_each_online_cpu(i) {
@@ -1546,7 +1256,7 @@ static int __cpuinit powernowk8_init(void)
}

if (supported_cpus != num_online_cpus()) {
- if (cpu_has(0, X86_FEATURE_HW_PSTATE))
+ if (static_cpu_has(X86_FEATURE_HW_PSTATE))
request_module("acpi_cpufreq");

return -ENODEV;
@@ -1555,35 +1265,8 @@ static int __cpuinit powernowk8_init(void)
printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);

- if (boot_cpu_has(X86_FEATURE_CPB)) {
-
- cpb_capable = true;
-
- msrs = msrs_alloc();
- if (!msrs) {
- printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
- return -ENOMEM;
- }
-
- register_cpu_notifier(&cpb_nb);
-
- rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
- for_each_cpu(cpu, cpu_online_mask) {
- struct msr *reg = per_cpu_ptr(msrs, cpu);
- cpb_enabled |= !(!!(reg->l & BIT(25)));
- }
-
- printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
- (cpb_enabled ? "on" : "off"));
- }
-
rv = cpufreq_register_driver(&cpufreq_amd64_driver);
- if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
- unregister_cpu_notifier(&cpb_nb);
- msrs_free(msrs);
- msrs = NULL;
- }
+
return rv;
}

@@ -1592,13 +1275,6 @@ static void __exit powernowk8_exit(void)
{
pr_debug("exit\n");

- if (boot_cpu_has(X86_FEATURE_CPB)) {
- msrs_free(msrs);
- msrs = NULL;
-
- unregister_cpu_notifier(&cpb_nb);
- }
-
cpufreq_unregister_driver(&cpufreq_amd64_driver);
}

diff --git a/drivers/cpufreq/powernow-k8.h b/drivers/cpufreq/powernow-k8.h
index 3744d26..79329d4 100644
--- a/drivers/cpufreq/powernow-k8.h
+++ b/drivers/cpufreq/powernow-k8.h
@@ -5,24 +5,11 @@
* http://www.gnu.org/licenses/gpl.html
*/

-enum pstate {
- HW_PSTATE_INVALID = 0xff,
- HW_PSTATE_0 = 0,
- HW_PSTATE_1 = 1,
- HW_PSTATE_2 = 2,
- HW_PSTATE_3 = 3,
- HW_PSTATE_4 = 4,
- HW_PSTATE_5 = 5,
- HW_PSTATE_6 = 6,
- HW_PSTATE_7 = 7,
-};
-
struct powernow_k8_data {
unsigned int cpu;

u32 numps; /* number of p-states */
u32 batps; /* number of p-states supported on battery */
- u32 max_hw_pstate; /* maximum legal hardware pstate */

/* these values are constant when the PSB is used to determine
* vid/fid pairings, but are modified during the ->target() call
@@ -37,7 +24,6 @@ struct powernow_k8_data {
/* keep track of the current fid / vid or pstate */
u32 currvid;
u32 currfid;
- enum pstate currpstate;

/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
@@ -97,23 +83,6 @@ struct powernow_k8_data {
#define MSR_S_HI_CURRENT_VID 0x0000003f
#define MSR_C_HI_STP_GNT_BENIGN 0x00000001

-
-/* Hardware Pstate _PSS and MSR definitions */
-#define USE_HW_PSTATE 0x00000080
-#define HW_PSTATE_MASK 0x00000007
-#define HW_PSTATE_VALID_MASK 0x80000000
-#define HW_PSTATE_MAX_MASK 0x000000f0
-#define HW_PSTATE_MAX_SHIFT 4
-#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */
-#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */
-#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */
-#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */
-
-/* define the two driver architectures */
-#define CPU_OPTERON 0
-#define CPU_HW_PSTATE 1
-
-
/*
* There are restrictions frequencies have to follow:
* - only 1 entry in the low fid table ( <=1.4GHz )
@@ -218,5 +187,4 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);

static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);

-static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
--
1.7.5.2

2011-06-18 11:25:45

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH V2 1/6] x86: Add AMD HW_PSTATE cpu feature bit and MSRs

On Fri, Jun 17, 2011 at 03:50:52PM -0400, Matthew Garrett wrote:
> AMD CPUs that support hardware P-state setting expose this via cpuid. Add
> the bit to the generic code so the ACPI cpufreq driver can make use of it,
> and also add the related MSRs.
>
> Signed-off-by: Matthew Garrett <[email protected]>
> ---
> arch/x86/include/asm/cpufeature.h | 1 +
> arch/x86/include/asm/msr-index.h | 2 ++
> arch/x86/kernel/cpu/scattered.c | 1 +
> 3 files changed, 4 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index 71cc380..312f455 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -174,6 +174,7 @@
> #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
> #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
> #define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
> +#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD frequency scaling */

Yeah, this is actually called /* AMD hardware P-state control */

because older CPUs also do frequency scaling but use a different method.

Other than that

Acked-by: Borislav Petkov <[email protected]>

>
> /* Virtualization flags: Linux defined, word 8 */
> #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 485b4f1..1772978 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -234,6 +234,8 @@
>
> #define MSR_IA32_PERF_STATUS 0x00000198
> #define MSR_IA32_PERF_CTL 0x00000199
> +#define MSR_AMD_PERF_STATUS 0xc0010063
> +#define MSR_AMD_PERF_CTL 0xc0010062
>
> #define MSR_IA32_MPERF 0x000000e7
> #define MSR_IA32_APERF 0x000000e8
> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
> index c7f64e6..4adffc4 100644
> --- a/arch/x86/kernel/cpu/scattered.c
> +++ b/arch/x86/kernel/cpu/scattered.c
> @@ -39,6 +39,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
> { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
> { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
> { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
> + { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
> { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
> { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
> { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
> --
> 1.7.5.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Regards/Gruss,
Boris.

2011-06-18 11:28:14

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH V2 3/6] acpi-cpufreq: Add support for disabling dynamic overclocking

On Fri, Jun 17, 2011 at 03:50:54PM -0400, Matthew Garrett wrote:
> One feature present in powernow-k8 that isn't present in acpi-cpufreq is
> support for enabling or disabling AMD's core performance boost technology.
> This patch adds that support to acpi-cpufreq, but also extends it to allow
> Intel's dynamic acceleration to be disabled via the same interface. The
> sysfs entry retains the cpb name for compatibility purposes.

Yeah, thinking in retrospect 'cpb' doesn't tell you anything. We
should've called it 'boost' from the beginning. And it makes much more
sense to call it that especially now, when the two methods get merged.

Maybe we should add a 'boost' sysfs node too and slowly phase out 'cpb'
or even leave 'cpb' in powernow-k8 and call it 'boost' here from the
get-go... Hmm...

> Signed-off-by: Matthew Garrett <[email protected]>
> ---
> drivers/cpufreq/acpi-cpufreq.c | 160 ++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 160 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 298a947..8e6d89e 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -76,6 +76,86 @@ static struct acpi_processor_performance __percpu *acpi_perf_data;
> static struct cpufreq_driver acpi_cpufreq_driver;
>
> static unsigned int acpi_pstate_strict;
> +static bool cpb_enabled, cpb_supported;
> +static struct msr __percpu *msrs;
> +
> +static void _cpb_toggle_msrs(unsigned int cpu, bool t)
> +{
> + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
> +
> + get_online_cpus();
> +
> + switch (data->cpu_feature) {
> + case SYSTEM_INTEL_MSR_CAPABLE:
> + rdmsr_on_cpus(cpu_online_mask, MSR_IA32_MISC_ENABLE, msrs);
> +
> + for_each_cpu(cpu, cpu_online_mask) {
> + struct msr *reg = per_cpu_ptr(msrs, cpu);
> + if (t)
> + reg->q &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
> + else
> + reg->q |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
> + }
> +
> + wrmsr_on_cpus(cpu_online_mask, MSR_IA32_MISC_ENABLE, msrs);
> + break;
> + case SYSTEM_AMD_MSR_CAPABLE:
> + rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
> +
> + for_each_cpu(cpu, cpu_online_mask) {
> + struct msr *reg = per_cpu_ptr(msrs, cpu);
> + if (t)
> + reg->l &= ~BIT(25);
> + else
> + reg->l |= BIT(25);
> + }
> +
> + wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
> + break;
> + }
> +
> + put_online_cpus();
> +}
> +
> +static void cpb_toggle(unsigned int cpu, bool t)
> +{
> + if (t && !cpb_enabled) {
> + cpb_enabled = true;
> + _cpb_toggle_msrs(cpu, t);
> + pr_debug("Core Boosting enabled.\n");
> + } else if (!t && cpb_enabled) {
> + cpb_enabled = false;
> + _cpb_toggle_msrs(cpu, t);
> + pr_debug("Core Boosting disabled.\n");
> + }
> +}
> +
> +static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
> + size_t count)
> +{
> + int ret = -EINVAL;
> + unsigned long val = 0;
> + unsigned int cpu = policy->cpu;
> +
> + ret = strict_strtoul(buf, 10, &val);
> + if (!ret && (val == 0 || val == 1) && cpb_supported)
> + cpb_toggle(cpu, val);
> + else
> + return -EINVAL;
> +
> + return count;
> +}
> +
> +static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
> +{
> + return sprintf(buf, "%u\n", cpb_enabled);
> +}
> +
> +#define define_one_rw(_name) \
> +static struct freq_attr _name = \
> + __ATTR(_name, 0644, show_##_name, store_##_name)
> +
> +define_one_rw(cpb);
>
> static int check_est_cpu(unsigned int cpuid)
> {
> @@ -446,6 +526,63 @@ static void free_acpi_perf_data(void)
> free_percpu(acpi_perf_data);
> }
>
> +static int cpb_notify(struct notifier_block *nb, unsigned long action,
> + void *hcpu)
> +{
> + unsigned cpu = (long)hcpu;
> + u32 lo, hi;
> + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
> + int msr;
> + u64 bit;
> +
> + switch (data->cpu_feature) {
> + case SYSTEM_INTEL_MSR_CAPABLE:
> + msr = MSR_IA32_MISC_ENABLE;
> + bit = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
> + break;
> + case SYSTEM_AMD_MSR_CAPABLE:
> + msr = MSR_K7_HWCR;
> + bit = BIT(25);
> + break;
> + default:
> + return NOTIFY_OK;
> + }

Yeah, can we move the powernow-k8 comment about the boost-state because
it is important:

/*
* On AMD clear the boost-disable flag on the CPU_DOWN path so that this cpu
* cannot block the remaining ones from boosting. On the CPU_UP path we
* simply keep the boost-disable flag in sync with the current global
* state.
*/

> + switch (action) {
> + case CPU_UP_PREPARE:
> + case CPU_UP_PREPARE_FROZEN:
> + if (!cpb_enabled) {
> + rdmsr_on_cpu(cpu, msr, &lo, &hi);
> + if (bit < BIT(32))
> + lo |= bit;
> + else
> + hi |= (bit >> 32);
> + wrmsr_on_cpu(cpu, msr, lo, hi);
> + }
> + break;
> +
> + case CPU_DOWN_PREPARE:
> + case CPU_DOWN_PREPARE_FROZEN:
> + rdmsr_on_cpu(cpu, msr, &lo, &hi);
> + if (bit < BIT(32))
> + lo &= ~bit;
> + else
> + hi &= ~(bit >> 32);
> + wrmsr_on_cpu(cpu, msr, lo, hi);
> + break;
> +
> + default:
> + break;
> + }
> +
> + return NOTIFY_OK;
> +}
> +
> +
> +static struct notifier_block cpb_nb = {
> + .notifier_call = cpb_notify,
> +};
> +
> /*
> * acpi_cpufreq_early_init - initialize ACPI P-States library
> *
> @@ -666,6 +803,21 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
> if (result)
> goto err_freqfree;
>
> + if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
> + msrs = msrs_alloc();
> +
> + if (!msrs) {
> + result = -ENOMEM;
> + goto err_freqfree;
> + }
> +
> + cpb_supported = true;
> +
> + register_cpu_notifier(&cpb_nb);
> +
> + cpb_toggle(0, true);

You're unconditionally enabling boosting on driver load here while
powernow-k8 simply stated the current boost state. Also, a normal setup
should always come up with boosting enabled (I suppose on Intel too).
However, if you do this here, you're overwriting the boosting settings
a user changed in the BIOS did because a she wanted to turn it off for
whatever reason.

So let's only report the boost state here instead, huh?

> + }
> +
> if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
> printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
>
> @@ -749,6 +901,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
>
> static struct freq_attr *acpi_cpufreq_attr[] = {
> &cpufreq_freq_attr_scaling_available_freqs,
> + &cpb,
> NULL,
> };
>
> @@ -788,6 +941,13 @@ static void __exit acpi_cpufreq_exit(void)
> {
> pr_debug("acpi_cpufreq_exit\n");
>
> + if (msrs) {
> + unregister_cpu_notifier(&cpb_nb);
> +
> + msrs_free(msrs);
> + msrs = NULL;
> + }
> +
> cpufreq_unregister_driver(&acpi_cpufreq_driver);
>
> free_percpu(acpi_perf_data);
> --
> 1.7.5.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Regards/Gruss,
Boris.

2011-06-18 11:28:32

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH V2 4/6] ACPI: Add fixups for AMD P-state figures

On Fri, Jun 17, 2011 at 03:50:55PM -0400, Matthew Garrett wrote:
> Some AMD systems may round the frequencies in ACPI tables to 100MHz
> boundaries. We can obtain the real frequencies from MSRs, so add a quirk
> to fix these frequencies up on AMD systems.
>
> Signed-off-by: Matthew Garrett <[email protected]>
> ---
> drivers/acpi/processor_perflib.c | 30 ++++++++++++++++++++++++++++++
> 1 files changed, 30 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> index 85b3237..9e18173 100644
> --- a/drivers/acpi/processor_perflib.c
> +++ b/drivers/acpi/processor_perflib.c
> @@ -302,6 +302,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
> return result;
> }
>
> +#ifdef CONFIG_X86
> +/*
> + * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding
> + * in their ACPI data. Calculate the real values and fix up the _PSS data.
> + */
> +static void amd_fixup_frequency(struct acpi_processor_px *px, int i)
> +{
> + u32 hi, lo, fid, did;
> + int index = px->control & 0x00000007;
> +
> + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> + return;
> +
> + if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
> + || boot_cpu_data.x86 == 0x11) {
> + rdmsr(0xc0010064 + index, lo, hi);

MSR_PSTATE_DEF_BASE

> + fid = lo & 0x3f;
> + did = (lo >> 6) & 7;
> + if (boot_cpu_data.x86 == 0x10)
> + px->core_frequency = (100 * (fid + 0x10)) >> did;
> + else
> + px->core_frequency = (100 * (fid + 8)) >> did;
> + }
> +}
> +#else
> +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {};
> +#endif
> +
> static int acpi_processor_get_performance_states(struct acpi_processor *pr)
> {
> int result = 0;
> @@ -356,6 +384,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
> goto end;
> }
>
> + amd_fixup_frequency(px, i);
> +
> ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> "State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n",
> i,
> --
> 1.7.5.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Regards/Gruss,
Boris.

2011-06-18 11:28:42

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH V2 5/6] cpufreq: Add compatibility hack to powernow-k8

On Fri, Jun 17, 2011 at 03:50:56PM -0400, Matthew Garrett wrote:
> cpufreq modules are often loaded from init scripts that assume that all
> recent AMD systems will use powernow-k8, so we should ensure that loading
> it triggers a load of acpi-cpufreq if the latter is built as a module.
> This avoids the problem of users ending up without any cpufreq support
> after the transition.
>
> Signed-off-by: Matthew Garrett <[email protected]>
> ---
> drivers/cpufreq/powernow-k8.c | 6 +++++-
> 1 files changed, 5 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
> index 83479b6..8591ec8 100644
> --- a/drivers/cpufreq/powernow-k8.c
> +++ b/drivers/cpufreq/powernow-k8.c
> @@ -1545,8 +1545,12 @@ static int __cpuinit powernowk8_init(void)
> supported_cpus++;
> }
>
> - if (supported_cpus != num_online_cpus())
> + if (supported_cpus != num_online_cpus()) {
> + if (cpu_has(0, X86_FEATURE_HW_PSTATE))

you mean here
if (boot_cpu_has(X86_FEATURE_HW_PSTATE))

otherwise:

drivers/cpufreq/powernow-k8.c: In function ‘powernowk8_init’:
drivers/cpufreq/powernow-k8.c:1553: error: invalid type argument of ‘->’ (have ‘int’)
drivers/cpufreq/powernow-k8.c:1553: error: invalid type argument of ‘->’ (have ‘int’)
make[2]: *** [drivers/cpufreq/powernow-k8.o] Error 1
make[1]: *** [drivers/cpufreq] Error 2
make: *** [drivers] Error 2
make: *** Waiting for unfinished jobs....


> + request_module("acpi_cpufreq");
> +
> return -ENODEV;
> + }
>
> printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
> num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
> --
> 1.7.5.2

--
Regards/Gruss,
Boris.