Hi,
This patch to 2.6.24-rc5 enables AMD Barcelona CPUs to register thermal throttling events as machine checks, in the
same fashion as the analogous Intel code.
Changed files:
arch/x86/kernel/cpu/mcheck/Makefile
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
include/asm-x86/mce.h
New files:
arch/x86/kernel/cpu/mcheck/mce_thermal.c
Signed-off-by Russell Leidich <[email protected]>
--
Russell Leidich
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 14:30:53.533297000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 11:56:20.549185000 -0800
@@ -1,4 +1,4 @@
-obj-y = mce_$(BITS).o therm_throt.o
+obj-y = mce_$(BITS).o therm_throt.o mce_thermal.o
obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2007-12-11 14:30:53.559298000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2007-12-14 17:34:18.749040000 -0800
@@ -20,6 +20,8 @@
#include <linux/interrupt.h>
#include <linux/kobject.h>
#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
@@ -29,6 +31,7 @@
#include <asm/msr.h>
#include <asm/percpu.h>
#include <asm/idle.h>
+#include <asm/therm_throt.h>
#define PFX "mce_threshold: "
#define VERSION "version 1.1.1"
@@ -46,6 +49,19 @@
#define MASK_ERR_COUNT_HI 0x00000FFF
#define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400
+#define THERM_CTL_F3X64 0x64
+#define PSL_APIC_LO_EN 0x80
+#define PSL_APIC_HI_EN 0x40
+#define HTC_ACTIVE 0x10
+#define HTC_EN 1
+#define NB_PCI_DEV_BASE 0x18
+
+extern int num_k8_northbridges;
+extern struct pci_dev **k8_northbridges;
+
+static int smp_thermal_interrupt_init(void);
+static int thermal_apic_init_allowed;
+static void thermal_apic_init(void *unused);
struct threshold_block {
unsigned int block;
@@ -644,20 +660,31 @@ static void threshold_remove_device(unsi
}
/* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
+static int amd_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
/* cpu was unsigned int to begin with */
unsigned int cpu = (unsigned long)hcpu;
- if (cpu >= NR_CPUS)
- goto out;
-
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
threshold_create_device(cpu);
- break;
+ if (thermal_apic_init_allowed) {
+ /*
+ * We need to run thermal_apic_init() on the core that
+ * just came online. If we're already on that core,
+ * then directly run it. Otherwise
+ * smp_call_function_single() to that core.
+ */
+ if (cpu == get_cpu())
+ thermal_apic_init(NULL);
+ else
+ smp_call_function_single(cpu,
+ &thermal_apic_init, NULL, 1, 0);
+ put_cpu();
+ }
+ break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
threshold_remove_device(cpu);
@@ -665,12 +692,11 @@ static int threshold_cpu_callback(struct
default:
break;
}
- out:
return NOTIFY_OK;
}
-static struct notifier_block threshold_cpu_notifier = {
- .notifier_call = threshold_cpu_callback,
+static struct notifier_block amd_cpu_notifier = {
+ .notifier_call = amd_cpu_callback,
};
static __init int threshold_init_device(void)
@@ -683,8 +709,191 @@ static __init int threshold_init_device(
if (err)
return err;
}
- register_hotcpu_notifier(&threshold_cpu_notifier);
+ register_hotcpu_notifier(&amd_cpu_notifier);
return 0;
}
device_initcall(threshold_init_device);
+
+/*
+ * AMD-specific thermal interrupt handler.
+ */
+void amd_smp_thermal_interrupt(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ /*
+ * We're here because thermal throttling has just been activated -- not
+ * deactivated -- hence therm_throt_process(1).
+ */
+ if (therm_throt_process(1))
+ mce_log_therm_throt_event(cpu, 1);
+ /*
+ * We'll still get subsequent interrupts even if we don't clear the
+ * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid
+ * touching PCI space. (If this assumption fails at some point, we'll
+ * need to schedule_work() in order to enter a process context, so that
+ * PCI locks can be asserted for proper access. This requirement, in
+ * turn, creates the need to remember which core interrupted, as the
+ * core which ultimately takes the scheduled work may be different.
+ * With any luck, we'll never need to do this.)
+ */
+}
+
+/*
+ * Initialize each northbridge's thermal throttling logic.
+ */
+static void smp_thermal_northbridge_init(void)
+{
+ int nb_num;
+ u32 therm_ctl_f3x64;
+
+ for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+ /*
+ * Configure the thermal interrupt for this northbridge.
+ */
+ pci_read_config_dword(k8_northbridges[nb_num],
+ THERM_CTL_F3X64, &therm_ctl_f3x64);
+ therm_ctl_f3x64 |= PSL_APIC_HI_EN | HTC_EN;
+ therm_ctl_f3x64 &= (~PSL_APIC_LO_EN);
+ pci_write_config_dword(k8_northbridges[nb_num],
+ THERM_CTL_F3X64, therm_ctl_f3x64);
+ printk(KERN_INFO "Northbridge at PCI device 0x%x: "
+ "thermal monitoring enabled.\n", NB_PCI_DEV_BASE +
+ nb_num);
+ }
+}
+
+/*
+ * Enable the delivery of thermal interrupts via the local APIC.
+ */
+static void thermal_apic_init(void *unused) {
+ unsigned int apic_lv_therm;
+
+ /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */
+ apic_lv_therm = apic_read(APIC_LVTTHMR);
+ /*
+ * See if some agent other than this routine has already initialized
+ * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that
+ * we would have programmed, had we been here before on this core.
+ */
+ if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm &
+ (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED |
+ THERMAL_APIC_VECTOR))) {
+ unsigned int cpu = smp_processor_id();
+
+ printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
+ "functional.\n", cpu);
+ if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI)
+ printk(KERN_DEBUG "Thermal interrupts already "
+ "handled by SMI according to (((local APIC "
+ "base) + 0x330) bit 0x9).\n");
+ else
+ printk(KERN_DEBUG "Thermal interrupts unexpectedly "
+ "enabled at (((local APIC base) + 0x330) bit "
+ "0x10).\n");
+ } else {
+ /*
+ * Configure the Local Thermal Vector Table Entry to issue
+ * issue thermal interrupts to THERMAL_APIC_VECTOR.
+ *
+ * Start by masking off Delivery Mode and Vector.
+ */
+ apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK);
+ /* Fixed interrupt, masked for now. */
+ apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED |
+ THERMAL_APIC_VECTOR;
+ apic_write(APIC_LVTTHMR, apic_lv_therm);
+ /*
+ * The Intel thermal kernel code implies that there may be a
+ * race involving the mask bit, so clear it only now, after
+ * the other bits have settled.
+ */
+ apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED);
+ }
+}
+
+/*
+* This function is intended to be called just after thermal throttling has
+ * been enabled. It warns the user if throttling is already active, which
+ * could indicate a failed cooling system. It may be the last chance to get
+ * a warning out before thermal shutdown occurs.
+ */
+static void smp_thermal_early_throttle_check(void)
+{
+ int nb_num;
+ u32 therm_ctl_f3x64;
+
+ for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+ /*
+ * Read back THERM_CTL_F3X64 to check whther HTC_ACTIVE is
+ * asserted, in which case, warn the user.
+ */
+ pci_read_config_dword(k8_northbridges[nb_num],
+ THERM_CTL_F3X64, &therm_ctl_f3x64);
+ if (therm_ctl_f3x64 & HTC_ACTIVE)
+ printk(KERN_WARNING "High temperature on northbridge "
+ "at PCI device 0x%x. Throttling enabled.\n",
+ NB_PCI_DEV_BASE + nb_num);
+ }
+}
+
+/*
+ * Determine whether or not the northbridges support thermal throttling
+ * interrupts. If so, initialize them for receiving the same, then perform
+ * corresponding APIC initialization on each core.
+ */
+static int smp_thermal_interrupt_init(void)
+{
+ int nb_num;
+ int thermal_registers_functional;
+
+ /*
+ * If there are no recognized northbridges, then we can't talk to the
+ * thermal registers.
+ */
+ thermal_registers_functional = num_k8_northbridges;
+ /*
+ * If any of the northbridges has PCI ID 0x1103, then its thermal
+ * hardware suffers from an erratum which prevents this code from working,
+ * so abort.
+ */
+ for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+ if ((k8_northbridges[nb_num]->device) == 0x1103) {
+ thermal_registers_functional = 0;
+ break;
+ }
+ }
+ if (thermal_registers_functional) {
+ /*
+ * Assert that we should log thermal throttling events, whenever
+ * we eventually get around to enabling them.
+ */
+ atomic_set(&therm_throt_en, 1);
+ /*
+ * Bind cpu_specific_smp_thermal_interrupt() to
+ * amd_smp_thermal_interrupt().
+ */
+ cpu_specific_smp_thermal_interrupt = amd_smp_thermal_interrupt;
+ smp_thermal_northbridge_init();
+ /*
+ * We've now initialized sufficient fabric to permit the
+ * initialization of the thermal interrupt APIC vectors, such as
+ * when a core comes online and calls amd_cpu_callback().
+ */
+ thermal_apic_init_allowed = 1;
+ /*
+ * Call thermal_apic_init() on each core.
+ */
+ on_each_cpu(&thermal_apic_init, NULL, 1, 0);
+ smp_thermal_early_throttle_check();
+ }
+ return 0;
+}
+
+/*
+ * smp_thermal_interrupt_init cannot execute until PCI has been fully
+ * initialized, hence late_initcall().
+ */
+late_initcall(smp_thermal_interrupt_init);
+
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2007-12-11 14:30:53.564303000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2007-12-13 12:31:55.311145000 -0800
@@ -13,21 +13,15 @@
#include <asm/idle.h>
#include <asm/therm_throt.h>
-asmlinkage void smp_thermal_interrupt(void)
+void intel_smp_thermal_interrupt(void)
{
__u64 msr_val;
- ack_APIC_irq();
-
- exit_idle();
- irq_enter();
-
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & 1))
mce_log_therm_throt_event(smp_processor_id(), msr_val);
add_pda(irq_thermal_count, 1);
- irq_exit();
}
static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
@@ -75,6 +69,11 @@ static void __cpuinit intel_init_thermal
wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
l = apic_read(APIC_LVTTHMR);
+ /*
+ * Bind the cpu_specific_smp_thermal_interrupt trampoline to
+ * intel_smp_thermal_interrupt.
+ */
+ cpu_specific_smp_thermal_interrupt = intel_smp_thermal_interrupt;
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
cpu, tm2 ? "TM2" : "TM1");
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c 2007-12-13 12:26:13.057412000 -0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2007 Google Inc.
+ *
+ * Written by Mike Waychison <[email protected]> and Russell Leidich <[email protected]>.
+ *
+ * CPU-independent thermal interrupt handler.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include "mce.h"
+#include <asm/hw_irq.h>
+#include <asm/idle.h>
+
+static void default_smp_thermal_interrupt(void) {}
+
+cpu_specific_smp_thermal_interrupt_callback cpu_specific_smp_thermal_interrupt =
+ default_smp_thermal_interrupt;
+
+/*
+ * Wrapper for the CPU-specific thermal interrupt service routine. Without
+ * this, we'd have to discern the CPU brand at runtime (because support could
+ * be installed for more than one).
+ */
+asmlinkage void smp_thermal_interrupt(void)
+{
+ ack_APIC_irq();
+ exit_idle();
+ irq_enter();
+ cpu_specific_smp_thermal_interrupt();
+ irq_exit();
+}
diff -uprN linux-2.6.24-rc5.orig/include/asm-x86/mce.h linux-2.6.24-rc5/include/asm-x86/mce.h
--- linux-2.6.24-rc5.orig/include/asm-x86/mce.h 2007-12-11 14:31:34.263515000 -0800
+++ linux-2.6.24-rc5/include/asm-x86/mce.h 2007-12-13 14:10:37.923970000 -0800
@@ -113,7 +113,10 @@ static inline void mce_amd_feature_init(
#endif
void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
+typedef void (*cpu_specific_smp_thermal_interrupt_callback)(void);
+extern cpu_specific_smp_thermal_interrupt_callback
+ cpu_specific_smp_thermal_interrupt;
extern atomic_t mce_entry;
extern void do_machine_check(struct pt_regs *, long);
On Mon, 17 Dec 2007 10:54:53 -0800 (PST) [email protected] (Russell Leidich) wrote:
> Hi,
>
> This patch to 2.6.24-rc5 enables AMD Barcelona CPUs to register thermal throttling events as machine checks, in the
> same fashion as the analogous Intel code.
>
> Changed files:
>
> arch/x86/kernel/cpu/mcheck/Makefile
> arch/x86/kernel/cpu/mcheck/mce_amd_64.c
> arch/x86/kernel/cpu/mcheck/mce_intel_64.c
> include/asm-x86/mce.h
>
> New files:
>
> arch/x86/kernel/cpu/mcheck/mce_thermal.c
>
> ...
>
> +extern int num_k8_northbridges;
> +extern struct pci_dev **k8_northbridges;
Please never add extern declarations in C files - find a suitable header
which is seen by the definition and by all users.
> -static int threshold_cpu_callback(struct notifier_block *nfb,
> +static int amd_cpu_callback(struct notifier_block *nfb,
> unsigned long action, void *hcpu)
> {
> /* cpu was unsigned int to begin with */
> unsigned int cpu = (unsigned long)hcpu;
>
> - if (cpu >= NR_CPUS)
> - goto out;
> -
> switch (action) {
> case CPU_ONLINE:
> case CPU_ONLINE_FROZEN:
> threshold_create_device(cpu);
> - break;
> + if (thermal_apic_init_allowed) {
> + /*
> + * We need to run thermal_apic_init() on the core that
> + * just came online. If we're already on that core,
> + * then directly run it. Otherwise
> + * smp_call_function_single() to that core.
> + */
> + if (cpu == get_cpu())
> + thermal_apic_init(NULL);
> + else
> + smp_call_function_single(cpu,
> + &thermal_apic_init, NULL, 1, 0);
> + put_cpu();
> + }
smp_call_function_single() already takes care of the
called-on-the-right-cpu case.
> + break;
> case CPU_DEAD:
> case CPU_DEAD_FROZEN:
> threshold_remove_device(cpu);
> @@ -665,12 +692,11 @@ static int threshold_cpu_callback(struct
> default:
> break;
> }
> - out:
> return NOTIFY_OK;
> }
>
> +
> +/*
> + * Enable the delivery of thermal interrupts via the local APIC.
> + */
> +static void thermal_apic_init(void *unused) {
eek, google coding "style" ;)
Please feed patches through scripts/checkpatch.pl.
> + unsigned int apic_lv_therm;
> +
> + /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */
> + apic_lv_therm = apic_read(APIC_LVTTHMR);
> + /*
> + * See if some agent other than this routine has already initialized
> + * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that
> + * we would have programmed, had we been here before on this core.
> + */
> + if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm &
> + (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED |
> + THERMAL_APIC_VECTOR))) {
> + unsigned int cpu = smp_processor_id();
afaict this function is called while the calling thread is running
preemptibly. This smp_processor_id() call should have generated a runtime
warning if it was tested with all debug options enabled?
Please see Documentation/SumitChecklist.
> + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> + "functional.\n", cpu);
> + if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI)
> + printk(KERN_DEBUG "Thermal interrupts already "
> + "handled by SMI according to (((local APIC "
> + "base) + 0x330) bit 0x9).\n");
> + else
> + printk(KERN_DEBUG "Thermal interrupts unexpectedly "
> + "enabled at (((local APIC base) + 0x330) bit "
> + "0x10).\n");
> + } else {
> + /*
> + * Configure the Local Thermal Vector Table Entry to issue
> + * issue thermal interrupts to THERMAL_APIC_VECTOR.
> + *
> + * Start by masking off Delivery Mode and Vector.
> + */
> + apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK);
> + /* Fixed interrupt, masked for now. */
> + apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED |
> + THERMAL_APIC_VECTOR;
> + apic_write(APIC_LVTTHMR, apic_lv_therm);
> + /*
> + * The Intel thermal kernel code implies that there may be a
> + * race involving the mask bit, so clear it only now, after
> + * the other bits have settled.
> + */
> + apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED);
> + }
> +}
> +
> +
> +/*
> + * Determine whether or not the northbridges support thermal throttling
> + * interrupts. If so, initialize them for receiving the same, then perform
> + * corresponding APIC initialization on each core.
> + */
> +static int smp_thermal_interrupt_init(void)
> +{
> + int nb_num;
> + int thermal_registers_functional;
> +
> + /*
> + * If there are no recognized northbridges, then we can't talk to the
> + * thermal registers.
> + */
> + thermal_registers_functional = num_k8_northbridges;
> + /*
> + * If any of the northbridges has PCI ID 0x1103, then its thermal
> + * hardware suffers from an erratum which prevents this code from working,
> + * so abort.
> + */
> + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
> + if ((k8_northbridges[nb_num]->device) == 0x1103) {
> + thermal_registers_functional = 0;
> + break;
> + }
> + }
> + if (thermal_registers_functional) {
> + /*
> + * Assert that we should log thermal throttling events, whenever
> + * we eventually get around to enabling them.
> + */
> + atomic_set(&therm_throt_en, 1);
> + /*
> + * Bind cpu_specific_smp_thermal_interrupt() to
> + * amd_smp_thermal_interrupt().
> + */
> + cpu_specific_smp_thermal_interrupt = amd_smp_thermal_interrupt;
> + smp_thermal_northbridge_init();
> + /*
> + * We've now initialized sufficient fabric to permit the
> + * initialization of the thermal interrupt APIC vectors, such as
> + * when a core comes online and calls amd_cpu_callback().
> + */
> + thermal_apic_init_allowed = 1;
> + /*
> + * Call thermal_apic_init() on each core.
> + */
> + on_each_cpu(&thermal_apic_init, NULL, 1, 0);
> + smp_thermal_early_throttle_check();
> + }
> + return 0;
> +}
The logic in this function looks more complex than it needs to be.
if (num_k8_northbridges == 0)
goto out;
for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
if ((k8_northbridges[nb_num]->device) == 0x1103)
goto out;
}
maybe?
> --- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c 2007-12-13 12:26:13.057412000 -0800
> @@ -0,0 +1,32 @@
> +/*
> + * Copyright (c) 2007 Google Inc.
> + *
> + * Written by Mike Waychison <[email protected]> and Russell Leidich <[email protected]>.
> + *
> + * CPU-independent thermal interrupt handler.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/interrupt.h>
> +#include "mce.h"
> +#include <asm/hw_irq.h>
> +#include <asm/idle.h>
> +
> +static void default_smp_thermal_interrupt(void) {}
static void default_smp_thermal_interrupt(void)
{
}
please.
Can this function ever actually be called?
> +cpu_specific_smp_thermal_interrupt_callback cpu_specific_smp_thermal_interrupt =
> + default_smp_thermal_interrupt;
> +
> +/*
> + * Wrapper for the CPU-specific thermal interrupt service routine. Without
> + * this, we'd have to discern the CPU brand at runtime (because support could
> + * be installed for more than one).
> + */
> +asmlinkage void smp_thermal_interrupt(void)
> +{
> + ack_APIC_irq();
> + exit_idle();
> + irq_enter();
> + cpu_specific_smp_thermal_interrupt();
> + irq_exit();
> +}
> diff -uprN linux-2.6.24-rc5.orig/include/asm-x86/mce.h linux-2.6.24-rc5/include/asm-x86/mce.h
> --- linux-2.6.24-rc5.orig/include/asm-x86/mce.h 2007-12-11 14:31:34.263515000 -0800
> +++ linux-2.6.24-rc5/include/asm-x86/mce.h 2007-12-13 14:10:37.923970000 -0800
> @@ -113,7 +113,10 @@ static inline void mce_amd_feature_init(
> #endif
>
> void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
> +typedef void (*cpu_specific_smp_thermal_interrupt_callback)(void);
Yes, this is the case where typedefs are OK. But please put a _t at the end
of the name.
> +extern cpu_specific_smp_thermal_interrupt_callback
> + cpu_specific_smp_thermal_interrupt;
> extern atomic_t mce_entry;
>
> extern void do_machine_check(struct pt_regs *, long);
Thanks Andrew. I have a few questions on your comments...
On Dec 25, 2007 2:04 PM, Andrew Morton <[email protected]> wrote:
> > + unsigned int apic_lv_therm;
> > +
> > + /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */
> > + apic_lv_therm = apic_read(APIC_LVTTHMR);
> > + /*
> > + * See if some agent other than this routine has already initialized
> > + * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that
> > + * we would have programmed, had we been here before on this core.
> > + */
> > + if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm &
> > + (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED |
> > + THERMAL_APIC_VECTOR))) {
> > + unsigned int cpu = smp_processor_id();
>
> afaict this function is called while the calling thread is running
> preemptibly. This smp_processor_id() call should have generated a runtime
> warning if it was tested with all debug options enabled?
I thought that the whole point of on_each_cpu(&thermal_apic_init,
NULL, 1, 0) was to ensure that thermal_apic_init() runs
(nonpreemptibly) on each core. No?
> > +static void default_smp_thermal_interrupt(void) {}
>
> static void default_smp_thermal_interrupt(void)
> {
> }
>
> please.
>
>
> Can this function ever actually be called?
My colleauge was concerned that we have a do-nothing handler in case
we get a spurious thermal interrupt. In my view, there's no point
programming for the possibility of broken hardware. On the other
hand, I do need some sort of indirection to bind the entry.S thermal
handler from assembly language to either Intel or AMD C code. Trouble
is, at compiletime, we might have both Intel and AMD support
installed, but only one of them should actually receive the interrupt
at runtime. So I think I need to do runtime binding, unless I want to
do CPUID inside the ISR. What do you think?
--
Russell Leidich
On Thu, 27 Dec 2007 10:57:20 -0800 "Russell Leidich" <[email protected]> wrote:
> Thanks Andrew. I have a few questions on your comments...
>
> On Dec 25, 2007 2:04 PM, Andrew Morton <[email protected]> wrote:
> > > + unsigned int apic_lv_therm;
> > > +
> > > + /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */
> > > + apic_lv_therm = apic_read(APIC_LVTTHMR);
> > > + /*
> > > + * See if some agent other than this routine has already initialized
> > > + * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that
> > > + * we would have programmed, had we been here before on this core.
> > > + */
> > > + if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm &
> > > + (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED |
> > > + THERMAL_APIC_VECTOR))) {
> > > + unsigned int cpu = smp_processor_id();
> >
> > afaict this function is called while the calling thread is running
> > preemptibly. This smp_processor_id() call should have generated a runtime
> > warning if it was tested with all debug options enabled?
>
> I thought that the whole point of on_each_cpu(&thermal_apic_init,
> NULL, 1, 0) was to ensure that thermal_apic_init() runs
> (nonpreemptibly) on each core. No?
oh, OK.
> > > +static void default_smp_thermal_interrupt(void) {}
> >
> > static void default_smp_thermal_interrupt(void)
> > {
> > }
> >
> > please.
> >
> >
> > Can this function ever actually be called?
>
> My colleauge was concerned that we have a do-nothing handler in case
> we get a spurious thermal interrupt. In my view, there's no point
> programming for the possibility of broken hardware. On the other
> hand, I do need some sort of indirection to bind the entry.S thermal
> handler from assembly language to either Intel or AMD C code. Trouble
> is, at compiletime, we might have both Intel and AMD support
> installed, but only one of them should actually receive the interrupt
> at runtime. So I think I need to do runtime binding, unless I want to
> do CPUID inside the ISR. What do you think?
If it "can't happen" then we can just leave the vector pointing at 0 until
it gets set up properly. But nobody ever got fired for mistrusting
hardware (and BIOS) so perhaps what we should do is to leave that there, but
make it emit a suitable diagnostic? A printk_ratelimit()ed one, probably.
OK, given our discussion, perhaps the attached revised patch will be
more to your liking.
If so, let me know and I'll give it one last paranoid test, then mail
you a Signed-off-by patch.
--
Russell Leidich
On Friday 28 December 2007 21:40:28 Russell Leidich wrote:
> OK, given our discussion, perhaps the attached revised patch will be
> more to your liking.
>
> If so, let me know and I'll give it one last paranoid test, then mail
> you a Signed-off-by patch.
>
In general you seem to have a lot (too many?) of low level comments,
but no high level "strategy" comment anywhere what this code actually
tries to do. I find the high level comments usually more useful.
amd_cpu_callback:
- if (cpu >= NR_CPUS)
- goto out;
-
that change seems to be unrelated cleanup. Such patches should be always separate.
Same with the rename.
I find it is quite common to do smp_call_function_single in CPU up/down callbacks.
It would be better to fix that generically in the high level code (provide
a new callback that is guaranteed to run on the target CPU) than to always reimplement
it.
thermal_apic_init:
+ printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
+ "functional.\n", cpu);
Why is that KERN_CRIT? Does not seem that critical to me.
smp_thermal_interrupt_init:
The erratum number/part number should be documented and the kernel ought to print
why it didn't initialize thermal on that hardware.
I'm not sure which erratum that is, but since that PCI-ID is used by all K8s
you excluded all of them, don't you? Is that whole code only for Fam10h?
That seems a little drastic. Perhaps it should just check steppings etc.
And needs more comments.
You're technically racy against parallel cpu hotplug happening from initrd
(which already runs during initcall -- yes that is a deathtrap)
although that is typically hard to fix.
thermal_apic_init_allowed seems like a hack. A separate notifier would
be cleaner.
+/*
+ * smp_thermal_interrupt_init cannot execute until PCI has been fully
+ * initialized, hence late_initcall().
+ */
+late_initcall(smp_thermal_interrupt_init);
PCI is already initialized before normal initcall, otherwise pretty much
all drivers would break.
mce_thermal.c seems to be just unnecessary to me. It would be cleaner
to just keep the separate own handlers, especially since they do quite
different things anyways. Don't mesh together what is quite different.
Also "cpu_specific_smp_thermal_interrupt_callback_t" is definitely too long.
-Andi
On Sat, 29 Dec 2007 03:11:51 +0100, Andi Kleen said:
> On Friday 28 December 2007 21:40:28 Russell Leidich wrote:
> + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> + "functional.\n", cpu);
>
> Why is that KERN_CRIT? Does not seem that critical to me.
If you think you're running on a chipset that *should* support thermal
monitoring, and it isn't there in a usable state, that seems pretty critical
to me. If that didn't work, you probably can't trust the "oh, the chip will
thermal-limit itself if it gets to 100C or whatever" either.
Of course, I'm just speaking as somebody who quite recently had a system do a
thermal throttle when it hit 85C due to a cooling system failure. I'm pretty
sure that if thermal monitoring wasn't functional, it wouldn't have throttled
either (after all, how can you throttle when you hit a given temp when you
don't have a working way to tell what the temp even is?), and I'd be looking at
extensive hardware damage...
On Saturday 29 December 2007 03:30:17 [email protected] wrote:
> On Sat, 29 Dec 2007 03:11:51 +0100, Andi Kleen said:
> > On Friday 28 December 2007 21:40:28 Russell Leidich wrote:
>
> > + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> > + "functional.\n", cpu);
> >
> > Why is that KERN_CRIT? Does not seem that critical to me.
>
> If you think you're running on a chipset that *should* support thermal
> monitoring, and it isn't there in a usable state, that seems pretty critical
> to me. If that didn't work, you probably can't trust the "oh, the chip will
> thermal-limit itself if it gets to 100C or whatever" either.
Thermal shutdown in emergency uses quite different mechanisms (e.g. it goes
directly through pins to the motherboard); i don't think that code checks for
that.
-Andi
On Sat, 29 Dec 2007 03:34:34 +0100, Andi Kleen said:
> On Saturday 29 December 2007 03:30:17 [email protected] wrote:
> > On Sat, 29 Dec 2007 03:11:51 +0100, Andi Kleen said:
> > > On Friday 28 December 2007 21:40:28 Russell Leidich wrote:
> >
> > > + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> > > + "functional.\n", cpu);
> > >
> > > Why is that KERN_CRIT? Does not seem that critical to me.
> >
> > If you think you're running on a chipset that *should* support thermal
> > monitoring, and it isn't there in a usable state, that seems pretty critical
> > to me. If that didn't work, you probably can't trust the "oh, the chip will
> > thermal-limit itself if it gets to 100C or whatever" either.
>
> Thermal shutdown in emergency uses quite different mechanisms (e.g. it goes
> directly through pins to the motherboard); i don't think that code checks for
> that.
Right. My point is that if monitoring *should* be working, and it isn't, then
you don't have a lot of reason to be 100% confident that those pins are working
either. Unless there's two totally separate temperature sensors - otherwise,
if that sensor goes out, thermal monitoring and the emergency stuff *both*
break.
Of course, if somebody wise on the actual hardware design can definitively
state that even if the thermal sensor the monitoring uses dies, the chipset
will still thermal-throttle correctly, then I'd agree that the message could
go down to KERN_ERR or KERN_WARN.....
"Russell Leidich" <[email protected]> writes:
Not sure you have addressed any of my feedback; don't see many changes.
When you repost stuff can you please add a changelog or if you decide
to not address some review comment say why at least.
Also the patch changelog description is missing anyways?
Biggest issue I raised is still not addressed:
> + /*
> + * If any of the northbridges has PCI ID 0x1103, then its thermal
> + * hardware suffers from an erratum which prevents this code from
> + * working, so abort.
> + */
> + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
> + if ((k8_northbridges[nb_num]->device) == 0x1103)
> + goto out;
> + }
AFAIK that's all K8s so the code will never work on them.
-Andi
On Dec 30, 2007 10:39 AM, Andi Kleen <[email protected]> wrote:
> "Russell Leidich" <[email protected]> writes:
>
> Not sure you have addressed any of my feedback; don't see many changes.
I emailed you on 12/7/2007 and never heard back, so I assumed that you
had no further issues. Anyway, thanks for getting back to me.
>
> When you repost stuff can you please add a changelog or if you decide
> to not address some review comment say why at least.
>
> Also the patch changelog description is missing anyways?
Sorry. I'll add a robust description with the next revision of the patch.
> In general you seem to have a lot (too many?) of low level comments,
> but no high level "strategy" comment anywhere what this code actually
> tries to do. I find the high level comments usually more useful.
>
I tried to add high-level comments at the top of each function. But I
can add more.
>
> Biggest issue I raised is still not addressed:
>
> > + /*
> > + * If any of the northbridges has PCI ID 0x1103, then its thermal
> > + * hardware suffers from an erratum which prevents this code from
> > + * working, so abort.
> > + */
> > + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
> > + if ((k8_northbridges[nb_num]->device) == 0x1103)
> > + goto out;
> > + }
>
> AFAIK that's all K8s so the code will never work on them.
Well, yes, this is Barcelona-only at the moment (and in all
likelihood, will extend to some future CPUs). Indeed, as far as my
testing has determined, there is no stepping of K8s which properly
implements thermal throttling. Even Rev F has a crippling erratum.
>
> amd_cpu_callback:
>
> - if (cpu >= NR_CPUS)
> - goto out;
> -
>
> that change seems to be unrelated cleanup. Such patches should be always separate.
My buddy at Google suggested that I might as well fix this while I'm
tearing up the code. But OK, I'll remove it. I may or may not submit
a further patch.
> Same with the rename.
I disagree here. The original code was exclusively focussed on
setting some MCE-related "threshold". With my changes, it's more
generic. "amd_" might not be the most appropriate prefix, but
"threshold_" certainly is not.
> + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> + "functional.\n", cpu);
>
> Why is that KERN_CRIT? Does not seem that critical to me.
So what this message really says is: "The OS cannot hook the thermal
interrupt because it has been hijacked or misconfigured by the BIOS.
Therefore, the throttling MCEs which you might naturally expect to see
on other Barcelona systems will not occur on this one. Therefore, if
your cooling policy relies on these MCEs (bad idea, but legal), it
will fail, potentially resulting in fire or the loss of user data."
For example, if you're expecting to be warned at 50C that your CPU has
tripped the throttling threshold, you may never receive this warning,
and therefore may never turn on the fan. Just because the CPU itself
may automatically shut down at 100C does not mean that the system
itself can withstand CPU temperatures this high, so fire is a remote
possibility. If that's not CRIT, then tell me what level is
appropriate, and I'll change it. To Valdis' point: the code is only
checking here for whether or not the BIOS has preempted the OS'
ability to hook the interrupt; there is no way for the code to
determine whether or not the sensor is actually functional, however
any client code which relies on its production of MCEs would fail, as
I just explained.
> I find it is quite common to do smp_call_function_single in CPU up/down callbacks.
> It would be better to fix that generically in the high level code (provide
> a new callback that is guaranteed to run on the target CPU) than to always reimplement
> it.
I agree, but it sounds like that should be the subject of another
patch which touches lots of other components.
> The erratum number/part number should be documented and the kernel ought to print
> why it didn't initialize thermal on that hardware.
I don't think there's a need for this, because 0x1103 came before
Barcelona; therefore, we can just consider this as a
Barcelona-and-later feature.
> You're technically racy against parallel cpu hotplug happening from initrd
> (which already runs during initcall -- yes that is a deathtrap)
> although that is typically hard to fix.
Can you elaborate? I'm assuming that I still need to fix this in
order to get the patch accepted, no?
> thermal_apic_init_allowed seems like a hack. A separate notifier would
> be cleaner.
A variable is always lighter than a notifier. I'm just trying to make
sure that if a CPU comes online before I'm ready to hook the thermal
interrupt, that it does not attempt to do so prematurely. I'm not
sure what a notifier would do, other than set
thermal_apic_init_allowed anyway.
> PCI is already initialized before normal initcall, otherwise pretty much
> all drivers would break.
I'll change the comment. I still want the convenience of a process
context, so I plan to keep the late_initcall().
> mce_thermal.c seems to be just unnecessary to me. It would be cleaner
> to just keep the separate own handlers, especially since they do quite
> different things anyways. Don't mesh together what is quite different.
As I mentioned to Andrew Morton, this is not easily avoidable without
some gross runtime CPUID hack. Specifically, how do you handle a
kernel build which supports both AMD and Intel thermal throttling,
wherein you don't know which CPU -- if either -- is present until
runtime? The root of the problem is that both architectures share the
same APIC vector, but implement throttling in incompatible ways.
> Also "cpu_specific_smp_thermal_interrupt_callback_t" is definitely too long.
I'll delete some characters to make it more obscure and Linux-like.
--
Russell Leidich
On Wed, Jan 02, 2008 at 11:43:08AM -0800, Russell Leidich wrote:
> > > + */
> > > + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
> > > + if ((k8_northbridges[nb_num]->device) == 0x1103)
> > > + goto out;
> > > + }
> >
> > AFAIK that's all K8s so the code will never work on them.
>
> Well, yes, this is Barcelona-only at the moment (and in all
Ok, but that was totally unclear to me which suggests it should
be somewhere in the description/changelog and possibly in the comments.
> likelihood, will extend to some future CPUs). Indeed, as far as my
> testing has determined, there is no stepping of K8s which properly
> implements thermal throttling. Even Rev F has a crippling erratum.
How about RevG?
>
> > Same with the rename.
>
> I disagree here. The original code was exclusively focussed on
> setting some MCE-related "threshold". With my changes, it's more
> generic. "amd_" might not be the most appropriate prefix, but
> "threshold_" certainly is not.
But such changes should be separate.
>
> > + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> > + "functional.\n", cpu);
> >
> > Why is that KERN_CRIT? Does not seem that critical to me.
>
> So what this message really says is: "The OS cannot hook the thermal
> interrupt because it has been hijacked or misconfigured by the BIOS.
> Therefore, the throttling MCEs which you might naturally expect to see
> on other Barcelona systems will not occur on this one. Therefore, if
> your cooling policy relies on these MCEs (bad idea, but legal), it
I don't think any Linux relies on that MCE for cooling so that seems like
a highly hypothetical situation. You would need a kernel driver for it
because there is no way to get it in user space even using a mce trigger.
If anything that should be handled through ACPI thermal trip anyways.
I think my point stands that this is not critical.
> I agree, but it sounds like that should be the subject of another
> patch which touches lots of other components.
Sure.
>
> > The erratum number/part number should be documented and the kernel ought to print
> > why it didn't initialize thermal on that hardware.
>
> I don't think there's a need for this, because 0x1103 came before
> Barcelona; therefore, we can just consider this as a
> Barcelona-and-later feature.
Ok, but that needs to be documented somewhere. Otherwise people will
eventually find out and then require lots of research to find this out.
e.g. if you had a high level comment that says
/* AMD Fam10h supports thermal throttling. When such a event happens we do
* .... because of X Y Z. Implement this in the following code.
* We don't do it on K8 due to crippling errata.
*/
it would have been all clear.
>
> > You're technically racy against parallel cpu hotplug happening from initrd
> > (which already runs during initcall -- yes that is a deathtrap)
> > although that is typically hard to fix.
>
> Can you elaborate? I'm assuming that I still need to fix this in
> order to get the patch accepted, no?
initrd user space can run in parallel with __initcall and in theory
it could trigger CPU hotplug events (and do lots of other things too)
It's quite unexpected and regularly causes bugs.
A lot of subsystems are racy this way. Also it's not easily fixable because
of locking problems in the cpu hotplug subsystem.
I just mentioned it for completeness; fixing it is probably beyond
scope of your patch and not a merging requirement.
>
> > thermal_apic_init_allowed seems like a hack. A separate notifier would
> > be cleaner.
>
> A variable is always lighter than a notifier. I'm just trying to make
Lighter but still a hack. I don't insist on it, it just makes
your code uglier than it could be.
> sure that if a CPU comes online before I'm ready to hook the thermal
> interrupt, that it does not attempt to do so prematurely. I'm not
> sure what a notifier would do, other than set
> thermal_apic_init_allowed anyway.
>
> > PCI is already initialized before normal initcall, otherwise pretty much
> > all drivers would break.
>
> I'll change the comment. I still want the convenience of a process
> context, so I plan to keep the late_initcall().
All __initcalls are in process context. late etc. just changes the
ordering inside them.
>
> > mce_thermal.c seems to be just unnecessary to me. It would be cleaner
> > to just keep the separate own handlers, especially since they do quite
> > different things anyways. Don't mesh together what is quite different.
>
> As I mentioned to Andrew Morton, this is not easily avoidable without
> some gross runtime CPUID hack. Specifically, how do you handle a
> kernel build which supports both AMD and Intel thermal throttling,
> wherein you don't know which CPU -- if either -- is present until
> runtime? The root of the problem is that both architectures share the
> same APIC vector, but implement throttling in incompatible ways.
You set different handlers depending on the CPU type.
>
> > Also "cpu_specific_smp_thermal_interrupt_callback_t" is definitely too long.
>
> I'll delete some characters to make it more obscure and Linux-like.
If you just set different handlers you don't need it at all.
-Andi
On Jan 2, 2008 12:00 PM, Andi Kleen <[email protected]> wrote:
> On Wed, Jan 02, 2008 at 11:43:08AM -0800, Russell Leidich wrote:
> > > > + */
> > > > + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
> > > > + if ((k8_northbridges[nb_num]->device) == 0x1103)
> > > > + goto out;
> > > > + }
> > >
> > > AFAIK that's all K8s so the code will never work on them.
> >
> > Well, yes, this is Barcelona-only at the moment (and in all
>
> Ok, but that was totally unclear to me which suggests it should
> be somewhere in the description/changelog and possibly in the comments.
Deal.
>
> > likelihood, will extend to some future CPUs). Indeed, as far as my
> > testing has determined, there is no stepping of K8s which properly
> > implements thermal throttling. Even Rev F has a crippling erratum.
>
> How about RevG?
I don't know of any RevG. Please explain.
> >
> > > Same with the rename.
> >
> > I disagree here. The original code was exclusively focussed on
> > setting some MCE-related "threshold". With my changes, it's more
> > generic. "amd_" might not be the most appropriate prefix, but
> > "threshold_" certainly is not.
>
> But such changes should be separate.
OK, I'll back it out.
>
> >
> > > + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not "
> > > + "functional.\n", cpu);
> > >
> > > Why is that KERN_CRIT? Does not seem that critical to me.
> >
> > So what this message really says is: "The OS cannot hook the thermal
> > interrupt because it has been hijacked or misconfigured by the BIOS.
> > Therefore, the throttling MCEs which you might naturally expect to see
> > on other Barcelona systems will not occur on this one. Therefore, if
> > your cooling policy relies on these MCEs (bad idea, but legal), it
>
> I don't think any Linux relies on that MCE for cooling so that seems like
> a highly hypothetical situation. You would need a kernel driver for it
> because there is no way to get it in user space even using a mce trigger.
>
> If anything that should be handled through ACPI thermal trip anyways.
>
> I think my point stands that this is not critical.
OK, KERN_WARN then.
>
> > I agree, but it sounds like that should be the subject of another
> > patch which touches lots of other components.
>
> Sure.
>
> >
> > > The erratum number/part number should be documented and the kernel ought to print
> > > why it didn't initialize thermal on that hardware.
> >
> > I don't think there's a need for this, because 0x1103 came before
> > Barcelona; therefore, we can just consider this as a
> > Barcelona-and-later feature.
>
> Ok, but that needs to be documented somewhere. Otherwise people will
> eventually find out and then require lots of research to find this out.
>
> e.g. if you had a high level comment that says
>
> /* AMD Fam10h supports thermal throttling. When such a event happens we do
> * .... because of X Y Z. Implement this in the following code.
> * We don't do it on K8 due to crippling errata.
> */
>
> it would have been all clear.
Will do.
>
> >
> > > You're technically racy against parallel cpu hotplug happening from initrd
> > > (which already runs during initcall -- yes that is a deathtrap)
> > > although that is typically hard to fix.
> >
> > Can you elaborate? I'm assuming that I still need to fix this in
> > order to get the patch accepted, no?
>
> initrd user space can run in parallel with __initcall and in theory
> it could trigger CPU hotplug events (and do lots of other things too)
>
> It's quite unexpected and regularly causes bugs.
>
> A lot of subsystems are racy this way. Also it's not easily fixable because
> of locking problems in the cpu hotplug subsystem.
>
> I just mentioned it for completeness; fixing it is probably beyond
> scope of your patch and not a merging requirement.
OK, well it's good to have your comment in the mail archives for when
someone tries to fix this globally.
>
> >
> > > thermal_apic_init_allowed seems like a hack. A separate notifier would
> > > be cleaner.
> >
> > A variable is always lighter than a notifier. I'm just trying to make
>
> Lighter but still a hack. I don't insist on it, it just makes
> your code uglier than it could be.
I'm going to be ugly, but if someone to beautify it later, I won't oppose.
>
> > sure that if a CPU comes online before I'm ready to hook the thermal
> > interrupt, that it does not attempt to do so prematurely. I'm not
> > sure what a notifier would do, other than set
> > thermal_apic_init_allowed anyway.
> >
> > > PCI is already initialized before normal initcall, otherwise pretty much
> > > all drivers would break.
> >
> > I'll change the comment. I still want the convenience of a process
> > context, so I plan to keep the late_initcall().
>
> All __initcalls are in process context. late etc. just changes the
> ordering inside them.
>
> >
> > > mce_thermal.c seems to be just unnecessary to me. It would be cleaner
> > > to just keep the separate own handlers, especially since they do quite
> > > different things anyways. Don't mesh together what is quite different.
> >
> > As I mentioned to Andrew Morton, this is not easily avoidable without
> > some gross runtime CPUID hack. Specifically, how do you handle a
> > kernel build which supports both AMD and Intel thermal throttling,
> > wherein you don't know which CPU -- if either -- is present until
> > runtime? The root of the problem is that both architectures share the
> > same APIC vector, but implement throttling in incompatible ways.
>
> You set different handlers depending on the CPU type.
Right. But that's exactly what I'm doing. If I understand correctly,
your complaint is just that I'm doing the CPU-independent stuff in one
place (before branching to the CPU-specific code), and it would be
better to create duplicate code? If I do it your way, I'll need to do
something gross in entry_64.S. Specifically, thermal_interrupt() will
need to change from:
--------------------------
ENTRY(thermal_interrupt)
apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
END(thermal_interrupt)
--------------------------
to:
--------------------------
ENTRY(thermal_interrupt)
push %rax
mov ($some_memory_location), %rax
apicinterrupt THERMAL_APIC_VECTOR,%rax
pop %rax
END(thermal_interrupt)
--------------------------
which might not work, depending on how apicinterrupt is defined, and
how easily I can get $some_memory_location loaded with the right
value.
Is this the fix you have in mind?
>
> >
> > > Also "cpu_specific_smp_thermal_interrupt_callback_t" is definitely too long.
> >
> > I'll delete some characters to make it more obscure and Linux-like.
>
> If you just set different handlers you don't need it at all.
True.
>
> -Andi
>
--
Russell Leidich
On Jan 2, 2008 10:12 PM, Russell Leidich <[email protected]> wrote:
> On Jan 2, 2008 12:00 PM, Andi Kleen <[email protected]> wrote:
> > On Wed, Jan 02, 2008 at 11:43:08AM -0800, Russell Leidich wrote:
> > > likelihood, will extend to some future CPUs). Indeed, as far as my
> > > testing has determined, there is no stepping of K8s which properly
> > > implements thermal throttling. Even Rev F has a crippling erratum.
> >
> > How about RevG?
>
> I don't know of any RevG. Please explain.
http://products.amd.com/en-us/DesktopCPUResult.aspx?f1=&f2=&f3=&f4=&f5=AM2&f6=G1&f7=65nm+SOI&f8=&f9=&
http://products.amd.com/en-us/DesktopCPUResult.aspx?f1=&f2=&f3=&f4=512&f5=AM2&f6=G2&f7=65nm+SOI&f8=&f9=&
65nm Athlons come in G1 and G2 steppings.
HTH
Torsten
On Jan 2, 2008 1:33 PM, Torsten Kaiser <[email protected]> wrote:
> On Jan 2, 2008 10:12 PM, Russell Leidich <[email protected]> wrote:
> > On Jan 2, 2008 12:00 PM, Andi Kleen <[email protected]> wrote:
> > > On Wed, Jan 02, 2008 at 11:43:08AM -0800, Russell Leidich wrote:
> > > > likelihood, will extend to some future CPUs). Indeed, as far as my
> > > > testing has determined, there is no stepping of K8s which properly
> > > > implements thermal throttling. Even Rev F has a crippling erratum.
> > >
> > > How about RevG?
> >
> > I don't know of any RevG. Please explain.
>
> http://products.amd.com/en-us/DesktopCPUResult.aspx?f1=&f2=&f3=&f4=&f5=AM2&f6=G1&f7=65nm+SOI&f8=&f9=&
> http://products.amd.com/en-us/DesktopCPUResult.aspx?f1=&f2=&f3=&f4=512&f5=AM2&f6=G2&f7=65nm+SOI&f8=&f9=&
>
> 65nm Athlons come in G1 and G2 steppings.
Thanks. My code doesn't support these CPUs because their northbridge
IDs are not 0x1203.
>
> HTH
>
> Torsten
>
--
Russell Leidich
> Thanks. My code doesn't support these CPUs because their northbridge
> IDs are not 0x1203.
Sure, but do they have the crippling erratum too?
-Andi
On Jan 2, 2008 1:54 PM, Andi Kleen <[email protected]> wrote:
> > Thanks. My code doesn't support these CPUs because their northbridge
> > IDs are not 0x1203.
>
> Sure, but do they have the crippling erratum too?
We don't seem to have erratum documentation on this chip, so I can't
confirm one way or another. In any event, I have no way to test this
CPU. I think it's a small sacrifice to assume that it doesn't work.
Someone could always add support later.
>
> -Andi
>
--
Russell Leidich
Here's a new version of the patch. Since I didn't explain its purpose
very well in my previous submission, I'll do so here:
This patch adds thermal machine check event (MCE) support to AMD
Barcelona (and probably later, if new PCI IDs are added to
k8_northbridges[]), styled after the same in the Intel code. The
initialization consists of 3 parts: (1) northbridge, which enables the
delivery of the interrupt to the local APIC, (2) APIC, which enables
delivery to X86, and (3) hotplug support in threshold_cpu_callback(),
which accomplishes #2 for CPUs which (re)enter the OS later.
Whenever the temperature reaches the throttling threshold programmed
into a northbridge register (by the BIOS -- my code doesn't change
this value), a thermal interrupt is delivered. The interrupt is
delivered to the vector specified in the thermal APIC register at
(APIC_BASE + 0x330), which is identical to Intel. Because the vector
register is the same between both architectures, and because I don't
know which brand of CPU is present until runtime (if either),
thermal_interrupt in entry_64.S will branch to smp_thermal_interrupt
in mce_thermal.c. In turn, smp_thermal_interrupt will branch to the
CPU-specific code for handling the interrupt. (Apart from the common
vector location, AMD and Intel use radically different architectures
for the purpose of reporting throttling events.) At that point, an
MCE is logged if and only if the temperature has made a low-to-high
transition. Rate limiting is employed so as not to spam the log.
New with this patch: I added some comments, as Andi suggested. I also
restored a redundant check for (cpu < NR_CPUS) which happened to be in
mce_amd_64.c, and I originally removed; I restored it because it was
deemed unrelated to my thermal throttling enhancement.
As to the question of Athlon RevG, it turns out that it has the same
thermal erratum as RevF. Therefore the fact that this code only works
with Barcelona and up is not a defficiency.
There are 2 pending issues, to which I have received insufficient feedback:
1. Andi would like to eliminate the trampoline in mce_thermal.c, but
no one has responded to my proposed disgusting hack on entry_64.S in
order to do so.
2. Ingo pointed out that a given config file did not build. But when
I tried to use it, I got all kinds of questions about the required
states of various build flags. I accepted all the defaults, and the
build completed normally. His error message suggests to me that
somehow he's not including include/asm-x86/mce.h in the build, but I'm
at a loss as to why. So I reverted my changes to this file, and
created mce_thermal.h instead, for tighter control.
--
Russell Leidich
> As to the question of Athlon RevG, it turns out that it has the same
> thermal erratum as RevF. Therefore the fact that this code only works
> with Barcelona and up is not a defficiency.
Thanks.
>
> There are 2 pending issues, to which I have received insufficient feedback:
>
> 1. Andi would like to eliminate the trampoline in mce_thermal.c, but
> no one has responded to my proposed disgusting hack on entry_64.S in
> order to do so.
There are two ways to do it:
either duplicate the entry point in entry_64.S and set different
vectors or let the asm glue jump through a call vector (use
apicinterrupt ...,*thermal_vector(%rip) ) Later is probably better.
>
> 2. Ingo pointed out that a given config file did not build. But when
The patch that was in git-x86 didn't build on 32bit. Perhaps it was a
32bit config file? Run it under "linux32" or equivalent on your distro.
> + */
> + if (therm_throt_process(1))
> + mce_log_therm_throt_event(cpu, 1);
> + /*
> + * We'll still get subsequent interrupts even if we don't clear the
> + * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid
> + * touching PCI space. (If this assumption fails at some point, we'll
> + * need to schedule_work() in order to enter a process context, so that
> + * PCI locks can be asserted for proper access. This requirement, in
PCI locks are spinlocks so they don't need process context. In PREEMPT-RT
kernels they can sleep, but there the handler will be likely already
a thread. Touching config space from interrupt context is legal, although
not very popular because it tends to be slow (but a few drivers do it)
> + /*
> + * If any of the northbridges has PCI ID 0x1103, then its thermal
> + * hardware suffers from an erratum which prevents this code from
> + * working, so abort.
Please add
* This implies it only works on Family 10h aka AMD Quad Core.
Otherwise I can just see the support questions of people asking why this
doesn't work for them.
Anyways I'm unsure about the blacklist here -- white list would
probably have been better. k8_northbridges[] will certainly include
Griffin northtbridges and who knows if TT will work there or not.
[sorry for mentioning that not earlier]
-Andi
On Jan 4, 2008 2:26 PM, Andi Kleen <[email protected]> wrote:
> > There are 2 pending issues, to which I have received insufficient feedback:
> >
> > 1. Andi would like to eliminate the trampoline in mce_thermal.c, but
> > no one has responded to my proposed disgusting hack on entry_64.S in
> > order to do so.
>
> There are two ways to do it:
>
> either duplicate the entry point in entry_64.S and set different
> vectors or let the asm glue jump through a call vector (use
> apicinterrupt ...,*thermal_vector(%rip) ) Later is probably better.
OK, I did it the latter way. Your thermal_vector is my
smp_thermal_interrupt, which I have converted from a function to a
pointer to the CPU-specific function.
>
> >
> > 2. Ingo pointed out that a given config file did not build. But when
>
> The patch that was in git-x86 didn't build on 32bit. Perhaps it was a
> 32bit config file? Run it under "linux32" or equivalent on your distro.
I _think_ it's fixed now. I'll let him rerun it at his end for verification.
>
> > + */
> > + if (therm_throt_process(1))
> > + mce_log_therm_throt_event(cpu, 1);
> > + /*
> > + * We'll still get subsequent interrupts even if we don't clear the
> > + * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid
> > + * touching PCI space. (If this assumption fails at some point, we'll
> > + * need to schedule_work() in order to enter a process context, so that
> > + * PCI locks can be asserted for proper access. This requirement, in
>
> PCI locks are spinlocks so they don't need process context. In PREEMPT-RT
> kernels they can sleep, but there the handler will be likely already
> a thread. Touching config space from interrupt context is legal, although
> not very popular because it tends to be slow (but a few drivers do it)
But if PCI locks are spinlocks, then how can one access config space
in an interrupt handler, as it might be locked by the foreground? (No
locks would be required at all, if everyone just saved 0xCF8 and
0xCFC, but I digress.) And it's one thing to be "likely" already in a
thread, and another to be sure. If you can address these issues, I'll
change or remove the comment. I just want to prevent a
reasonable-looking but bad coding change from happening in the future.
>
> > + /*
> > + * If any of the northbridges has PCI ID 0x1103, then its thermal
> > + * hardware suffers from an erratum which prevents this code from
> > + * working, so abort.
>
> Please add
>
> * This implies it only works on Family 10h aka AMD Quad Core.
>
> Otherwise I can just see the support questions of people asking why this
> doesn't work for them.
Agreed. I had it at the top of the function, but now I've worked it
into both places.
>
> Anyways I'm unsure about the blacklist here -- white list would
> probably have been better. k8_northbridges[] will certainly include
> Griffin northtbridges and who knows if TT will work there or not.
> [sorry for mentioning that not earlier]
Ideally, every ID in the k8_northbridges[] whitelist would have
functional thermal throttling. If more IDs than 0x1103 turn out to
have this feature broken, then we may need to add a blacklist as well.
But I expect that most future IDs will function correctly, hence my
reliance on the whitelist. In my view, anyone who adds an ID to a
whitelist (or just a list, for that matter) is obligated to perform a
static analysis (i.e. grep for "k8_northbridges") of the implications
of such act; therefore, I'm not too concerned about Griffin.
I've attached an updated patch. In the unlikely event that you want
to check this in, let me know so I can give it a final test.
--
Russell Leidich
>
> But if PCI locks are spinlocks, then how can one access config space
> in an interrupt handler, as it might be locked by the foreground? (No
They disable interrupts
> locks would be required at all, if everyone just saved 0xCF8 and
> 0xCFC, but I digress.)
Not sure what you mean? Since it is two non atomic accesses even
saving restoring the registers would not make the accesses safe for lockless.
If someone changes 0xcf8 before you can access 0xcfc you always have
a problem.
In fact we had (or still have) races with some older user land accessing these
ports directly.
The only access method that is lockless is mmconfig, which will work
on most (but not all) Fam10h systems.
> And it's one thing to be "likely" already in a
> thread, and another to be sure. If you can address these issues, I'll
> change or remove the comment. I just want to prevent a
> reasonable-looking but bad coding change from happening in the future.
Well at least as written the comment is not quite correct.
>
>
> Agreed. I had it at the top of the function, but now I've worked it
> into both places.
>
> >
> > Anyways I'm unsure about the blacklist here -- white list would
> > probably have been better. k8_northbridges[] will certainly include
> > Griffin northtbridges and who knows if TT will work there or not.
> > [sorry for mentioning that not earlier]
>
> Ideally, every ID in the k8_northbridges[] whitelist would have
k8_northbridges[] is used by various subsystems, most of them
do not care at all about thermal throttling.
> functional thermal throttling. If more IDs than 0x1103 turn out to
> have this feature broken, then we may need to add a blacklist as well.
? you already got a blacklist ?
> But I expect that most future IDs will function correctly, hence my
> reliance on the whitelist.
? but you don't have a whitelist, you have a black list.
> In my view, anyone who adds an ID to a
> whitelist (or just a list, for that matter) is obligated to perform a
> static analysis (i.e. grep for "k8_northbridges") of the implications
> of such act;
That view would require the whitelist.
-Andi
On Jan 5, 2008 5:24 AM, Andi Kleen <[email protected]> wrote:
> >
> > But if PCI locks are spinlocks, then how can one access config space
> > in an interrupt handler, as it might be locked by the foreground? (No
>
> They disable interrupts
Got it.
>
> > locks would be required at all, if everyone just saved 0xCF8 and
> > 0xCFC, but I digress.)
>
> Not sure what you mean? Since it is two non atomic accesses even
> saving restoring the registers would not make the accesses safe for lockless.
> If someone changes 0xcf8 before you can access 0xcfc you always have
> a problem.
No, it would be safe, because the interrupting process doesn't
actually need to save 0xCFC (since it's a function of 0xCF8). SMI
handlers use this technique to ensure they don't destroy OS config
space accesses. Anyway, it doesn't matter. As long as everyone CLIs
across the whole business, there is no possibility of races.
>
> In fact we had (or still have) races with some older user land accessing these
> ports directly.
Yuck. I'm going to assume that I can ignore this fact, as they're
already broken with respect to lots of other kernel code.
>
> The only access method that is lockless is mmconfig, which will work
> on most (but not all) Fam10h systems.
>
> > And it's one thing to be "likely" already in a
> > thread, and another to be sure. If you can address these issues, I'll
> > change or remove the comment. I just want to prevent a
> > reasonable-looking but bad coding change from happening in the future.
>
> Well at least as written the comment is not quite correct.
OK, I buy that now. I'll fix the comment and resend.
> >
> >
> > Agreed. I had it at the top of the function, but now I've worked it
> > into both places.
> >
> > >
> > > Anyways I'm unsure about the blacklist here -- white list would
> > > probably have been better. k8_northbridges[] will certainly include
> > > Griffin northtbridges and who knows if TT will work there or not.
> > > [sorry for mentioning that not earlier]
> >
> > Ideally, every ID in the k8_northbridges[] whitelist would have
>
> k8_northbridges[] is used by various subsystems, most of them
> do not care at all about thermal throttling.
>
> > functional thermal throttling. If more IDs than 0x1103 turn out to
> > have this feature broken, then we may need to add a blacklist as well.
>
> ? you already got a blacklist ?
Yes, technically. It's a single "if" statement which tests for 0x1103
:). What I was saying was that if lots more thermally-broken CPUs
show up in the wild, then we can expand this into a blacklist array.
>
> > But I expect that most future IDs will function correctly, hence my
> > reliance on the whitelist.
>
> ? but you don't have a whitelist, you have a black list.
I'm using k8_northbridges[] as my de-facto whitelist, because I
believe that future CPUs will probably function properly. (It's hard
to imagine AMD removing or breaking a perfectly functional system,
apart from maybe adding new features. Worst case, as I said, we can
create a blacklist. But right now, that's unnecessary.)
Any other concerns?
--
Russell Leidich
OK all, I fixed the comments about accessing PCI config space in an
interrupt context.
Can someone please give me a thumbs up or down about checking this in?
If it's thumbs up, I will do a final retest and mail back a
Signed-off-by patch.
--
Russell Leidich
> ENTRY(thermal_interrupt)
> - apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
> + apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt(%rip)
Are you sure a * is not needed? I would have thought it would jump
to the variable instead of through it. But if it works it's ok for me.
The rest of the patch looks ok to to me.
-Andi
On Jan 8, 2008 3:52 PM, Andi Kleen <[email protected]> wrote:
> > ENTRY(thermal_interrupt)
> > - apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
> > + apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt(%rip)
>
> Are you sure a * is not needed? I would have thought it would jump
> to the variable instead of through it. But if it works it's ok for me.
I will test to make sure it works. I don't think stars mean anything
in AT&T-style X86-64.
>
> The rest of the patch looks ok to to me.
Thank you! I will give it a final test and submit the official patch this week.
>
> -Andi
>
>
--
Russell Leidich
On Tue, Jan 08, 2008 at 06:28:18PM -0800, Russell Leidich wrote:
> On Jan 8, 2008 3:52 PM, Andi Kleen <[email protected]> wrote:
> > > ENTRY(thermal_interrupt)
> > > - apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
> > > + apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt(%rip)
> >
> > Are you sure a * is not needed? I would have thought it would jump
> > to the variable instead of through it. But if it works it's ok for me.
>
> I will test to make sure it works. I don't think stars mean anything
> in AT&T-style X86-64.
% cat t.s
call foo
call *foo
% as -o t.o t.s
% objdump -S t.o
t.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <.text>:
0: e8 00 00 00 00 callq 0x5
5: ff 14 25 00 00 00 00 callq *0x0
-Andi
All,
Here's the hopefully-final version of the patch, which I have just
tested on Intel and AMD.
In my AMD test, I happened to discover that although MCEs were being
logged, the MCE counter at
/sys/devices/system/cpu/cpu(whatever)/thermal_throttle/count was not
being updated. I fixed this by (1) moving smp_thermal_init from
late_initcall to device_initcall in mce_amd_64.c (2) moving
thermal_throttle_init_device from device_initcall to late_initcall in
therm_throt.c.
Thanks to Andi for his notes on how to hack up an indirect call in
AT&T-style X86-64.
To reiterate my earlier general description of the patch:
This patch adds thermal machine check event (MCE) support to AMD
Barcelona (and probably later, if new PCI IDs are added to
k8_northbridges[]), styled after the same in the Intel code. The
initialization consists of 3 parts: (1) northbridge, which enables the
delivery of the interrupt to the local APIC, (2) APIC, which enables
delivery to X86, and (3) hotplug support in threshold_cpu_callback(),
which accomplishes #2 for CPUs which (re)enter the OS later.
Whenever the temperature reaches the throttling threshold programmed
into a northbridge register (by the BIOS -- my code doesn't change
this value), a thermal interrupt is delivered. The interrupt is
delivered to the vector specified in the thermal APIC register at
(APIC_BASE + 0x330), which is identical to Intel. Because the vector
register is the same between both architectures, and because I don't
know which brand of CPU is present until runtime (if either),
thermal_interrupt in entry_64.S will branch to smp_thermal_interrupt
in mce_thermal.c. In turn, smp_thermal_interrupt will branch to the
CPU-specific code for handling the interrupt. (Apart from the common
vector location, AMD and Intel use radically different architectures
for the purpose of reporting throttling events.) At that point, an
MCE is logged if and only if the temperature has made a low-to-high
transition. Rate limiting is employed so as not to spam the log.
--
Russell Leidich
All,
Sorry, but the Google code review process has turned up a subtle issue
with my formerly submitted patch. It is fixed in this newer patch
(attached). Otherwise, the functional description is exactly as
explained below.
The issue is that there were 2 printk()s which printed the northbridge
PCI device number associated with throttling initialization and
detection. These printk()s assumed that the northbridges were
enumerated in-order. While this is highly likely to be the case, it
should not have been assumed. In the event that it is _not_ the case,
then the PCI device numbers printed out would have been wrong.
Since I had to respin the patch, I took the opportunity to fix a few
minor issues as well: (1) Changed the contents of the aforementioned
printk()s to reveal the PCI bus, function, and register which relate
to the throttling message -- not just the device. (2) Deleted all
mention of "AMD Quad Core", leaving only "Family 0x10". (3) Fixed one
comment with incorrect grammar. (4) Fixed punctuation in a printk().
I sincerely hope that this will be the last iteration. Thanks to all
who contributed to the review, here and at Google!
On Jan 10, 2008 6:21 PM, Russell Leidich <[email protected]> wrote:
> All,
>
> Here's the hopefully-final version of the patch, which I have just
> tested on Intel and AMD.
>
> In my AMD test, I happened to discover that although MCEs were being
> logged, the MCE counter at
> /sys/devices/system/cpu/cpu(whatever)/thermal_throttle/count was not
> being updated. I fixed this by (1) moving smp_thermal_init from
> late_initcall to device_initcall in mce_amd_64.c (2) moving
> thermal_throttle_init_device from device_initcall to late_initcall in
> therm_throt.c.
>
> Thanks to Andi for his notes on how to hack up an indirect call in
> AT&T-style X86-64.
>
> To reiterate my earlier general description of the patch:
>
> This patch adds thermal machine check event (MCE) support to AMD
> Barcelona (and probably later, if new PCI IDs are added to
> k8_northbridges[]), styled after the same in the Intel code. The
> initialization consists of 3 parts: (1) northbridge, which enables the
> delivery of the interrupt to the local APIC, (2) APIC, which enables
> delivery to X86, and (3) hotplug support in threshold_cpu_callback(),
> which accomplishes #2 for CPUs which (re)enter the OS later.
>
> Whenever the temperature reaches the throttling threshold programmed
> into a northbridge register (by the BIOS -- my code doesn't change
> this value), a thermal interrupt is delivered. The interrupt is
> delivered to the vector specified in the thermal APIC register at
> (APIC_BASE + 0x330), which is identical to Intel. Because the vector
> register is the same between both architectures, and because I don't
> know which brand of CPU is present until runtime (if either),
> thermal_interrupt in entry_64.S will branch to smp_thermal_interrupt
> in mce_thermal.c. In turn, smp_thermal_interrupt will branch to the
> CPU-specific code for handling the interrupt. (Apart from the common
> vector location, AMD and Intel use radically different architectures
> for the purpose of reporting throttling events.) At that point, an
> MCE is logged if and only if the temperature has made a low-to-high
> transition. Rate limiting is employed so as not to spam the log.
>
> --
> Russell Leidich
>
--
Russell Leidich
On Thu, 17 Jan 2008 17:06:33 -0800 "Russell Leidich" <[email protected]> wrote:
> > Here's the hopefully-final version of the patch, which I have just
> > tested on Intel and AMD.
Curious. This just broke.
i386 allmodconfig:
arch/x86/kernel/cpu/mcheck/p4.o: In function `smp_thermal_interrupt':
arch/x86/kernel/cpu/mcheck/p4.c:61: multiple definition of `smp_thermal_interrupt'
arch/x86/kernel/cpu/mcheck/mce_thermal.o:arch/x86/kernel/cpu/mcheck/mce_thermal.c:17: first defined here
/opt/crosstool/gcc-4.1.0-glibc-2.3.6/i686-unknown-linux-gnu/bin/i686-unknown-linux-gnu-ld: Warning: size of symbol `smp_thermal_interrupt' changed from 4 in arch/x86/kernel/cpu/mcheck/mce_thermal.o to 43 in arch/x86/kernel/cpu/mcheck/p4.o
/opt/crosstool/gcc-4.1.0-glibc-2.3.6/i686-unknown-linux-gnu/bin/i686-unknown-linux-gnu-ld: Warning: type of symbol `smp_thermal_interrupt' changed from 1 to 2 in arch/x86/kernel/cpu/mcheck/p4.o
In mainline we just did this:
--- linux-2.6.24/arch/x86/kernel/cpu/mcheck/p4.c 2008-02-01 14:29:18.000000000 -0800
+++ devel/arch/x86/kernel/cpu/mcheck/p4.c 2008-02-02 15:38:06.000000000 -0800
@@ -57,7 +57,7 @@ static void intel_thermal_interrupt(stru
/* Thermal interrupt handler for this CPU setup */
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
-fastcall void smp_thermal_interrupt(struct pt_regs *regs)
+void smp_thermal_interrupt(struct pt_regs *regs)
which I think was always wrong and has just now tripped up the linker. I
_assume_ that function should be __attribute__((weak)). Then again, maybe
I'm full of it. But hey, it compiles now.
On Sat, 2008-02-02 at 16:10 -0800, Andrew Morton wrote:
> On Thu, 17 Jan 2008 17:06:33 -0800 "Russell Leidich" <[email protected]> wrote:
>
> > > Here's the hopefully-final version of the patch, which I have just
> > > tested on Intel and AMD.
>
> Curious. This just broke.
>
> i386 allmodconfig:
>
> arch/x86/kernel/cpu/mcheck/p4.o: In function `smp_thermal_interrupt':
> arch/x86/kernel/cpu/mcheck/p4.c:61: multiple definition of `smp_thermal_interrupt'
> arch/x86/kernel/cpu/mcheck/mce_thermal.o:arch/x86/kernel/cpu/mcheck/mce_thermal.c:17: first defined here
> /opt/crosstool/gcc-4.1.0-glibc-2.3.6/i686-unknown-linux-gnu/bin/i686-unknown-linux-gnu-ld: Warning: size of symbol `smp_thermal_interrupt' changed from 4 in arch/x86/kernel/cpu/mcheck/mce_thermal.o to 43 in arch/x86/kernel/cpu/mcheck/p4.o
> /opt/crosstool/gcc-4.1.0-glibc-2.3.6/i686-unknown-linux-gnu/bin/i686-unknown-linux-gnu-ld: Warning: type of symbol `smp_thermal_interrupt' changed from 1 to 2 in arch/x86/kernel/cpu/mcheck/p4.o
>
> In mainline we just did this:
>
> --- linux-2.6.24/arch/x86/kernel/cpu/mcheck/p4.c 2008-02-01 14:29:18.000000000 -0800
> +++ devel/arch/x86/kernel/cpu/mcheck/p4.c 2008-02-02 15:38:06.000000000 -0800
> @@ -57,7 +57,7 @@ static void intel_thermal_interrupt(stru
> /* Thermal interrupt handler for this CPU setup */
> static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
>
> -fastcall void smp_thermal_interrupt(struct pt_regs *regs)
> +void smp_thermal_interrupt(struct pt_regs *regs)
>
> which I think was always wrong and has just now tripped up the linker. I
> _assume_ that function should be __attribute__((weak)). Then again, maybe
> I'm full of it. But hey, it compiles now.
>
What tree fails to compile?
grepping mainline:
./arch/x86/kernel/cpu/mcheck/mce_intel_64.c:asmlinkage void smp_thermal_interrupt(void)
./arch/x86/kernel/cpu/mcheck/p4.c:void smp_thermal_interrupt(struct pt_regs *regs)
./arch/x86/kernel/entry_64.S: apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
./arch/x86/kernel/traps_64.c:asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
Notice the one in p4.c doesn't take void args.
I'll look into it.
Harvey
On Sat, 02 Feb 2008 16:27:40 -0800 Harvey Harrison <[email protected]> wrote:
> On Sat, 2008-02-02 at 16:10 -0800, Andrew Morton wrote:
> > On Thu, 17 Jan 2008 17:06:33 -0800 "Russell Leidich" <[email protected]> wrote:
> >
> > > > Here's the hopefully-final version of the patch, which I have just
> > > > tested on Intel and AMD.
> >
> > Curious. This just broke.
> >
> > i386 allmodconfig:
> >
> > arch/x86/kernel/cpu/mcheck/p4.o: In function `smp_thermal_interrupt':
> > arch/x86/kernel/cpu/mcheck/p4.c:61: multiple definition of `smp_thermal_interrupt'
> > arch/x86/kernel/cpu/mcheck/mce_thermal.o:arch/x86/kernel/cpu/mcheck/mce_thermal.c:17: first defined here
> > /opt/crosstool/gcc-4.1.0-glibc-2.3.6/i686-unknown-linux-gnu/bin/i686-unknown-linux-gnu-ld: Warning: size of symbol `smp_thermal_interrupt' changed from 4 in arch/x86/kernel/cpu/mcheck/mce_thermal.o to 43 in arch/x86/kernel/cpu/mcheck/p4.o
> > /opt/crosstool/gcc-4.1.0-glibc-2.3.6/i686-unknown-linux-gnu/bin/i686-unknown-linux-gnu-ld: Warning: type of symbol `smp_thermal_interrupt' changed from 1 to 2 in arch/x86/kernel/cpu/mcheck/p4.o
> >
> > In mainline we just did this:
> >
> > --- linux-2.6.24/arch/x86/kernel/cpu/mcheck/p4.c 2008-02-01 14:29:18.000000000 -0800
> > +++ devel/arch/x86/kernel/cpu/mcheck/p4.c 2008-02-02 15:38:06.000000000 -0800
> > @@ -57,7 +57,7 @@ static void intel_thermal_interrupt(stru
> > /* Thermal interrupt handler for this CPU setup */
> > static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
> >
> > -fastcall void smp_thermal_interrupt(struct pt_regs *regs)
> > +void smp_thermal_interrupt(struct pt_regs *regs)
> >
> > which I think was always wrong and has just now tripped up the linker. I
> > _assume_ that function should be __attribute__((weak)). Then again, maybe
> > I'm full of it. But hey, it compiles now.
> >
>
> What tree fails to compile?
Current mainline plus current -mm mess. I don't think there's anything
else apart from this patch involved though.
> grepping mainline:
>
> ./arch/x86/kernel/cpu/mcheck/mce_intel_64.c:asmlinkage void smp_thermal_interrupt(void)
> ./arch/x86/kernel/cpu/mcheck/p4.c:void smp_thermal_interrupt(struct pt_regs *regs)
> ./arch/x86/kernel/entry_64.S: apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
> ./arch/x86/kernel/traps_64.c:asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
>
> Notice the one in p4.c doesn't take void args.
Yeah. I didn't really look into what all this is supposed to do.
Something seems a bit broken.
> I'll look into it.
Thanks. My current queue is (as usual) at http://userweb.kernel.org/~akpm/mmotm/
The pt_regs arg is never used, make it agree with the other
definitions of smp_thermal_interrupt.
It doesn't look like smp_thermal_interrupt is even called on
32-bit...
Signed-off-by: Harvey Harrison <[email protected]>
---
Andrew, this is a fairly dumb patch, but works-for-me(tm)
arch/x86/kernel/cpu/mcheck/p4.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345..866105a 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -36,7 +36,7 @@ static int mce_num_extended_msrs = 0;
#ifdef CONFIG_X86_MCE_P4THERMAL
-static void unexpected_thermal_interrupt(struct pt_regs *regs)
+static void unexpected_thermal_interrupt(void)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
@@ -44,7 +44,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal transition interrupt handler */
-static void intel_thermal_interrupt(struct pt_regs *regs)
+static void intel_thermal_interrupt(void)
{
__u64 msr_val;
@@ -55,12 +55,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
}
/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt;
void smp_thermal_interrupt(struct pt_regs *regs)
{
irq_enter();
- vendor_thermal_interrupt(regs);
+ vendor_thermal_interrupt();
__get_cpu_var(irq_stat).irq_thermal_count++;
irq_exit();
}
--
1.5.4.rc4.1142.gf5a97
The pt_regs arg is never used, make it agree with the other
definitions of smp_thermal_interrupt.
It doesn't look like smp_thermal_interrupt is even called on
32-bit...
Signed-off-by: Harvey Harrison <[email protected]>
---
How about I actually send the patch that worked this time?
Sorry about the last one.
arch/x86/kernel/cpu/mcheck/p4.c | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345..1f76b98 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -36,7 +36,7 @@ static int mce_num_extended_msrs = 0;
#ifdef CONFIG_X86_MCE_P4THERMAL
-static void unexpected_thermal_interrupt(struct pt_regs *regs)
+static void unexpected_thermal_interrupt(void)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
smp_processor_id());
@@ -44,7 +44,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal transition interrupt handler */
-static void intel_thermal_interrupt(struct pt_regs *regs)
+static void intel_thermal_interrupt(void)
{
__u64 msr_val;
@@ -55,12 +55,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
}
/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+static void (*vendor_thermal_interrupt)(void) = unexpected_thermal_interrupt;
-void smp_thermal_interrupt(struct pt_regs *regs)
+void smp_thermal_interrupt(void)
{
irq_enter();
- vendor_thermal_interrupt(regs);
+ vendor_thermal_interrupt();
__get_cpu_var(irq_stat).irq_thermal_count++;
irq_exit();
}
--
1.5.4.rc4.1142.gf5a97
On Sat, Feb 02, 2008 at 05:01:53PM -0800, Harvey Harrison wrote:
> The pt_regs arg is never used, make it agree with the other
> definitions of smp_thermal_interrupt.
They are all completely independent.
>
> It doesn't look like smp_thermal_interrupt is even called on
> 32-bit...
It is called from a macro in include/asm-x86/mach-default/entry_arch.h
-Andi