2010-02-12 22:20:53

by Don Zickus

[permalink] [raw]
Subject: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling

Determines if an arch has setup arch specific perf_events and nmi_watchdog
code. This should restrict compiles to only those arches ready.

Signed-off-by: Don Zickus <[email protected]>
---
arch/x86/Kconfig | 1 +
init/Kconfig | 5 +++++
lib/Kconfig.debug | 3 +--
3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbcbfde..4f9685f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -52,6 +52,7 @@ config X86
select HAVE_KERNEL_LZO
select HAVE_HW_BREAKPOINT
select PERF_EVENTS
+ select PERF_EVENTS_NMI
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
diff --git a/init/Kconfig b/init/Kconfig
index ada4844..7331a16 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -946,6 +946,11 @@ config PERF_USE_VMALLOC
help
See tools/perf/design.txt for details

+config PERF_EVENTS_NMI
+ bool
+ help
+ Arch has support for nmi_watchdog
+
menu "Kernel Performance Events And Counters"

config PERF_EVENTS
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index acef882..01a4d85 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -172,8 +172,7 @@ config DETECT_SOFTLOCKUP

config NMI_WATCHDOG
bool "Detect Hard Lockups with an NMI Watchdog"
- depends on DEBUG_KERNEL && PERF_EVENTS
- depends on X86
+ depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI
default y
help
Say Y here to enable the kernel to use the NMI as a watchdog
--
1.6.6.83.gc9a2


2010-02-12 22:20:20

by Don Zickus

[permalink] [raw]
Subject: [PATCH 2/4] nmi_watchdog: compile and portability fixes

The original patch was x86_64 centric. Changed the code to make it less so.

ested by building and running on a powerpc.

Signed-off-by: Don Zickus <[email protected]>
---
arch/x86/include/asm/nmi.h | 2 +
arch/x86/kernel/apic/hw_nmi.c | 21 ++++++++++++----
include/linux/nmi.h | 9 +++++++
kernel/nmi_watchdog.c | 52 ++++++++++++++++++++++++++++++++--------
kernel/sysctl.c | 15 +++++++++++-
5 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 93da9c3..5b41b0f 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);

extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int check_nmi_watchdog(void);
+#if !defined(CONFIG_NMI_WATCHDOG)
extern int nmi_watchdog_enabled;
+#endif
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 8c0e6a4..312d772 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -32,8 +32,13 @@ static DEFINE_PER_CPU(unsigned, last_irq_sum);
*/
static inline unsigned int get_timer_irqs(int cpu)
{
- return per_cpu(irq_stat, cpu).apic_timer_irqs +
- per_cpu(irq_stat, cpu).irq0_irqs;
+ unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs;
+
+#if defined(CONFIG_X86_LOCAL_APIC)
+ irqs += per_cpu(irq_stat, cpu).apic_timer_irqs;
+#endif
+
+ return irqs;
}

static inline int mce_in_progress(void)
@@ -82,6 +87,11 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
}
}

+u64 hw_nmi_get_sample_period(void)
+{
+ return cpu_khz * 1000;
+}
+
void arch_trigger_all_cpu_backtrace(void)
{
int i;
@@ -100,15 +110,16 @@ void arch_trigger_all_cpu_backtrace(void)
}

/* STUB calls to mimic old nmi_watchdog behaviour */
+#if defined(CONFIG_X86_LOCAL_APIC)
unsigned int nmi_watchdog = NMI_NONE;
EXPORT_SYMBOL(nmi_watchdog);
+void acpi_nmi_enable(void) { return; }
+void acpi_nmi_disable(void) { return; }
+#endif
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
EXPORT_SYMBOL(nmi_active);
-int nmi_watchdog_enabled;
int unknown_nmi_panic;
void cpu_nmi_set_wd_enabled(void) { return; }
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
void stop_apic_nmi_watchdog(void *unused) { return; }
void setup_apic_nmi_watchdog(void *unused) { return; }
int __init check_nmi_watchdog(void) { return 0; }
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a42ff0b..794e735 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
extern void acpi_nmi_disable(void);
extern void acpi_nmi_enable(void);
#else
+#ifndef CONFIG_NMI_WATCHDOG
static inline void touch_nmi_watchdog(void)
{
touch_softlockup_watchdog();
}
+#else
+extern void touch_nmi_watchdog(void);
+#endif
static inline void acpi_nmi_disable(void) { }
static inline void acpi_nmi_enable(void) { }
#endif
@@ -49,6 +53,11 @@ static inline bool trigger_all_cpu_backtrace(void)

#ifdef CONFIG_NMI_WATCHDOG
int hw_nmi_is_cpu_stuck(struct pt_regs *);
+u64 hw_nmi_get_sample_period(void);
+extern int nmi_watchdog_enabled;
+struct ctl_table;
+extern int proc_nmi_enabled(struct ctl_table *, int ,
+ void __user *, size_t *, loff_t *);
#endif

#endif
diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 36817b2..73c1954 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
static DEFINE_PER_CPU(int, nmi_watchdog_touch);
static DEFINE_PER_CPU(long, alert_counter);

+static int panic_on_timeout;
+
void touch_nmi_watchdog(void)
{
__raw_get_cpu_var(nmi_watchdog_touch) = 1;
@@ -46,19 +48,49 @@ void touch_all_nmi_watchdog(void)
touch_softlockup_watchdog();
}

+static int __init setup_nmi_watchdog(char *str)
+{
+ if (!strncmp(str, "panic", 5)) {
+ panic_on_timeout = 1;
+ str = strchr(str, ',');
+ if (!str)
+ return 1;
+ ++str;
+ }
+ return 1;
+}
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
#ifdef CONFIG_SYSCTL
/*
* proc handler for /proc/sys/kernel/nmi_watchdog
*/
+int nmi_watchdog_enabled;
+
int proc_nmi_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int cpu;

- if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL)
+ if (!write) {
+ struct perf_event *event;
+ for_each_online_cpu(cpu) {
+ event = per_cpu(nmi_watchdog_ev, cpu);
+ if (event->state > PERF_EVENT_STATE_OFF) {
+ nmi_watchdog_enabled = 1;
+ break;
+ }
+ }
+ proc_dointvec(table, write, buffer, length, ppos);
+ return 0;
+ }
+
+ if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) {
nmi_watchdog_enabled = 0;
- else
- nmi_watchdog_enabled = 1;
+ proc_dointvec(table, write, buffer, length, ppos);
+ printk("NMI watchdog failed configuration, can not be enabled\n");
+ return 0;
+ }

touch_all_nmi_watchdog();
proc_dointvec(table, write, buffer, length, ppos);
@@ -81,8 +113,6 @@ struct perf_event_attr wd_attr = {
.disabled = 1,
};

-static int panic_on_timeout;
-
void wd_overflow(struct perf_event *event, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
@@ -103,11 +133,11 @@ void wd_overflow(struct perf_event *event, int nmi,
*/
per_cpu(alert_counter,cpu) += 1;
if (per_cpu(alert_counter,cpu) == 5) {
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi("BUG: NMI Watchdog detected LOCKUP",
- regs, panic_on_timeout);
+ if (panic_on_timeout) {
+ panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
+ } else {
+ WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
+ }
}
} else {
per_cpu(alert_counter,cpu) = 0;
@@ -133,7 +163,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
/* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
- wd_attr.sample_period = cpu_khz * 1000;
+ wd_attr.sample_period = hw_nmi_get_sample_period();
event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
if (IS_ERR(event)) {
printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b24..ac72c9e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -60,6 +60,10 @@
#include <asm/io.h>
#endif

+#ifdef CONFIG_NMI_WATCHDOG
+#include <linux/nmi.h>
+#endif
+

#if defined(CONFIG_SYSCTL)

@@ -692,7 +696,16 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_NMI_WATCHDOG)
+ {
+ .procname = "nmi_watchdog",
+ .data = &nmi_watchdog_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_nmi_enabled,
+ },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG)
{
.procname = "unknown_nmi_panic",
.data = &unknown_nmi_panic,
--
1.6.6.83.gc9a2

2010-02-12 22:20:32

by Don Zickus

[permalink] [raw]
Subject: [PATCH 3/4] nmi_watchdog: fallback to software events when no hardware pmu detected

Not all arches have a PMU or have perf_event support for their PMU. The
nmi_watchdog will fail in those cases. Fallback to using software events to
generate nmi_watchdog traffic with local apic interrupts.

Tested on a Pentium4 and it worked as expected, excepting for detecting cpu
lockups.

The problem with using software events as a cpu lock up detector is the
nmi_watchdog uses the logic that if local apic interrupts stop incrementing
then the cpu is probably locked up. But with software events we use the
local apic to trigger the nmi_watchdog callback to see if local apic
interrupts are still firing, which obviously they are otherwise we wouldn't
have been triggered.

The algorithm to detect cpu lock ups is the same as the old nmi_watchdog.
Perhaps we need to find a better way to detect lock ups?

Signed-off-by: Don Zickus <[email protected]>
---
kernel/nmi_watchdog.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 73c1954..4f23505 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -166,8 +166,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
wd_attr.sample_period = hw_nmi_get_sample_period();
event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
if (IS_ERR(event)) {
- printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
- return NOTIFY_BAD;
+ wd_attr.type = PERF_TYPE_SOFTWARE;
+ event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
+ if (IS_ERR(event)) {
+ printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
+ return NOTIFY_BAD;
+ }
}
per_cpu(nmi_watchdog_ev, hotcpu) = event;
perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu));
--
1.6.6.83.gc9a2

2010-02-12 22:20:41

by Don Zickus

[permalink] [raw]
Subject: [PATCH 4/4] [RFC][powerpc] nmi_watchdog: support for powerpc

In order to make sure other arches compiled properly, I used powerpc as a
test bed. It was just basic sanity checking code to see if the
infrastructure bits worked ok. The lock up detection logic is non-existant
and the sample_period is some large made up number.

The interesting piece of the patch is the change to ppc970-pmc.c. I had to
move the initcall from arch_init to early_init to allow the nmi_watchdog to
register with the perf_event subsystem as cpus were coming online.
Otherwise it failed with ENXIO.

This patch is just a conversation starter. I am not sure if powerpc has a
true NMI and if this is really needed. But since I spent some time making
it work, I thought I would throw it out there.

Signed-off-by: Don Zickus <[email protected]>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/kernel/Makefile | 1 +
arch/powerpc/kernel/hw_nmi.c | 45 ++++++++++++++++++++++++++++++++++++++
arch/powerpc/kernel/ppc970-pmu.c | 2 +-
4 files changed, 48 insertions(+), 1 deletions(-)
create mode 100644 arch/powerpc/kernel/hw_nmi.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ba3948c..146b0b5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,6 +140,7 @@ config PPC
select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32
select HAVE_PERF_EVENTS
+ select PERF_EVENTS_NMI

config EARLY_PRINTK
bool
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index c002b04..08e3d2d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o

pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
diff --git a/arch/powerpc/kernel/hw_nmi.c b/arch/powerpc/kernel/hw_nmi.c
new file mode 100644
index 0000000..2313724
--- /dev/null
+++ b/arch/powerpc/kernel/hw_nmi.c
@@ -0,0 +1,45 @@
+/*
+ * HW NMI watchdog support
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Arch specific calls to support NMI watchdog
+ *
+ * Bits copied from original nmi.c file
+ *
+ */
+
+#include <linux/smp.h>
+//#include <linux/cpumask.h>
+//#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
+
+//#include <linux/nmi.h>
+
+static DEFINE_PER_CPU(unsigned, last_irq_sum);
+
+int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
+{
+ unsigned int sum;
+ int cpu = smp_processor_id();
+
+ /* We determine if the cpu is stuck by checking whether any
+ * interrupts have happened since we last checked. Of course
+ * an nmi storm could create false positives, but the higher
+ * level logic should account for that
+ */
+ return 0;
+ sum = kstat_irqs_cpu(0, cpu);
+ if (__get_cpu_var(last_irq_sum) == sum) {
+ return 1;
+ } else {
+ __get_cpu_var(last_irq_sum) = sum;
+ return 0;
+ }
+}
+
+u64 hw_nmi_get_sample_period(void)
+{
+ return 10000 * 1000;
+}
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 8eff48e..d91aaf1 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -492,4 +492,4 @@ static int init_ppc970_pmu(void)
return register_power_pmu(&ppc970_pmu);
}

-arch_initcall(init_ppc970_pmu);
+early_initcall(init_ppc970_pmu);
--
1.6.6.83.gc9a2

2010-02-14 09:13:41

by Don Zickus

[permalink] [raw]
Subject: [tip:perf/nmi] nmi_watchdog: Use a boolean config flag for compiling

Commit-ID: c3128fb6ad39b0edda6675d20585a64846cf89ea
Gitweb: http://git.kernel.org/tip/c3128fb6ad39b0edda6675d20585a64846cf89ea
Author: Don Zickus <[email protected]>
AuthorDate: Fri, 12 Feb 2010 17:19:18 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 14 Feb 2010 09:19:43 +0100

nmi_watchdog: Use a boolean config flag for compiling

Determines if an arch has setup arch specific perf_events and
nmi_watchdog code. This should restrict compiles to only those
arches ready.

Signed-off-by: Don Zickus <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Kconfig | 1 +
init/Kconfig | 5 +++++
lib/Kconfig.debug | 3 +--
3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbcbfde..4f9685f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -52,6 +52,7 @@ config X86
select HAVE_KERNEL_LZO
select HAVE_HW_BREAKPOINT
select PERF_EVENTS
+ select PERF_EVENTS_NMI
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
diff --git a/init/Kconfig b/init/Kconfig
index ada4844..7331a16 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -946,6 +946,11 @@ config PERF_USE_VMALLOC
help
See tools/perf/design.txt for details

+config PERF_EVENTS_NMI
+ bool
+ help
+ Arch has support for nmi_watchdog
+
menu "Kernel Performance Events And Counters"

config PERF_EVENTS
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index acef882..01a4d85 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -172,8 +172,7 @@ config DETECT_SOFTLOCKUP

config NMI_WATCHDOG
bool "Detect Hard Lockups with an NMI Watchdog"
- depends on DEBUG_KERNEL && PERF_EVENTS
- depends on X86
+ depends on DEBUG_KERNEL && PERF_EVENTS && PERF_EVENTS_NMI
default y
help
Say Y here to enable the kernel to use the NMI as a watchdog

2010-02-14 09:13:56

by Don Zickus

[permalink] [raw]
Subject: [tip:perf/nmi] nmi_watchdog: Compile and portability fixes

Commit-ID: 504d7cf10ee42bb76b9556859f23d4121dee0a77
Gitweb: http://git.kernel.org/tip/504d7cf10ee42bb76b9556859f23d4121dee0a77
Author: Don Zickus <[email protected]>
AuthorDate: Fri, 12 Feb 2010 17:19:19 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 14 Feb 2010 09:19:43 +0100

nmi_watchdog: Compile and portability fixes

The original patch was x86_64 centric. Changed the code to make
it less so.

ested by building and running on a powerpc.

Signed-off-by: Don Zickus <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/nmi.h | 2 +
arch/x86/kernel/apic/hw_nmi.c | 21 ++++++++++++----
include/linux/nmi.h | 9 +++++++
kernel/nmi_watchdog.c | 52 ++++++++++++++++++++++++++++++++--------
kernel/sysctl.c | 15 +++++++++++-
5 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 93da9c3..5b41b0f 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);

extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int check_nmi_watchdog(void);
+#if !defined(CONFIG_NMI_WATCHDOG)
extern int nmi_watchdog_enabled;
+#endif
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 8c0e6a4..312d772 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -32,8 +32,13 @@ static DEFINE_PER_CPU(unsigned, last_irq_sum);
*/
static inline unsigned int get_timer_irqs(int cpu)
{
- return per_cpu(irq_stat, cpu).apic_timer_irqs +
- per_cpu(irq_stat, cpu).irq0_irqs;
+ unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs;
+
+#if defined(CONFIG_X86_LOCAL_APIC)
+ irqs += per_cpu(irq_stat, cpu).apic_timer_irqs;
+#endif
+
+ return irqs;
}

static inline int mce_in_progress(void)
@@ -82,6 +87,11 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
}
}

+u64 hw_nmi_get_sample_period(void)
+{
+ return cpu_khz * 1000;
+}
+
void arch_trigger_all_cpu_backtrace(void)
{
int i;
@@ -100,15 +110,16 @@ void arch_trigger_all_cpu_backtrace(void)
}

/* STUB calls to mimic old nmi_watchdog behaviour */
+#if defined(CONFIG_X86_LOCAL_APIC)
unsigned int nmi_watchdog = NMI_NONE;
EXPORT_SYMBOL(nmi_watchdog);
+void acpi_nmi_enable(void) { return; }
+void acpi_nmi_disable(void) { return; }
+#endif
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
EXPORT_SYMBOL(nmi_active);
-int nmi_watchdog_enabled;
int unknown_nmi_panic;
void cpu_nmi_set_wd_enabled(void) { return; }
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
void stop_apic_nmi_watchdog(void *unused) { return; }
void setup_apic_nmi_watchdog(void *unused) { return; }
int __init check_nmi_watchdog(void) { return 0; }
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a42ff0b..794e735 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
extern void acpi_nmi_disable(void);
extern void acpi_nmi_enable(void);
#else
+#ifndef CONFIG_NMI_WATCHDOG
static inline void touch_nmi_watchdog(void)
{
touch_softlockup_watchdog();
}
+#else
+extern void touch_nmi_watchdog(void);
+#endif
static inline void acpi_nmi_disable(void) { }
static inline void acpi_nmi_enable(void) { }
#endif
@@ -49,6 +53,11 @@ static inline bool trigger_all_cpu_backtrace(void)

#ifdef CONFIG_NMI_WATCHDOG
int hw_nmi_is_cpu_stuck(struct pt_regs *);
+u64 hw_nmi_get_sample_period(void);
+extern int nmi_watchdog_enabled;
+struct ctl_table;
+extern int proc_nmi_enabled(struct ctl_table *, int ,
+ void __user *, size_t *, loff_t *);
#endif

#endif
diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 36817b2..73c1954 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
static DEFINE_PER_CPU(int, nmi_watchdog_touch);
static DEFINE_PER_CPU(long, alert_counter);

+static int panic_on_timeout;
+
void touch_nmi_watchdog(void)
{
__raw_get_cpu_var(nmi_watchdog_touch) = 1;
@@ -46,19 +48,49 @@ void touch_all_nmi_watchdog(void)
touch_softlockup_watchdog();
}

+static int __init setup_nmi_watchdog(char *str)
+{
+ if (!strncmp(str, "panic", 5)) {
+ panic_on_timeout = 1;
+ str = strchr(str, ',');
+ if (!str)
+ return 1;
+ ++str;
+ }
+ return 1;
+}
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
#ifdef CONFIG_SYSCTL
/*
* proc handler for /proc/sys/kernel/nmi_watchdog
*/
+int nmi_watchdog_enabled;
+
int proc_nmi_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int cpu;

- if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL)
+ if (!write) {
+ struct perf_event *event;
+ for_each_online_cpu(cpu) {
+ event = per_cpu(nmi_watchdog_ev, cpu);
+ if (event->state > PERF_EVENT_STATE_OFF) {
+ nmi_watchdog_enabled = 1;
+ break;
+ }
+ }
+ proc_dointvec(table, write, buffer, length, ppos);
+ return 0;
+ }
+
+ if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL) {
nmi_watchdog_enabled = 0;
- else
- nmi_watchdog_enabled = 1;
+ proc_dointvec(table, write, buffer, length, ppos);
+ printk("NMI watchdog failed configuration, can not be enabled\n");
+ return 0;
+ }

touch_all_nmi_watchdog();
proc_dointvec(table, write, buffer, length, ppos);
@@ -81,8 +113,6 @@ struct perf_event_attr wd_attr = {
.disabled = 1,
};

-static int panic_on_timeout;
-
void wd_overflow(struct perf_event *event, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
@@ -103,11 +133,11 @@ void wd_overflow(struct perf_event *event, int nmi,
*/
per_cpu(alert_counter,cpu) += 1;
if (per_cpu(alert_counter,cpu) == 5) {
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi("BUG: NMI Watchdog detected LOCKUP",
- regs, panic_on_timeout);
+ if (panic_on_timeout) {
+ panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
+ } else {
+ WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
+ }
}
} else {
per_cpu(alert_counter,cpu) = 0;
@@ -133,7 +163,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
/* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
- wd_attr.sample_period = cpu_khz * 1000;
+ wd_attr.sample_period = hw_nmi_get_sample_period();
event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
if (IS_ERR(event)) {
printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b24..ac72c9e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -60,6 +60,10 @@
#include <asm/io.h>
#endif

+#ifdef CONFIG_NMI_WATCHDOG
+#include <linux/nmi.h>
+#endif
+

#if defined(CONFIG_SYSCTL)

@@ -692,7 +696,16 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_NMI_WATCHDOG)
+ {
+ .procname = "nmi_watchdog",
+ .data = &nmi_watchdog_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_nmi_enabled,
+ },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_NMI_WATCHDOG)
{
.procname = "unknown_nmi_panic",
.data = &unknown_nmi_panic,

2010-02-14 09:14:16

by Don Zickus

[permalink] [raw]
Subject: [tip:perf/nmi] nmi_watchdog: Fallback to software events when no hardware pmu detected

Commit-ID: cf454aecb31741a0438ed1201b3dd153c7c7b19a
Gitweb: http://git.kernel.org/tip/cf454aecb31741a0438ed1201b3dd153c7c7b19a
Author: Don Zickus <[email protected]>
AuthorDate: Fri, 12 Feb 2010 17:19:20 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 14 Feb 2010 09:19:44 +0100

nmi_watchdog: Fallback to software events when no hardware pmu detected

Not all arches have a PMU or have perf_event support for their
PMU. The nmi_watchdog will fail in those cases. Fallback to
using software events to generate nmi_watchdog traffic with
local apic interrupts.

Tested on a Pentium4 and it worked as expected, excepting for
detecting cpu lockups.

The problem with using software events as a cpu lock up detector
is the nmi_watchdog uses the logic that if local apic interrupts
stop incrementing then the cpu is probably locked up. But with
software events we use the local apic to trigger the
nmi_watchdog callback to see if local apic interrupts are still
firing, which obviously they are otherwise we wouldn't have been
triggered.

The algorithm to detect cpu lock ups is the same as the old
nmi_watchdog. Perhaps we need to find a better way to detect
lock ups?

Signed-off-by: Don Zickus <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/nmi_watchdog.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
index 73c1954..4f23505 100644
--- a/kernel/nmi_watchdog.c
+++ b/kernel/nmi_watchdog.c
@@ -166,8 +166,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
wd_attr.sample_period = hw_nmi_get_sample_period();
event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
if (IS_ERR(event)) {
- printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
- return NOTIFY_BAD;
+ wd_attr.type = PERF_TYPE_SOFTWARE;
+ event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
+ if (IS_ERR(event)) {
+ printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
+ return NOTIFY_BAD;
+ }
}
per_cpu(nmi_watchdog_ev, hotcpu) = event;
perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu));

2010-02-14 17:00:11

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling


i'm still getting build failures:

arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace':
(.text+0x151f5): undefined reference to `apic'

config attached.

Ingo


Attachments:
(No filename) (175.00 B)
config (66.86 kB)
Download all attachments

2010-02-14 18:13:44

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling


i'm also getting this:

[ 11.101892] NET: Registered protocol family 5
[ 19.293996] INFO: RCU detected CPU stalls: 1 (detected by 0, t=10002 jiffies)
[ 19.293996] sending NMI to all CPUs:
[ 19.294339] NMI backtrace for cpu 0
[ 19.294339] CPU 0
[ 19.294339] Pid: 0, comm: swapper Not tainted 2.6.33-rc8-tip+ #16697 A8N-E/System Product Name
[ 19.294339] RIP: 0010:[<ffffffff81009f3f>] [<ffffffff81009f3f>] native_read_tsc+0x6/0x16
[ 19.294339] RSP: 0018:ffff880003a03df8 EFLAGS: 00000046
[ 19.294339] RAX: 0000000082d602a7 RBX: ffffffff82c1aeae RCX: 0000000082d60282
[ 19.294339] RDX: 0000000000000017 RSI: 0000000000000002 RDI: 00000000001eadb1
[ 19.294339] RBP: ffff880003a03df8 R08: 0000000000000000 R09: 0000000000000003
[ 19.294339] R10: ffffffff824a65a0 R11: 0000000000000000 R12: 00000000001eadb1
[ 19.294339] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000093c70
[ 19.294339] FS: 0000000000000000(0000) GS:ffff880003a00000(0000) knlGS:0000000000000000
[ 19.294339] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 19.294339] CR2: 0000000000000000 CR3: 0000000002494000 CR4: 00000000000006f0
[ 19.294339] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 19.294339] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 19.294339] Process swapper (pid: 0, threadinfo ffffffff82400000, task ffffffff8249c020)
[ 19.294339] Stack:
[ 19.294339] ffff880003a03e28 ffffffff813e9039 00000000000004a6 ffffffff824f2000
[ 19.294339] <0> 0000000000000000 ffffffff824f2000 ffff880003a03e38 ffffffff813e8fa5
[ 19.294339] <0> ffff880003a03e48 ffffffff813e8fe4 ffff880003a03e68 ffffffff8101a1a7
[ 19.294339] Call Trace:
[ 19.294339] <IRQ>
[ 19.294339] [<ffffffff813e9039>] delay_tsc+0x32/0x79
[ 19.294339] [<ffffffff813e8fa5>] __delay+0xa/0xc
[ 19.294339] [<ffffffff813e8fe4>] __const_udelay+0x3d/0x3f
[ 19.294339] [<ffffffff8101a1a7>] arch_trigger_all_cpu_backtrace+0x73/0x7f
[ 19.294339] [<ffffffff81095468>] print_other_cpu_stall+0xf7/0x10a
[ 19.294339] [<ffffffff81095540>] check_cpu_stall+0xc5/0xc9
[ 19.294339] [<ffffffff81095567>] __rcu_pending+0x23/0xd9
[ 19.294339] [<ffffffff8109564d>] rcu_check_callbacks+0x30/0x107
[ 19.294339] [<ffffffff81050219>] update_process_times+0x3c/0x5b
[ 19.294339] [<ffffffff81068492>] tick_periodic+0x63/0x6f
[ 19.294339] [<ffffffff810684bf>] tick_handle_periodic+0x21/0x6e
[ 19.294339] [<ffffffff81019b1a>] smp_apic_timer_interrupt+0x7c/0x8f
[ 19.294339] [<ffffffff81003553>] apic_timer_interrupt+0x13/0x20
[ 19.294339] <EOI>
[ 19.294339] [<ffffffff81020564>] ? native_safe_halt+0x6/0x8
[ 19.294339] [<ffffffff8106da5a>] ? trace_hardirqs_on+0xd/0xf
[ 19.294339] [<ffffffff8100a988>] default_idle+0x36/0x58
[ 19.294339] [<ffffffff81001bd6>] cpu_idle+0xdc/0x11d
[ 19.294339] [<ffffffff81b22580>] rest_init+0x74/0x76
[ 19.294339] [<ffffffff82a54d56>] start_kernel+0x372/0x37a
[ 19.294339] [<ffffffff82a542a0>] x86_64_start_reservations+0xa7/0xab
[ 19.294339] [<ffffffff82a5439c>] x86_64_start_kernel+0xf8/0x107
[ 19.294339] Code: 00 e8 8d f0 3d 00 c9 c3 55 40 88 f8 48 89 e5 e6 70 e4 71 c9 c3 55 40 88 f0 48 89 e5 e6 70 40 88 f8 e6 71 c9 c3 55 48 89 e5 0f 31 <89> c1 48 89 d0 48 c1 e0 20 89 c9 48 09 c8 c9 c3 55 48 89 e5 41
[ 19.294339] Call Trace:
[ 19.294339] <IRQ> [<ffffffff813e9039>] delay_tsc+0x32/0x79
[ 19.294339] [<ffffffff813e8fa5>] __delay+0xa/0xc
[ 19.294339] [<ffffffff813e8fe4>] __const_udelay+0x3d/0x3f
[ 19.294339] [<ffffffff8101a1a7>] arch_trigger_all_cpu_backtrace+0x73/0x7f
[ 19.294339] [<ffffffff81095468>] print_other_cpu_stall+0xf7/0x10a
[ 19.294339] [<ffffffff81095540>] check_cpu_stall+0xc5/0xc9
[ 19.294339] [<ffffffff81095567>] __rcu_pending+0x23/0xd9
[ 19.294339] [<ffffffff8109564d>] rcu_check_callbacks+0x30/0x107
[ 19.294339] [<ffffffff81050219>] update_process_times+0x3c/0x5b
[ 19.294339] [<ffffffff81068492>] tick_periodic+0x63/0x6f
[ 19.294339] [<ffffffff810684bf>] tick_handle_periodic+0x21/0x6e
[ 19.294339] [<ffffffff81019b1a>] smp_apic_timer_interrupt+0x7c/0x8f
[ 19.294339] [<ffffffff81003553>] apic_timer_interrupt+0x13/0x20
[ 19.294339] <EOI> [<ffffffff81020564>] ? native_safe_halt+0x6/0x8
[ 19.294339] [<ffffffff8106da5a>] ? trace_hardirqs_on+0xd/0xf
[ 19.294339] [<ffffffff8100a988>] default_idle+0x36/0x58
[ 19.294339] [<ffffffff81001bd6>] cpu_idle+0xdc/0x11d
[ 19.294339] [<ffffffff81b22580>] rest_init+0x74/0x76
[ 19.294339] [<ffffffff82a54d56>] start_kernel+0x372/0x37a
[ 19.294339] [<ffffffff82a542a0>] x86_64_start_reservations+0xa7/0xab
[ 19.294339] [<ffffffff82a5439c>] x86_64_start_kernel+0xf8/0x107
[ 19.294339] Pid: 0, comm: swapper Not tainted 2.6.33-rc8-tip+ #16697
[ 19.294339] Call Trace:
[ 19.294339] <NMI> [<ffffffff8100a778>] ? show_regs+0x26/0x2b
[ 19.294339] [<ffffffff8101a23c>] hw_nmi_is_cpu_stuck+0x4f/0xb7
[ 19.294339] [<ffffffff810907ab>] wd_overflow+0x43/0xc3
[ 19.294339] [<ffffffff810ad21d>] __perf_event_overflow+0x184/0x1fd
[ 19.294339] [<ffffffff810a9de1>] ? rcu_read_unlock+0x1c/0x1e
[ 19.294339] [<ffffffff810ad6ce>] perf_event_overflow+0x14/0x16
[ 19.294339] [<ffffffff810104ff>] x86_pmu_handle_irq+0x16d/0x1b6
[ 19.294339] [<ffffffff8100f4b8>] perf_event_nmi_handler+0x42/0x4f
[ 19.294339] [<ffffffff81060857>] notifier_call_chain+0x63/0x97
[ 19.294339] [<ffffffff81060ccb>] __atomic_notifier_call_chain+0x59/0x86
[ 19.294339] [<ffffffff81060c72>] ? __atomic_notifier_call_chain+0x0/0x86
[ 19.294339] [<ffffffff81060d07>] atomic_notifier_call_chain+0xf/0x11
[ 19.294339] [<ffffffff81060d37>] notify_die+0x2e/0x30
[ 19.294339] [<ffffffff81004956>] default_do_nmi+0x57/0x1ee
[ 19.294339] [<ffffffff81004b4d>] do_nmi+0x60/0x9b
[ 19.294339] [<ffffffff81b8ba60>] nmi+0x20/0x39
[ 19.294339] [<ffffffff81009f3f>] ? native_read_tsc+0x6/0x16
[ 19.294339] <<EOE>> <IRQ> [<ffffffff813e9039>] delay_tsc+0x32/0x79
[ 19.294339] [<ffffffff813e8fa5>] __delay+0xa/0xc
[ 19.294339] [<ffffffff813e8fe4>] __const_udelay+0x3d/0x3f
[ 19.294339] [<ffffffff8101a1a7>] arch_trigger_all_cpu_backtrace+0x73/0x7f
[ 19.294339] [<ffffffff81095468>] print_other_cpu_stall+0xf7/0x10a
[ 19.294339] [<ffffffff81095540>] check_cpu_stall+0xc5/0xc9
[ 19.294339] [<ffffffff81095567>] __rcu_pending+0x23/0xd9
[ 19.294339] [<ffffffff8109564d>] rcu_check_callbacks+0x30/0x107
[ 19.294339] [<ffffffff81050219>] update_process_times+0x3c/0x5b
[ 19.294339] [<ffffffff81068492>] tick_periodic+0x63/0x6f
[ 19.294339] [<ffffffff810684bf>] tick_handle_periodic+0x21/0x6e
[ 19.294339] [<ffffffff81019b1a>] smp_apic_timer_interrupt+0x7c/0x8f
[ 19.294339] [<ffffffff81003553>] apic_timer_interrupt+0x13/0x20
[ 19.294339] <EOI> [<ffffffff81020564>] ? native_safe_halt+0x6/0x8
[ 19.294339] [<ffffffff8106da5a>] ? trace_hardirqs_on+0xd/0xf
[ 19.294339] [<ffffffff8100a988>] default_idle+0x36/0x58
[ 19.294339] [<ffffffff81001bd6>] cpu_idle+0xdc/0x11d
[ 19.294339] [<ffffffff81b22580>] rest_init+0x74/0x76
[ 19.294339] [<ffffffff82a54d56>] start_kernel+0x372/0x37a
[ 19.294339] [<ffffffff82a542a0>] x86_64_start_reservations+0xa7/0xab
[ 19.294339] [<ffffffff82a5439c>] x86_64_start_kernel+0xf8/0x107
[ 19.294339] ------------[ cut here ]------------
[ 19.294339] WARNING: at kernel/nmi_watchdog.c:139 wd_overflow+0xa3/0xc3()
[ 19.294339] Hardware name: System Product Name
[ 19.294339] NMI Watchdog detected LOCKUP on cpu 0Pid: 0, comm: swapper Not tainted 2.6.33-rc8-tip+ #16697
[ 19.294339] Call Trace:
[ 19.294339] <NMI> [<ffffffff8104331c>] warn_slowpath_common+0x72/0x8a
[ 19.294339] [<ffffffff81043381>] warn_slowpath_fmt+0x3c/0x3e
[ 19.294339] [<ffffffff8109080b>] wd_overflow+0xa3/0xc3
[ 19.294339] [<ffffffff810ad21d>] __perf_event_overflow+0x184/0x1fd
[ 19.294339] [<ffffffff810a9de1>] ? rcu_read_unlock+0x1c/0x1e
[ 19.294339] [<ffffffff810ad6ce>] perf_event_overflow+0x14/0x16
[ 19.294339] [<ffffffff810104ff>] x86_pmu_handle_irq+0x16d/0x1b6
[ 19.294339] [<ffffffff8100f4b8>] perf_event_nmi_handler+0x42/0x4f
[ 19.294339] [<ffffffff81060857>] notifier_call_chain+0x63/0x97
[ 19.294339] [<ffffffff81060ccb>] __atomic_notifier_call_chain+0x59/0x86
[ 19.294339] [<ffffffff81060c72>] ? __atomic_notifier_call_chain+0x0/0x86
[ 19.294339] [<ffffffff81060d07>] atomic_notifier_call_chain+0xf/0x11
[ 19.294339] [<ffffffff81060d37>] notify_die+0x2e/0x30
[ 19.294339] [<ffffffff81004956>] default_do_nmi+0x57/0x1ee
[ 19.294339] [<ffffffff81004b4d>] do_nmi+0x60/0x9b
[ 19.294339] [<ffffffff81b8ba60>] nmi+0x20/0x39
[ 19.294339] [<ffffffff81009f3f>] ? native_read_tsc+0x6/0x16
[ 19.294339] <<EOE>> <IRQ> [<ffffffff813e9039>] delay_tsc+0x32/0x79
[ 19.294339] [<ffffffff813e8fa5>] __delay+0xa/0xc
[ 19.294339] [<ffffffff813e8fe4>] __const_udelay+0x3d/0x3f
[ 19.294339] [<ffffffff8101a1a7>] arch_trigger_all_cpu_backtrace+0x73/0x7f
[ 19.294339] [<ffffffff81095468>] print_other_cpu_stall+0xf7/0x10a
[ 19.294339] [<ffffffff81095540>] check_cpu_stall+0xc5/0xc9
[ 19.294339] [<ffffffff81095567>] __rcu_pending+0x23/0xd9
[ 19.294339] [<ffffffff8109564d>] rcu_check_callbacks+0x30/0x107
[ 19.294339] [<ffffffff81050219>] update_process_times+0x3c/0x5b
[ 19.294339] [<ffffffff81068492>] tick_periodic+0x63/0x6f
[ 19.294339] [<ffffffff810684bf>] tick_handle_periodic+0x21/0x6e
[ 19.294339] [<ffffffff81019b1a>] smp_apic_timer_interrupt+0x7c/0x8f
[ 19.294339] [<ffffffff81003553>] apic_timer_interrupt+0x13/0x20
[ 19.294339] <EOI> [<ffffffff81020564>] ? native_safe_halt+0x6/0x8
[ 19.294339] [<ffffffff8106da5a>] ? trace_hardirqs_on+0xd/0xf
[ 19.294339] [<ffffffff8100a988>] default_idle+0x36/0x58
[ 19.294339] [<ffffffff81001bd6>] cpu_idle+0xdc/0x11d
[ 19.294339] [<ffffffff81b22580>] rest_init+0x74/0x76
[ 19.294339] [<ffffffff82a54d56>] start_kernel+0x372/0x37a
[ 19.294339] [<ffffffff82a542a0>] x86_64_start_reservations+0xa7/0xab
[ 19.294339] [<ffffffff82a5439c>] x86_64_start_kernel+0xf8/0x107
[ 19.294339] ---[ end trace 12a5a6fbc564882f ]---
[ 19.295052] async_waiting @ 2046
[ 19.296176] async_continuing @ 2046 after 0 usec
[ 19.298241] scsi 0:0:0:0: Direct-Access ATA HDS722525VLAT80 V36O PQ: 0 ANSI: 5
[ 19.308341] device: 'target0:0:0': device_add
[ 19.310536] PM: Adding info for No Bus:target0:0:0
[ 19.312112] device: '0:0:0:0': device_add
[ 19.314823] bus: 'scsi': add device 0:0:0:0
[ 19.315821] PM: Adding info for scsi:0:0:0:0
[ 19.318189] bus: 'scsi': driver_probe_device: matched device 0:0:0:0 with driver st
[ 19.325106] bus: 'scsi': really_probe: probing driver st with device 0:0:0:0

Ingo


Attachments:
(No filename) (10.53 kB)
config (67.46 kB)
1 (432.11 kB)
Download all attachments

2010-02-15 00:33:58

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH 3/4] nmi_watchdog: fallback to software events when no hardware pmu detected

On Fri, Feb 12, 2010 at 05:19:20PM -0500, Don Zickus wrote:

> diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
> index 73c1954..4f23505 100644
> --- a/kernel/nmi_watchdog.c
> +++ b/kernel/nmi_watchdog.c
> @@ -166,8 +166,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
> wd_attr.sample_period = hw_nmi_get_sample_period();
> event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
> if (IS_ERR(event)) {
> - printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
> - return NOTIFY_BAD;
> + wd_attr.type = PERF_TYPE_SOFTWARE;
> + event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);

Here you don't explicitly set wd_attr.config or wd_attr.sample_period
for the software event. So PERF_COUNT_HW_CPU_CYCLES (which is 0)
becomes PERF_COUNT_SW_CPU_CLOCK (also 0). Which is either a happy
accident or really really subtle. :) I suggest you either set
wd_attr.config or add a comment pointing out that you are using the
fact that PERF_COUNT_HW_CPU_CYCLES == PERF_COUNT_SW_CPU_CLOCK.

Also you don't explicitly set wd_attr.sample_period, so it will be
whatever hw_nmi_get_sample_period() returned, except now measured in
nanoseconds rather than (presumably) cpu clock cycles. Are you aiming
for 1 interrupt per second? If so you should set .sample_period to
NSEC_PER_SEC.

Paul.

2010-02-15 15:38:53

by Don Zickus

[permalink] [raw]
Subject: Re: [PATCH 3/4] nmi_watchdog: fallback to software events when no hardware pmu detected

On Mon, Feb 15, 2010 at 11:33:41AM +1100, Paul Mackerras wrote:
> On Fri, Feb 12, 2010 at 05:19:20PM -0500, Don Zickus wrote:
>
> > diff --git a/kernel/nmi_watchdog.c b/kernel/nmi_watchdog.c
> > index 73c1954..4f23505 100644
> > --- a/kernel/nmi_watchdog.c
> > +++ b/kernel/nmi_watchdog.c
> > @@ -166,8 +166,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
> > wd_attr.sample_period = hw_nmi_get_sample_period();
> > event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
> > if (IS_ERR(event)) {
> > - printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
> > - return NOTIFY_BAD;
> > + wd_attr.type = PERF_TYPE_SOFTWARE;
> > + event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
>
> Here you don't explicitly set wd_attr.config or wd_attr.sample_period
> for the software event. So PERF_COUNT_HW_CPU_CYCLES (which is 0)
> becomes PERF_COUNT_SW_CPU_CLOCK (also 0). Which is either a happy
> accident or really really subtle. :) I suggest you either set
> wd_attr.config or add a comment pointing out that you are using the
> fact that PERF_COUNT_HW_CPU_CYCLES == PERF_COUNT_SW_CPU_CLOCK.
>
> Also you don't explicitly set wd_attr.sample_period, so it will be
> whatever hw_nmi_get_sample_period() returned, except now measured in
> nanoseconds rather than (presumably) cpu clock cycles. Are you aiming
> for 1 interrupt per second? If so you should set .sample_period to
> NSEC_PER_SEC.

Hmm, good points, I'll work on that and add it to my next round of
changes.

Thanks,
Don

2010-02-15 17:52:45

by Don Zickus

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling

On Sun, Feb 14, 2010 at 05:59:41PM +0100, Ingo Molnar wrote:
>
> i'm still getting build failures:
>
> arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace':
> (.text+0x151f5): undefined reference to `apic'
>
> config attached.

Hmm, I can't get the config to match up properly when I run 'make
oldconfig', even though I am using the HEAD of perf/nmi. I took the
defaults anyway and used 'ARCH=i386 make all' but couldn't reproduce your
build failure.

Looking through the code, is there ever case where 'apic' is undefined?
The arch_trigger_all_cpu_backtrace should match identically to the old nmi
code, so I am stuck on how to fix this.

Thoughts?

Cheers,
Don

2010-02-15 18:20:52

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling

On Mon, Feb 15, 2010 at 12:51:06PM -0500, Don Zickus wrote:
> On Sun, Feb 14, 2010 at 05:59:41PM +0100, Ingo Molnar wrote:
> >
> > i'm still getting build failures:
> >
> > arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace':
> > (.text+0x151f5): undefined reference to `apic'
> >
> > config attached.
>
> Hmm, I can't get the config to match up properly when I run 'make
> oldconfig', even though I am using the HEAD of perf/nmi. I took the
> defaults anyway and used 'ARCH=i386 make all' but couldn't reproduce your
> build failure.
>
> Looking through the code, is there ever case where 'apic' is undefined?
> The arch_trigger_all_cpu_backtrace should match identically to the old nmi
> code, so I am stuck on how to fix this.
>
> Thoughts?
>

It looks familiar to one problem with HW breakpoints "Kconfig select"
issue Frederic fixed once not that long ago, if I recall correctly.

So due to select we choose to compile hw_nmi.c, but CONFIG_LOCAL_APIC
(or APIC on UP) is turned off. So apic.c is not compiled. And at stage
of linking we get unresolved symbol.

I don't remember the details how Frederic fixed HW breakpoints "select"
issue, CC'ed :) But you may google to find it out.

> Cheers,
> Don
>

n.b.: i remember about p4 pmu, still out of time :(

-- Cyrill

2010-02-15 18:21:22

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling

On Mon, Feb 15, 2010 at 09:13:01PM +0300, Cyrill Gorcunov wrote:
...
> > Looking through the code, is there ever case where 'apic' is undefined?
> > The arch_trigger_all_cpu_backtrace should match identically to the old nmi
> > code, so I am stuck on how to fix this.
> >
> > Thoughts?
> >
>
> It looks familiar to one problem with HW breakpoints "Kconfig select"
> issue Frederic fixed once not that long ago, if I recall correctly.
>
> So due to select we choose to compile hw_nmi.c, but CONFIG_LOCAL_APIC
> (or APIC on UP) is turned off. So apic.c is not compiled. And at stage
> of linking we get unresolved symbol.
>
> I don't remember the details how Frederic fixed HW breakpoints "select"
> issue, CC'ed :) But you may google to find it out.
>

Here is what I mean http://patchwork.kernel.org/patch/67973/

-- Cyrill

2010-02-15 18:45:22

by Don Zickus

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling

On Mon, Feb 15, 2010 at 09:21:17PM +0300, Cyrill Gorcunov wrote:
> On Mon, Feb 15, 2010 at 09:13:01PM +0300, Cyrill Gorcunov wrote:
> ...
> > > Looking through the code, is there ever case where 'apic' is undefined?
> > > The arch_trigger_all_cpu_backtrace should match identically to the old nmi
> > > code, so I am stuck on how to fix this.
> > >
> > > Thoughts?
> > >
> >
> > It looks familiar to one problem with HW breakpoints "Kconfig select"
> > issue Frederic fixed once not that long ago, if I recall correctly.
> >
> > So due to select we choose to compile hw_nmi.c, but CONFIG_LOCAL_APIC
> > (or APIC on UP) is turned off. So apic.c is not compiled. And at stage
> > of linking we get unresolved symbol.


Hmm thanks for the feedback. The code can handle CONFIG_LOCAL_APIC being
turned off. I'll look more at the UP stuff.

> >
> > I don't remember the details how Frederic fixed HW breakpoints "select"
> > issue, CC'ed :) But you may google to find it out.
> >
>
> Here is what I mean http://patchwork.kernel.org/patch/67973/

I don't believe I have the same issue. Thanks for the pointer.

Cheers,
Don

2010-02-15 23:03:17

by Don Zickus

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling

On Sun, Feb 14, 2010 at 07:12:19PM +0100, Ingo Molnar wrote:
>
> i'm also getting this:
>
> [ 11.101892] NET: Registered protocol family 5
> [ 19.293996] INFO: RCU detected CPU stalls: 1 (detected by 0, t=10002 jiffies)

I guess part of the problem I am having trouble reproducing this is
because I am not sure where you are getting the config from that you send
me. Above clearly shows RCU has stall detetion on but the config you
provided me has:

# CONFIG_RCU_CPU_STALL_DETECTOR is not set

I guess I assumed I could just plug the config files you gave me into the
source tree. Perhaps I misunderstood your intentions?

Cheers,
Don

2010-02-16 14:31:14

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling


* Don Zickus <[email protected]> wrote:

> On Sun, Feb 14, 2010 at 05:59:41PM +0100, Ingo Molnar wrote:
> >
> > i'm still getting build failures:
> >
> > arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace':
> > (.text+0x151f5): undefined reference to `apic'
> >
> > config attached.
>
> Hmm, I can't get the config to match up properly when I run 'make
> oldconfig', even though I am using the HEAD of perf/nmi. I took the
> defaults anyway and used 'ARCH=i386 make all' but couldn't reproduce your
> build failure.

Hm, neither can i reproduce it with the config i sent. Maybe i sent the wrong
config, or it somehow got solved meanwhile. I'll re-add perf/nmi to
tip:master and we'll see it shortly whether there are any build failures
left.

Thanks,

Ingo

2010-02-18 19:28:11

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/4] nmi_watchdog: use a boolean config flag for compiling


* Ingo Molnar <[email protected]> wrote:

>
> * Don Zickus <[email protected]> wrote:
>
> > On Sun, Feb 14, 2010 at 05:59:41PM +0100, Ingo Molnar wrote:
> > >
> > > i'm still getting build failures:
> > >
> > > arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace':
> > > (.text+0x151f5): undefined reference to `apic'
> > >
> > > config attached.
> >
> > Hmm, I can't get the config to match up properly when I run 'make
> > oldconfig', even though I am using the HEAD of perf/nmi. I took the
> > defaults anyway and used 'ARCH=i386 make all' but couldn't reproduce your
> > build failure.
>
> Hm, neither can i reproduce it with the config i sent. Maybe i sent the
> wrong config, or it somehow got solved meanwhile. I'll re-add perf/nmi to
> tip:master and we'll see it shortly whether there are any build failures
> left.

i reproduced it again on tip-bb22c6d:

CC init/version.o
LD init/built-in.o
LD .tmp_vmlinux1
arch/x86/built-in.o: In function `arch_trigger_all_cpu_backtrace':
(.text+0x18778): undefined reference to `apic'
make: *** [.tmp_vmlinux1] Error 1

config attached.

Thanks,

Ingo


Attachments:
(No filename) (1.12 kB)
config (63.13 kB)
Download all attachments