2002-10-24 13:21:50

by Corey Minyard

[permalink] [raw]
Subject: [PATCH] NMI request/release, version 5 - I think this one's ready

diff -ur linux.orig/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c
--- linux.orig/arch/i386/kernel/i386_ksyms.c Mon Oct 21 13:25:58 2002
+++ linux/arch/i386/kernel/i386_ksyms.c Tue Oct 22 12:19:59 2002
@@ -90,6 +90,9 @@
EXPORT_SYMBOL(cpu_khz);
EXPORT_SYMBOL(apm_info);

+EXPORT_SYMBOL(request_nmi);
+EXPORT_SYMBOL(release_nmi);
+
#ifdef CONFIG_DEBUG_IOVIRT
EXPORT_SYMBOL(__io_virt_debug);
#endif
diff -ur linux.orig/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c
--- linux.orig/arch/i386/kernel/irq.c Mon Oct 21 13:25:58 2002
+++ linux/arch/i386/kernel/irq.c Tue Oct 22 12:08:20 2002
@@ -131,6 +131,8 @@
* Generic, controller-independent functions:
*/

+extern void nmi_append_user_names(struct seq_file *p);
+
int show_interrupts(struct seq_file *p, void *v)
{
int i, j;
@@ -166,6 +168,8 @@
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
p += seq_printf(p, "%10u ", nmi_count(j));
+ seq_printf(p, " ");
+ nmi_append_user_names(p);
seq_putc(p, '\n');
#if CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
diff -ur linux.orig/arch/i386/kernel/nmi.c linux/arch/i386/kernel/nmi.c
--- linux.orig/arch/i386/kernel/nmi.c Mon Oct 21 13:25:45 2002
+++ linux/arch/i386/kernel/nmi.c Wed Oct 23 13:57:55 2002
@@ -20,6 +20,7 @@
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
+#include <linux/notifier.h>

#include <asm/smp.h>
#include <asm/mtrr.h>
@@ -102,6 +103,17 @@
return 0;
}

+static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs);
+
+static struct nmi_handler nmi_watchdog_handler =
+{
+ .link = LIST_HEAD_INIT(nmi_watchdog_handler.link),
+ .dev_name = "nmi_watchdog",
+ .dev_id = NULL,
+ .handler = nmi_watchdog_tick,
+ .priority = 255, /* We want to be relatively high priority. */
+};
+
static int __init setup_nmi_watchdog(char *str)
{
int nmi;
@@ -110,6 +122,12 @@

if (nmi >= NMI_INVALID)
return 0;
+
+ if (request_nmi(&nmi_watchdog_handler) != 0) {
+ /* Couldn't add a watchdog handler, give up. */
+ return 0;
+ }
+
if (nmi == NMI_NONE)
nmi_watchdog = nmi;
/*
@@ -350,7 +368,7 @@
alert_counter[i] = 0;
}

-void nmi_watchdog_tick (struct pt_regs * regs)
+static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs)
{

/*
@@ -401,4 +419,6 @@
}
wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
}
+
+ return NOTIFY_OK;
}
diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
--- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002
+++ linux/arch/i386/kernel/traps.c Thu Oct 24 08:11:14 2002
@@ -23,6 +23,10 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/highmem.h>
+#include <linux/notifier.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>

#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -485,21 +489,125 @@
printk("Do you have a strange power saving mode enabled?\n");
}

+/*
+ * A list of handlers for NMIs. This list will be called in order
+ * when an NMI from an otherwise unidentifiable source somes in. If
+ * one of these handles the NMI, it should return NOTIFY_OK, otherwise
+ * it should return NOTIFY_DONE. NMI handlers cannot claim spinlocks,
+ * so we have to handle freeing these in a different manner. A
+ * spinlock protects the list from multiple writers. When something
+ * is removed from the list, it is thrown into another list (with
+ * another link, so the "next" element stays valid) and scheduled to
+ * run as an rcu. When the rcu runs, it is guaranteed that nothing in
+ * the NMI code will be using it.
+ */
+static struct list_head nmi_handler_list = LIST_HEAD_INIT(nmi_handler_list);
+static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * To free the list item, we use an rcu. The rcu-function will not
+ * run until all processors have done a context switch, gone idle, or
+ * gone to a user process, so it's guaranteed that when this runs, any
+ * NMI handler running at release time has completed and the list item
+ * can be safely freed.
+ */
+static void free_nmi_handler(void *arg)
+{
+ struct nmi_handler *handler = arg;
+
+ INIT_LIST_HEAD(&(handler->link));
+ complete(&(handler->complete));
+}
+
+int request_nmi(struct nmi_handler *handler)
+{
+ struct list_head *curr;
+ struct nmi_handler *curr_h = NULL;
+
+ if (!list_empty(&(handler->link)))
+ return EBUSY;
+
+ spin_lock(&nmi_handler_lock);
+
+ __list_for_each(curr, &nmi_handler_list) {
+ curr_h = list_entry(curr, struct nmi_handler, link);
+ if (curr_h->priority <= handler->priority)
+ break;
+ }
+
+ if (curr_h)
+ /* list_add_rcu takes care of memory barrier */
+ list_add_rcu(&(handler->link), curr_h->link.prev);
+ else
+ list_add_rcu(&(handler->link), &nmi_handler_list);
+
+ spin_unlock(&nmi_handler_lock);
+ return 0;
+}
+
+void release_nmi(struct nmi_handler *handler)
+{
+ spin_lock(&nmi_handler_lock);
+ list_del_rcu(&(handler->link));
+ init_completion(&(handler->complete));
+ call_rcu(&(handler->rcu), free_nmi_handler, handler);
+ spin_unlock(&nmi_handler_lock);
+
+ /* Wait for handler to finish being freed. This can't be
+ interrupted, we must wait until it finished. */
+ wait_for_completion(&(handler->complete));
+}
+
+static int call_nmi_handlers(struct pt_regs * regs)
+{
+ struct list_head *curr;
+ struct nmi_handler *curr_h;
+ int handled = 0;
+ int val;
+
+ __list_for_each_rcu(curr, &nmi_handler_list) {
+ curr_h = list_entry(curr, struct nmi_handler, link);
+ val = curr_h->handler(curr_h->dev_id, regs);
+ switch (val & ~NOTIFY_STOP_MASK) {
+ case NOTIFY_OK:
+ handled = 1;
+ break;
+
+ case NOTIFY_DONE:
+ default:
+ }
+ if (val & NOTIFY_STOP_MASK)
+ break;
+ }
+ return handled;
+}
+
+void nmi_append_user_names(struct seq_file *p)
+{
+ struct list_head *curr;
+ struct nmi_handler *curr_h;
+
+ spin_lock(&nmi_handler_lock);
+ __list_for_each(curr, &nmi_handler_list) {
+ curr_h = list_entry(curr, struct nmi_handler, link);
+ if (curr_h->dev_name)
+ p += seq_printf(p, " %s", curr_h->dev_name);
+ }
+ spin_unlock(&nmi_handler_lock);
+}
+
static void default_do_nmi(struct pt_regs * regs)
{
unsigned char reason = inb(0x61);

if (!(reason & 0xc0)) {
-#if CONFIG_X86_LOCAL_APIC
/*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
+ * Check the handler list to see if anyone can handle this
+ * nmi.
*/
- if (nmi_watchdog) {
- nmi_watchdog_tick(regs);
+ if (call_nmi_handlers(regs))
return;
- }
-#endif
+
unknown_nmi_error(reason, regs);
return;
}
diff -ur linux.orig/include/asm-i386/apic.h linux/include/asm-i386/apic.h
--- linux.orig/include/asm-i386/apic.h Mon Oct 21 13:26:04 2002
+++ linux/include/asm-i386/apic.h Tue Oct 22 12:40:16 2002
@@ -79,7 +79,6 @@
extern void setup_boot_APIC_clock (void);
extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
-extern inline void nmi_watchdog_tick (struct pt_regs * regs);
extern int APIC_init_uniprocessor (void);
extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
diff -ur linux.orig/include/asm-i386/irq.h linux/include/asm-i386/irq.h
--- linux.orig/include/asm-i386/irq.h Mon Oct 21 13:24:41 2002
+++ linux/include/asm-i386/irq.h Wed Oct 23 16:47:24 2002
@@ -12,6 +12,7 @@

#include <linux/config.h>
#include <linux/sched.h>
+#include <linux/rcupdate.h>
/* include comes from machine specific directory */
#include "irq_vectors.h"

@@ -27,5 +28,36 @@
#ifdef CONFIG_X86_LOCAL_APIC
#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
#endif
+
+
+/**
+ * Register a handler to get called when an NMI occurs. If the
+ * handler actually handles the NMI, it should return NOTIFY_OK. If
+ * it did not handle the NMI, it should return NOTIFY_DONE. It may "or"
+ * on NOTIFY_STOP_MASK to the return value if it does not want other
+ * handlers after it to be notified. Note that this is a slow-path
+ * handler, if you have a fast-path handler there's another tie-in
+ * for you. See the oprofile code.
+ */
+#define HAVE_NMI_HANDLER 1
+struct nmi_handler
+{
+ struct list_head link; /* You must init this before use. */
+
+ char *dev_name;
+ void *dev_id;
+ int (*handler)(void *dev_id, struct pt_regs *regs);
+ int priority; /* Handlers called in priority order. */
+
+ /* Don't mess with anything below here. */
+
+ struct rcu_head rcu;
+ struct completion complete;
+};
+
+int request_nmi(struct nmi_handler *handler);
+
+/* Release will block until the handler is completely free. */
+void release_nmi(struct nmi_handler *handler);

#endif /* _ASM_IRQ_H */


Attachments:
linux-nmi-v5.diff (8.51 kB)

2002-10-24 14:40:29

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

On Thu, Oct 24, 2002 at 08:28:20AM -0500, Corey Minyard wrote:

> diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
> --- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002
> +++ linux/arch/i386/kernel/traps.c Thu Oct 24 08:11:14 2002

At this point I'd quite like to see :

mv nmi.c nmi_watchdog.c

and put all this stuff in always-compiled nmi.c. traps.c is getting
bloated.

> static void default_do_nmi(struct pt_regs * regs)
> {
> unsigned char reason = inb(0x61);
>
> if (!(reason & 0xc0)) {
> -#if CONFIG_X86_LOCAL_APIC
> /*
> - * Ok, so this is none of the documented NMI sources,
> - * so it must be the NMI watchdog.
> + * Check the handler list to see if anyone can handle this
> + * nmi.
> */
> - if (nmi_watchdog) {
> - nmi_watchdog_tick(regs);
> + if (call_nmi_handlers(regs))

Now you're using RCU, it's a real pity that we have the inb() first -
if it wasn't for that, there would be no reason at all to have the "fast
path" setting code too (the latter code is ugly, which is one reason I
want to ditch it).

How about adding default_do_nmi as the minimal-priority handler, then
add the watchdog with higher priority above that ? Then oprofile can add
itself on top of those both and return NOTIFY_OK to indicate it should
break out of the loop. As a bonus, you lose the inb() for the watchdog
too.

> +++ linux/include/asm-i386/irq.h Wed Oct 23 16:47:24 2002

I thought you agreed the stuff should be in asm/nmi.h ?

regards
john
--
"This is playing, not work, therefore it's not a waste of time."
- Zath

2002-10-24 15:29:51

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

John Levon wrote:

>On Thu, Oct 24, 2002 at 08:28:20AM -0500, Corey Minyard wrote:
>
>
>
>>diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
>>--- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002
>>+++ linux/arch/i386/kernel/traps.c Thu Oct 24 08:11:14 2002
>>
>>
>
>At this point I'd quite like to see :
>
> mv nmi.c nmi_watchdog.c
>
>and put all this stuff in always-compiled nmi.c. traps.c is getting
>bloated.
>
I agree.

>
>
>
>> static void default_do_nmi(struct pt_regs * regs)
>> {
>> unsigned char reason = inb(0x61);
>>
>> if (!(reason & 0xc0)) {
>>-#if CONFIG_X86_LOCAL_APIC
>> /*
>>- * Ok, so this is none of the documented NMI sources,
>>- * so it must be the NMI watchdog.
>>+ * Check the handler list to see if anyone can handle this
>>+ * nmi.
>> */
>>- if (nmi_watchdog) {
>>- nmi_watchdog_tick(regs);
>>+ if (call_nmi_handlers(regs))
>>
>>
>
>Now you're using RCU, it's a real pity that we have the inb() first -
>if it wasn't for that, there would be no reason at all to have the "fast
>path" setting code too (the latter code is ugly, which is one reason I
>want to ditch it).
>
>How about adding default_do_nmi as the minimal-priority handler, then
>add the watchdog with higher priority above that ? Then oprofile can add
>itself on top of those both and return NOTIFY_OK to indicate it should
>break out of the loop. As a bonus, you lose the inb() for the watchdog
>too.
>
Is there any way to detect if the nmi watchdog actually caused the
timeout? I don't understand the hardware well enough to do it without
some work, but it would be a VERY good idea to make sure the nmi
watchdog actully caused the operation.

Plus, can't you get more than one cause of an NMI? Shouldn't you check
them all?

>>+++ linux/include/asm-i386/irq.h Wed Oct 23 16:47:24 2002
>>
>>
>
>I thought you agreed the stuff should be in asm/nmi.h ?
>
I will (I had forgotten), and I will move nmi.h to nmi_watchdog.h.

-Corey

2002-10-24 17:12:04

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

On Thu, Oct 24, 2002 at 10:36:22AM -0500, Corey Minyard wrote:

> Is there any way to detect if the nmi watchdog actually caused the
> timeout? I don't understand the hardware well enough to do it without

You can check if the counter used overflowed :

#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
#define CTRL_READ(l,h,msrs,c) do {rdmsr(MSR_P6_PERFCTR0, (l), (h));} while (0)

CTR_READ(low, high, msrs, i);
if (CTR_OVERFLOWED(low)) {
... found

like oprofile does.

I've accidentally deleted your patch, but weren't you unconditionally
returning "break out of loop" from the watchdog ? I'm not very clear on
the difference between NOTIFY_DONE and NOTIFY_OK anyway...

> Plus, can't you get more than one cause of an NMI? Shouldn't you check
> them all?

Shouldn't the NMI stay asserted ? At least with perfctr, two counters
causes two interrupts (actually there's a bug in mainline oprofile on
that that I'll fix when Linus is back)

regards
john
--
"This is playing, not work, therefore it's not a waste of time."
- Zath

2002-10-24 17:37:23

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

John Levon wrote:

>On Thu, Oct 24, 2002 at 10:36:22AM -0500, Corey Minyard wrote:
>
>
>
>>Is there any way to detect if the nmi watchdog actually caused the
>>timeout? I don't understand the hardware well enough to do it without
>>
>>
>
>You can check if the counter used overflowed :
>
>#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
>#define CTRL_READ(l,h,msrs,c) do {rdmsr(MSR_P6_PERFCTR0, (l), (h));} while (0)
>
> CTR_READ(low, high, msrs, i);
> if (CTR_OVERFLOWED(low)) {
> ... found
>
>like oprofile does.
>
Ok, thanks, I'll add that to the nmi_watchdog code.

>
>I've accidentally deleted your patch, but weren't you unconditionally
>returning "break out of loop" from the watchdog ? I'm not very clear on
>the difference between NOTIFY_DONE and NOTIFY_OK anyway...
>
The comments on these are:
#define NOTIFY_DONE 0x0000 /* Don't care */
#define NOTIFY_OK 0x0001 /* Suits me */
#define NOTIFY_STOP_MASK 0x8000 /* Don't call further */

I'mt taking these to mean that NOTIFY_DONE means you didn't handle it,
NOTIFY_OK means you did handle it, and you "or" on NOTIFY_STOP_MASK if
you want it to stop. I'm thinking that stopping is probably a bad idea,
if the NMI is really edge triggered.

>
>
>
>>Plus, can't you get more than one cause of an NMI? Shouldn't you check
>>them all?
>>
>>
>
>Shouldn't the NMI stay asserted ? At least with perfctr, two counters
>causes two interrupts (actually there's a bug in mainline oprofile on
>that that I'll fix when Linus is back)
>
There's a comment in do_nmi() that says that the NMI is edge triggered.
If it is, then you have to call everything. I'd really like a manual
on how the timer chip works, I'll see if I can hunt one down.

-Corey

2002-10-24 17:58:31

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

On Thu, Oct 24, 2002 at 12:43:50PM -0500, Corey Minyard wrote:

> #define NOTIFY_DONE 0x0000 /* Don't care */
> #define NOTIFY_OK 0x0001 /* Suits me */
> #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */
>
> I'mt taking these to mean that NOTIFY_DONE means you didn't handle it,
> NOTIFY_OK means you did handle it, and you "or" on NOTIFY_STOP_MASK if
> you want it to stop. I'm thinking that stopping is probably a bad idea,
> if the NMI is really edge triggered.

> There's a comment in do_nmi() that says that the NMI is edge triggered.

So there is. Darn. You could special case default_do_nmi, only printing
out "Unknown NMI" iff the reason inb() check fails, /and/ no previous
handlers set handled. Theoretically we have to take the cost of the
inb() every time otherwise we can lose one of the NMISC-based things in
default_do_nmi ... I can always see if this makes a noticable practical
difference for oprofile under high interrupt load

(I also need to be able to remove the NMI watchdog handler, but that's
an oprofile-specific problem).

Perhaps things will end being best to leave the current fast-path thing,
but we should make sure that we've explored the possibilities of
removing it first ;)

thanks
john

--
"This is playing, not work, therefore it's not a waste of time."
- Zath

2002-10-24 18:26:09

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

John Levon wrote:

>On Thu, Oct 24, 2002 at 12:43:50PM -0500, Corey Minyard wrote:
>
>
>>There's a comment in do_nmi() that says that the NMI is edge triggered.
>>
>>
>
>So there is. Darn. You could special case default_do_nmi, only printing
>out "Unknown NMI" iff the reason inb() check fails, /and/ no previous
>handlers set handled. Theoretically we have to take the cost of the
>inb() every time otherwise we can lose one of the NMISC-based things in
>default_do_nmi ... I can always see if this makes a noticable practical
>difference for oprofile under high interrupt load
>
>(I also need to be able to remove the NMI watchdog handler, but that's
>an oprofile-specific problem).
>
>Perhaps things will end being best to leave the current fast-path thing,
>but we should make sure that we've explored the possibilities of
>removing it first ;)
>
This also means that the current code is actually wrong, since the
current code just returns at the first thing it finds.

I'll work on redoing this all in one list.

Do you know if nmi_shutdown in oprofile/nmi_int.c can be called from
interrupt context? Because nmi_release() can block, so I couldn't use
it as-is if it ran in interrupt context.

-Corey


2002-10-24 18:41:05

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

On Thu, Oct 24, 2002 at 01:32:40PM -0500, Corey Minyard wrote:

> Do you know if nmi_shutdown in oprofile/nmi_int.c can be called from

It can only be called in process context, from the event buffer
release() method (and open(), on the failure path)

regards
john
--
"This is playing, not work, therefore it's not a waste of time."
- Zath

2002-10-24 19:58:37

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

John Levon wrote:

>On Thu, Oct 24, 2002 at 10:36:22AM -0500, Corey Minyard wrote:
>
>
>
>>Is there any way to detect if the nmi watchdog actually caused the
>>timeout? I don't understand the hardware well enough to do it without
>>
>>
>
>You can check if the counter used overflowed :
>
>#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
>#define CTRL_READ(l,h,msrs,c) do {rdmsr(MSR_P6_PERFCTR0, (l), (h));} while (0)
>
> CTR_READ(low, high, msrs, i);
> if (CTR_OVERFLOWED(low)) {
> ... found
>
Any way to do this for the IO_APIC case?

-Corey


2002-10-24 20:22:58

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] NMI request/release, version 5 - I think this one's ready

On Thu, Oct 24, 2002 at 03:03:31PM -0500, Corey Minyard wrote:

> > if (CTR_OVERFLOWED(low)) {
> > ... found
> >
> Any way to do this for the IO_APIC case?

Ugh, forgot about that. I have no idea, sorry

regards
john

--
"This is playing, not work, therefore it's not a waste of time."
- Zath