2002-12-09 21:55:18

by Stephen Hemminger

[permalink] [raw]
Subject: [PATCH] Notifier for significant events on i386

This is a successor to the previous patch for notifier callback when NMI
watchdog occurs. It is a port of x86_64 code (thanks for the suggestion
Andi Kleen <[email protected]>) with extensions for watchdog, and integration
of panic handling.

To get notified for panic, oops, NMI and other events the caller needs
to insert itself in the notify_die chain. The callback can then filter
out which events are of interest.

This started out as a way to hook in LKCD, but it is general enough that
kprobe, kdb, and other utilities can use it as well.

Please consider this because it makes integration of other components
much easier.

diff -urN -X dontdiff linux-2.5/include/asm-i386/kdebug.h
linux-2.5-lkcd/include/asm-i386/kdebug.h
--- linux-2.5/include/asm-i386/kdebug.h 1969-12-31 16:00:00.000000000
-0800
+++ linux-2.5-lkcd/include/asm-i386/kdebug.h 2002-12-06
11:52:34.000000000 -0800
@@ -0,0 +1,55 @@
+#ifndef _I386_KDEBUG_H
+#define _I386_KDEBUG_H 1
+
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+ struct pt_regs *regs;
+ const char *str;
+ long err;
+};
+
+extern struct notifier_block *die_chain;
+
+/* Grossly misnamed. */
+enum die_val {
+ DIE_OOPS = 1,
+ DIE_INT3,
+ DIE_DEBUG,
+ DIE_PANIC,
+ DIE_NMI,
+ DIE_DIE,
+ DIE_CPUINIT, /* not really a die, but .. */
+ DIE_TRAPINIT, /* not really a die, but .. */
+ DIE_STOP,
+ DIE_WATCHDOG,
+};
+
+static inline int notify_die(enum die_val val, const char *str,
+ struct pt_regs *regs,long err)
+{
+ struct die_args args = { regs: regs, str: str, err: err };
+ return notifier_call_chain(&die_chain, val, &args);
+}
+
+static inline void get_current_regs(struct pt_regs *regs)
+{
+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
+ __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
+ __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
+ __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
+ __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
+ __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
+ regs->eip = (unsigned long)current_text_addr();
+}
+
+#endif
diff -urN -X dontdiff linux-2.5/arch/i386/kernel/nmi.c
linux-2.5-lkcd/arch/i386/kernel/nmi.c
--- linux-2.5/arch/i386/kernel/nmi.c 2002-12-02 09:38:13.000000000 -0800
+++ linux-2.5-lkcd/arch/i386/kernel/nmi.c 2002-12-06 11:52:13.000000000
-0800
@@ -21,6 +21,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>

+#include <asm/kdebug.h>
#include <asm/smp.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
@@ -377,6 +378,7 @@
bust_spinlocks(1);
printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx,
registers:\n", cpu, regs->eip);
show_registers(regs);
+ notify_die(DIE_WATCHDOG, "nmi_watchdog", regs, 0);
printk("console shuts up ...\n");
console_silent();
spin_unlock(&nmi_print_lock);
diff -urN -X dontdiff linux-2.5/arch/i386/kernel/reboot.c
linux-2.5-lkcd/arch/i386/kernel/reboot.c
--- linux-2.5/arch/i386/kernel/reboot.c 2002-12-02 09:38:13.000000000
-0800
+++ linux-2.5-lkcd/arch/i386/kernel/reboot.c 2002-12-06
11:52:13.000000000 -0800
@@ -8,6 +8,7 @@
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
#include <asm/uaccess.h>
+#include <asm/kdebug.h>

/*
* Power off function, if any
@@ -256,7 +257,8 @@
* Stop all CPUs and turn off local APICs and the IO-APIC, so
* other OSs see a clean IRQ state.
*/
- smp_send_stop();
+ if (notify_die(DIE_STOP,"cpustop",0,0) != NOTIFY_BAD)
+ smp_send_stop();
disable_IO_APIC();
#endif

diff -urN -X dontdiff linux-2.5/arch/i386/kernel/setup.c
linux-2.5-lkcd/arch/i386/kernel/setup.c
--- linux-2.5/arch/i386/kernel/setup.c 2002-12-02 09:38:13.000000000
-0800
+++ linux-2.5-lkcd/arch/i386/kernel/setup.c 2002-12-06
11:52:13.000000000 -0800
@@ -36,6 +36,7 @@
#include <linux/highmem.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
+#include <asm/kdebug.h>
#include <asm/edd.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
@@ -43,6 +44,12 @@

static inline char * __init machine_specific_memory_setup(void);

+extern struct notifier_block *panic_notifier_list;
+static int panic_event(struct notifier_block *, unsigned long, void *);
+static struct notifier_block panic_block = {
+ .notifier_call = panic_event,
+};
+
/*
* Machine setup..
*/
@@ -910,6 +917,9 @@
#endif
#endif
dmi_scan_machine();
+
+ /* Register a call for panic conditions into notify_die. */
+ notifier_chain_register(&panic_notifier_list, &panic_block);
}

static int __init highio_setup(char *str)
@@ -920,6 +930,16 @@
}
__setup("nohighio", highio_setup);

+static int panic_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct pt_regs regs;
+ get_current_regs(&regs);
+
+ return notify_die(DIE_PANIC, (const char *)ptr, &regs, event);
+}
+

#include "setup_arch_post.h"
/*
diff -urN -X dontdiff linux-2.5/arch/i386/kernel/smpboot.c
linux-2.5-lkcd/arch/i386/kernel/smpboot.c
--- linux-2.5/arch/i386/kernel/smpboot.c 2002-12-02 09:38:13.000000000
-0800
+++ linux-2.5-lkcd/arch/i386/kernel/smpboot.c 2002-12-06
11:52:13.000000000 -0800
@@ -49,6 +49,7 @@
#include <asm/tlbflush.h>
#include <asm/smpboot.h>
#include <asm/desc.h>
+#include <asm/kdebug.h>
#include <asm/arch_hooks.h>
#include "smpboot_hooks.h"
#include "mach_apic.h"
@@ -418,6 +419,8 @@
*/
smp_store_cpu_info(cpuid);

+ notify_die(DIE_CPUINIT, "cpuinit", NULL, 0);
+
disable_APIC_timer();
local_irq_disable();
/*

diff -urN -X dontdiff linux-2.5/arch/i386/kernel/traps.c
linux-2.5-lkcd/arch/i386/kernel/traps.c
--- linux-2.5/arch/i386/kernel/traps.c 2002-12-02 09:38:13.000000000
-0800
+++ linux-2.5-lkcd/arch/i386/kernel/traps.c 2002-12-06
11:52:13.000000000 -0800
@@ -41,6 +41,7 @@
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/kdebug.h>
#include <asm/nmi.h>

#include <asm/smp.h>
@@ -85,8 +86,9 @@
asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);

-static int kstack_depth_to_print = 24;
+struct notifier_block *die_chain;

+static int kstack_depth_to_print = 24;

/*
* If the address is either in the .text section of the
@@ -291,7 +293,12 @@

void die(const char * str, struct pt_regs * regs, long err)
{
+ struct die_args args = { regs, str, err };
+
console_verbose();
+ if (notifier_call_chain(&die_chain, DIE_DIE, &args) == NOTIFY_BAD)
+ goto segv;
+
spin_lock_irq(&die_lock);
bust_spinlocks(1);
handle_BUG(regs);
@@ -299,6 +306,9 @@
show_registers(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
+
+ notifier_call_chain(&die_chain, DIE_OOPS, &args);
+ segv:
do_exit(SIGSEGV);
}

@@ -404,7 +414,6 @@
}

DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error,
FPE_INTDIV, regs->eip)
-DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN,
regs->eip)
@@ -471,6 +480,7 @@
outb(reason, 0x61);
}

+
static void unknown_nmi_error(unsigned char reason, struct pt_regs *
regs)
{
#ifdef CONFIG_MCA
@@ -505,10 +515,13 @@
unknown_nmi_error(reason, regs);
return;
}
+ if (notify_die(DIE_NMI, "nmi", regs, reason) == NOTIFY_BAD)
+ return;
if (reason & 0x80)
mem_parity_error(reason, regs);
if (reason & 0x40)
io_check_error(reason, regs);
+
/*
* Reassert NMI in case it became active meanwhile
* as it's edge-triggered.
@@ -551,6 +564,15 @@
nmi_callback = dummy_nmi_callback;
}

+
+asmlinkage void do_int3(struct pt_regs *regs, long error_code)
+{
+ if (notify_die(DIE_INT3, "int3", regs, error_code) == NOTIFY_BAD)
+ return;
+
+ do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+}
+
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -581,6 +603,9 @@

__asm__ __volatile__("movl %%db6,%0" : "=r" (condition));

+ if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD)
+ return;
+
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
if (!tsk->thread.debugreg[7])
@@ -927,6 +952,7 @@
set_call_gate(&default_ldt[0],lcall7);
set_call_gate(&default_ldt[4],lcall27);

+ notify_die(DIE_TRAPINIT, "traps initialized", 0, 0);
/*
* Should be a barrier for any external CPU state.
*/





2002-12-11 10:59:00

by Vamsi Krishna S .

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Mon, Dec 09, 2002 at 10:08:11PM +0000, Stephen Hemminger wrote:
> This is a successor to the previous patch for notifier callback when NMI
> watchdog occurs. It is a port of x86_64 code (thanks for the suggestion
> Andi Kleen <[email protected]>) with extensions for watchdog, and integration
> of panic handling.
>
> To get notified for panic, oops, NMI and other events the caller needs
> to insert itself in the notify_die chain. The callback can then filter
> out which events are of interest.
>
> This started out as a way to hook in LKCD, but it is general enough that
> kprobe, kdb, and other utilities can use it as well.
>
I support this, it makes all kernel-space debug tools less intrusive.
It may be out of scope for this work but there are a couple of
other issues to consider here:

- turn trap1/trap3 to interrupt gates: kprobes does this, kgdb turns
off interrupts in its own handler, I suppose other tools too need
this.
- notifier lists are racy on SMP, IFAICT, read_lock(&notifier_lock)
needs to be taken in notifier_call_chain(), but that too is
deadlock prone.

Andi,

Isn't this a problem on x86_64 too? What is there to prevent a
handler from being removed from the notifier list while it
is being used to call the handler on another CPU?

I am considering using a RCU-based list for notifier chains.
Corey has done some work on these lines to add NMI notifier
chain, I think it should be generalised on for all notifiers.

Thoughts? Comments?
--
Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5044959
Internet: [email protected]

2002-12-11 11:09:28

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Wed, Dec 11, 2002 at 04:51:53PM +0530, Vamsi Krishna S . wrote:
> I support this, it makes all kernel-space debug tools less intrusive.
> It may be out of scope for this work but there are a couple of
> other issues to consider here:
> - turn trap1/trap3 to interrupt gates: kprobes does this, kgdb turns
> off interrupts in its own handler, I suppose other tools too need
> this.
> - notifier lists are racy on SMP, IFAICT, read_lock(&notifier_lock)
> needs to be taken in notifier_call_chain(), but that too is
> deadlock prone.
> Andi,
> Isn't this a problem on x86_64 too? What is there to prevent a
> handler from being removed from the notifier list while it
> is being used to call the handler on another CPU?
> I am considering using a RCU-based list for notifier chains.
> Corey has done some work on these lines to add NMI notifier
> chain, I think it should be generalised on for all notifiers.

A coherent explanation of how notifier locking is supposed to work
would be wonderful to have. I'd like to register notifiers but am
pig ignorant of how to lock my structures down to work with it.

Bill

2002-12-11 11:21:12

by Vamsi Krishna S .

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Wed, Dec 11, 2002 at 03:16:39AM -0800, William Lee Irwin III wrote:
> On Wed, Dec 11, 2002 at 04:51:53PM +0530, Vamsi Krishna S . wrote:
> > <snip>
> >
> > I am considering using a RCU-based list for notifier chains.
> > Corey has done some work on these lines to add NMI notifier
> > chain, I think it should be generalised on for all notifiers.
>
> A coherent explanation of how notifier locking is supposed to work
> would be wonderful to have. I'd like to register notifiers but am
> pig ignorant of how to lock my structures down to work with it.
>
Unless I am missing something, notifiers have always been racy.
No amount of locking you do in individual modules to prevent
races will help as the notifier chain is walked inside
notifier_call_chain() in kernel/sys.c. One would need to
add some form of locking there (*) so that users of notifier
chains need not worry about races/locking at all.

(*) converting the notifier chain to an RCU-based list guarentees
to modules using the notifier chains that their handlers will
not be called once the handler is unregistered.

> Bill
Vamsi.
--
Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5044959
Internet: [email protected]

2002-12-11 13:48:44

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

Vamsi Krishna S . wrote:

>Andi,
>
>Isn't this a problem on x86_64 too? What is there to prevent a
>handler from being removed from the notifier list while it
>is being used to call the handler on another CPU?
>
>I am considering using a RCU-based list for notifier chains.
>Corey has done some work on these lines to add NMI notifier
>chain, I think it should be generalised on for all notifiers.
>
>Thoughts? Comments?
>
>
This is probably a good idea. I won't be able to work on it for a
while, but you can grab
my patch at http://sourceforge.net/projects/openipmi/, look under the
2.5 releases for
the most current linux-nmi-2.5.xx-vyy.diff.

-Corey


2002-12-11 16:48:26

by Stephen Hemminger

[permalink] [raw]
Subject: Re: [lkcd-devel] Re: [PATCH] Notifier for significant events on i386

On Wed, 2002-12-11 at 03:43, Vamsi Krishna S . wrote:
> On Wed, Dec 11, 2002 at 03:16:39AM -0800, William Lee Irwin III wrote:
> > On Wed, Dec 11, 2002 at 04:51:53PM +0530, Vamsi Krishna S . wrote:
> > > <snip>
> > >
> > > I am considering using a RCU-based list for notifier chains.
> > > Corey has done some work on these lines to add NMI notifier
> > > chain, I think it should be generalised on for all notifiers.
> >
> > A coherent explanation of how notifier locking is supposed to work
> > would be wonderful to have. I'd like to register notifiers but am
> > pig ignorant of how to lock my structures down to work with it.
> >
> Unless I am missing something, notifiers have always been racy.
> No amount of locking you do in individual modules to prevent
> races will help as the notifier chain is walked inside
> notifier_call_chain() in kernel/sys.c. One would need to
> add some form of locking there (*) so that users of notifier
> chains need not worry about races/locking at all.
>
> (*) converting the notifier chain to an RCU-based list guarentees
> to modules using the notifier chains that their handlers will
> not be called once the handler is unregistered.
>
> > Bill
> Vamsi.

There should be a register/unregister interface that uses RCU. That
makes sense. This assumes that this is an uncommon thing that happens
at init/load and unload. The notifier callback's better not change
themselves! Especially, since they get called when the system may be in
a unstable state like in a panic or other error handling.

--
Stephen Hemminger <[email protected]>
Open Source Devlopment Lab

2002-12-11 20:19:42

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Wed, Dec 11, 2002 at 05:13:37PM +0530, Vamsi Krishna S . wrote:

> Unless I am missing something, notifiers have always been racy.
> No amount of locking you do in individual modules to prevent
> races will help as the notifier chain is walked inside
> notifier_call_chain() in kernel/sys.c. One would need to
> add some form of locking there (*) so that users of notifier
> chains need not worry about races/locking at all.

There are notifiers being used that sleep inside the called notifiers.

You could easily make a __notifier_call_chain that is lockless and
another one that readlocks the notifier_lock ...

regards
john
--
"Anyone who says you can have a lot of widely dispersed people hack away on
a complicated piece of code and avoid total anarchy has never managed a
software project."
- Andy Tanenbaum

2002-12-11 20:30:19

by Alan

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Wed, 2002-12-11 at 20:27, John Levon wrote:
> There are notifiers being used that sleep inside the called notifiers.
>
> You could easily make a __notifier_call_chain that is lockless and
> another one that readlocks the notifier_lock ...

The notifier chains assume the users will do the locking needed for
them. It might be possible to do cool things there with RCU

2002-12-12 00:12:28

by Stephen Hemminger

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386


On Wed, 2002-12-11 at 13:15, Alan Cox wrote:
> On Wed, 2002-12-11 at 20:27, John Levon wrote:
> > There are notifiers being used that sleep inside the called notifiers.
> >
> > You could easily make a __notifier_call_chain that is lockless and
> > another one that readlocks the notifier_lock ...
>
> The notifier chains assume the users will do the locking needed for
> them. It might be possible to do cool things there with RCU

This patch changes notifier to use RCU. No interface change, just a little
more memory in each notifier_block. Also some formatting cleanup.
Please review and give comments.

diff -Nru a/include/linux/notifier.h b/include/linux/notifier.h
--- a/include/linux/notifier.h Wed Dec 11 15:46:05 2002
+++ b/include/linux/notifier.h Wed Dec 11 15:46:05 2002
@@ -9,13 +9,19 @@

#ifndef _LINUX_NOTIFIER_H
#define _LINUX_NOTIFIER_H
+
#include <linux/errno.h>
+#include <linux/rcupdate.h>
+#include <linux/completion.h>

struct notifier_block
{
int (*notifier_call)(struct notifier_block *self, unsigned long, void *);
struct notifier_block *next;
int priority;
+
+ struct rcu_head rcu;
+ struct completion complete;
};


diff -Nru a/kernel/sys.c b/kernel/sys.c
--- a/kernel/sys.c Wed Dec 11 15:46:05 2002
+++ b/kernel/sys.c Wed Dec 11 15:46:05 2002
@@ -78,7 +78,7 @@
*/

static struct notifier_block *reboot_notifier_list;
-rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
+static spinlock_t notifier_lock = SPIN_LOCK_UNLOCKED;

/**
* notifier_chain_register - Add notifier to a notifier chain
@@ -89,46 +89,60 @@
*
* Currently always returns zero.
*/
-
int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
{
- write_lock(&notifier_lock);
- while(*list)
- {
+ spin_lock(&notifier_lock);
+ while (*list) {
if(n->priority > (*list)->priority)
break;
list= &((*list)->next);
}
+
n->next = *list;
- *list=n;
- write_unlock(&notifier_lock);
+ *list = n;
+ wmb();
+
+ spin_unlock(&notifier_lock);
return 0;
}

+static void notifier_unreg_complete(void *arg) {
+ struct notifier_block *n = arg;
+
+ n->next = NULL;
+ complete(&n->complete);
+}
+
/**
* notifier_chain_unregister - Remove notifier from a notifier chain
- * @nl: Pointer to root list pointer
+ * @list: Pointer to root list pointer
* @n: New entry in notifier chain
*
* Removes a notifier from a notifier chain.
*
* Returns zero on success, or %-ENOENT on failure.
*/
-
-int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
+int notifier_chain_unregister(struct notifier_block **list, struct notifier_block *n)
{
- write_lock(&notifier_lock);
- while((*nl)!=NULL)
- {
- if((*nl)==n)
- {
- *nl=n->next;
- write_unlock(&notifier_lock);
+ spin_lock(&notifier_lock);
+ while(*list) {
+ if (*list == n) {
+ *list = n->next;
+ wmb();
+
+ init_completion(&n->complete);
+ call_rcu(&n->rcu, notifier_unreg_complete, n);
+ spin_unlock(&notifier_lock);
+
+ wait_for_completion(&n->complete);
+
return 0;
}
- nl=&((*nl)->next);
+
+ list = &((*list)->next);
}
- write_unlock(&notifier_lock);
+
+ spin_unlock(&notifier_lock);
return -ENOENT;
}

@@ -147,21 +161,19 @@
* Otherwise, the return value is the return value
* of the last notifier function called.
*/
-
int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
{
- int ret=NOTIFY_DONE;
+ int ret = NOTIFY_DONE;
struct notifier_block *nb = *n;

- while(nb)
- {
- ret=nb->notifier_call(nb,val,v);
- if(ret&NOTIFY_STOP_MASK)
- {
+ while (nb) {
+ ret = nb->notifier_call(nb,val,v);
+ if (ret & NOTIFY_STOP_MASK)
return ret;
- }
- nb=nb->next;
+ nb = nb->next;
+ read_barrier_depends();
}
+
return ret;
}





2002-12-12 07:13:29

by Vamsi Krishna S .

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Thu, Dec 12, 2002 at 12:25:47AM +0000, Stephen Hemminger wrote:
>
> This patch changes notifier to use RCU. No interface change, just a little
> more memory in each notifier_block. Also some formatting cleanup.
> Please review and give comments.
>
> <snip patch>

This looks good. I have a few of comments:

- add read_lock_rcu() / read_unlock_rcu() around the loop in
notifier_call_chain() to be preempt-safe.

- I would suggest using struct list_head in the notifier_block
and use the RCU list routines from include/linux/list.h
instead of spreading subtle RCU memory-barrier black magic.

- Even though RCU list reading is lockless, premption needs to
be disabled while reading as mentioned above. So, we do
need an __notifier_call_chain() version for those handlers
that could sleep inside the handler: they will have to
handle the required locking themselves.
--
Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5044959
Internet: [email protected]

2002-12-12 07:52:16

by Vamsi Krishna S .

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Wed, Dec 11, 2002 at 10:13:33PM +0000, Alan Cox wrote:
> On Wed, 2002-12-11 at 20:27, John Levon wrote:
> > There are notifiers being used that sleep inside the called notifiers.
> >
> > You could easily make a __notifier_call_chain that is lockless and
> > another one that readlocks the notifier_lock ...
>
> The notifier chains assume the users will do the locking needed for
> them. It might be possible to do cool things there with RCU
>
Hmm. If the called notifiers sleep, RCU can't prevent the notifiers
from being unregistered while they are executing. I suppose then that
RCU is not suitable for generic notifiers. It may still be useful
for managing notifiers in very fast paths where acquiring a
read lock may be undesirable (like the oprofile's NMI handler).

Instead of a lockless __notifier_call_chain, I would rather add
a notifier_call_chain_safe() to avoid changing the existing
handlers. Something like the patch below.

The safe version should be used for the trap handlers: we know
that the called notifiers will not sleep and the smp-safeness
makes the handlers simpler.

Thanks,
Vamsi.
--
Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5044959
Internet: [email protected]
--
diff -urN -X /home/vamsi/.dontdiff 51-pure/include/linux/notifier.h 51-notifier/include/linux/notifier.h
--- 51-pure/include/linux/notifier.h 2002-12-10 08:15:43.000000000 +0530
+++ 51-notifier/include/linux/notifier.h 2002-12-12 11:53:53.000000000 +0530
@@ -24,6 +24,7 @@
extern int notifier_chain_register(struct notifier_block **list, struct notifier_block *n);
extern int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n);
extern int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v);
+extern int notifier_call_chain_safe(struct notifier_block **n, unsigned long val, void *v);

#define NOTIFY_DONE 0x0000 /* Don't care */
#define NOTIFY_OK 0x0001 /* Suits me */
diff -urN -X /home/vamsi/.dontdiff 51-pure/kernel/sys.c 51-notifier/kernel/sys.c
--- 51-pure/kernel/sys.c 2002-12-10 08:15:43.000000000 +0530
+++ 51-notifier/kernel/sys.c 2002-12-12 11:52:45.000000000 +0530
@@ -166,6 +166,29 @@
}

/**
+ * notifier_call_chain_safe - Call functions in a notifier chain
+ * @n: Pointer to root pointer of notifier chain
+ * @val: Value passed unmodified to notifier function
+ * @v: Pointer passed unmodified to notifier function
+ *
+ * Calls each function in a notifier chain in turn while ensuring
+ * that a notifier cannot be unregistered while it is being
+ * executed. Because a read_lock is taken, the called notifiers
+ * must not sleep.
+ */
+
+int notifier_call_chain_safe(struct notifier_block **n, unsigned long val, void *v)
+{
+ int ret;
+
+ read_lock(&notifier_lock);
+ ret = notifier_call_chain(n, val, v);
+ read_unlock(&notifier_lock);
+
+ return ret;
+}
+
+/**
* register_reboot_notifier - Register function to be called at reboot time
* @nb: Info about notifier function to be called
*

2002-12-12 17:46:23

by Stephen Hemminger

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Wed, 2002-12-11 at 23:34, Vamsi Krishna S . wrote:
> On Thu, Dec 12, 2002 at 12:25:47AM +0000, Stephen Hemminger wrote:
> >
> > This patch changes notifier to use RCU. No interface change, just a little
> > more memory in each notifier_block. Also some formatting cleanup.
> > Please review and give comments.
> >
> > <snip patch>
>
> This looks good. I have a few of comments:
>
> - add read_lock_rcu() / read_unlock_rcu() around the loop in
> notifier_call_chain() to be preempt-safe.
>
> - I would suggest using struct list_head in the notifier_block
> and use the RCU list routines from include/linux/list.h
> instead of spreading subtle RCU memory-barrier black magic.

That would be good for a new interface, but the existing code depends on
the single linked behavior. Many initializer's are pre-C99 style, and
more importantly there is no distinction between a list element and a
list head. To work with list macros the head has to be initialized
correctly. It is better not to worry about changing the interface and
avoid having to change all the calling code.

The only advantage to the doubly-linked list (besides std macros) is
that it is possible to unregister without knowing the head. There was a
patch several months ago to do singly-linked list macros but it looks
like it never got accepted. If the obscurity of the macro's is desired
then maybe the way to go is creating a slist.h with RCU extensions.


> - Even though RCU list reading is lockless, premption needs to
> be disabled while reading as mentioned above. So, we do
> need an __notifier_call_chain() version for those handlers
> that could sleep inside the handler: they will have to
> handle the required locking themselves.

The use of notifier today is limited to things that can't sleep. As far
as I can tell, it is intended for system events like reboot, panic;
where sleeping doesn't make sense. I think that is why the original
notifier_call_chain did not grab the read_lock.

--
Stephen Hemminger <[email protected]>
Open Source Devlopment Lab

2002-12-12 17:50:17

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Thu, Dec 12, 2002 at 09:53:35AM -0800, Stephen Hemminger wrote:

> The use of notifier today is limited to things that can't sleep. As far

kernel/profile.c

You'd have to move that to a different API if you want to force notifier
callbacks non-sleepable

regards
john

--
"Anyone who says you can have a lot of widely dispersed people hack away on
a complicated piece of code and avoid total anarchy has never managed a
software project."
- Andy Tanenbaum

2002-12-13 12:12:09

by Vamsi Krishna S .

[permalink] [raw]
Subject: Re: [PATCH] Notifier for significant events on i386

On Thu, Dec 12, 2002 at 05:58:04PM +0000, John Levon wrote:
> On Thu, Dec 12, 2002 at 09:53:35AM -0800, Stephen Hemminger wrote:
>
> > The use of notifier today is limited to things that can't sleep. As far
>
> kernel/profile.c
>
> You'd have to move that to a different API if you want to force notifier
> callbacks non-sleepable
>
Yes, indeed most of the existing notifiers potentially sleep. That is why
I think we should, may be, leave the existing notifiers alone. Add a
notifier_call_chain_safe() and use that to run trap1/trap3/NMI etc
notifiers.

--
Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5044959
Internet: [email protected]