2002-12-18 22:08:36

by Stephen Hemminger

[permalink] [raw]
Subject: [PATCH] (4/5) improved notifier callback mechanism - read copy update

The notifier interface was only partially locked. The
notifier_call_chain needs to be called in places where it is impossible
to safely without having deadlocks; for example, NMI watchdog timeout.

This patch uses read-copy-update to manage the list. One extra bit of
safety is using a reference count on the notifier_blocks to allow for
cases like oprofile which need to sleep in a callback.

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or
higher.
# This patch includes the following deltas:
# ChangeSet 1.983 -> 1.984
# include/linux/notifier.h 1.7 -> 1.8
# kernel/sys.c 1.39 -> 1.40
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/12/18 [email protected] 1.984
# 04-notifier-rcu.patch
# --------------------------------------------
#
diff -Nru a/include/linux/notifier.h b/include/linux/notifier.h
--- a/include/linux/notifier.h Wed Dec 18 10:00:43 2002
+++ b/include/linux/notifier.h Wed Dec 18 10:00:43 2002
@@ -11,14 +11,15 @@
#define _LINUX_NOTIFIER_H

#include <linux/list.h>
+#include <asm/atomic.h>

struct notifier_block {
int (*notifier_call)(struct notifier_block *, unsigned long, void *);
- int priority;
+ int priority;

+ atomic_t inuse;
struct list_head link;
-};
-
+} ____cacheline_aligned;

#ifdef __KERNEL__

diff -Nru a/kernel/sys.c b/kernel/sys.c
--- a/kernel/sys.c Wed Dec 18 10:00:43 2002
+++ b/kernel/sys.c Wed Dec 18 10:00:43 2002
@@ -22,6 +22,7 @@
#include <linux/security.h>
#include <linux/dcookies.h>
#include <linux/suspend.h>
+#include <linux/rcupdate.h>

#include <asm/uaccess.h>
#include <asm/io.h>
@@ -78,7 +79,7 @@
*/

static LIST_HEAD(reboot_notifier_list);
-static rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
+static spinlock_t notifier_lock = SPIN_LOCK_UNLOCKED;

/**
* notifier_chain_register - Add notifier to a notifier chain
@@ -95,7 +96,9 @@
struct list_head *p;

INIT_LIST_HEAD(&n->link);
- write_lock(&notifier_lock);
+ atomic_set(&n->inuse, 0);
+
+ spin_lock(&notifier_lock);
list_for_each(p, list) {
struct notifier_block *e
= list_entry(p, struct notifier_block, link);
@@ -104,7 +107,7 @@
}

list_add(&n->link, p);
- write_unlock(&notifier_lock);
+ spin_unlock(&notifier_lock);
return 0;
}

@@ -122,15 +125,24 @@
{
struct list_head *cur;

- write_lock(&notifier_lock);
+ might_sleep();
+
+ spin_lock(&notifier_lock);
list_for_each(cur, list) {
if (n == list_entry(cur, struct notifier_block, link)) {
- list_del(cur);
- write_unlock(&notifier_lock);
+ list_del_rcu(cur);
+ spin_unlock(&notifier_lock);
+
+ synchronize_kernel();
+
+ /* notifier may be sleeping */
+ while (atomic_read(&n->inuse) > 0)
+ yield();
+
return 0;
}
}
- write_unlock(&notifier_lock);
+ spin_unlock(&notifier_lock);
return -ENOENT;
}

@@ -148,23 +160,31 @@
* the notifier function which halted execution.
* Otherwise, the return value is the return value
* of the last notifier function called.
+ *
+ * This might be called from NMI or other context where it
+ * is impossible to sleep or spin.
*/

int notifier_call_chain(struct list_head *list, unsigned long val, void
*v)
{
- struct list_head *p;
+ struct list_head *p, *nxtp;
int ret = NOTIFY_DONE;

- list_for_each(p, list) {
+ rcu_read_lock();
+ list_for_each_safe_rcu(p, nxtp, list) {
struct notifier_block *nb =
list_entry(p, struct notifier_block, link);

+ atomic_inc(&nb->inuse);
ret = nb->notifier_call(nb,val,v);
+ atomic_dec(&nb->inuse);
+
if (ret & NOTIFY_STOP_MASK)
goto end_loop;
}

end_loop:
+ rcu_read_unlock();
return ret;
}





2002-12-19 12:26:30

by Vamsi Krishna S .

[permalink] [raw]
Subject: Re: [PATCH] (4/5) improved notifier callback mechanism - read copy update

Hi Stephen,

On Wed, Dec 18, 2002 at 11:06:08PM +0000, Stephen Hemminger wrote:
> The notifier interface was only partially locked. The
> notifier_call_chain needs to be called in places where it is impossible
> to safely without having deadlocks; for example, NMI watchdog timeout.
>
> This patch uses read-copy-update to manage the list. One extra bit of
> safety is using a reference count on the notifier_blocks to allow for
> cases like oprofile which need to sleep in a callback.
>
<snip>
>
> int notifier_call_chain(struct list_head *list, unsigned long val, void
> *v)
> {
> - struct list_head *p;
> + struct list_head *p, *nxtp;
> int ret = NOTIFY_DONE;
>
> - list_for_each(p, list) {
> + rcu_read_lock();
> + list_for_each_safe_rcu(p, nxtp, list) {
> struct notifier_block *nb =
> list_entry(p, struct notifier_block, link);
>
> + atomic_inc(&nb->inuse);
> ret = nb->notifier_call(nb,val,v);
> + atomic_dec(&nb->inuse);
> +

There could be a small problem here. When rcu_read_lock() is called,
it bumps the preempt_count, so when the called handler attempts
to sleep, it will oops with "Bad: scheduling in atomic region".

--
Vamsi Krishna S.
Linux Technology Center,
IBM Software Lab, Bangalore.
Ph: +91 80 5044959
Internet: [email protected]

2002-12-19 17:29:22

by Stephen Hemminger

[permalink] [raw]
Subject: Re: [PATCH] (4/5) improved notifier callback mechanism - read copy update

Thanks, will look into it in more detail. Perhaps figuring out how to do
oprofile without sleeping would be best.

On Thu, 2002-12-19 at 04:49, Vamsi Krishna S . wrote:
> Hi Stephen,
>
> On Wed, Dec 18, 2002 at 11:06:08PM +0000, Stephen Hemminger wrote:
> > The notifier interface was only partially locked. The
> > notifier_call_chain needs to be called in places where it is impossible
> > to safely without having deadlocks; for example, NMI watchdog timeout.
> >
> > This patch uses read-copy-update to manage the list. One extra bit of
> > safety is using a reference count on the notifier_blocks to allow for
> > cases like oprofile which need to sleep in a callback.
> >
> <snip>
> >
> > int notifier_call_chain(struct list_head *list, unsigned long val, void
> > *v)
> > {
> > - struct list_head *p;
> > + struct list_head *p, *nxtp;
> > int ret = NOTIFY_DONE;
> >
> > - list_for_each(p, list) {
> > + rcu_read_lock();
> > + list_for_each_safe_rcu(p, nxtp, list) {
> > struct notifier_block *nb =
> > list_entry(p, struct notifier_block, link);
> >
> > + atomic_inc(&nb->inuse);
> > ret = nb->notifier_call(nb,val,v);
> > + atomic_dec(&nb->inuse);
> > +
>
> There could be a small problem here. When rcu_read_lock() is called,
> it bumps the preempt_count, so when the called handler attempts
> to sleep, it will oops with "Bad: scheduling in atomic region".
--
Stephen Hemminger <[email protected]>
Open Source Devlopment Lab

2002-12-19 18:00:23

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] (4/5) improved notifier callback mechanism - read copy update

On Thu, Dec 19, 2002 at 09:37:10AM -0800, Stephen Hemminger wrote:

> Thanks, will look into it in more detail. Perhaps figuring out how to do
> oprofile without sleeping would be best.

You can try, but I don't think you'll get far ... maintaining the
necessary "no samples present for exited process" invariant is difficult
without sleeping.

It would seem better just to force the profiling hooks to use a
different API

regards
john

--
"ALL television is children's television."
- Richard Adler

2002-12-19 22:59:52

by Stephen Hemminger

[permalink] [raw]
Subject: [PATCH] (4/5) notifier callback mechanism - read copy update V2

Here is third try at using RCU for notifier callbacks. The difference is
that this version has a separate version for use by kernel profile that does
it's own locking and can sleep.

diff -Nru a/include/linux/notifier.h b/include/linux/notifier.h
--- a/include/linux/notifier.h Thu Dec 19 14:51:07 2002
+++ b/include/linux/notifier.h Thu Dec 19 14:51:07 2002
@@ -25,6 +25,7 @@
extern int notifier_chain_register(struct list_head *, struct notifier_block *);
extern int notifier_chain_unregister(struct list_head *, struct notifier_block *);
extern int notifier_call_chain(struct list_head *, unsigned long, void *);
+extern int notifier_call_chain_safe(struct list_head *, unsigned long, void *);

extern int register_panic_notifier(struct notifier_block *);
extern int unregister_panic_notifier(struct notifier_block *);
diff -Nru a/kernel/profile.c b/kernel/profile.c
--- a/kernel/profile.c Thu Dec 19 14:51:07 2002
+++ b/kernel/profile.c Thu Dec 19 14:51:07 2002
@@ -55,21 +55,21 @@
void profile_exit_task(struct task_struct * task)
{
down_read(&profile_rwsem);
- notifier_call_chain(&exit_task_notifier, 0, task);
+ notifier_call_chain_safe(&exit_task_notifier, 0, task);
up_read(&profile_rwsem);
}

void profile_exit_mmap(struct mm_struct * mm)
{
down_read(&profile_rwsem);
- notifier_call_chain(&exit_mmap_notifier, 0, mm);
+ notifier_call_chain_safe(&exit_mmap_notifier, 0, mm);
up_read(&profile_rwsem);
}

void profile_exec_unmap(struct mm_struct * mm)
{
down_read(&profile_rwsem);
- notifier_call_chain(&exec_unmap_notifier, 0, mm);
+ notifier_call_chain_safe(&exec_unmap_notifier, 0, mm);
up_read(&profile_rwsem);
}

diff -Nru a/kernel/sys.c b/kernel/sys.c
--- a/kernel/sys.c Thu Dec 19 14:51:07 2002
+++ b/kernel/sys.c Thu Dec 19 14:51:07 2002
@@ -22,6 +22,7 @@
#include <linux/security.h>
#include <linux/dcookies.h>
#include <linux/suspend.h>
+#include <linux/rcupdate.h>

#include <asm/uaccess.h>
#include <asm/io.h>
@@ -78,7 +79,7 @@
*/

static LIST_HEAD(reboot_notifier_list);
-static rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
+static spinlock_t notifier_lock = SPIN_LOCK_UNLOCKED;

/**
* notifier_chain_register - Add notifier to a notifier chain
@@ -95,7 +96,8 @@
struct list_head *p;

INIT_LIST_HEAD(&n->link);
- write_lock(&notifier_lock);
+
+ spin_lock(&notifier_lock);
list_for_each(p, list) {
struct notifier_block *e
= list_entry(p, struct notifier_block, link);
@@ -104,7 +106,7 @@
}

list_add(&n->link, p);
- write_unlock(&notifier_lock);
+ spin_unlock(&notifier_lock);
return 0;
}

@@ -122,15 +124,18 @@
{
struct list_head *cur;

- write_lock(&notifier_lock);
+ spin_lock(&notifier_lock);
list_for_each(cur, list) {
if (n == list_entry(cur, struct notifier_block, link)) {
- list_del(cur);
- write_unlock(&notifier_lock);
+ list_del_rcu(cur);
+ spin_unlock(&notifier_lock);
+
+ synchronize_kernel();
+
return 0;
}
}
- write_unlock(&notifier_lock);
+ spin_unlock(&notifier_lock);
return -ENOENT;
}

@@ -148,18 +153,62 @@
* the notifier function which halted execution.
* Otherwise, the return value is the return value
* of the last notifier function called.
+ *
+ * This might be called from NMI or other context where it
+ * is impossible to sleep or spin. The restriction is that the
+ * handler must not sleep since rcu_read_lock disables preempt.
*/
-
int notifier_call_chain(struct list_head *list, unsigned long val, void *v)
{
- struct list_head *p;
+ struct list_head *p, *nxtp;
int ret = NOTIFY_DONE;

- list_for_each(p, list) {
+ rcu_read_lock();
+ list_for_each_safe_rcu(p, nxtp, list) {
+ struct notifier_block *nb =
+ list_entry(p, struct notifier_block, link);
+
+ ret = nb->notifier_call(nb,val,v);
+
+ if (ret & NOTIFY_STOP_MASK)
+ goto end_loop;
+ }
+
+ end_loop:
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * notifier_call_chain_safe - Call functions in a notifier chain
+ * @n: Pointer to root pointer of notifier chain
+ * @val: Value passed unmodified to notifier function
+ * @v: Pointer passed unmodified to notifier function
+ *
+ * Calls each function in a notifier chain in turn.
+ *
+ * If the return value of the notifier can be and'd
+ * with %NOTIFY_STOP_MASK, then notifier_call_chain
+ * will return immediately, with the return value of
+ * the notifier function which halted execution.
+ * Otherwise, the return value is the return value
+ * of the last notifier function called.
+ *
+ * This differs from notifier_call_chain because it assumes
+ * that the caller has done its own mutual exclusion and
+ * does not want to use read-copy-update.
+ */
+int notifier_call_chain_safe(struct list_head *list, unsigned long val, void *v)
+{
+ struct list_head *p, *nxtp;
+ int ret = NOTIFY_DONE;
+
+ list_for_each_safe(p, nxtp, list) {
struct notifier_block *nb =
list_entry(p, struct notifier_block, link);

ret = nb->notifier_call(nb,val,v);
+
if (ret & NOTIFY_STOP_MASK)
goto end_loop;
}



2002-12-19 23:03:13

by Stephen Hemminger

[permalink] [raw]
Subject: [PATCH] (5/5) improved notifier callback mechanism -- remove old locking V2

After the previous patches, the notifier interface now has it's own
locking. Therefore the existing locking done by the oprofile interface
is superfluous.
diff -Nru a/arch/i386/kernel/profile.c b/arch/i386/kernel/profile.c
--- a/arch/i386/kernel/profile.c Tue Dec 17 11:25:47 2002
+++ b/arch/i386/kernel/profile.c Tue Dec 17 11:25:47 2002
@@ -6,40 +6,25 @@
*/

#include <linux/profile.h>
-#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <linux/irq.h>
#include <asm/hw_irq.h>

static LIST_HEAD(profile_listeners);
-static rwlock_t profile_lock = RW_LOCK_UNLOCKED;

int register_profile_notifier(struct notifier_block * nb)
{
- int err;
- write_lock_irq(&profile_lock);
- err = notifier_chain_register(&profile_listeners, nb);
- write_unlock_irq(&profile_lock);
- return err;
+ return notifier_chain_register(&profile_listeners, nb);
}


int unregister_profile_notifier(struct notifier_block * nb)
{
- int err;
- write_lock_irq(&profile_lock);
- err = notifier_chain_unregister(&profile_listeners, nb);
- write_unlock_irq(&profile_lock);
- return err;
+ return notifier_chain_unregister(&profile_listeners, nb);
}


void x86_profile_hook(struct pt_regs * regs)
{
- /* we would not even need this lock if
- * we had a global cli() on register/unregister
- */
- read_lock(&profile_lock);
notifier_call_chain(&profile_listeners, 0, regs);
- read_unlock(&profile_lock);
}