If poll or select is waiting on /proc/mdstat when md-mod is unloaded
an oops will ensure when the poll/select completes.
This is because the wait_queue_head which is registered with poll_wait()
is local to the module and no longer exists when the poll completes and
detaches that wait_queue_head (in poll_free_wait -> remove_wait_queue).
To fix this we need the wait_queue_head to have (at least) the same life
time as the proc_dir_entry. So this patch places it in that structure.
We:
- add pde_poll_wait to struct proc_dir_entry
- call poll_wait() passing this when poll() is called on the proc file
- export a function proc_wake_up which will call wake_up() on pde_poll_wait
and make use of all that in md.c
Reported-by: "majianpeng" <[email protected]>
Signed-off-by: NeilBrown <[email protected]>
--
Do we have a maintainer for fs/proc ??
If I could get a couple of Acks, or constructive comments, on this,
I would appreciate it.
Thanks,
NeilBrown
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4ad5cc4e63e8..1bf70d9c55d3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -193,12 +193,12 @@ EXPORT_SYMBOL_GPL(bio_clone_mddev);
* start array, stop array, error, add device, remove device,
* start build, activate spare
*/
-static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
+static struct proc_dir_entry *mdstat_pde;
static atomic_t md_event_count;
void md_new_event(struct mddev *mddev)
{
atomic_inc(&md_event_count);
- wake_up(&md_event_waiters);
+ proc_wake_up(mdstat_pde);
}
EXPORT_SYMBOL_GPL(md_new_event);
@@ -208,7 +208,7 @@ EXPORT_SYMBOL_GPL(md_new_event);
static void md_new_event_inintr(struct mddev *mddev)
{
atomic_inc(&md_event_count);
- wake_up(&md_event_waiters);
+ proc_wake_up(mdstat_pde);
}
/*
@@ -7187,8 +7187,6 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
struct seq_file *seq = filp->private_data;
int mask;
- poll_wait(filp, &md_event_waiters, wait);
-
/* always allow read */
mask = POLLIN | POLLRDNORM;
@@ -8557,7 +8555,7 @@ static void md_geninit(void)
{
pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
- proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
+ mdstat_pde = proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
}
static int __init md_init(void)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b7f268eb5f45..c579da4cd765 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -357,6 +357,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
atomic_set(&ent->count, 1);
spin_lock_init(&ent->pde_unload_lock);
INIT_LIST_HEAD(&ent->pde_openers);
+ init_waitqueue_head(&ent->pde_poll_wait);
out:
return ent;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 124fc43c7090..353fc199e8b5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -234,13 +234,21 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
unsigned int (*poll)(struct file *, struct poll_table_struct *);
if (use_pde(pde)) {
poll = pde->proc_fops->poll;
- if (poll)
+ if (poll) {
+ poll_wait(file, &pde->pde_poll_wait, pts);
rv = poll(file, pts);
+ }
unuse_pde(pde);
}
return rv;
}
+void proc_wake_up(struct proc_dir_entry *pde)
+{
+ wake_up(&pde->pde_poll_wait);
+}
+EXPORT_SYMBOL_GPL(proc_wake_up);
+
static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 651d09a11dde..6f9f84eecded 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -46,6 +46,7 @@ struct proc_dir_entry {
struct completion *pde_unload_completion;
struct list_head pde_openers; /* who did ->open, but not ->release */
spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+ wait_queue_head_t pde_poll_wait; /* For proc_reg_poll */
u8 namelen;
char name[];
};
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 608e60a74c3c..a4a3d5f001ef 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -34,6 +34,7 @@ static inline struct proc_dir_entry *proc_create(
return proc_create_data(name, mode, parent, proc_fops, NULL);
}
+extern void proc_wake_up(struct proc_dir_entry *pde);
extern void proc_set_size(struct proc_dir_entry *, loff_t);
extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t);
extern void *PDE_DATA(const struct inode *);
On Thu, 27 Feb 2014 17:24:45 +1100 NeilBrown <[email protected]> wrote:
> If poll or select is waiting on /proc/mdstat when md-mod is unloaded
> an oops will ensure when the poll/select completes.
>
> This is because the wait_queue_head which is registered with poll_wait()
> is local to the module and no longer exists when the poll completes and
> detaches that wait_queue_head (in poll_free_wait -> remove_wait_queue).
>
> To fix this we need the wait_queue_head to have (at least) the same life
> time as the proc_dir_entry. So this patch places it in that structure.
>
> We:
> - add pde_poll_wait to struct proc_dir_entry
> - call poll_wait() passing this when poll() is called on the proc file
> - export a function proc_wake_up which will call wake_up() on pde_poll_wait
>
> and make use of all that in md.c
This sounds wrong. If a userspace process is waiting on
md_event_waiters then the md module is "busy" and the rmmod attempt
should fail?
On Thu, 27 Feb 2014 12:58:07 -0800 Andrew Morton <[email protected]>
wrote:
> On Thu, 27 Feb 2014 17:24:45 +1100 NeilBrown <[email protected]> wrote:
>
> > If poll or select is waiting on /proc/mdstat when md-mod is unloaded
> > an oops will ensure when the poll/select completes.
> >
> > This is because the wait_queue_head which is registered with poll_wait()
> > is local to the module and no longer exists when the poll completes and
> > detaches that wait_queue_head (in poll_free_wait -> remove_wait_queue).
> >
> > To fix this we need the wait_queue_head to have (at least) the same life
> > time as the proc_dir_entry. So this patch places it in that structure.
> >
> > We:
> > - add pde_poll_wait to struct proc_dir_entry
> > - call poll_wait() passing this when poll() is called on the proc file
> > - export a function proc_wake_up which will call wake_up() on pde_poll_wait
> >
> > and make use of all that in md.c
>
> This sounds wrong. If a userspace process is waiting on
> md_event_waiters then the md module is "busy" and the rmmod attempt
> should fail?
Al Viro says "no" quite firmly.
I think the core argument is that
rmmod md-mod < /proc/mdstat
would deadlock.
http://marc.info/?l=linux-fsdevel&m=133024267507384
Thanks,
NeilBrown
On Fri, 28 Feb 2014 08:34:43 +1100 NeilBrown <[email protected]> wrote:
> On Thu, 27 Feb 2014 12:58:07 -0800 Andrew Morton <[email protected]>
> wrote:
>
> > On Thu, 27 Feb 2014 17:24:45 +1100 NeilBrown <[email protected]> wrote:
> >
> > > If poll or select is waiting on /proc/mdstat when md-mod is unloaded
> > > an oops will ensure when the poll/select completes.
> > >
> > > This is because the wait_queue_head which is registered with poll_wait()
> > > is local to the module and no longer exists when the poll completes and
> > > detaches that wait_queue_head (in poll_free_wait -> remove_wait_queue).
> > >
> > > To fix this we need the wait_queue_head to have (at least) the same life
> > > time as the proc_dir_entry. So this patch places it in that structure.
> > >
> > > We:
> > > - add pde_poll_wait to struct proc_dir_entry
> > > - call poll_wait() passing this when poll() is called on the proc file
> > > - export a function proc_wake_up which will call wake_up() on pde_poll_wait
> > >
> > > and make use of all that in md.c
> >
> > This sounds wrong. If a userspace process is waiting on
> > md_event_waiters then the md module is "busy" and the rmmod attempt
> > should fail?
>
> Al Viro says "no" quite firmly.
>
> I think the core argument is that
>
> rmmod md-mod < /proc/mdstat
>
> would deadlock.
Well, only if the rmmod hangs around waiting for the module to go idle.
I'm thinking rmmod should fail. EBUSY.
> http://marc.info/?l=linux-fsdevel&m=133024267507384
Why don't a billion blocking-procfs-read sites have this problem?
On Thu, 27 Feb 2014 13:51:25 -0800 Andrew Morton <[email protected]>
wrote:
> On Fri, 28 Feb 2014 08:34:43 +1100 NeilBrown <[email protected]> wrote:
>
> > On Thu, 27 Feb 2014 12:58:07 -0800 Andrew Morton <[email protected]>
> > wrote:
> >
> > > On Thu, 27 Feb 2014 17:24:45 +1100 NeilBrown <[email protected]> wrote:
> > >
> > > > If poll or select is waiting on /proc/mdstat when md-mod is unloaded
> > > > an oops will ensure when the poll/select completes.
> > > >
> > > > This is because the wait_queue_head which is registered with poll_wait()
> > > > is local to the module and no longer exists when the poll completes and
> > > > detaches that wait_queue_head (in poll_free_wait -> remove_wait_queue).
> > > >
> > > > To fix this we need the wait_queue_head to have (at least) the same life
> > > > time as the proc_dir_entry. So this patch places it in that structure.
> > > >
> > > > We:
> > > > - add pde_poll_wait to struct proc_dir_entry
> > > > - call poll_wait() passing this when poll() is called on the proc file
> > > > - export a function proc_wake_up which will call wake_up() on pde_poll_wait
> > > >
> > > > and make use of all that in md.c
> > >
> > > This sounds wrong. If a userspace process is waiting on
> > > md_event_waiters then the md module is "busy" and the rmmod attempt
> > > should fail?
> >
> > Al Viro says "no" quite firmly.
> >
> > I think the core argument is that
> >
> > rmmod md-mod < /proc/mdstat
> >
> > would deadlock.
>
> Well, only if the rmmod hangs around waiting for the module to go idle.
> I'm thinking rmmod should fail. EBUSY.
>
> > http://marc.info/?l=linux-fsdevel&m=133024267507384
>
> Why don't a billion blocking-procfs-read sites have this problem?
>
This issue of a file in /proc (actually /proc/fs/nfsd which is a separate
filesystem) blocking came up on the NFS list recently.
Experiments suggested that no other files in /proc block reads
(though /proc/vmcore takes somewhat longer to 'cat' than most).
So the offending file was changed to never block.
i.e. I think your 'billion' estimate is a little high, by one billion.
:-)
NeilBrown
On Fri, 28 Feb 2014 10:07:57 +1100 NeilBrown <[email protected]> wrote:
> On Thu, 27 Feb 2014 13:51:25 -0800 Andrew Morton <[email protected]>
> wrote:
>
> > On Fri, 28 Feb 2014 08:34:43 +1100 NeilBrown <[email protected]> wrote:
> >
> > > On Thu, 27 Feb 2014 12:58:07 -0800 Andrew Morton <[email protected]>
> > > wrote:
> > >
> > > > On Thu, 27 Feb 2014 17:24:45 +1100 NeilBrown <[email protected]> wrote:
> > > >
> > > > > If poll or select is waiting on /proc/mdstat when md-mod is unloaded
> > > > > an oops will ensure when the poll/select completes.
> > > > >
> > > > > This is because the wait_queue_head which is registered with poll_wait()
> > > > > is local to the module and no longer exists when the poll completes and
> > > > > detaches that wait_queue_head (in poll_free_wait -> remove_wait_queue).
> > > > >
> > > > > To fix this we need the wait_queue_head to have (at least) the same life
> > > > > time as the proc_dir_entry. So this patch places it in that structure.
> > > > >
> > > > > We:
> > > > > - add pde_poll_wait to struct proc_dir_entry
> > > > > - call poll_wait() passing this when poll() is called on the proc file
> > > > > - export a function proc_wake_up which will call wake_up() on pde_poll_wait
> > > > >
> > > > > and make use of all that in md.c
> > > >
> > > > This sounds wrong. If a userspace process is waiting on
> > > > md_event_waiters then the md module is "busy" and the rmmod attempt
> > > > should fail?
> > >
> > > Al Viro says "no" quite firmly.
> > >
> > > I think the core argument is that
> > >
> > > rmmod md-mod < /proc/mdstat
> > >
> > > would deadlock.
> >
> > Well, only if the rmmod hangs around waiting for the module to go idle.
> > I'm thinking rmmod should fail. EBUSY.
This? What happens if we just fail the rmmod when the module is busy
(which it is).