--- ./fs/dcache.c.dcacher 2005-05-10 16:10:25.000000000 +0400
+++ ./fs/dcache.c 2005-05-10 17:52:54.000000000 +0400
@@ -111,6 +111,78 @@ static inline void dentry_iput(struct de
}
}
+struct dcache_shrinker {
+ struct list_head list;
+ struct dentry *dentry;
+};
+
+DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
+
+/* called under dcache_lock */
+static void dcache_shrinker_add(struct dcache_shrinker *ds,
+ struct dentry *parent, struct dentry *dentry)
+{
+ if (parent != dentry) {
+ struct super_block *sb;
+
+ sb = parent->d_sb;
+ ds->dentry = parent;
+ list_add(&ds->list, &sb->s_dshrinkers);
+ } else
+ INIT_LIST_HEAD(&ds->list);
+}
+
+/* called under dcache_lock */
+static void dcache_shrinker_del(struct dcache_shrinker *ds)
+{
+ if (ds == NULL || list_empty(&ds->list))
+ return;
+
+ list_del_init(&ds->list);
+ wake_up_all(&dcache_shrinker_wq);
+}
+
+/* called under dcache_lock, drops inside */
+static void dcache_shrinker_wait(struct super_block *sb)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&dcache_shrinker_wq, &wq);
+ spin_unlock(&dcache_lock);
+
+ schedule();
+ remove_wait_queue(&dcache_shrinker_wq, &wq);
+ __set_current_state(TASK_RUNNING);
+}
+
+void dcache_shrinker_wait_sb(struct super_block *sb)
+{
+ /* the root dentry can be held in dput_recursive */
+ spin_lock(&dcache_lock);
+ while (!list_empty(&sb->s_dshrinkers)) {
+ dcache_shrinker_wait(sb);
+ spin_lock(&dcache_lock);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+/* dcache_lock protects shrinker's list */
+static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
+{
+ struct super_block *sb;
+ struct dcache_shrinker *ds;
+
+ sb = parent->d_sb;
+ list_for_each_entry(ds, &sb->s_dshrinkers, list) {
+ /* is one of dcache shrinkers working on the dentry? */
+ if (ds->dentry == parent) {
+ *racecheck = 1;
+ break;
+ }
+ }
+}
+
/*
* This is dput
*
@@ -129,8 +201,9 @@ static inline void dentry_iput(struct de
*/
/*
- * dput - release a dentry
- * @dentry: dentry to release
+ * dput_recursive - go upward through the dentry tree and release dentries
+ * @dentry: starting dentry
+ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
*
* Release a dentry. This will drop the usage count and if appropriate
* call the dentry unlink method as well as removing it from the queues and
@@ -140,17 +213,15 @@ static inline void dentry_iput(struct de
* no dcache lock, please.
*/
-void dput(struct dentry *dentry)
+static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
{
- if (!dentry)
- return;
-
-repeat:
if (atomic_read(&dentry->d_count) == 1)
might_sleep();
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;
+ dcache_shrinker_del(ds);
+repeat:
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count)) {
spin_unlock(&dentry->d_lock);
@@ -182,6 +253,7 @@ unhash_it:
kill_it: {
struct dentry *parent;
+ struct dcache_shrinker lds;
/* If dentry was on d_lru list
* delete it from there
@@ -191,18 +263,43 @@ kill_it: {
dentry_stat.nr_unused--;
}
list_del(&dentry->d_child);
+ parent = dentry->d_parent;
+ dcache_shrinker_add(&lds, parent, dentry);
dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
- parent = dentry->d_parent;
d_free(dentry);
if (dentry == parent)
return;
dentry = parent;
- goto repeat;
+ spin_lock(&dcache_lock);
+ dcache_shrinker_del(&lds);
+ if (atomic_dec_and_test(&dentry->d_count))
+ goto repeat;
+ spin_unlock(&dcache_lock);
}
}
+/*
+ * dput - release a dentry
+ * @dentry: dentry to release
+ *
+ * Release a dentry. This will drop the usage count and if appropriate
+ * call the dentry unlink method as well as removing it from the queues and
+ * releasing its resources. If the parent dentries were scheduled for release
+ * they too may now get deleted.
+ *
+ * no dcache lock, please.
+ */
+
+void dput(struct dentry *dentry)
+{
+ if (!dentry)
+ return;
+
+ dput_recursive(dentry, NULL);
+}
+
/**
* d_invalidate - invalidate a dentry
* @dentry: dentry to invalidate
@@ -361,19 +458,23 @@ restart:
* removed.
* Called with dcache_lock, drops it and then regains.
*/
-static inline void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry)
{
struct dentry * parent;
+ struct dcache_shrinker ds;
__d_drop(dentry);
list_del(&dentry->d_child);
+ parent = dentry->d_parent;
+ dcache_shrinker_add(&ds, parent, dentry);
dentry_stat.nr_dentry--; /* For d_free, below */
dentry_iput(dentry);
parent = dentry->d_parent;
d_free(dentry);
if (parent != dentry)
- dput(parent);
+ dput_recursive(parent, &ds);
spin_lock(&dcache_lock);
+ dcache_shrinker_del(&ds);
}
/**
@@ -562,13 +663,12 @@ positive:
* drop the lock and return early due to latency
* constraints.
*/
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, int * racecheck)
{
struct dentry *this_parent = parent;
struct list_head *next;
int found = 0;
- spin_lock(&dcache_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -610,6 +710,9 @@ dentry->d_parent->d_name.name, dentry->d
#endif
goto repeat;
}
+
+ if (!found && racecheck != NULL)
+ shrink_dcache_racecheck(dentry, racecheck);
}
/*
* All done at this level ... ascend and resume the search.
@@ -624,7 +727,6 @@ this_parent->d_parent->d_name.name, this
goto resume;
}
out:
- spin_unlock(&dcache_lock);
return found;
}
@@ -637,10 +739,66 @@ out:
void shrink_dcache_parent(struct dentry * parent)
{
- int found;
+ int found, r;
+
+ while (1) {
+ spin_lock(&dcache_lock);
+ found = select_parent(parent, NULL);
+ if (found)
+ goto found;
- while ((found = select_parent(parent)) != 0)
+ /*
+ * try again with a dput_recursive() race check.
+ * it returns quickly if everything was really shrinked
+ */
+ r = 0;
+ found = select_parent(parent, &r);
+ if (found)
+ goto found;
+ if (!r)
+ break;
+
+ /* drops the lock inside */
+ dcache_shrinker_wait(parent->d_sb);
+ continue;
+
+found:
+ spin_unlock(&dcache_lock);
prune_dcache(found);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+/*
+ * Move any unused anon dentries to the end of the unused list.
+ * called under dcache_lock
+ */
+static int select_anon(struct hlist_head *head, int *racecheck)
+{
+ struct hlist_node *lp;
+ int found = 0;
+
+ hlist_for_each(lp, head) {
+ struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
+ if (!list_empty(&this->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&this->d_lru);
+ }
+
+ /*
+ * move only zero ref count dentries to the end
+ * of the unused list for prune_dcache
+ */
+ if (!atomic_read(&this->d_count)) {
+ list_add_tail(&this->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ found++;
+ }
+
+ if (!found && racecheck != NULL)
+ shrink_dcache_racecheck(this, racecheck);
+ }
+ return found;
}
/**
@@ -653,33 +811,36 @@ void shrink_dcache_parent(struct dentry
* done under dcache_lock.
*
*/
-void shrink_dcache_anon(struct hlist_head *head)
+void shrink_dcache_anon(struct super_block *sb)
{
- struct hlist_node *lp;
- int found;
- do {
- found = 0;
+ int found, r;
+
+ while (1) {
spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
+ found = select_anon(&sb->s_anon, NULL);
+ if (found)
+ goto found;
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
+ /*
+ * try again with a dput_recursive() race check.
+ * it returns quickly if everything was really shrinked
+ */
+ r = 0;
+ found = select_anon(&sb->s_anon, &r);
+ if (found)
+ goto found;
+ if (!r)
+ break;
+
+ /* drops the lock inside */
+ dcache_shrinker_wait(sb);
+ continue;
+
+found:
spin_unlock(&dcache_lock);
prune_dcache(found);
- } while(found);
+ }
+ spin_unlock(&dcache_lock);
}
/*
--- ./fs/super.c.dcacher 2005-05-10 16:10:29.000000000 +0400
+++ ./fs/super.c 2005-05-10 17:53:36.000000000 +0400
@@ -69,6 +69,7 @@ static struct super_block *alloc_super(v
INIT_LIST_HEAD(&s->s_io);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
+ INIT_LIST_HEAD(&s->s_dshrinkers);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
init_rwsem(&s->s_umount);
@@ -230,8 +231,9 @@ void generic_shutdown_super(struct super
if (root) {
sb->s_root = NULL;
shrink_dcache_parent(root);
- shrink_dcache_anon(&sb->s_anon);
+ shrink_dcache_anon(sb);
dput(root);
+ dcache_shrinker_wait_sb(sb);
fsync_super(sb);
lock_super(sb);
sb->s_flags &= ~MS_ACTIVE;
--- ./include/linux/dcache.h.dcacher 2005-05-10 16:10:38.000000000 +0400
+++ ./include/linux/dcache.h 2005-05-10 17:53:36.000000000 +0400
@@ -209,7 +209,8 @@ extern struct dentry * d_alloc_anon(stru
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct hlist_head *);
+extern void shrink_dcache_anon(struct super_block *);
+extern void dcache_shrinker_wait_sb(struct super_block *sb);
extern int d_invalidate(struct dentry *);
/* only used at mount-time */
--- ./include/linux/fs.h.dcacher 2005-05-10 16:10:38.000000000 +0400
+++ ./include/linux/fs.h 2005-05-10 17:47:40.000000000 +0400
@@ -783,6 +783,7 @@ struct super_block {
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
+ struct list_head s_dshrinkers; /* active dcache shrinkers */
struct block_device *s_bdev;
struct list_head s_instances;
Kirill Korotaev <[email protected]> wrote:
>
> This patch fixes dcache race between shrink_dcache_XXX functions
> and dput().
> -void dput(struct dentry *dentry)
> +static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
> {
> - if (!dentry)
> - return;
> -
> -repeat:
> if (atomic_read(&dentry->d_count) == 1)
> might_sleep();
> if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
> return;
> + dcache_shrinker_del(ds);
>
> +repeat:
> spin_lock(&dentry->d_lock);
> if (atomic_read(&dentry->d_count)) {
> spin_unlock(&dentry->d_lock);
> @@ -182,6 +253,7 @@ unhash_it:
>
> kill_it: {
> struct dentry *parent;
> + struct dcache_shrinker lds;
>
> /* If dentry was on d_lru list
> * delete it from there
> @@ -191,18 +263,43 @@ kill_it: {
> dentry_stat.nr_unused--;
> }
> list_del(&dentry->d_child);
> + parent = dentry->d_parent;
> + dcache_shrinker_add(&lds, parent, dentry);
> dentry_stat.nr_dentry--; /* For d_free, below */
> /*drops the locks, at that point nobody can reach this dentry */
> dentry_iput(dentry);
> - parent = dentry->d_parent;
> d_free(dentry);
> if (dentry == parent)
Aren't we leaving local variable `lds' on the global list here?
> return;
> dentry = parent;
> - goto repeat;
> + spin_lock(&dcache_lock);
> + dcache_shrinker_del(&lds);
> + if (atomic_dec_and_test(&dentry->d_count))
> + goto repeat;
> + spin_unlock(&dcache_lock);
> }
> }
>
--- 25/fs/dcache.c~fix-of-dcache-race-leading-to-busy-inodes-on-umount-fix 2005-05-11 19:24:23.000000000 -0700
+++ 25-akpm/fs/dcache.c 2005-05-11 19:27:27.000000000 -0700
@@ -269,8 +269,12 @@ kill_it: {
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
d_free(dentry);
- if (dentry == parent)
+ if (unlikely(dentry == parent)) {
+ spin_lock(&dcache_lock);
+ dcache_shrinker_del(&lds);
+ spin_unlock(&dcache_lock);
return;
+ }
dentry = parent;
spin_lock(&dcache_lock);
dcache_shrinker_del(&lds);
_
Kirill Korotaev <[email protected]> wrote:
>
> This patch fixes dcache race between shrink_dcache_XXX functions
> and dput().
>
> Example race scenario:
>
> CPU 0 CPU 1
> umount /dev/sda1
> generic_shutdown_super shrink_dcache_memory()
> shrink_dcache_parent dput dentry
> select_parent prune_one_dentry()
> <<<< child is dead, locks are released,
> but parent is still referenced!!! >>>>
>
> skip dentry->parent,
> since it's d_count > 0
>
> message: BUSY inodes after umount...
> <<< parent is left on dentry_unused
> list, referencing freed super block
>
> We faced these messages about busy inodes constantly after some stress
> testing with mount/umount operations parrallel with some other activity.
> This patch helped the problem.
>
> The patch was heavilly tested on 2.6.8 during 2 months,
> this forward-ported version boots and works ok as well.
>
You've provided no description of how the patch solves the problem.
> /* dcache_lock protects shrinker's list */
> static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
> {
> struct super_block *sb;
> struct dcache_shrinker *ds;
>
> sb = parent->d_sb;
> list_for_each_entry(ds, &sb->s_dshrinkers, list) {
> /* is one of dcache shrinkers working on the dentry? */
> if (ds->dentry == parent) {
> *racecheck = 1;
> break;
> }
> }
> }
>
>
This all looks awfully hacky. Why is it done this way, and is there no
cleaner solution?
>>This patch fixes dcache race between shrink_dcache_XXX functions
>>and dput().
>
>
>
>>-void dput(struct dentry *dentry)
>>+static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
>> {
>>- if (!dentry)
>>- return;
>>-
>>-repeat:
>> if (atomic_read(&dentry->d_count) == 1)
>> might_sleep();
>> if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
>> return;
>>+ dcache_shrinker_del(ds);
>>
>>+repeat:
>> spin_lock(&dentry->d_lock);
>> if (atomic_read(&dentry->d_count)) {
>> spin_unlock(&dentry->d_lock);
>>@@ -182,6 +253,7 @@ unhash_it:
>>
>> kill_it: {
>> struct dentry *parent;
>>+ struct dcache_shrinker lds;
>>
>> /* If dentry was on d_lru list
>> * delete it from there
>>@@ -191,18 +263,43 @@ kill_it: {
>> dentry_stat.nr_unused--;
>> }
>> list_del(&dentry->d_child);
>>+ parent = dentry->d_parent;
>>+ dcache_shrinker_add(&lds, parent, dentry);
>> dentry_stat.nr_dentry--; /* For d_free, below */
>> /*drops the locks, at that point nobody can reach this dentry */
>> dentry_iput(dentry);
>>- parent = dentry->d_parent;
>> d_free(dentry);
>> if (dentry == parent)
>
>
> Aren't we leaving local variable `lds' on the global list here?
yup, my fault.
You patch solves this.
Kirill
> Kirill Korotaev <[email protected]> wrote:
>
>>This patch fixes dcache race between shrink_dcache_XXX functions
>>and dput().
>>
>>Example race scenario:
>>
>> CPU 0 CPU 1
>>umount /dev/sda1
>>generic_shutdown_super shrink_dcache_memory()
>>shrink_dcache_parent dput dentry
>>select_parent prune_one_dentry()
>> <<<< child is dead, locks are released,
>> but parent is still referenced!!! >>>>
>>
>>skip dentry->parent,
>>since it's d_count > 0
>>
>>message: BUSY inodes after umount...
>> <<< parent is left on dentry_unused
>> list, referencing freed super block
>>
>>We faced these messages about busy inodes constantly after some stress
>>testing with mount/umount operations parrallel with some other activity.
>>This patch helped the problem.
>>
>>The patch was heavilly tested on 2.6.8 during 2 months,
>>this forward-ported version boots and works ok as well.
>>
>
>
> You've provided no description of how the patch solves the problem.
below
>>/* dcache_lock protects shrinker's list */
>>static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
>>{
>> struct super_block *sb;
>> struct dcache_shrinker *ds;
>>
>> sb = parent->d_sb;
>> list_for_each_entry(ds, &sb->s_dshrinkers, list) {
>> /* is one of dcache shrinkers working on the dentry? */
>> if (ds->dentry == parent) {
>> *racecheck = 1;
>> break;
>> }
>> }
>>}
>>
>>
>
>
> This all looks awfully hacky. Why is it done this way, and is there no
> cleaner solution?
Additional patch info:
This patch solves the race mentioned above via introducing per super
block list of dentries which are being held indirectly by it's child
which is going away. This usually happens when a child is dput()'ed last
time and is holding it's parent reference though it is not in any lists
already and can't be found anywhere. To make sure that
shrink_dcache_parent() can't shrink dcache tree because of a real
reference and not such indirect one we check a race condition by looking
through dcache_shrinker list. If the current dentry is found on
dcache_shrinker list than it is being referenced indirectly as described
above and should be waited for, otherwise we really can't shrink the tree.
Why is it so hacky? We tried to develop a solution which minimally
influences perfomance, not slowdowning a fast path and not introducing
lock contention via widening existing locks. I'll be happy if someone
fixes it more efficiently.
Kirill