2006-05-24 11:00:30

by David Chinner

[permalink] [raw]
Subject: [PATCH] Per-superblock unused dentry LRU lists V2

Per superblock dentry LRU lists.

As originally described in:

http://marc.theaimsgroup.com/?l=linux-kernel&m=114491661527656&w=2

shrink_dcache_sb() becomes a severe bottleneck when the unused dentry
list becomes long and mounts and unmounts occur frequently on the
machine.

The patch attached below is based on the suggestion made by Andrew
Morton here:

http://marc.theaimsgroup.com/?l=linux-fsdevel&m=114499224412427&w=2

I've attempted to make reclaim fair by keeping track of the last superblock
we pruned, and starting from the next one in the list each time.

Signed-off-by: Dave Chinner <[email protected]>

Version 2:

o cleaned up coding style
o added comments where required
o ported to 2.6.17-rc4-mm3
o added s_umount locking to prune_dcache()
o added counter for number of dentries on a superblock

---
Note: compile tested only, sending out for review to determine
if s_umount locking is correct w.r.t the umount/prune_dcache
race fixed in the -mm tree.

fs/dcache.c | 291 +++++++++++++++++++++++++----------------------------
fs/super.c | 1
include/linux/fs.h | 2
3 files changed, 145 insertions(+), 149 deletions(-)

Index: linux-2.6.16/fs/dcache.c
===================================================================
--- linux-2.6.16.orig/fs/dcache.c 2006-05-24 19:16:10.906422272 +1000
+++ linux-2.6.16/fs/dcache.c 2006-05-24 20:32:25.510976672 +1000
@@ -61,7 +61,6 @@ static kmem_cache_t *dentry_cache __read
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);

/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
@@ -113,6 +112,38 @@ static void dentry_iput(struct dentry *
}
}

+static void dentry_lru_add(struct dentry *dentry)
+{
+ list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+ dentry->d_sb->s_dentry_lru_nr++;
+ dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+ list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+ dentry->d_sb->s_dentry_lru_nr++;
+ dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_del(struct dentry *dentry)
+{
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry->d_sb->s_dentry_lru_nr--;
+ dentry_stat.nr_unused--;
+ }
+}
+
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+ if (likely(!list_empty(&dentry->d_lru))) {
+ list_del_init(&dentry->d_lru);
+ dentry->d_sb->s_dentry_lru_nr--;
+ dentry_stat.nr_unused--;
+ }
+}
+
/*
* This is dput
*
@@ -172,8 +203,7 @@ repeat:
goto kill_it;
if (list_empty(&dentry->d_lru)) {
dentry->d_flags |= DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_lru_add(dentry);
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
@@ -185,13 +215,8 @@ unhash_it:
kill_it: {
struct dentry *parent;

- /* If dentry was on d_lru list
- * delete it from there
- */
- if (!list_empty(&dentry->d_lru)) {
- list_del(&dentry->d_lru);
- dentry_stat.nr_unused--;
- }
+ /* If dentry was on d_lru list delete it from there */
+ dentry_lru_del(dentry);
list_del(&dentry->d_u.d_child);
dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
@@ -267,10 +292,7 @@ int d_invalidate(struct dentry * dentry)
static inline struct dentry * __dget_locked(struct dentry *dentry)
{
atomic_inc(&dentry->d_count);
- if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&dentry->d_lru);
- }
+ dentry_lru_del_init(dentry);
return dentry;
}

@@ -380,6 +402,51 @@ static void prune_one_dentry(struct dent
spin_lock(&dcache_lock);
}

+/*
+ * Shrink the dentry LRU on æ given superblock.
+ */
+static void
+__shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+{
+ struct dentry *dentry;
+ int cnt = *count;
+
+ spin_lock(&dcache_lock);
+ while (!list_empty(&sb->s_dentry_lru) && cnt--) {
+ dentry = list_entry(sb->s_dentry_lru.prev,
+ struct dentry, d_lru);
+ dentry_lru_del_init(dentry);
+ BUG_ON(dentry->d_sb != sb);
+ prefetch(sb->s_dentry_lru.prev);
+
+ spin_lock(&dentry->d_lock);
+ /*
+ * We found an inuse dentry which was not removed from
+ * the LRU because of laziness during lookup. Do not free
+ * it - just keep it off the LRU list.
+ */
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
+ /*
+ * If we are honouring the DCACHE_REFERENCED flag and it is
+ * present on this dentry, clear the flag and put it back on
+ * the lru.
+ */
+ if ((flags & DCACHE_REFERENCED) &&
+ (dentry->d_flags & DCACHE_REFERENCED)) {
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ dentry_lru_add(dentry);
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
+ prune_one_dentry(dentry);
+ }
+ spin_unlock(&dcache_lock);
+ *count = cnt;
+}
+
/**
* prune_dcache - shrink the dcache
* @count: number of entries to try and free
@@ -395,95 +462,61 @@ static void prune_one_dentry(struct dent
* all the dentries are in use.
*/

-static void prune_dcache(int count, struct super_block *sb)
+static void prune_dcache(int count)
{
- spin_lock(&dcache_lock);
- for (; count ; count--) {
- struct dentry *dentry;
- struct list_head *tmp;
- struct rw_semaphore *s_umount;
-
- cond_resched_lock(&dcache_lock);
-
- tmp = dentry_unused.prev;
- if (unlikely(sb)) {
- /* Try to find a dentry for this sb, but don't try
- * too hard, if they aren't near the tail they will
- * be moved down again soon
- */
- int skip = count;
- while (skip && tmp != &dentry_unused &&
- list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
- skip--;
- tmp = tmp->prev;
- }
- }
- if (tmp == &dentry_unused)
- break;
- list_del_init(tmp);
- prefetch(dentry_unused.prev);
- dentry_stat.nr_unused--;
- dentry = list_entry(tmp, struct dentry, d_lru);
-
- spin_lock(&dentry->d_lock);
- /*
- * We found an inuse dentry which was not removed from
- * dentry_unused because of laziness during lookup. Do not free
- * it - just keep it off the dentry_unused list.
- */
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- /* If the dentry was recently referenced, don't free it. */
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- spin_unlock(&dentry->d_lock);
+ struct super_block *sb;
+ static struct super_block *sb_hand = NULL;
+ int work_done = 0;
+
+ spin_lock(&sb_lock);
+ if (sb_hand == NULL)
+ sb_hand = sb_entry(super_blocks.next);
+restart:
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (sb != sb_hand)
continue;
- }
/*
- * If the dentry is not DCACHED_REFERENCED, it is time
- * to remove it from the dcache, provided the super block is
- * NULL (which means we are trying to reclaim memory)
- * or this dentry belongs to the same super block that
- * we want to shrink.
+ * Found the next superblock to work on. Move the hand
+ * forwards so that parallel pruners work on a different sb
*/
+ work_done++;
+ sb_hand = sb_entry(sb->s_list.next);
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
/*
- * If this dentry is for "my" filesystem, then I can prune it
- * without taking the s_umount lock (I already hold it).
- */
- if (sb && dentry->d_sb == sb) {
- prune_one_dentry(dentry);
- continue;
- }
- /*
- * ...otherwise we need to be sure this filesystem isn't being
- * unmounted, otherwise we could race with
- * generic_shutdown_super(), and end up holding a reference to
- * an inode while the filesystem is unmounted.
- * So we try to get s_umount, and make sure s_root isn't NULL.
- * (Take a local copy of s_umount to avoid a use-after-free of
- * `dentry').
+ * We need to be sure this filesystem isn't being unmounted,
+ * otherwise we could race with generic_shutdown_super(), and
+ * end up holding a reference to an inode while the filesystem
+ * is unmounted. So we try to get s_umount, and make sure
+ * s_root isn't NULL.
*/
- s_umount = &dentry->d_sb->s_umount;
- if (down_read_trylock(s_umount)) {
- if (dentry->d_sb->s_root != NULL) {
- prune_one_dentry(dentry);
- up_read(s_umount);
- continue;
+ if (down_read_trylock(&sb->s_umount)) {
+ if ((sb->s_root != NULL) &&
+ (!list_empty(&sb->s_dentry_lru))) {
+ __shrink_dcache_sb(sb, &count,
+ DCACHE_REFERENCED);
}
- up_read(s_umount);
+ up_read(&sb->s_umount);
}
- spin_unlock(&dentry->d_lock);
- /* Cannot remove the first dentry, and it isn't appropriate
- * to move it to the head of the list, so give up, and try
- * later
+
+ /*
+ * Restart if more work to do and this sb has been
+ * removed from the list
*/
- break;
+ spin_lock(&sb_lock);
+ if (__put_super_and_need_restart(sb) && count)
+ goto restart;
+ if (count <= 0)
+ break;
+ }
+ if (!work_done) {
+ /* sb_hand is stale. Start and the beginning of the
+ * list again. */
+ sb_hand = sb_entry(super_blocks.next);
+ goto restart;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&sb_lock);
}

/*
@@ -510,41 +543,7 @@ static void prune_dcache(int count, stru

void shrink_dcache_sb(struct super_block * sb)
{
- struct list_head *tmp, *next;
- struct dentry *dentry;
-
- /*
- * Pass one ... move the dentries for the specified
- * superblock to the most recent end of the unused list.
- */
- spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- list_move(tmp, &dentry_unused);
- }
-
- /*
- * Pass two ... free the dentries for this superblock.
- */
-repeat:
- list_for_each_safe(tmp, next, &dentry_unused) {
- dentry = list_entry(tmp, struct dentry, d_lru);
- if (dentry->d_sb != sb)
- continue;
- dentry_stat.nr_unused--;
- list_del_init(tmp);
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
- continue;
- }
- prune_one_dentry(dentry);
- cond_resched_lock(&dcache_lock);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
+ __shrink_dcache_sb(sb, &sb->s_dentry_lru_nr, 0);
}

/*
@@ -628,19 +627,14 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;

- if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&dentry->d_lru);
- }
+ dentry_lru_del_init(dentry);
+
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
- if (!atomic_read(&dentry->d_count)) {
- list_add_tail(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
+ if (!atomic_read(&dentry->d_count))
+ dentry_lru_add_tail(dentry);

/*
* We can return to the caller if we have found some (this
@@ -680,10 +674,11 @@ out:

void shrink_dcache_parent(struct dentry * parent)
{
+ struct super_block *sb = parent->d_sb;
int found;

while ((found = select_parent(parent)) != 0)
- prune_dcache(found, parent->d_sb);
+ __shrink_dcache_sb(sb, &found, DCACHE_REFERENCED);
}

/**
@@ -701,29 +696,27 @@ void shrink_dcache_anon(struct super_blo
struct hlist_node *lp;
struct hlist_head *head = &sb->s_anon;
int found;
- do {
+ for (;;) {
found = 0;
spin_lock(&dcache_lock);
hlist_for_each(lp, head) {
struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }

+ dentry_lru_del_init(this);
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+ dentry_lru_add_tail(this);
found++;
}
}
spin_unlock(&dcache_lock);
- prune_dcache(found, sb);
- } while(found);
+ if (!found)
+ break;
+ __shrink_dcache_sb(sb, &found, DCACHE_REFERENCED);
+ }
}

/*
@@ -743,7 +736,7 @@ static int shrink_dcache_memory(int nr,
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
- prune_dcache(nr, NULL);
+ prune_dcache(nr);
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
@@ -1072,7 +1065,7 @@ struct dentry *d_splice_alias(struct ino
* rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
* lookup is going on.
*
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
* in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
* select_parent and __dget_locked. This laziness saves lookup from dcache_lock
* acquisition.
Index: linux-2.6.16/include/linux/fs.h
===================================================================
--- linux-2.6.16.orig/include/linux/fs.h 2006-05-24 19:16:12.349202936 +1000
+++ linux-2.6.16/include/linux/fs.h 2006-05-24 20:28:50.268698472 +1000
@@ -846,6 +846,8 @@ struct super_block {
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
+ struct list_head s_dentry_lru; /* unused dentry lru */
+ int s_dentry_lru_nr;/* # of dentries on lru */

struct block_device *s_bdev;
struct list_head s_instances;
Index: linux-2.6.16/fs/super.c
===================================================================
--- linux-2.6.16.orig/fs/super.c 2006-05-24 19:16:11.267367400 +1000
+++ linux-2.6.16/fs/super.c 2006-05-24 19:58:14.433787800 +1000
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(v
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
+ INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
down_write(&s->s_umount);


2006-05-25 04:10:27

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

> +/*
> + * Shrink the dentry LRU on æ given superblock.
^^^
This character (ae) looks strange.

The other changes look fine. Do you have any performance numbers, any
results from stress tests (for version 2 of the patch)?

Balbir Singh,
Linux Technology Center,
IBM Software Labs

2006-05-25 06:16:33

by David Chinner

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

On Thu, May 25, 2006 at 09:36:04AM +0530, Balbir Singh wrote:
> > +/*
> > + * Shrink the dentry LRU on æ given superblock.
> ^^^
> This character (ae) looks strange.

Fixed. It slipped through when I switched to the -mm tree.

> The other changes look fine. Do you have any performance numbers, any
> results from stress tests (for version 2 of the patch)?

Not yet - I've just started the stress tests now. I had to wait for
the storage and then reconfigure it which took some time. It's
currently running a create/unlink workload across 8 filesystems in
parallel. I'll run some dbench loads after this has run for a few
hours.

FWIW, this create/unlink load has been triggering reliable "Busy
inodes after unmount" errors that I've slowly been tracking down.
After I fixed the last problem in XFS late last week, I've
been getting a failure that i think is the unmount/prune_dcache
races that you and Neil have recently fixed.

Basically, I'm seeing a transient elevated reference count on
the root inode of the XFS filesystem during the final put_super()
in generic_shutdown_super(). If I trigger a BUG_ON() when that
elevated reference count is detected, byt he time al the cpus
are stopped and I'm in kdb, the reference count on the root inode
is only 1. The next thing I was going to track was where the dentry
for the root inodes was.

I'll know if this really is the same race soon, as the create/unlink
test would trip it under an hour.....

Cheers,

Dave.
--
Dave Chinner
R&D Software Enginner
SGI Australian Software Group

2006-05-25 06:34:26

by David Chinner

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

On Thu, May 25, 2006 at 04:15:53PM +1000, David Chinner wrote:
> On Thu, May 25, 2006 at 09:36:04AM +0530, Balbir Singh wrote:
> > > +/*
> > > + * Shrink the dentry LRU on æ given superblock.
> > ^^^
> > This character (ae) looks strange.
>
> Fixed. It slipped through when I switched to the -mm tree.
>
> > The other changes look fine. Do you have any performance numbers, any
> > results from stress tests (for version 2 of the patch)?
>
> Not yet - I've just started the stress tests now. I had to wait for
> the storage and then reconfigure it which took some time. It's
> currently running a create/unlink workload across 8 filesystems in
> parallel. I'll run some dbench loads after this has run for a few
> hours.
>
> FWIW, this create/unlink load has been triggering reliable "Busy
> inodes after unmount" errors that I've slowly been tracking down.
> After I fixed the last problem in XFS late last week, I've
> been getting a failure that i think is the unmount/prune_dcache
> races that you and Neil have recently fixed.

I just had all 8 filesystems come up with:

May 25 15:55:18 budgie kernel: XFS unmount got error 16
May 25 15:55:18 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b006339280 left dangling!
May 25 15:55:18 budgie kernel: VFS: Busy inodes after unmount of dm-9. Self-destruct in 5 seconds. Have a nice day...
May 25 15:55:19 budgie kernel: XFS: Invalid device [/dev/mapper/dm8], error=-16
May 25 15:55:35 budgie kernel: XFS unmount got error 16
May 25 15:55:35 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015a18c80 left dangling!
May 25 15:55:35 budgie kernel: VFS: Busy inodes after unmount of dm-15. Self-destruct in 5 seconds. Have a nice day...
May 25 15:55:35 budgie kernel: XFS unmount got error 16
May 25 15:55:35 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015ea6280 left dangling!
May 25 15:55:35 budgie kernel: VFS: Busy inodes after unmount of dm-13. Self-destruct in 5 seconds. Have a nice day...
May 25 15:55:36 budgie kernel: XFS: Invalid device [/dev/mapper/dm14], error=-16
May 25 15:55:36 budgie kernel: XFS: Invalid device [/dev/mapper/dm12], error=-16
May 25 15:55:42 budgie kernel: XFS unmount got error 16
May 25 15:55:42 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b00633b580 left dangling!
May 25 15:55:42 budgie kernel: VFS: Busy inodes after unmount of dm-5. Self-destruct in 5 seconds. Have a nice day...
May 25 15:55:44 budgie kernel: XFS: Invalid device [/dev/mapper/dm4], error=-16
May 25 15:55:44 budgie kernel: XFS unmount got error 16
May 25 15:55:44 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b00633bc80 left dangling!
May 25 15:55:44 budgie kernel: VFS: Busy inodes after unmount of dm-11. Self-destruct in 5 seconds. Have a nice day...
May 25 15:55:44 budgie kernel: XFS: Invalid device [/dev/mapper/dm10], error=-16
May 25 15:55:55 budgie kernel: XFS unmount got error 16
May 25 15:55:55 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015d4a080 left dangling!
May 25 15:55:55 budgie kernel: VFS: Busy inodes after unmount of dm-7. Self-destruct in 5 seconds. Have a nice day...
May 25 15:55:56 budgie kernel: XFS: Invalid device [/dev/mapper/dm6], error=-16
May 25 15:56:16 budgie kernel: XFS unmount got error 16
May 25 15:56:16 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b9edeacf80 left dangling!
May 25 15:56:16 budgie kernel: VFS: Busy inodes after unmount of dm-3. Self-destruct in 5 seconds. Have a nice day...
May 25 15:56:16 budgie kernel: XFS unmount got error 16
May 25 15:56:16 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015a2a280 left dangling!
May 25 15:56:16 budgie kernel: VFS: Busy inodes after unmount of dm-1. Self-destruct in 5 seconds. Have a nice day...

On the second test iteration. On 2.6.16, it takes about 10 iterations to get one
filesystem to do this. I'll need to look into this one further. I'm going to
reboot the machine and run some dbench tests (which typically don't trigger
this problem) and then come back to this one with added debugging....

Cheers,

Dave.
--
Dave Chinner
R&D Software Enginner
SGI Australian Software Group

2006-05-25 06:34:20

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

>
> FWIW, this create/unlink load has been triggering reliable "Busy
> inodes after unmount" errors that I've slowly been tracking down.
> After I fixed the last problem in XFS late last week, I've
> been getting a failure that i think is the unmount/prune_dcache
> races that you and Neil have recently fixed.

Good, we were not able to reproduce the problem and test the fix.
I guess we have a more reliable way of testing the fix now.

>
> Basically, I'm seeing a transient elevated reference count on
> the root inode of the XFS filesystem during the final put_super()
> in generic_shutdown_super(). If I trigger a BUG_ON() when that
> elevated reference count is detected, byt he time al the cpus
> are stopped and I'm in kdb, the reference count on the root inode
> is only 1. The next thing I was going to track was where the dentry
> for the root inodes was.

The dput() would happen eventually, in this case after the umount.
kprobes might be more reliable for extracting information in this
case.

>
> I'll know if this really is the same race soon, as the create/unlink
> test would trip it under an hour.....
>
> Cheers,
>
> Dave.

Cheers,
Balbir Singh,
Linux Technology Center,
IBM Software Labs

2006-05-25 06:56:44

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

On Thu, May 25, 2006 at 04:33:50PM +1000, David Chinner wrote:
> On Thu, May 25, 2006 at 04:15:53PM +1000, David Chinner wrote:
> > On Thu, May 25, 2006 at 09:36:04AM +0530, Balbir Singh wrote:
> > > > +/*
> > > > + * Shrink the dentry LRU on æ given superblock.
> > > ^^^
> > > This character (ae) looks strange.
> >
> > Fixed. It slipped through when I switched to the -mm tree.
> >
> > > The other changes look fine. Do you have any performance numbers, any
> > > results from stress tests (for version 2 of the patch)?
> >
> > Not yet - I've just started the stress tests now. I had to wait for
> > the storage and then reconfigure it which took some time. It's
> > currently running a create/unlink workload across 8 filesystems in
> > parallel. I'll run some dbench loads after this has run for a few
> > hours.
> >
> > FWIW, this create/unlink load has been triggering reliable "Busy
> > inodes after unmount" errors that I've slowly been tracking down.
> > After I fixed the last problem in XFS late last week, I've
> > been getting a failure that i think is the unmount/prune_dcache
> > races that you and Neil have recently fixed.
>
> I just had all 8 filesystems come up with:
>
> May 25 15:55:18 budgie kernel: XFS unmount got error 16
> May 25 15:55:18 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b006339280 left dangling!
> May 25 15:55:18 budgie kernel: VFS: Busy inodes after unmount of dm-9. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:55:19 budgie kernel: XFS: Invalid device [/dev/mapper/dm8], error=-16
> May 25 15:55:35 budgie kernel: XFS unmount got error 16
> May 25 15:55:35 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015a18c80 left dangling!
> May 25 15:55:35 budgie kernel: VFS: Busy inodes after unmount of dm-15. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:55:35 budgie kernel: XFS unmount got error 16
> May 25 15:55:35 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015ea6280 left dangling!
> May 25 15:55:35 budgie kernel: VFS: Busy inodes after unmount of dm-13. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:55:36 budgie kernel: XFS: Invalid device [/dev/mapper/dm14], error=-16
> May 25 15:55:36 budgie kernel: XFS: Invalid device [/dev/mapper/dm12], error=-16
> May 25 15:55:42 budgie kernel: XFS unmount got error 16
> May 25 15:55:42 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b00633b580 left dangling!
> May 25 15:55:42 budgie kernel: VFS: Busy inodes after unmount of dm-5. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:55:44 budgie kernel: XFS: Invalid device [/dev/mapper/dm4], error=-16
> May 25 15:55:44 budgie kernel: XFS unmount got error 16
> May 25 15:55:44 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b00633bc80 left dangling!
> May 25 15:55:44 budgie kernel: VFS: Busy inodes after unmount of dm-11. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:55:44 budgie kernel: XFS: Invalid device [/dev/mapper/dm10], error=-16
> May 25 15:55:55 budgie kernel: XFS unmount got error 16
> May 25 15:55:55 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015d4a080 left dangling!
> May 25 15:55:55 budgie kernel: VFS: Busy inodes after unmount of dm-7. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:55:56 budgie kernel: XFS: Invalid device [/dev/mapper/dm6], error=-16
> May 25 15:56:16 budgie kernel: XFS unmount got error 16
> May 25 15:56:16 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b9edeacf80 left dangling!
> May 25 15:56:16 budgie kernel: VFS: Busy inodes after unmount of dm-3. Self-destruct in 5 seconds. Have a nice day...
> May 25 15:56:16 budgie kernel: XFS unmount got error 16
> May 25 15:56:16 budgie kernel: xfs_fs_put_super: vfsp/0xe000003015a2a280 left dangling!
> May 25 15:56:16 budgie kernel: VFS: Busy inodes after unmount of dm-1. Self-destruct in 5 seconds. Have a nice day...
>
> On the second test iteration. On 2.6.16, it takes about 10 iterations to get one
> filesystem to do this. I'll need to look into this one further. I'm going to
> reboot the machine and run some dbench tests (which typically don't trigger
> this problem) and then come back to this one with added debugging....
>

Is this with version 2 of your patch? Could you also try the -mm tree
and see if the problem goes away. We have a set of patches to address
exactly this problem.

Balbir Singh,
Linux Technology Center,
IBM Software Labs

2006-05-25 08:13:50

by David Chinner

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

On Thu, May 25, 2006 at 12:22:20PM +0530, Balbir Singh wrote:
> On Thu, May 25, 2006 at 04:33:50PM +1000, David Chinner wrote:
> > On Thu, May 25, 2006 at 04:15:53PM +1000, David Chinner wrote:
> > >
> > > FWIW, this create/unlink load has been triggering reliable "Busy
> > > inodes after unmount" errors that I've slowly been tracking down.
> > > After I fixed the last problem in XFS late last week, I've
> > > been getting a failure that i think is the unmount/prune_dcache
> > > races that you and Neil have recently fixed.
> >
> > I just had all 8 filesystems come up with:
> >
> > May 25 15:55:18 budgie kernel: XFS unmount got error 16
> > May 25 15:55:18 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b006339280 left dangling!
> > May 25 15:55:18 budgie kernel: VFS: Busy inodes after unmount of dm-9. Self-destruct in 5 seconds. Have a nice day...

.....

> > On the second test iteration. On 2.6.16, it takes about 10 iterations to get one
> > filesystem to do this. I'll need to look into this one further. I'm going to
> > reboot the machine and run some dbench tests (which typically don't trigger
> > this problem) and then come back to this one with added debugging....
>
> Is this with version 2 of your patch?

That's with version 2 on -rc4-mm3.

> Could you also try the -mm tree
> and see if the problem goes away. We have a set of patches to address
> exactly this problem.

Well, the patches overlap and I thought that I had taken into account
the fixes that were applied to the -mm tree.

I'm rerunning on 2.6.16 with the s_umount locking fix so I know that it's
not something else in -mm that is being tripped over....

Cheers,

Dave.
--
Dave Chinner
R&D Software Enginner
SGI Australian Software Group

2006-05-26 00:25:05

by David Chinner

[permalink] [raw]
Subject: Re: [PATCH] Per-superblock unused dentry LRU lists V2

On Thu, May 25, 2006 at 06:13:12PM +1000, David Chinner wrote:
> On Thu, May 25, 2006 at 12:22:20PM +0530, Balbir Singh wrote:
> > On Thu, May 25, 2006 at 04:33:50PM +1000, David Chinner wrote:
> > > On Thu, May 25, 2006 at 04:15:53PM +1000, David Chinner wrote:
> > > >
> > > > FWIW, this create/unlink load has been triggering reliable "Busy
> > > > inodes after unmount" errors that I've slowly been tracking down.
> > > > After I fixed the last problem in XFS late last week, I've
> > > > been getting a failure that i think is the unmount/prune_dcache
> > > > races that you and Neil have recently fixed.
> > >
> > > I just had all 8 filesystems come up with:
> > >
> > > May 25 15:55:18 budgie kernel: XFS unmount got error 16
> > > May 25 15:55:18 budgie kernel: xfs_fs_put_super: vfsp/0xe00000b006339280 left dangling!
> > > May 25 15:55:18 budgie kernel: VFS: Busy inodes after unmount of dm-9. Self-destruct in 5 seconds. Have a nice day...
>
> .....
>
> > > On the second test iteration. On 2.6.16, it takes about 10 iterations to get one
> > > filesystem to do this. I'll need to look into this one further. I'm going to
> > > reboot the machine and run some dbench tests (which typically don't trigger
> > > this problem) and then come back to this one with added debugging....
> >
> > Is this with version 2 of your patch?
>
> That's with version 2 on -rc4-mm3.
>
> > Could you also try the -mm tree
> > and see if the problem goes away. We have a set of patches to address
> > exactly this problem.
>
> Well, the patches overlap and I thought that I had taken into account
> the fixes that were applied to the -mm tree.
>
> I'm rerunning on 2.6.16 with the s_umount locking fix so I know that it's
> not something else in -mm that is being tripped over....

2.6.16 ran all night on the above test with the s_umount locking fix in it.
The prune_dcache/unmount race was indeed the cause of the errors I have
been seeing over the last week.

I just found a bug in the -mm patch which broke shrink_dcache_parent().
This is the likely cause of the above problems, and I'm about to retest
the -mm kernel with the fixed patch.....

Cheers,

Dave.
--
Dave Chinner
R&D Software Enginner
SGI Australian Software Group