2004-09-09 15:28:22

by Kirill Korotaev

[permalink] [raw]
Subject: [PATCH] adding per sb inode list to make invalidate_inodes() faster

--- ./include/linux/fs.h.invl 2004-08-14 14:55:09.000000000 +0400
+++ ./include/linux/fs.h 2004-09-08 18:47:46.989249792 +0400
@@ -419,6 +419,7 @@ static inline int mapping_writably_mappe
struct inode {
struct hlist_node i_hash;
struct list_head i_list;
+ struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
atomic_t i_count;
@@ -750,6 +751,7 @@ struct super_block {
atomic_t s_active;
void *s_security;

+ struct list_head s_inodes; /* all inodes */
struct list_head s_dirty; /* dirty inodes */
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
--- ./fs/hugetlbfs/inode.c.invl 2004-08-14 14:56:14.000000000 +0400
+++ ./fs/hugetlbfs/inode.c 2004-09-08 19:17:05.787871760 +0400
@@ -198,6 +198,7 @@ static void hugetlbfs_delete_inode(struc
struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);

hlist_del_init(&inode->i_hash);
+ list_del(&inode->i_sb_list);
list_del_init(&inode->i_list);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
@@ -240,6 +241,7 @@ static void hugetlbfs_forget_inode(struc
inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash);
out_truncate:
+ list_del(&inode->i_sb_list);
list_del_init(&inode->i_list);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
--- ./fs/inode.c.invl 2004-09-08 18:35:48.082540224 +0400
+++ ./fs/inode.c 2004-09-08 18:57:45.873205592 +0400
@@ -295,7 +295,7 @@ static void dispose_list(struct list_hea
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
+static int invalidate_list(struct list_head *head, struct list_head * dispose)
{
struct list_head *next;
int busy = 0, count = 0;
@@ -308,12 +308,11 @@ static int invalidate_list(struct list_h
next = next->next;
if (tmp == head)
break;
- inode = list_entry(tmp, struct inode, i_list);
- if (inode->i_sb != sb)
- continue;
+ inode = list_entry(tmp, struct inode, i_sb_list);
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
hlist_del_init(&inode->i_hash);
+ list_del(&inode->i_sb_list);
list_move(&inode->i_list, dispose);
inode->i_state |= I_FREEING;
count++;
@@ -349,10 +348,7 @@ int invalidate_inodes(struct super_block

down(&iprune_sem);
spin_lock(&inode_lock);
- busy = invalidate_list(&inode_in_use, sb, &throw_away);
- busy |= invalidate_list(&inode_unused, sb, &throw_away);
- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
- busy |= invalidate_list(&sb->s_io, sb, &throw_away);
+ busy = invalidate_list(&sb->s_inodes, &throw_away);
spin_unlock(&inode_lock);

dispose_list(&throw_away);
@@ -452,6 +448,7 @@ static void prune_icache(int nr_to_scan)
continue;
}
hlist_del_init(&inode->i_hash);
+ list_del(&inode->i_sb_list);
list_move(&inode->i_list, &freeable);
inode->i_state |= I_FREEING;
nr_pruned++;
@@ -561,6 +558,7 @@ struct inode *new_inode(struct super_blo
if (inode) {
spin_lock(&inode_lock);
inodes_stat.nr_inodes++;
+ list_add(&inode->i_sb_list, &sb->s_inodes);
list_add(&inode->i_list, &inode_in_use);
inode->i_ino = ++last_ino;
inode->i_state = 0;
@@ -609,6 +607,7 @@ static struct inode * get_new_inode(stru
goto set_failed;

inodes_stat.nr_inodes++;
+ list_add(&inode->i_sb_list, &sb->s_inodes);
list_add(&inode->i_list, &inode_in_use);
hlist_add_head(&inode->i_hash, head);
inode->i_state = I_LOCK|I_NEW;
@@ -657,6 +656,7 @@ static struct inode * get_new_inode_fast
if (!old) {
inode->i_ino = ino;
inodes_stat.nr_inodes++;
+ list_add(&inode->i_sb_list, &sb->s_inodes);
list_add(&inode->i_list, &inode_in_use);
hlist_add_head(&inode->i_hash, head);
inode->i_state = I_LOCK|I_NEW;
@@ -993,6 +993,7 @@ void generic_delete_inode(struct inode *
{
struct super_operations *op = inode->i_sb->s_op;

+ list_del(&inode->i_sb_list);
list_del_init(&inode->i_list);
inode->i_state|=I_FREEING;
inodes_stat.nr_inodes--;
@@ -1038,6 +1039,7 @@ static void generic_forget_inode(struct
inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash);
}
+ list_del(&inode->i_sb_list);
list_del_init(&inode->i_list);
inode->i_state|=I_FREEING;
inodes_stat.nr_inodes--;
@@ -1230,33 +1232,15 @@ int remove_inode_dquot_ref(struct inode
void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head)
{
struct inode *inode;
- struct list_head *act_head;

if (!sb->dq_op)
return; /* nothing to do */
- spin_lock(&inode_lock); /* This lock is for inodes code */

+ spin_lock(&inode_lock); /* This lock is for inodes code */
/* We hold dqptr_sem so we are safe against the quota code */
- list_for_each(act_head, &inode_in_use) {
- inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb == sb && !IS_NOQUOTA(inode))
- remove_inode_dquot_ref(inode, type, tofree_head);
- }
- list_for_each(act_head, &inode_unused) {
- inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb == sb && !IS_NOQUOTA(inode))
- remove_inode_dquot_ref(inode, type, tofree_head);
- }
- list_for_each(act_head, &sb->s_dirty) {
- inode = list_entry(act_head, struct inode, i_list);
- if (!IS_NOQUOTA(inode))
- remove_inode_dquot_ref(inode, type, tofree_head);
- }
- list_for_each(act_head, &sb->s_io) {
- inode = list_entry(act_head, struct inode, i_list);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
if (!IS_NOQUOTA(inode))
remove_inode_dquot_ref(inode, type, tofree_head);
- }
spin_unlock(&inode_lock);
}

--- ./fs/super.c.invl 2004-08-14 14:55:22.000000000 +0400
+++ ./fs/super.c 2004-09-08 18:46:47.227334984 +0400
@@ -65,6 +65,7 @@ static struct super_block *alloc_super(v
}
INIT_LIST_HEAD(&s->s_dirty);
INIT_LIST_HEAD(&s->s_io);
+ INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);


Attachments:
diff-inode-invalidate-20040908 (5.88 kB)

2004-09-09 15:53:41

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster



On Thu, 9 Sep 2004, Kirill Korotaev wrote:
>
> This patch fixes the problem that huge inode cache
> can make invalidate_inodes() calls very long. Meanwhile
> lock_kernel() and inode_lock are being held and the system
> can freeze for seconds.

Hmm.. I don't mind the approach per se, but I get very nervous about the
fact that I don't see any initialization of "inode->i_sb_list".

Yes, you do a

list_add(&inode->i_sb_list, &sb->s_inodes);

in new_inode(), but there are a ton of users that allocate inodes other
ways, and more importantly, even if this was the only allocation function,
you do various "list_del(&inode->i_sb_list)" things which leaves the inode
around but with an invalid superblock list.

So at the very _least_, you should document why all of this is safe very
carefully (I get nervous about fundamental FS infrastructure changes), and
it should be left to simmer in -mm for a longish time to make sure it
really works..

Call me chicken.

Linus "tweet tweet" Torvalds

2004-09-09 17:21:42

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

On Thu, Sep 09, 2004 at 08:51:45AM -0700, Linus Torvalds wrote:
> Hmm.. I don't mind the approach per se, but I get very nervous about the
> fact that I don't see any initialization of "inode->i_sb_list".
> Yes, you do a
> list_add(&inode->i_sb_list, &sb->s_inodes);
> in new_inode(), but there are a ton of users that allocate inodes other
> ways, and more importantly, even if this was the only allocation function,
> you do various "list_del(&inode->i_sb_list)" things which leaves the inode
> around but with an invalid superblock list.
> So at the very _least_, you should document why all of this is safe very
> carefully (I get nervous about fundamental FS infrastructure changes), and
> it should be left to simmer in -mm for a longish time to make sure it
> really works..
> Call me chicken.

Some version of this patch has been in 2.6.x-mm for a long while. I've
not reviewed this version of the patch for differences with the -mm
code. It would probably be best to look at the -mm bits as they've had
sustained exposure for quite some time.


-- wli

2004-09-09 18:29:54

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

William Lee Irwin III <[email protected]> wrote:
>> I've not reviewed this version of the patch for differences with the -mm
>> code. It would probably be best to look at the -mm bits as they've had
>> sustained exposure for quite some time.

On Thu, Sep 09, 2004 at 11:06:22AM -0700, Andrew Morton wrote:
> Yes.
> I have not merged it up because it seems rather dopey to add eight bytes to
> the inode to speed up something as rare as umount.
> Is there a convincing reason for proceeding with the change?

The only motive I'm aware of is for latency in the presence of things
such as autofs. It's also worth noting that in the presence of things
such as removable media umount is also much more common. I personally
find this sufficiently compelling. Kirill may have additional ammunition.

Also, the additional sizeof(struct list_head) is only a requirement
while the global inode LRU is maintained. I believed it would have
been beneficial to have localized the LRU to the sb also, which would
have maintained sizeof(struct inode0 at parity with current mainline.


-- wli

2004-09-09 18:25:17

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

William Lee Irwin III <[email protected]> wrote:
>
> On Thu, Sep 09, 2004 at 08:51:45AM -0700, Linus Torvalds wrote:
> > Hmm.. I don't mind the approach per se, but I get very nervous about the
> > fact that I don't see any initialization of "inode->i_sb_list".
> > Yes, you do a
> > list_add(&inode->i_sb_list, &sb->s_inodes);
> > in new_inode(), but there are a ton of users that allocate inodes other
> > ways, and more importantly, even if this was the only allocation function,
> > you do various "list_del(&inode->i_sb_list)" things which leaves the inode
> > around but with an invalid superblock list.
> > So at the very _least_, you should document why all of this is safe very
> > carefully (I get nervous about fundamental FS infrastructure changes), and
> > it should be left to simmer in -mm for a longish time to make sure it
> > really works..
> > Call me chicken.
>
> Some version of this patch has been in 2.6.x-mm for a long while.

One year.

> I've
> not reviewed this version of the patch for differences with the -mm
> code. It would probably be best to look at the -mm bits as they've had
> sustained exposure for quite some time.

Yes.

I have not merged it up because it seems rather dopey to add eight bytes to
the inode to speed up something as rare as umount.

Is there a convincing reason for proceeding with the change?

2004-09-09 19:39:33

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

William Lee Irwin III <[email protected]> wrote:
>
> On Thu, Sep 09, 2004 at 11:06:22AM -0700, Andrew Morton wrote:
> > Yes.
> > I have not merged it up because it seems rather dopey to add eight bytes to
> > the inode to speed up something as rare as umount.
> > Is there a convincing reason for proceeding with the change?
>
> The only motive I'm aware of is for latency in the presence of things
> such as autofs. It's also worth noting that in the presence of things
> such as removable media umount is also much more common. I personally
> find this sufficiently compelling. Kirill may have additional ammunition.

Well. That's why I'm keeping the patch alive-but-unmerged. Waiting to see
who wants it.

There are people who have large machines which are automounting hundreds of
different NFS servers. I'd certainly expect such a machine to experience
ongoing umount glitches. But no reports have yet been sighted by this
little black duck.

> Also, the additional sizeof(struct list_head) is only a requirement
> while the global inode LRU is maintained. I believed it would have
> been beneficial to have localized the LRU to the sb also, which would
> have maintained sizeof(struct inode0 at parity with current mainline.

Could be. We would give each superblock its own shrinker callback and
everything should balance out nicely (hah).

2004-09-09 19:44:13

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

William Lee Irwin III <[email protected]> wrote:
>> The only motive I'm aware of is for latency in the presence of things
>> such as autofs. It's also worth noting that in the presence of things
>> such as removable media umount is also much more common. I personally
>> find this sufficiently compelling. Kirill may have additional ammunition.

On Thu, Sep 09, 2004 at 12:08:18PM -0700, Andrew Morton wrote:
> Well. That's why I'm keeping the patch alive-but-unmerged. Waiting to see
> who wants it.
> There are people who have large machines which are automounting hundreds of
> different NFS servers. I'd certainly expect such a machine to experience
> ongoing umount glitches. But no reports have yet been sighted by this
> little black duck.

Unfortunately all the scenarios I'm aware of where this is an ongoing
issue involve extremely downrev and vendor-centric kernel versions along
with the usual ultraconservative system administration, so this specific
concern is somewhat downplayed as other ongoing functional concerns
(e.g. direct IO on nfs breaking, deadlocks, getting zillions of fs's to
mount at all, etc.) have far higher priority than performance concerns.


William Lee Irwin III <[email protected]> wrote:
>> Also, the additional sizeof(struct list_head) is only a requirement
>> while the global inode LRU is maintained. I believed it would have
>> been beneficial to have localized the LRU to the sb also, which would
>> have maintained sizeof(struct inode0 at parity with current mainline.

On Thu, Sep 09, 2004 at 12:08:18PM -0700, Andrew Morton wrote:
> Could be. We would give each superblock its own shrinker callback and
> everything should balance out nicely (hah).

Hmm. My first leaning and implementation was to hierarchically LRU
inodes within sb's, but I suppose that's plausible. Let me know if you
want the shrinker callbacks or something else in particular done.


-- wli

2004-09-10 08:21:52

by Kirill Korotaev

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

Linus Torvalds wrote:
>
> On Thu, 9 Sep 2004, Kirill Korotaev wrote:
>
>>This patch fixes the problem that huge inode cache
>>can make invalidate_inodes() calls very long. Meanwhile
>>lock_kernel() and inode_lock are being held and the system
>>can freeze for seconds.
>
>
> Hmm.. I don't mind the approach per se, but I get very nervous about the
> fact that I don't see any initialization of "inode->i_sb_list".
inode->i_sb_list is a link list_head, not real list head (real list head
is sb->s_inodes and it's initialized). i.e. it doesn't require
initialization.

all the operations I perform on i_sb_list are
- list_add(&inode->i_sb_list, ...);
- list_del(&inode->i_sb_list);

So it's all safe.

> Yes, you do a
> list_add(&inode->i_sb_list, &sb->s_inodes);
>
> in new_inode(), but there are a ton of users that allocate inodes other
> ways, and more importantly, even if this was the only allocation function,
> you do various "list_del(&inode->i_sb_list)" things which leaves the inode
> around but with an invalid superblock list.
1. struct inode is allocated only in one place!
it's alloc_inode(). Next alloc_inode() is static and is called from 3
places:
new_inode(), get_new_inode() and get_new_inode_fast().

All 3 above functions do list_add(&inode->i_sb_list, &sb->s_inodes);
i.e. newly allocated inodes are always in super block list.

2. list_del(&inode->i_sb_list) doesn't leave super block list invalid!

I state that I remove inodes from sb list only and only when usual
inode->i_list is removed and inode can't be found after that moment
neither in my per sb list nor in any other list (unused_inodes,
inodes_in_use, sb->s_io, etc.)

See the details below.

> So at the very _least_, you should document why all of this is safe very
> carefully (I get nervous about fundamental FS infrastructure changes), and
> it should be left to simmer in -mm for a longish time to make sure it
> really works..
Ok. This patch is safe because the use of new inode->i_sb_list list is
fully symmetric to the use of inode->i_list. i.e.

- when inode is created it's added by inode->i_list to one of std lists
(inodes_in_use, unused_inodes, sb->s_io). It lives in one of this lists
during whole lifetime. So in places where inode is created I've just
added list_add(&inode->i_sb_list, &sb->s_inodes). There are 3 such
places: new_inode(), get_new_inode() and get_new_inode_fast()

- when inode is about to be destroyed it's usually removed from std
lists (and sometimes is moved to 'freeable' list). It's the places where
inode is removed from the hash as well. In such places I've just
inserted list_del(&inode->i_sb_list). These places are in
generic_forget_inode(), generic_delete_inode(), invalidate_list(),
prune_icache(), hugetlbfs_delete_inode(), hugetlbfs_forget_inode().

So as you can see from the description the lifetime of inode in
sb->s_inodes list is the same as in hash and other std lists.
And these new per-sb list is protected by the same inode_lock.

To be sure that there are no other places where i_list field is used
somehow in other ways I've just grepped it.

Kirill

2004-09-10 08:43:24

by Kirill Korotaev

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

Andrew Morton wrote:

> William Lee Irwin III <[email protected]> wrote:
>>On Thu, Sep 09, 2004 at 11:06:22AM -0700, Andrew Morton wrote:
>> > Yes.
>> > I have not merged it up because it seems rather dopey to add eight bytes to
>> > the inode to speed up something as rare as umount.
>> > Is there a convincing reason for proceeding with the change?
>>
>> The only motive I'm aware of is for latency in the presence of things
>> such as autofs. It's also worth noting that in the presence of things
>> such as removable media umount is also much more common. I personally
>> find this sufficiently compelling. Kirill may have additional ammunition.

> Well. That's why I'm keeping the patch alive-but-unmerged. Waiting to see
> who wants it.

> There are people who have large machines which are automounting hundreds of
> different NFS servers. I'd certainly expect such a machine to experience
> ongoing umount glitches. But no reports have yet been sighted by this
> little black duck.
I think It's not always evident where the problem is. For many people
waiting 2 seconds is ok and they pay no much attention to this small
little hangs.

1. I saw the bug in bugzilla from NFS people you pointed to me last time
yourself where the same problem was detected.

2. We come across this problem accidentally. After we started using 4GB
split in production systems we faced small hangs of umount and quota
on/off. We have hundreds of super blocks in the systems and do
mount/umount, quota on/off quite often, so it was quite a noticable
hangs for us though unfatal.

We extracted the problem in the test case and resolved the issue. The
patch I posted here a year ago (for 2.4 kernels) was used by us about
for a year in ALL our production systems.

Well for sure this bug can be triggered only on really big servers with
a huge amount of memory and cache size.
It's up to you whether to apply it or not. I understand your position
about 8 bytes, but probably it's just a question of using kernel,
whether it's a user or server system.
Probably we can introduce some config option which would trigger
features such as this one for enterprise systems.

>> Also, the additional sizeof(struct list_head) is only a requirement
>> while the global inode LRU is maintained. I believed it would have
>> been beneficial to have localized the LRU to the sb also, which would
>> have maintained sizeof(struct inode0 at parity with current mainline.
>
> Could be. We would give each superblock its own shrinker callback and
> everything should balance out nicely (hah).
heh, and how do you plan to make per-sb LRU to be fair?

Kirill

2004-09-10 09:08:00

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

Kirill Korotaev <[email protected]> wrote:
>
> Well for sure this bug can be triggered only on really big servers with
> a huge amount of memory and cache size.
> It's up to you whether to apply it or not. I understand your position
> about 8 bytes, but probably it's just a question of using kernel,
> whether it's a user or server system.
> Probably we can introduce some config option which would trigger
> features such as this one for enterprise systems.

I am paralysed by indecision!

It would be nice if we had evidence that more than one site in the world
was affected by this :(

I can't see an less space-consuming alternative here (apart from per-sb lru)

> >> Also, the additional sizeof(struct list_head) is only a requirement
> >> while the global inode LRU is maintained. I believed it would have
> >> been beneficial to have localized the LRU to the sb also, which would
> >> have maintained sizeof(struct inode0 at parity with current mainline.
> >
> > Could be. We would give each superblock its own shrinker callback and
> > everything should balance out nicely (hah).
>
> heh, and how do you plan to make per-sb LRU to be fair?

Good point.

2004-09-10 14:24:13

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster



On Fri, 10 Sep 2004, Kirill Korotaev wrote:
> >
> > Hmm.. I don't mind the approach per se, but I get very nervous about
> > the fact that I don't see any initialization of "inode->i_sb_list".
>
> inode->i_sb_list is a link list_head, not real list head (real list head
> is sb->s_inodes and it's initialized). i.e. it doesn't require
> initialization.

It _does_ require initialization. And no, there is no difference between a
"real" head and a entry "link" in the list. They both need to be
initialized.

> all the operations I perform on i_sb_list are
> - list_add(&inode->i_sb_list, ...);

This one is ok without initialzing the entry, since it will do so itself.

> - list_del(&inode->i_sb_list);

This one is NOT ok. If list_del() is ever done on a link entry that hasn't
been initialized, you crash. If "list_del()" is ever done twice on an
entry, you will crash and/or corrupt memory elsewhere.

> So it's all safe.

It's not "all safe". You had better explain _why_ it's safe. You do so
later:

> 1. struct inode is allocated only in one place!
> it's alloc_inode(). Next alloc_inode() is static and is called from 3
> places:
> new_inode(), get_new_inode() and get_new_inode_fast().
>
> All 3 above functions do list_add(&inode->i_sb_list, &sb->s_inodes);
> i.e. newly allocated inodes are always in super block list.

Good. _This_ is what I was after.

> 2. list_del(&inode->i_sb_list) doesn't leave super block list invalid!

No, but it leaves _itself_ invalid. There had better not be anything that
touches it ever after without an initialization. That wasn't obvious...

Linus

2004-09-10 16:54:30

by Kirill Korotaev

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

Linus Torvalds wrote:
> On Fri, 10 Sep 2004, Kirill Korotaev wrote:

>>>Hmm.. I don't mind the approach per se, but I get very nervous about
>>>the fact that I don't see any initialization of "inode->i_sb_list".
>>
>>inode->i_sb_list is a link list_head, not real list head (real list head
>>is sb->s_inodes and it's initialized). i.e. it doesn't require
>>initialization.
>
> It _does_ require initialization. And no, there is no difference
between a
> "real" head and a entry "link" in the list. They both need to be
> initialized.
>
>>all the operations I perform on i_sb_list are
>>- list_add(&inode->i_sb_list, ...);
>
> This one is ok without initialzing the entry, since it will do so itself.
>
>>- list_del(&inode->i_sb_list);
>
> This one is NOT ok. If list_del() is ever done on a link entry that
hasn't
> been initialized, you crash. If "list_del()" is ever done twice on an
> entry, you will crash and/or corrupt memory elsewhere.
We never do list_del twice, nor we do list_del on unitialized inodes!

>>1. struct inode is allocated only in one place!
>>it's alloc_inode(). Next alloc_inode() is static and is called from 3
>>places:
>>new_inode(), get_new_inode() and get_new_inode_fast().
>>
>>All 3 above functions do list_add(&inode->i_sb_list, &sb->s_inodes);
>>i.e. newly allocated inodes are always in super block list.

> Good. _This_ is what I was after.
>
>
>>2. list_del(&inode->i_sb_list) doesn't leave super block list invalid!
>
> No, but it leaves _itself_ invalid. There had better not be anything
that
> touches it ever after without an initialization. That wasn't obvious...
ok. But now I hope I proved that it's ok?
no one does list_del() twice and no one does list_del() on unitialized
inodes.

If you want i_sb_list to be really ALWAYS initialized than we can
replace list_del with list_del_init and insert INIT_LIST_HEAD(i_sb_list)
in inode_init_once().
But I don't think it's a good idea.
Moreover, I think that list_del_init() and other initialization
functions of link list_heads in such places usually only hide the
problems, not solve them.

Kirill

2004-09-10 20:14:35

by Denis Vlasenko

[permalink] [raw]
Subject: Re: [PATCH] adding per sb inode list to make invalidate_inodes() faster

> >> The only motive I'm aware of is for latency in the presence of things
> >> such as autofs. It's also worth noting that in the presence of things
> >> such as removable media umount is also much more common. I personally
> >> find this sufficiently compelling. Kirill may have additional
> >> ammunition.
> >
> > Well. That's why I'm keeping the patch alive-but-unmerged. Waiting to
> > see who wants it.
> >
> > There are people who have large machines which are automounting hundreds
> > of different NFS servers. I'd certainly expect such a machine to
> > experience ongoing umount glitches. But no reports have yet been sighted
> > by this little black duck.
>
> I think It's not always evident where the problem is. For many people
> waiting 2 seconds is ok and they pay no much attention to this small
> little hangs.
>
> 1. I saw the bug in bugzilla from NFS people you pointed to me last time
> yourself where the same problem was detected.

What bug? Are you talking about umount being not so fast or something else?
--
vda

2004-09-11 09:11:49

by Kirill Korotaev

[permalink] [raw]
Subject: Re[2]: [PATCH] adding per sb inode list to make invalidate_inodes() faster

Hello Denis,

Saturday, September 11, 2004, 12:14:24 AM, you wrote:

>> >> The only motive I'm aware of is for latency in the presence of things
>> >> such as autofs. It's also worth noting that in the presence of things
>> >> such as removable media umount is also much more common. I personally
>> >> find this sufficiently compelling. Kirill may have additional
>> >> ammunition.
>> >
>> > Well. That's why I'm keeping the patch alive-but-unmerged. Waiting to
>> > see who wants it.
>> >
>> > There are people who have large machines which are automounting hundreds
>> > of different NFS servers. I'd certainly expect such a machine to
>> > experience ongoing umount glitches. But no reports have yet been sighted
>> > by this little black duck.
>>
>> I think It's not always evident where the problem is. For many people
>> waiting 2 seconds is ok and they pay no much attention to this small
>> little hangs.
>>
>> 1. I saw the bug in bugzilla from NFS people you pointed to me last time
>> yourself where the same problem was detected.

DV> What bug? Are you talking about umount being not so fast or something else?
yup. umount requires traversing of ALL inodes in the system (even
inodes from other super blocks). So umount and quota off can last very
very LONG time.

Have you faced this bug before?

--
Best regards,
Kirill mailto:[email protected]