2004-04-23 14:15:26

by Steve Dickson

[permalink] [raw]
Subject: [PATCH] Reinstantiating stale inodes

--- linux-2.4.21/fs/nfs/inode.c.org 2004-04-17 18:26:32.000000000 -0400
+++ linux-2.4.21/fs/nfs/inode.c 2004-04-23 03:19:51.000000000 -0400
@@ -953,13 +953,57 @@ nfs_wait_on_inode(struct inode *inode, i
}

/*
+ * Reinstantiate an inode that has gone stale
+ */
+static int
+nfs_reinstantiate(
+ struct inode *dir,
+ struct dentry *dentry,
+ struct nfs_fattr *fattr)
+{
+ int error;
+ struct nfs_fh fhandle;
+ struct inode *inode;
+
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ if (!error) {
+ error = -ENOMEM;
+ inode = nfs_fhget(dentry, &fhandle, &fattr);
+ if (inode) {
+ d_drop(dentry);
+ dput(dentry);
+ d_instantiate(dentry, inode);
+ dentry->d_time = jiffies;
+ error = 0;
+ }
+ }
+ return error;
+}
+
+/*
* Externally visible revalidation function
*/
int
nfs_revalidate(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
- return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ struct inode *pinode;
+ struct nfs_fattr fattr;
+ int error;
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!error || error != -ESTALE)
+ return error;
+ /*
+ * We have a stale fh so ask the server for another one
+ */
+ pinode = dentry->d_parent->d_inode;
+ if (nfs_reinstantiate(pinode, dentry, &fattr) == 0) {
+ inode = dentry->d_inode;
+ if (nfs_refresh_inode(inode, &fattr) == 0)
+ error = 0;
+ }
+ return error;
}

/*


Attachments:
linux-2.4.21-nfs-estale.patch (1.38 kB)

2004-04-23 14:33:31

by Olaf Kirch

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, Apr 23, 2004 at 10:15:35AM -0400, Steve Dickson wrote:
> Here is a 2.4 patch that will reinstantiate an inode
> when a ESTALE error is returned on a getattr. When
> the error occurs, a lookup is immediately issued
> to get a new fh.

Brrr. Are you sure this is such a good idea? It will have all sorts of
bad side effects. For instance, you may be writing a file named "foo".
Someone else replaces foo with their copy (mv foo-new foo) and your file
handle becomes stale.

If you issue a lookup immediately, you will continue writing, but
now your writes go to the new file and produce garbage.

At a minimum, the lookup should occur at file open, and only if
there are no other users of the inode.

Olaf
--
Olaf Kirch | The Hardware Gods hate me.
[email protected] |
---------------+


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 14:36:18

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, 2004-04-23 at 10:15, Steve Dickson wrote:
> Here is a 2.4 patch that will reinstantiate an inode
> when a ESTALE error is returned on a getattr. When
> the error occurs, a lookup is immediately issued
> to get a new fh.
>
> The fixes the problem of a server rsync -a directory
> that a client has mounted. The key being the -a flag
> since it causes the server not to update the mtime on
> the directory.
>
> My initial efforts was to make nfs_lookup_revalidate()
> a bit smarter with the use of ctimes but turns out
> that when nfs_lookup_revalidate() does no caching
> (i.e. an otw lookup is issued on every call) the
> ESTALEs still occurred.
>
> Then I turned my attention to __nfs_refresh_inode() and
> had it used ctime in its calculations of what is
> and is not valid... This did work, but it cause a significant
> amount of extra otw traffic (using the cthon test suite) for
> the non error cases. The one thing good about this patch (imho)
> is the extra lookups only occur after an error that generally
> does not happen...
>
> Comments would be appreciated, especially about how
> I'm reinstantiating the dentry....

There are several problems here, but the main one is that you have no
guarantees that you are the exclusive user of that dentry.

This again means that people who think they have open files on the "old"
inode may suddenly find their program Oopsing or corrupting the new
file. Ditto for all those shrink_dcache_*() which are likely to Oops.

Cheers,
Trond


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 14:48:26

by Lever, Charles

[permalink] [raw]
Subject: RE: [PATCH] Reinstantiating stale inodes

> On Fri, 2004-04-23 at 10:15, Steve Dickson wrote:
> > Here is a 2.4 patch that will reinstantiate an inode
> > when a ESTALE error is returned on a getattr. When
> > the error occurs, a lookup is immediately issued
> > to get a new fh.
> >=20
> > The fixes the problem of a server rsync -a directory
> > that a client has mounted. The key being the -a flag
> > since it causes the server not to update the mtime on
> > the directory.
> >=20
> > My initial efforts was to make nfs_lookup_revalidate()
> > a bit smarter with the use of ctimes but turns out
> > that when nfs_lookup_revalidate() does no caching
> > (i.e. an otw lookup is issued on every call) the
> > ESTALEs still occurred.
> >=20
> > Then I turned my attention to __nfs_refresh_inode() and
> > had it used ctime in its calculations of what is
> > and is not valid... This did work, but it cause a significant
> > amount of extra otw traffic (using the cthon test suite) for
> > the non error cases. The one thing good about this patch (imho)
> > is the extra lookups only occur after an error that generally
> > does not happen...
> >=20
> > Comments would be appreciated, especially about how
> > I'm reinstantiating the dentry....
>=20
> There are several problems here, but the main one is that you have no
> guarantees that you are the exclusive user of that dentry.
>=20
> This again means that people who think they have open files=20
> on the "old"
> inode may suddenly find their program Oopsing or corrupting the new
> file. Ditto for all those shrink_dcache_*() which are likely to Oops.

yes, i was wondering about that when i saw the patch.

there appears to be a real problem when restoring from a backup,
or using rsync. the file size and the mtime stay precisely the
same, but the file handle changes. i'm not sure anything can be
done about this in NFSv2/3?


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 15:00:31

by Trond Myklebust

[permalink] [raw]
Subject: RE: [PATCH] Reinstantiating stale inodes

On Fri, 2004-04-23 at 10:48, Lever, Charles wrote:
> there appears to be a real problem when restoring from a backup,
> or using rsync. the file size and the mtime stay precisely the
> same, but the file handle changes. i'm not sure anything can be
> done about this in NFSv2/3?

The only way to distinguish the two is to replace the use of the mtime
with the ctime in nfs_check_verifier() and friends.

Under ordinary circumstances, the mtime and ctime should be more or less
identical, so I'm not sure why Steve was seeing extra revalidations when
he was running the Connectathon suite. Were you perhaps changing the
algorithm in nfs_refresh_inode() instead, Steve?

Cheers,
Trond


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 15:08:55

by Olaf Kirch

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, Apr 23, 2004 at 07:48:11AM -0700, Lever, Charles wrote:
> there appears to be a real problem when restoring from a backup,
> or using rsync. the file size and the mtime stay precisely the
> same, but the file handle changes. i'm not sure anything can be
> done about this in NFSv2/3?

Well, namei_open could interpret an ESTALE error as "retry the
lookup and open, ignoring the cache".
I think this would solve 99% of all problems.

Olaf
--
Olaf Kirch | The Hardware Gods hate me.
[email protected] |
---------------+


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 15:17:37

by Lever, Charles

[permalink] [raw]
Subject: RE: [PATCH] Reinstantiating stale inodes

> On Fri, Apr 23, 2004 at 07:48:11AM -0700, Lever, Charles wrote:
> > there appears to be a real problem when restoring from a backup,
> > or using rsync. the file size and the mtime stay precisely the
> > same, but the file handle changes. i'm not sure anything can be
> > done about this in NFSv2/3?
>=20
> Well, namei_open could interpret an ESTALE error as "retry the
> lookup and open, ignoring the cache".
> I think this would solve 99% of all problems.

my impression was that viro would wretch at the idea of adding
file-system specific logic to the VFS layer. %^)


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 15:50:27

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Hi Olaf,

Olaf Kirch wrote:

>On Fri, Apr 23, 2004 at 10:15:35AM -0400, Steve Dickson wrote:
>
>
>>Here is a 2.4 patch that will reinstantiate an inode
>>when a ESTALE error is returned on a getattr. When
>>the error occurs, a lookup is immediately issued
>>to get a new fh.
>>
>>
>
>Brrr. Are you sure this is such a good idea? It will have all sorts of
>bad side effects.
>
This is why I'm asking.... In the beginning I also thought this ideas was
a bit brain dead ... but after a bunch of trials and tribulations and not
being able to do it any other way that didn't effect normal
traffic... it kinda grew on me... :)

>For instance, you may be writing a file named "foo".
>Someone else replaces foo with their copy (mv foo-new foo) and your file
>handle becomes stale.
>
>
True... but the write or commit will still fail with ESTALE. This
patch only effects getattrs (or stat()s). I guess my subject line
was a bit miss leading...

>If you issue a lookup immediately, you will continue writing, but
>now your writes go to the new file and produce garbage.
>
>
Here are the tests I ran:

client: writes 100m file called foo
server: rm foo
client: the write or commit failed with ESTALE

client: writes 100m file called foo
server: renames foo to foo.old and immediately creates foo
client: failed with ESTALE

client: writes 100m file called foo
Same client: rm foo
client: finishes writing then the file is removed

client: writes 100m file called foo
Same client: renames foo to foo.old and immediately creates foo
client: both write to foo and foo.old complete and sum -r shows
they are identical

Again only getattrs will be followed up with lookups..

>At a minimum, the lookup should occur at file open, and only if
>there are no other users of the inode.
>
>
Unfortunately, in this particular case, there were no opens...


SteveD.


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 16:01:39

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Trond Myklebust wrote:

>There are several problems here, but the main one is that you have no
>guarantees that you are the exclusive user of that dentry.
>
>
True, there are no guarantees the user has exclusive access but
what does it matter? Its a stale inode if they try to used its
going to fail...

>This again means that people who think they have open files on the "old"
>inode may suddenly find their program Oopsing or corrupting the new
>file.
>
Again, this new fh is only gotten on getattrs any other ops will still
fail with ESTALE...


SteveD


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 16:04:15

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Neil Horman wrote:

> Should ENOMEM be whats returned in the event that we don't get a file
> handle after we refresh the directory? Shouldn't that be ENOENT?

It really matter since the original ESTALE error will be
returned if any part of the lookup fails...

SteveD.


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 16:16:00

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Trond Myklebust wrote:

>On Fri, 2004-04-23 at 10:48, Lever, Charles wrote:
>
>
>>there appears to be a real problem when restoring from a backup,
>>or using rsync. the file size and the mtime stay precisely the
>>same, but the file handle changes. i'm not sure anything can be
>>done about this in NFSv2/3?
>>
>>
>
>The only way to distinguish the two is to replace the use of the mtime
>with the ctime in nfs_check_verifier() and friends.
>
>
Again.. this was the first place I looked... And I just can't figure how
to use NFS_CACHE_CTIME() in any meaningful calculation...

>Under ordinary circumstances, the mtime and ctime should be more or less
>identical,
>
In this particular case they aren't because the server explicitly
maintains the "old" mtime and since mtime does not change we'll never
get the new ctime...

>so I'm not sure why Steve was seeing extra revalidations when
>he was running the Connectathon suite. Were you perhaps changing the
>algorithm in nfs_refresh_inode() instead, Steve?
>
>
A very keen observation... Yes, when I introduced ctime into the valid or
not valid calculation, the getattrs (i.e revalidations) but even more
surprising (at least to me) was the number of reads sky rocked when
I ran the cthon tests...

SteveD.


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 16:17:11

by Lever, Charles

[permalink] [raw]
Subject: RE: [PATCH] Reinstantiating stale inodes

> >This again means that people who think they have open files=20
> on the "old"
> >inode may suddenly find their program Oopsing or corrupting the new
> >file.=20
> >
> Again, this new fh is only gotten on getattrs any other ops will still
> fail with ESTALE...

so you need to invalidate all the cached pages for this inode
explicitly. nfs_refresh_inode will not detect any change in
mtime or i_size so it will not do this for you.

but this exposes us to the "unable to invalidate locked pages"
bug again.

and i think this is only a valid thing to do iff the mtime
and i_size are the same as our cached values but the fh has
changed. otherwise, you can fool yourself into thinking that
an entirely new file is the same as the old one.



-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 16:21:12

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, 2004-04-23 at 12:01, Steve Dickson wrote:
> >This again means that people who think they have open files on the "old"
> >inode may suddenly find their program Oopsing or corrupting the new
> >file.
> >
> Again, this new fh is only gotten on getattrs any other ops will still
> fail with ESTALE...

No! That dentry is a shared object, and you may be changing it while
other ops are referencing/using it. There are no locks or any other
mechanisms here that ensure this is not the case.

Even if you do solve the above race, you still have another bug: you are
changing the object that the "struct file" refers to, and hence at best
you are going to cause all the existing open files to start reading
from/writing to the new file (which is a bug!) instead of returning
ESTALE.

Cheers,
Trond


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 16:27:35

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Lever, Charles wrote:

>so you need to invalidate all the cached pages for this inode
>explicitly. nfs_refresh_inode will not detect any change in
>mtime or i_size so it will not do this for you.
>
>
Yes I tried invaliding the caches using both nfs_zap_caches()
and NFS_CACHEINV()... but just took care of some the ESTALEs
in a very random way...

SteveD.


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 17:21:13

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes



Trond Myklebust wrote:

>Even if you do solve the above race, you still have another bug: you are
>changing the object that the "struct file" refers to, and hence at best
>you are going to cause all the existing open files to start reading
>from/writing to the new file (which is a bug!) instead of returning
>ESTALE.
>
>
Ok... this is the silver bullet that got me... :(

Question: if (atomic_read(&dentry->d_count) < 2)
can I assume that the current process is the only one
attached to the dentry and it would be safe reinstantiate
the dentry?

SteveD.



-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 17:49:11

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, 2004-04-23 at 13:21, Steve Dickson wrote:

> Question: if (atomic_read(&dentry->d_count) < 2)
> can I assume that the current process is the only one
> attached to the dentry and it would be safe reinstantiate
> the dentry?

You can if and only if

- The dentry is unhashed, and you are holding the semaphore dir->i_sem
on the parent directory.
OR
- You are holding the spin lock dcache_lock.

Both of these two methods will prevent the VFS from looking the dentry
up.

Note: on 2.6.x, the second condition needs to be modified a bit: you
probably need to hold dentry->d_lock too since the 2.6.x version of
d_lookup() is by and large lockless...

Cheers,
Trond


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 17:55:57

by Olaf Kirch

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, Apr 23, 2004 at 11:50:41AM -0400, Steve Dickson wrote:
> client: writes 100m file called foo
> server: renames foo to foo.old and immediately creates foo
> client: failed with ESTALE

This shouldn't fail, because the file still exists, just
under a different name.

What will get you into trouble is if your client writes
to "foo" and you do a "mv bar foo".

> Again only getattrs will be followed up with lookups..

Yes, but it's just a matter of probabilities if the first
call that returns ESTALE is a write or a getattr (called from
revalidate_inode).

> >At a minimum, the lookup should occur at file open, and only if
> >there are no other users of the inode.
> >
> >
> Unfortunately, in this particular case, there were no opens...

So what exactly went wrong? A stat() on the files that just got
replaced?

Olaf
--
Olaf Kirch | The Hardware Gods hate me.
[email protected] |
---------------+


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 18:43:54

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Olaf Kirch wrote:

>On Fri, Apr 23, 2004 at 11:50:41AM -0400, Steve Dickson wrote:
>
>
>>client: writes 100m file called foo
>>server: renames foo to foo.old and immediately creates foo
>>client: failed with ESTALE
>>
>>
>This shouldn't fail, because the file still exists, just
>under a different name.
>
>
This must have been cut and paste error... Because I ran the
test again and didn't see a ESTALE error...

>What will get you into trouble is if your client writes
>to "foo" and you do a "mv bar foo".
>
>
This also seem to work... I figured since foo was open it
got renamed to .nfs file until the first write was finished...

>So what exactly went wrong? A stat() on the files that just got
>replaced?
>
>
Yes... problem is the files get changed but the mtime on the directory
does not...

SteveD.


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 18:50:30

by Olaf Kirch

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Fri, Apr 23, 2004 at 02:43:44PM -0400, Steve Dickson wrote:
> >What will get you into trouble is if your client writes
> >to "foo" and you do a "mv bar foo".
> >
> >
> This also seem to work... I figured since foo was open it
> got renamed to .nfs file until the first write was finished...

It will, as long as the mv is executed on the NFS client
where your writing process runs. But in the particular case
that prompted you do create that patch, rsync ran on the
NFS server, didn't it?

Olaf
--
Olaf Kirch | The Hardware Gods hate me.
[email protected] |
---------------+


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-04-23 19:14:14

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

--- linux-2.4.21/fs/nfs/inode.c.org 2004-04-17 18:26:32.000000000 -0400
+++ linux-2.4.21/fs/nfs/inode.c 2004-04-23 14:44:59.000000000 -0400
@@ -953,13 +953,74 @@ nfs_wait_on_inode(struct inode *inode, i
}

/*
+ * Reinstantiate an inode that has gone stale
+ */
+static int
+nfs_reinstantiate(
+ struct inode *dir,
+ struct dentry *dentry,
+ struct nfs_fattr *fattr)
+{
+ int error;
+ struct nfs_fh fhandle;
+ struct inode *inode;
+
+ /*
+ * We can only reinstantiate the inode if we are the
+ * only ones accessing it.
+ */
+ if (atomic_read(&dentry->d_count) > 1)
+ return EACCES;
+
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ if (!error) {
+ error = -ENOMEM;
+ inode = nfs_fhget(dentry, &fhandle, &fattr);
+ if (inode) {
+ /*
+ * Check again this time holding the i_sem semaphore
+ * to make sure we are the only ones accessing this
+ * dentry
+ */
+ down(&dir->i_sem);
+ if (atomic_read(&dentry->d_count) < 2) {
+ d_drop(dentry);
+ dput(dentry);
+ d_instantiate(dentry, inode);
+ dentry->d_time = jiffies;
+ error = 0;
+ } else
+ iput(inode);
+ up(&dir->i_sem);
+ }
+ }
+ return error;
+}
+
+/*
* Externally visible revalidation function
*/
int
nfs_revalidate(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
- return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ struct inode *pinode;
+ struct nfs_fattr fattr;
+ int error;
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!error || error != -ESTALE)
+ return error;
+ /*
+ * We have a stale fh so ask the server for another one
+ */
+ pinode = dentry->d_parent->d_inode;
+ if (nfs_reinstantiate(pinode, dentry, &fattr) == 0) {
+ inode = dentry->d_inode;
+ if (nfs_refresh_inode(inode, &fattr) == 0)
+ error = 0;
+ }
+ return error;
}

/*


Attachments:
linux-2.4.21-nfs-estale.patch (1.79 kB)

2004-04-23 20:07:25

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes



Olaf Kirch wrote:

>But in the particular case that prompted you do create that patch, rsync ran on the
>NFS server, didn't it?
>
>
Yes...

SteveD.


-------------------------------------------------------
This SF.net email is sponsored by: The Robotic Monkeys at ThinkGeek
For a limited time only, get FREE Ground shipping on all orders of $35
or more. Hurry up and shop folks, this offer expires April 30th!
http://www.thinkgeek.com/freeshipping/?cpg=12297
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-01 16:13:50

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

--- linux-2.4.21/fs/nfs/inode.c.org 2004-04-17 18:26:32.000000000 -0400
+++ linux-2.4.21/fs/nfs/inode.c 2004-05-01 11:11:12.000000000 -0400
@@ -953,13 +953,48 @@ nfs_wait_on_inode(struct inode *inode, i
}

/*
+ * Reinstantiate an inode that has gone stale
+ */
+int
+nfs_reinstantiate(struct dentry *dentry)
+{
+ int error;
+ struct nfs_fh fhandle;
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *inode = dentry->d_inode;
+ struct nfs_fattr fattr;
+
+ nfs_zap_caches(dir);
+ nfs_zap_caches(inode);
+
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ if (!error) {
+ memcpy(&inode->u.nfs_i.fh, &fhandle, sizeof(struct nfs_fh));
+ nfs_refresh_inode(inode, &fattr);
+ }
+
+ return error;
+}
+
+/*
* Externally visible revalidation function
*/
int
nfs_revalidate(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
- return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ int error;
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!error || error != -ESTALE)
+ return error;
+ /*
+ * We have a stale fh so ask the server for another one
+ */
+ if (nfs_reinstantiate(dentry) == 0)
+ error = 0;
+
+ return error;
}

/*
--- linux-2.4.21/include/linux/nfs_fs.h.org 2004-04-17 18:40:02.000000000 -0400
+++ linux-2.4.21/include/linux/nfs_fs.h 2004-05-01 10:55:59.000000000 -0400
@@ -159,6 +159,7 @@ extern struct inode *nfs_fhget(struct de
struct nfs_fattr *);
extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_revalidate(struct dentry *);
+extern int nfs_reinstantiate(struct dentry *);
extern int nfs_permission(struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);


Attachments:
linux-2.4.21-nfs-estale2.patch (1.73 kB)

2004-05-01 19:25:20

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Sat, 2004-05-01 at 12:13, Steve Dickson wrote:

> Is this something Marcelo might be interested in?

Vetoed!

You are not listening to what I am saying: you are NOT allowed to change
inodes in this manner. The filehandle defines which file you are
referencing. By changing the filehandle, you are changing files from
beneath other processes and your own process.

Imagine if I do

rm blah; ln /etc/passwd blah

Your patch means that anyone that was writing to file "blah" before you
deleted it, will suddenly find themselves overwriting /etc/passwd. That
is NOT POSIX-compatible behaviour!

The ONLY way to overcome a stale inode is to d_drop() the dentry, unhash
the inode, and then force the VFS to look up a new inode. That way only
new calls to open() end up overwriting /etc/passwd in the above case.

Cheers,
Trond


-------------------------------------------------------
This SF.Net email is sponsored by: Oracle 10g
Get certified on the hottest thing ever to hit the market... Oracle 10g.
Take an Oracle 10g class now, and we'll give you the exam FREE.
http://ads.osdn.com/?ad_id=3149&alloc_id=8166&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-01 23:57:59

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Trond Myklebust wrote:

>You are not listening to what I am saying: you are NOT allowed to change
>inodes in this manner. The filehandle defines which file you are
>referencing. By changing the filehandle, you are changing files from
>beneath other processes and your own process.
>
>
Believe me... I hear you.... and do understand what I'm trying to do....
And I also apologize for being so persistent.... but just bothers the
hell out of me that other clients can recover from ESTALEs and we don't...

>Imagine if I do
>
>rm blah; ln /etc/passwd blah
>
>Your patch means that anyone that was writing to file "blah" before you
>deleted it, will suddenly find themselves overwriting /etc/passwd. That
>is NOT POSIX-compatible behaviour!
>
>
I see this point in theory, but in my testing, bad things don't seem to
happen.
The writes always fails with ESTALE... but I can't deny there is not a
window
just because I can't reproduce it...

>The ONLY way to overcome a stale inode is to d_drop() the dentry, unhash
>the inode, and then force the VFS to look up a new inode. That way only
>new calls to open() end up overwriting /etc/passwd in the above case.
>
>
I guess I would be interested on how other implementation handle this
problem,
since they seem to recover... Maybe their VFS layers is more friendly to
these types of things....

SteveD.



-------------------------------------------------------
This SF.Net email is sponsored by: Oracle 10g
Get certified on the hottest thing ever to hit the market... Oracle 10g.
Take an Oracle 10g class now, and we'll give you the exam FREE.
http://ads.osdn.com/?ad_id=3149&alloc_id=8166&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-02 00:22:21

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Sat, 2004-05-01 at 19:57, Steve Dickson wrote:

> Believe me... I hear you.... and do understand what I'm trying to do....
> And I also apologize for being so persistent.... but just bothers the
> hell out of me that other clients can recover from ESTALEs and we don't...

So please tell me why the following patch (which addresses the
particular problem that you raised of someone resetting mtime on the
parent directory) does not suffice?

Cheers,
Trond


Attachments:
gnurr.dif (3.04 kB)

2004-05-02 03:19:20

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Trond Myklebust wrote:

>So please tell me why the following patch (which addresses the
>particular problem that you raised of someone resetting mtime on the
>parent directory) does not suffice?
>
>
Again this is were I started... and this patch does take care of the ESTALEs
but it also increases normal traffic by 2% to 3% (mostly getattrs and
lookups)
when I ran the connectathon04 tests... Granted 2% to 3% is not that much
of an increase and my testing is not that exact... but.... any increase
for an
error that generally does not happen, I didn't think would be acceptable...

But if this the patch thats going to make it into the 2.4 tree... so be
it....
since it does avoid the ESTALE issues and maybe things will be a bit
more coherent since we do send a few more getattrs and lookups...

Thanks for your guidance.... it is definitely appreciated!

SteveD.




-------------------------------------------------------
This SF.Net email is sponsored by: Oracle 10g
Get certified on the hottest thing ever to hit the market... Oracle 10g.
Take an Oracle 10g class now, and we'll give you the exam FREE.
http://ads.osdn.com/?ad_id=3149&alloc_id=8166&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-02 03:28:39

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2004/02/09 11:09:22-06:00 [email protected]
# JFS: rename should update mtime on source and target directories
#
# fs/jfs/namei.c
# 2004/02/09 11:07:24-06:00 [email protected] +2 -2
# rename should update i_mtime on affected directories
#
diff -Nru a/fs/jfs/namei.c b/fs/jfs/namei.c
--- a/fs/jfs/namei.c Sat May 1 23:28:13 2004
+++ b/fs/jfs/namei.c Sat May 1 23:28:13 2004
@@ -1223,7 +1223,7 @@
old_ip->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_ip);

- new_dir->i_ctime = CURRENT_TIME;
+ new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(new_dir);

/* Build list of inodes modified by this transaction */
@@ -1235,7 +1235,7 @@

if (old_dir != new_dir) {
iplist[ipcount++] = new_dir;
- old_dir->i_ctime = CURRENT_TIME;
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(old_dir);
}


Attachments:
gnarf.dif (0.98 kB)

2004-05-03 19:51:00

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

--- linux-2.4.21/fs/nfs/inode.c.org 2004-05-03 08:57:36.000000000 -0400
+++ linux-2.4.21/fs/nfs/inode.c 2004-05-03 15:03:57.000000000 -0400
@@ -958,8 +958,16 @@ nfs_wait_on_inode(struct inode *inode, i
int
nfs_revalidate(struct dentry *dentry)
{
+ int error;
struct inode *inode = dentry->d_inode;
- return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (error == -ESTALE) {
+ struct inode *dir = dentry->d_parent->d_inode;
+ nfs_zap_caches(dir);
+ }
+
+ return error;
}

/*


Attachments:
linux-2.4.21-nfs-estale3.patch (553.00 B)

2004-05-03 20:33:40

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

Trond Myklebust wrote:

>On Mon, 2004-05-03 at 15:50, Steve Dickson wrote:
>
>
>
>>So I would like to propose a less intrusive patch what will simply zap
>>the directories caches when nfs_revalidate() sees an ESTALE.
>>
>>This patch does not effect traffic and only the first dir entry is
>>failed with ESTALE
>>and finally subsequent ls will not fail with ESTALEs....
>>
>>Comments?
>>
>>
>
>That one looks fine (although it will not apply to 2.6.x).
>
>
Its a 2.4 patch...

>I'm still curious to find out which operations are changing ctime on the
>parent directory without changing mtime, though.
>
>
Here are two if the runs I've seen... The first one is without the
patch, the second
one is with the patch...

client rpc stats:
calls retrans authrefrsh
30073 7 0
Client nfs v3:
null getattr setattr lookup access readlink
0 0% 2280 7% 1199 3% 2595 8% 1173 3% 250 0%
read write create mkdir symlink mknod
3954 13% 11892 39% 893 2% 175 0% 250 0% 0 0%
remove rmdir rename link readdir readdirplus
1391 4% 175 0% 376 1% 250 0% 0 0% 1119 3%
fsstat fsinfo pathconf commit
1500 4% 1 0% 0 0% 600 1%


Client rpc stats:
calls retrans authrefrsh
34134 22 0
Client nfs v3:
null getattr setattr lookup access readlink
0 0% 2124 6% 1199 3% 2979 8% 1174 3% 250 0%
read write create mkdir symlink mknod
3954 11% 15752 46% 893 2% 175 0% 250 0% 0 0%
remove rmdir rename link readdir readdirplus
1391 4% 175 0% 380 1% 250 0% 0 0% 1119 3%
fsstat fsinfo pathconf commit
1500 4% 1 0% 0 0% 568 1%

The lookups and writes seem to sky rocketing.... and I've seen other
runs (with using both mtime and ctimes) where the lookups, getattrs
and reads sky rocket

SteveD.


-------------------------------------------------------
This SF.Net email is sponsored by: Oracle 10g
Get certified on the hottest thing ever to hit the market... Oracle 10g.
Take an Oracle 10g class now, and we'll give you the exam FREE.
http://ads.osdn.com/?ad_id=3149&alloc_id=8166&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-03 21:28:00

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Mon, 2004-05-03 at 16:33, Steve Dickson wrote:
> >
> Here are two if the runs I've seen... The first one is without the
> patch, the second
> one is with the patch...
>
> client rpc stats:
> calls retrans authrefrsh
> 30073 7 0
> Client nfs v3:
> null getattr setattr lookup access readlink
> 0 0% 2280 7% 1199 3% 2595 8% 1173 3% 250 0%
> read write create mkdir symlink mknod
> 3954 13% 11892 39% 893 2% 175 0% 250 0% 0 0%
> remove rmdir rename link readdir readdirplus
> 1391 4% 175 0% 376 1% 250 0% 0 0% 1119 3%
> fsstat fsinfo pathconf commit
> 1500 4% 1 0% 0 0% 600 1%
>
>
> Client rpc stats:
> calls retrans authrefrsh
> 34134 22 0
> Client nfs v3:
> null getattr setattr lookup access readlink
> 0 0% 2124 6% 1199 3% 2979 8% 1174 3% 250 0%
> read write create mkdir symlink mknod
> 3954 11% 15752 46% 893 2% 175 0% 250 0% 0 0%
> remove rmdir rename link readdir readdirplus
> 1391 4% 175 0% 380 1% 250 0% 0 0% 1119 3%
> fsstat fsinfo pathconf commit
> 1500 4% 1 0% 0 0% 568 1%
>
> The lookups and writes seem to sky rocketing....

Interesting... You've got a few more renames too.

I assume those are just extra sillyrenames() if this was a standard
Connectathon test workload. OK... So the extra sillyrename() might
explain why you get more lookups() (assuming at least one of the
sillyrenames is on one of those large directories that Cthon creates).
The extra writes probably account for the sillyrenames() because they
change the timing of close()...

So the big question is: *why do you get all those extra writes*?

Hmm... Just wanting to check: the above results were indeed made with
only the patch I sent you applied?
There weren't any extra changes to nfs_refresh_inode() that might cause
the actual page cache invalidation to depend on the inode ctime (as
opposed to just the lookup cache)?

Cheers,
Trond


-------------------------------------------------------
This SF.Net email is sponsored by: Oracle 10g
Get certified on the hottest thing ever to hit the market... Oracle 10g.
Take an Oracle 10g class now, and we'll give you the exam FREE.
http://ads.osdn.com/?ad_id=3149&alloc_id=8166&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-03 20:16:12

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

On Mon, 2004-05-03 at 15:50, Steve Dickson wrote:

> So I would like to propose a less intrusive patch what will simply zap
> the directories caches when nfs_revalidate() sees an ESTALE.
>
> This patch does not effect traffic and only the first dir entry is
> failed with ESTALE
> and finally subsequent ls will not fail with ESTALEs....
>
> Comments?

That one looks fine (although it will not apply to 2.6.x).

I'm still curious to find out which operations are changing ctime on the
parent directory without changing mtime, though.

I've identified only two more (beside utime()), so far: link()/unlink()
change the ctime on the file but both do set mtime and ctime on the
parent directory.

Cheers,
Trond


-------------------------------------------------------
This SF.Net email is sponsored by: Oracle 10g
Get certified on the hottest thing ever to hit the market... Oracle 10g.
Take an Oracle 10g class now, and we'll give you the exam FREE.
http://ads.osdn.com/?ad_id=3149&alloc_id=8166&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-05-04 19:05:35

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

--- linux-2.4/fs/nfs/dir.c.orig 2003-11-11 16:55:40.000000000 -0500
+++ linux-2.4/fs/nfs/dir.c 2004-05-04 13:26:20.000000000 -0400
@@ -421,7 +421,7 @@ int nfs_check_verifier(struct inode *dir
return 1;
if (nfs_revalidate_inode(NFS_SERVER(dir), dir))
return 0;
- return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir));
+ return time_after(dentry->d_time, NFS_CTIME_UPDATE(dir));
}

/*
--- linux-2.4/fs/nfs/inode.c.orig 2004-05-04 13:26:21.000000000 -0400
+++ linux-2.4/fs/nfs/inode.c 2004-05-04 13:28:20.000000000 -0400
@@ -821,8 +821,16 @@ nfs_wait_on_inode(struct inode *inode, i
int
nfs_revalidate(struct dentry *dentry)
{
+ int error;
struct inode *inode = dentry->d_inode;
- return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (error == -ESTALE) {
+ struct inode *dir = dentry->d_parent->d_inode;
+ nfs_zap_caches(dir);
+ }
+
+ return error;
}

/*
@@ -1054,16 +1062,16 @@ __nfs_refresh_inode(struct inode *inode,
if (nfs_have_writebacks(inode) && new_isize < inode->i_size)
new_isize = inode->i_size;

- NFS_CACHE_CTIME(inode) = fattr->ctime;
- inode->i_ctime = nfs_time_to_secs(fattr->ctime);
+ if (NFS_CACHE_CTIME(inode) != fattr->ctime) {
+ NFS_CACHE_CTIME(inode) = fattr->ctime;
+ inode->i_ctime = nfs_time_to_secs(fattr->ctime);
+ NFS_CTIME_UPDATE(inode) = jiffies;
+ }

inode->i_atime = new_atime;

- if (NFS_CACHE_MTIME(inode) != new_mtime) {
- NFS_MTIME_UPDATE(inode) = jiffies;
- NFS_CACHE_MTIME(inode) = new_mtime;
- inode->i_mtime = nfs_time_to_secs(new_mtime);
- }
+ NFS_CACHE_MTIME(inode) = new_mtime;
+ inode->i_mtime = nfs_time_to_secs(new_mtime);

NFS_CACHE_ISIZE(inode) = new_size;
inode->i_size = new_isize;
--- linux-2.4/include/linux/nfs_fs.h.orig 2004-05-04 13:26:23.000000000 -0400
+++ linux-2.4/include/linux/nfs_fs.h 2004-05-04 13:26:24.000000000 -0400
@@ -78,7 +78,7 @@ static inline struct nfs_inode_info *NFS
#define NFS_CONGESTED(inode) (RPC_CONGESTED(NFS_CLIENT(inode)))
#define NFS_COOKIEVERF(inode) ((inode)->u.nfs_i.cookieverf)
#define NFS_READTIME(inode) ((inode)->u.nfs_i.read_cache_jiffies)
-#define NFS_MTIME_UPDATE(inode) ((inode)->u.nfs_i.cache_mtime_jiffies)
+#define NFS_CTIME_UPDATE(inode) ((inode)->u.nfs_i.cache_ctime_jiffies)
#define NFS_CACHE_CTIME(inode) ((inode)->u.nfs_i.read_cache_ctime)
#define NFS_CACHE_MTIME(inode) ((inode)->u.nfs_i.read_cache_mtime)
#define NFS_CACHE_ISIZE(inode) ((inode)->u.nfs_i.read_cache_isize)
--- linux-2.4/include/linux/nfs_fs_i.h.orig 2004-05-04 13:26:24.000000000 -0400
+++ linux-2.4/include/linux/nfs_fs_i.h 2004-05-04 13:26:24.000000000 -0400
@@ -49,10 +49,10 @@ struct nfs_inode_info {
unsigned long attrtimeo_timestamp;

/*
- * Timestamp that dates the change made to read_cache_mtime.
+ * Timestamp that dates the change made to read_cache_ctime.
* This is of use for dentry revalidation
*/
- unsigned long cache_mtime_jiffies;
+ unsigned long cache_ctime_jiffies;

/*
* This is the cookie verifier used for NFSv3 readdir


Attachments:
linux-2.4-nfs-estale.patch (2.99 kB)

2004-05-06 17:38:43

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Reinstantiating stale inodes

--- linux/fs/nfs/inode.c.orig 2004-05-06 11:31:23.000000000 -0400
+++ linux/fs/nfs/inode.c 2004-05-06 13:03:27.000000000 -0400
@@ -958,8 +958,17 @@ nfs_wait_on_inode(struct inode *inode, i
int
nfs_revalidate(struct dentry *dentry)
{
+ int error;
struct inode *inode = dentry->d_inode;
- return nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (error == -ESTALE) {
+ struct inode *dir = dentry->d_parent->d_inode;
+ nfs_zap_caches(dir);
+ d_drop(dentry);
+ }
+
+ return error;
}

/*
--- linux/fs/stat.c.orig 2004-05-06 11:31:23.000000000 -0400
+++ linux/fs/stat.c 2004-05-06 13:03:27.000000000 -0400
@@ -143,8 +143,9 @@ static int cp_new_stat(struct inode * in
asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
{
struct nameidata nd;
- int error;
+ int error, errcnt = 0;

+again:
error = user_path_walk(filename, &nd);
if (!error) {
error = do_revalidate(nd.dentry);
@@ -152,6 +153,10 @@ asmlinkage long sys_stat(char * filename
error = cp_old_stat(nd.dentry->d_inode, statbuf);
path_release(&nd);
}
+ if (error == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
return error;
}
#endif
@@ -159,8 +164,9 @@ asmlinkage long sys_stat(char * filename
asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
{
struct nameidata nd;
- int error;
+ int error, errcnt = 0;

+again:
error = user_path_walk(filename, &nd);
if (!error) {
error = do_revalidate(nd.dentry);
@@ -168,6 +174,11 @@ asmlinkage long sys_newstat(char * filen
error = cp_new_stat(nd.dentry->d_inode, statbuf);
path_release(&nd);
}
+ if (error == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return error;
}

@@ -180,8 +191,9 @@ asmlinkage long sys_newstat(char * filen
asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
{
struct nameidata nd;
- int error;
+ int error, errcnt = 0;

+again:
error = user_path_walk_link(filename, &nd);
if (!error) {
error = do_revalidate(nd.dentry);
@@ -189,6 +201,11 @@ asmlinkage long sys_lstat(char * filenam
error = cp_old_stat(nd.dentry->d_inode, statbuf);
path_release(&nd);
}
+ if (error == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return error;
}

@@ -197,8 +214,9 @@ asmlinkage long sys_lstat(char * filenam
asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
{
struct nameidata nd;
- int error;
+ int error, errcnt = 0;

+again:
error = user_path_walk_link(filename, &nd);
if (!error) {
error = do_revalidate(nd.dentry);
@@ -206,6 +224,12 @@ asmlinkage long sys_newlstat(char * file
error = cp_new_stat(nd.dentry->d_inode, statbuf);
path_release(&nd);
}
+
+ if (error == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return error;
}

@@ -218,8 +242,10 @@ asmlinkage long sys_newlstat(char * file
asmlinkage long sys_fstat(unsigned int fd, struct __old_kernel_stat * statbuf)
{
struct file * f;
- int err = -EBADF;
+ int err, errcnt = 0;

+again:
+ err = -EBADF;
f = fget(fd);
if (f) {
struct dentry * dentry = f->f_dentry;
@@ -229,6 +255,11 @@ asmlinkage long sys_fstat(unsigned int f
err = cp_old_stat(dentry->d_inode, statbuf);
fput(f);
}
+ if (err == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return err;
}

@@ -237,8 +268,10 @@ asmlinkage long sys_fstat(unsigned int f
asmlinkage long sys_newfstat(unsigned int fd, struct stat * statbuf)
{
struct file * f;
- int err = -EBADF;
+ int err, errcnt = 0;

+again:
+ err = -EBADF;
f = fget(fd);
if (f) {
struct dentry * dentry = f->f_dentry;
@@ -248,6 +281,11 @@ asmlinkage long sys_newfstat(unsigned in
err = cp_new_stat(dentry->d_inode, statbuf);
fput(f);
}
+ if (err == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return err;
}

@@ -340,8 +378,9 @@ static long cp_new_stat64(struct inode *
asmlinkage long sys_stat64(char * filename, struct stat64 * statbuf, long flags)
{
struct nameidata nd;
- int error;
+ int error, errcnt = 0;

+again:
error = user_path_walk(filename, &nd);
if (!error) {
error = do_revalidate(nd.dentry);
@@ -349,14 +388,20 @@ asmlinkage long sys_stat64(char * filena
error = cp_new_stat64(nd.dentry->d_inode, statbuf);
path_release(&nd);
}
+ if (error == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return error;
}

asmlinkage long sys_lstat64(char * filename, struct stat64 * statbuf, long flags)
{
struct nameidata nd;
- int error;
+ int error, errcnt= 0;

+again:
error = user_path_walk_link(filename, &nd);
if (!error) {
error = do_revalidate(nd.dentry);
@@ -364,14 +409,21 @@ asmlinkage long sys_lstat64(char * filen
error = cp_new_stat64(nd.dentry->d_inode, statbuf);
path_release(&nd);
}
+ if (error == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
+
return error;
}

asmlinkage long sys_fstat64(unsigned long fd, struct stat64 * statbuf, long flags)
{
struct file * f;
- int err = -EBADF;
+ int err, errcnt = 0;

+again:
+ err = -EBADF;
f = fget(fd);
if (f) {
struct dentry * dentry = f->f_dentry;
@@ -381,6 +433,10 @@ asmlinkage long sys_fstat64(unsigned lon
err = cp_new_stat64(dentry->d_inode, statbuf);
fput(f);
}
+ if (err == -ESTALE && !errcnt) {
+ errcnt++;
+ goto again;
+ }
return err;
}


Attachments:
linux-2.4.21-nfs-estale4.patch (5.24 kB)