2024-05-22 22:19:34

by Scott Mayhew

[permalink] [raw]
Subject: [PATCH] nfs: don't invalidate dentries on transient errors

This is a slight variation on a patch previously proposed by Neil Brown
that never got merged.

Prior to commit 5ceb9d7fdaaf ("NFS: Refactor nfs_lookup_revalidate()"),
any error from nfs_lookup_verify_inode() other than -ESTALE would result
in nfs_lookup_revalidate() returning that error (-ESTALE is mapped to
zero).

Since that commit, all errors result in nfs_lookup_revalidate()
returning zero, resulting in dentries being invalidated where they
previously were not (particularly in the case of -ERESTARTSYS).

Fix it by passing the actual error code to nfs_lookup_revalidate_done(),
and leaving the decision on whether to map the error code to zero or
one to nfs_lookup_revalidate_done().

A simple reproducer is to run the following python code in a
subdirectory of an NFS mount (not in the root of the NFS mount):

---8<---
import os
import multiprocessing
import time

if __name__=="__main__":
multiprocessing.set_start_method("spawn")

count = 0
while True:
try:
os.getcwd()
pool = multiprocessing.Pool(10)
pool.close()
pool.terminate()
count += 1
except Exception as e:
print(f"Failed after {count} iterations")
print(e)
break
---8<---

Prior to commit 5ceb9d7fdaaf, the above code would run indefinitely.
After commit 5ceb9d7fdaaf, it fails almost immediately with -ENOENT.

Signed-off-by: Scott Mayhew <[email protected]>
---
fs/nfs/dir.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac505671efbd..d9264ed4ac52 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1635,6 +1635,14 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
if (inode && IS_ROOT(dentry))
error = 1;
break;
+ case -ESTALE:
+ case -ENOENT:
+ error = 0;
+ break;
+ case -ETIMEDOUT:
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+ error = 1;
+ break;
}
trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
return error;
@@ -1680,18 +1688,8 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir,

dir_verifier = nfs_save_change_attribute(dir);
ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (ret < 0) {
- switch (ret) {
- case -ESTALE:
- case -ENOENT:
- ret = 0;
- break;
- case -ETIMEDOUT:
- if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
- ret = 1;
- }
+ if (ret < 0)
goto out;
- }

/* Request help from readdirplus */
nfs_lookup_advise_force_readdirplus(dir, flags);
@@ -1735,7 +1733,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct inode *inode;
- int error;
+ int error = 0;

nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
@@ -1780,7 +1778,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
out_bad:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+ return nfs_lookup_revalidate_done(dir, dentry, inode, error);
}

static int
--
2.44.0



2024-05-22 22:50:06

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] nfs: don't invalidate dentries on transient errors

On Wed, 2024-05-22 at 18:19 -0400, Scott Mayhew wrote:
> This is a slight variation on a patch previously proposed by Neil
> Brown
> that never got merged.
>
> Prior to commit 5ceb9d7fdaaf ("NFS: Refactor
> nfs_lookup_revalidate()"),
> any error from nfs_lookup_verify_inode() other than -ESTALE would
> result
> in nfs_lookup_revalidate() returning that error (-ESTALE is mapped to
> zero).
>
> Since that commit, all errors result in nfs_lookup_revalidate()
> returning zero, resulting in dentries being invalidated where they
> previously were not (particularly in the case of -ERESTARTSYS).
>
> Fix it by passing the actual error code to
> nfs_lookup_revalidate_done(),
> and leaving the decision on whether to  map the error code to zero or
> one to nfs_lookup_revalidate_done().
>
> A simple reproducer is to run the following python code in a
> subdirectory of an NFS mount (not in the root of the NFS mount):
>
> ---8<---
> import os
> import multiprocessing
> import time
>
> if __name__=="__main__":
>     multiprocessing.set_start_method("spawn")
>
>     count = 0
>     while True:
>         try:
>             os.getcwd()
>             pool = multiprocessing.Pool(10)
>             pool.close()
>             pool.terminate()
>             count += 1
>         except Exception as e:
>             print(f"Failed after {count} iterations")
>             print(e)
>             break
> ---8<---
>
> Prior to commit 5ceb9d7fdaaf, the above code would run indefinitely.
> After commit 5ceb9d7fdaaf, it fails almost immediately with -ENOENT.
>
> Signed-off-by: Scott Mayhew <[email protected]>
> ---
>  fs/nfs/dir.c | 24 +++++++++++-------------
>  1 file changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index ac505671efbd..d9264ed4ac52 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -1635,6 +1635,14 @@ nfs_lookup_revalidate_done(struct inode *dir,
> struct dentry *dentry,
>   if (inode && IS_ROOT(dentry))
>   error = 1;
>   break;
> + case -ESTALE:
> + case -ENOENT:
> + error = 0;
> + break;
> + case -ETIMEDOUT:
> + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
> + error = 1;
> + break;
>   }
>   trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
>   return error;
> @@ -1680,18 +1688,8 @@ static int nfs_lookup_revalidate_dentry(struct
> inode *dir,
>  
>   dir_verifier = nfs_save_change_attribute(dir);
>   ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
> - if (ret < 0) {
> - switch (ret) {
> - case -ESTALE:
> - case -ENOENT:
> - ret = 0;
> - break;
> - case -ETIMEDOUT:
> - if (NFS_SERVER(inode)->flags &
> NFS_MOUNT_SOFTREVAL)
> - ret = 1;
> - }
> + if (ret < 0)
>   goto out;
> - }
>  
>   /* Request help from readdirplus */
>   nfs_lookup_advise_force_readdirplus(dir, flags);
> @@ -1735,7 +1733,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>   unsigned int flags)
>  {
>   struct inode *inode;
> - int error;
> + int error = 0;
>  
>   nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
>   inode = d_inode(dentry);
> @@ -1780,7 +1778,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>  out_bad:
>   if (flags & LOOKUP_RCU)
>   return -ECHILD;
> - return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
> + return nfs_lookup_revalidate_done(dir, dentry, inode,
> error);

Won't this now cause us to skip the special handling of the root
directory in nfs_lookup_revalidate_done() if the call to
nfs_lookup_verify_inode() fails with an error?

>  }
>  
>  static int

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2024-05-23 12:18:27

by Scott Mayhew

[permalink] [raw]
Subject: Re: [PATCH] nfs: don't invalidate dentries on transient errors

On Wed, 22 May 2024, Trond Myklebust wrote:

> On Wed, 2024-05-22 at 18:19 -0400, Scott Mayhew wrote:
> > This is a slight variation on a patch previously proposed by Neil
> > Brown
> > that never got merged.
> >
> > Prior to commit 5ceb9d7fdaaf ("NFS: Refactor
> > nfs_lookup_revalidate()"),
> > any error from nfs_lookup_verify_inode() other than -ESTALE would
> > result
> > in nfs_lookup_revalidate() returning that error (-ESTALE is mapped to
> > zero).
> >
> > Since that commit, all errors result in nfs_lookup_revalidate()
> > returning zero, resulting in dentries being invalidated where they
> > previously were not (particularly in the case of -ERESTARTSYS).
> >
> > Fix it by passing the actual error code to
> > nfs_lookup_revalidate_done(),
> > and leaving the decision on whether to? map the error code to zero or
> > one to nfs_lookup_revalidate_done().
> >
> > A simple reproducer is to run the following python code in a
> > subdirectory of an NFS mount (not in the root of the NFS mount):
> >
> > ---8<---
> > import os
> > import multiprocessing
> > import time
> >
> > if __name__=="__main__":
> > ??? multiprocessing.set_start_method("spawn")
> >
> > ??? count = 0
> > ??? while True:
> > ??????? try:
> > ??????????? os.getcwd()
> > ??????????? pool = multiprocessing.Pool(10)
> > ??????????? pool.close()
> > ??????????? pool.terminate()
> > ??????????? count += 1
> > ??????? except Exception as e:
> > ??????????? print(f"Failed after {count} iterations")
> > ??????????? print(e)
> > ??????????? break
> > ---8<---
> >
> > Prior to commit 5ceb9d7fdaaf, the above code would run indefinitely.
> > After commit 5ceb9d7fdaaf, it fails almost immediately with -ENOENT.
> >
> > Signed-off-by: Scott Mayhew <[email protected]>
> > ---
> > ?fs/nfs/dir.c | 24 +++++++++++-------------
> > ?1 file changed, 11 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index ac505671efbd..d9264ed4ac52 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -1635,6 +1635,14 @@ nfs_lookup_revalidate_done(struct inode *dir,
> > struct dentry *dentry,
> > ? if (inode && IS_ROOT(dentry))
> > ? error = 1;
> > ? break;
> > + case -ESTALE:
> > + case -ENOENT:
> > + error = 0;
> > + break;
> > + case -ETIMEDOUT:
> > + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
> > + error = 1;
> > + break;
> > ? }
> > ? trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
> > ? return error;
> > @@ -1680,18 +1688,8 @@ static int nfs_lookup_revalidate_dentry(struct
> > inode *dir,
> > ?
> > ? dir_verifier = nfs_save_change_attribute(dir);
> > ? ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
> > - if (ret < 0) {
> > - switch (ret) {
> > - case -ESTALE:
> > - case -ENOENT:
> > - ret = 0;
> > - break;
> > - case -ETIMEDOUT:
> > - if (NFS_SERVER(inode)->flags &
> > NFS_MOUNT_SOFTREVAL)
> > - ret = 1;
> > - }
> > + if (ret < 0)
> > ? goto out;
> > - }
> > ?
> > ? /* Request help from readdirplus */
> > ? nfs_lookup_advise_force_readdirplus(dir, flags);
> > @@ -1735,7 +1733,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> > struct dentry *dentry,
> > ? unsigned int flags)
> > ?{
> > ? struct inode *inode;
> > - int error;
> > + int error = 0;
> > ?
> > ? nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
> > ? inode = d_inode(dentry);
> > @@ -1780,7 +1778,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> > struct dentry *dentry,
> > ?out_bad:
> > ? if (flags & LOOKUP_RCU)
> > ? return -ECHILD;
> > - return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
> > + return nfs_lookup_revalidate_done(dir, dentry, inode,
> > error);
>
> Won't this now cause us to skip the special handling of the root
> directory in nfs_lookup_revalidate_done() if the call to
> nfs_lookup_verify_inode() fails with an error?

Yes, it will. I'll send a v2 in a bit.

-Scott
>
> > ?}
> > ?
> > ?static int
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]
>
>