2008-01-18 15:36:25

by Peter Staubach

[permalink] [raw]
Subject: [PATCH 1/3] enhanced ESTALE error handling

--- linux-2.6.23.i686/fs/namei.c.org
+++ linux-2.6.23.i686/fs/namei.c
@@ -741,7 +741,7 @@ static __always_inline void follow_dotdo
{
struct fs_struct *fs = current->fs;

- while(1) {
+ while (1) {
struct vfsmount *parent;
struct dentry *old = nd->dentry;

@@ -840,7 +840,7 @@ static fastcall int __link_path_walk(con
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);

/* At this point we know we have a real path component. */
- for(;;) {
+ for (;;) {
unsigned long hash;
struct qstr this;
unsigned int c;
@@ -992,7 +992,7 @@ return_reval:
*/
if (nd->dentry && nd->dentry->d_sb &&
(nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
- err = -ESTALE;
+ err = -ENOENT;
/* Note: we do not d_invalidate() */
if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
break;
@@ -1003,6 +1003,8 @@ out_dput:
dput_path(&next, nd);
break;
}
+ if (err == -ESTALE)
+ d_drop(nd->dentry);
path_release(nd);
return_err:
return err;
@@ -1025,12 +1027,27 @@ static int fastcall link_path_walk(const
mntget(save.mnt);

result = __link_path_walk(name, nd);
- if (result == -ESTALE) {
+ while (result == -ESTALE) {
+ /*
+ * If no progress was made looking up the pathname,
+ * then stop and return ENOENT instead of ESTALE.
+ */
+ if (nd->dentry == save.dentry) {
+ result = -ENOENT;
+ break;
+ }
*nd = save;
dget(nd->dentry);
mntget(nd->mnt);
nd->flags |= LOOKUP_REVAL;
result = __link_path_walk(name, nd);
+ /*
+ * If no progress was made this time, then return
+ * ENOENT instead of ESTALE because no recovery
+ * is possible to recover the stale file handle.
+ */
+ if (result == -ESTALE && nd->dentry == save.dentry)
+ result = -ENOENT;
}

dput(save.dentry);
@@ -1268,8 +1285,8 @@ int path_lookup_open(int dfd, const char
* @create_mode: create intent flags
*/
static int path_lookup_create(int dfd, const char *name,
- unsigned int lookup_flags, struct nameidata *nd,
- int open_flags, int create_mode)
+ unsigned int lookup_flags, struct nameidata *nd,
+ int open_flags, int create_mode)
{
return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
nd, open_flags, create_mode);
@@ -1712,7 +1729,10 @@ int open_namei(int dfd, const char *path
int acc_mode, error;
struct path path;
struct dentry *dir;
- int count = 0;
+ int count;
+
+top:
+ count = 0;

acc_mode = ACC_MODE(flag);

@@ -1739,7 +1759,8 @@ int open_namei(int dfd, const char *path
/*
* Create - we need to know the parent.
*/
- error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
+ error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, nd,
+ flag, mode);
if (error)
return error;

@@ -1812,10 +1833,17 @@ ok:
return 0;

exit_dput:
+ if (error == -ESTALE)
+ d_drop(path.dentry);
dput_path(&path, nd);
exit:
if (!IS_ERR(nd->intent.open.file))
release_open_intent(nd);
+ if (error == -ESTALE) {
+ d_drop(nd->dentry);
+ path_release(nd);
+ goto top;
+ }
path_release(nd);
return error;

@@ -1825,7 +1853,7 @@ do_link:
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
- * thing by hands. The reason is that this way we have zero link_count
+ * thing by hand. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
@@ -1844,6 +1872,8 @@ do_link:
* with "intent.open".
*/
release_open_intent(nd);
+ if (error == ESTALE)
+ goto top;
return error;
}
nd->flags &= ~LOOKUP_PARENT;
@@ -1857,7 +1887,7 @@ do_link:
goto exit;
}
error = -ELOOP;
- if (count++==32) {
+ if (count++ == 32) {
__putname(nd->last.name);
goto exit;
}


Attachments:
estale.lookup (3.79 kB)

2008-01-18 16:14:44

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 1/3] enhanced ESTALE error handling

On Fri, Jan 18, 2008 at 10:36:01AM -0500, Peter Staubach wrote:
> @@ -1025,12 +1027,27 @@ static int fastcall link_path_walk(const
> mntget(save.mnt);
>
> result = __link_path_walk(name, nd);
> - if (result == -ESTALE) {
> + while (result == -ESTALE) {
> + /*
> + * If no progress was made looking up the pathname,
> + * then stop and return ENOENT instead of ESTALE.
> + */
> + if (nd->dentry == save.dentry) {
> + result = -ENOENT;
> + break;
> + }
> *nd = save;
> dget(nd->dentry);
> mntget(nd->mnt);
> nd->flags |= LOOKUP_REVAL;
> result = __link_path_walk(name, nd);
> + /*
> + * If no progress was made this time, then return
> + * ENOENT instead of ESTALE because no recovery
> + * is possible to recover the stale file handle.
> + */
> + if (result == -ESTALE && nd->dentry == save.dentry)
> + result = -ENOENT;
> }
>
> dput(save.dentry);

Why do you need both of these tests? The first one should be enough,
surely?

> @@ -1268,8 +1285,8 @@ int path_lookup_open(int dfd, const char
> * @create_mode: create intent flags
> */
> static int path_lookup_create(int dfd, const char *name,
> - unsigned int lookup_flags, struct nameidata *nd,
> - int open_flags, int create_mode)
> + unsigned int lookup_flags, struct nameidata *nd,
> + int open_flags, int create_mode)

Gratuitous reformatting?

> @@ -1712,7 +1729,10 @@ int open_namei(int dfd, const char *path
> int acc_mode, error;
> struct path path;
> struct dentry *dir;
> - int count = 0;
> + int count;
> +
> +top:
> + count = 0;
>
> acc_mode = ACC_MODE(flag);
>
> @@ -1739,7 +1759,8 @@ int open_namei(int dfd, const char *path
> /*
> * Create - we need to know the parent.
> */
> - error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
> + error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, nd,
> + flag, mode);
> if (error)
> return error;
>
> @@ -1812,10 +1833,17 @@ ok:
> return 0;
>
> exit_dput:
> + if (error == -ESTALE)
> + d_drop(path.dentry);
> dput_path(&path, nd);
> exit:
> if (!IS_ERR(nd->intent.open.file))
> release_open_intent(nd);
> + if (error == -ESTALE) {
> + d_drop(nd->dentry);
> + path_release(nd);
> + goto top;
> + }

I wonder if a tail-call might not work better here.

--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."

2008-02-01 20:57:51

by Peter Staubach

[permalink] [raw]
Subject: [PATCH 1/3] enhanced lookup ESTALE error handling (v2)

--- linux-2.6.24.i686/fs/namei.c.org
+++ linux-2.6.24.i686/fs/namei.c
@@ -741,7 +741,7 @@ static __always_inline void follow_dotdo
{
struct fs_struct *fs = current->fs;

- while(1) {
+ while (1) {
struct vfsmount *parent;
struct dentry *old = nd->dentry;

@@ -840,7 +840,7 @@ static fastcall int __link_path_walk(con
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);

/* At this point we know we have a real path component. */
- for(;;) {
+ for (;;) {
unsigned long hash;
struct qstr this;
unsigned int c;
@@ -992,7 +992,7 @@ return_reval:
*/
if (nd->dentry && nd->dentry->d_sb &&
(nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
- err = -ESTALE;
+ err = -ENOENT;
/* Note: we do not d_invalidate() */
if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
break;
@@ -1003,6 +1003,8 @@ out_dput:
dput_path(&next, nd);
break;
}
+ if (err == -ESTALE)
+ d_drop(nd->dentry);
path_release(nd);
return_err:
return err;
@@ -1019,13 +1021,24 @@ static int fastcall link_path_walk(const
{
struct nameidata save = *nd;
int result;
+ struct dentry *svd;

/* make sure the stuff we saved doesn't go away */
dget(save.dentry);
mntget(save.mnt);

+ svd = nd->dentry;
result = __link_path_walk(name, nd);
- if (result == -ESTALE) {
+ while (result == -ESTALE) {
+ /*
+ * If no progress was made looking up the pathname,
+ * then stop and return ENOENT instead of ESTALE.
+ */
+ if (nd->dentry == svd) {
+ result = -ENOENT;
+ break;
+ }
+ svd = nd->dentry;
*nd = save;
dget(nd->dentry);
mntget(nd->mnt);
@@ -1712,7 +1725,10 @@ int open_namei(int dfd, const char *path
int acc_mode, error;
struct path path;
struct dentry *dir;
- int count = 0;
+ int count;
+
+top:
+ count = 0;

acc_mode = ACC_MODE(flag);

@@ -1739,7 +1755,8 @@ int open_namei(int dfd, const char *path
/*
* Create - we need to know the parent.
*/
- error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
+ error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, nd,
+ flag, mode);
if (error)
return error;

@@ -1812,10 +1829,17 @@ ok:
return 0;

exit_dput:
+ if (error == -ESTALE)
+ d_drop(path.dentry);
dput_path(&path, nd);
exit:
if (!IS_ERR(nd->intent.open.file))
release_open_intent(nd);
+ if (error == -ESTALE) {
+ d_drop(nd->dentry);
+ path_release(nd);
+ goto top;
+ }
path_release(nd);
return error;

@@ -1825,7 +1849,7 @@ do_link:
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
- * thing by hands. The reason is that this way we have zero link_count
+ * thing by hand. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
@@ -1844,6 +1868,8 @@ do_link:
* with "intent.open".
*/
release_open_intent(nd);
+ if (error == ESTALE)
+ goto top;
return error;
}
nd->flags &= ~LOOKUP_PARENT;
@@ -1857,7 +1883,7 @@ do_link:
goto exit;
}
error = -ELOOP;
- if (count++==32) {
+ if (count++ == 32) {
__putname(nd->last.name);
goto exit;
}


Attachments:
estale.lookup.2 (3.18 kB)

2008-03-10 20:24:12

by Peter Staubach

[permalink] [raw]
Subject: [PATCH 1/3] enhanced lookup ESTALE error handling (v3)

--- linux-2.6.24.i686/fs/namei.c.org
+++ linux-2.6.24.i686/fs/namei.c
@@ -750,7 +750,7 @@ static __always_inline void follow_dotdo
{
struct fs_struct *fs = current->fs;

- while(1) {
+ while (1) {
struct vfsmount *parent;
struct dentry *old = nd->path.dentry;

@@ -849,7 +849,7 @@ static int __link_path_walk(const char *
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);

/* At this point we know we have a real path component. */
- for(;;) {
+ for (;;) {
unsigned long hash;
struct qstr this;
unsigned int c;
@@ -1003,7 +1003,7 @@ return_reval:
*/
if (nd->path.dentry && nd->path.dentry->d_sb &&
(nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
- err = -ESTALE;
+ err = -ENOENT;
/* Note: we do not d_invalidate() */
if (!nd->path.dentry->d_op->d_revalidate(
nd->path.dentry, nd))
@@ -1015,6 +1015,8 @@ out_dput:
path_put_conditional(&next, nd);
break;
}
+ if (err == -ESTALE)
+ d_drop(nd->path.dentry);
path_put(&nd->path);
return_err:
return err;
@@ -1031,13 +1033,24 @@ static int link_path_walk(const char *na
{
struct nameidata save = *nd;
int result;
+ struct dentry *svd;

/* make sure the stuff we saved doesn't go away */
dget(save.path.dentry);
mntget(save.path.mnt);

+ svd = nd->path.dentry;
result = __link_path_walk(name, nd);
- if (result == -ESTALE) {
+ while (result == -ESTALE) {
+ /*
+ * If no progress was made looking up the pathname,
+ * then stop and return ENOENT instead of ESTALE.
+ */
+ if (nd->path.dentry == svd) {
+ result = -ENOENT;
+ break;
+ }
+ svd = nd->path.dentry;
*nd = save;
dget(nd->path.dentry);
mntget(nd->path.mnt);
@@ -1714,7 +1727,10 @@ int open_namei(int dfd, const char *path
int acc_mode, error;
struct path path;
struct dentry *dir;
- int count = 0;
+ int count;
+
+top:
+ count = 0;

acc_mode = ACC_MODE(flag);

@@ -1741,7 +1757,8 @@ int open_namei(int dfd, const char *path
/*
* Create - we need to know the parent.
*/
- error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
+ error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, nd,
+ flag, mode);
if (error)
return error;

@@ -1814,10 +1831,17 @@ ok:
return 0;

exit_dput:
+ if (error == -ESTALE)
+ d_drop(path.dentry);
path_put_conditional(&path, nd);
exit:
if (!IS_ERR(nd->intent.open.file))
release_open_intent(nd);
+ if (error == -ESTALE) {
+ d_drop(nd->path.dentry);
+ path_put(&nd->path);
+ goto top;
+ }
path_put(&nd->path);
return error;

@@ -1827,7 +1851,7 @@ do_link:
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
- * thing by hands. The reason is that this way we have zero link_count
+ * thing by hand. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
@@ -1846,6 +1870,8 @@ do_link:
* with "intent.open".
*/
release_open_intent(nd);
+ if (error == ESTALE)
+ goto top;
return error;
}
nd->flags &= ~LOOKUP_PARENT;
@@ -1859,7 +1885,7 @@ do_link:
goto exit;
}
error = -ELOOP;
- if (count++==32) {
+ if (count++ == 32) {
__putname(nd->last.name);
goto exit;
}


Attachments:
estale.lookup.3 (3.28 kB)

2008-03-10 21:48:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/3] enhanced lookup ESTALE error handling (v3)

On Mon, 10 Mar 2008 16:23:33 -0400
Peter Staubach <[email protected]> wrote:

> This is a patch to enhance ESTALE error handling during the
> lookup process. The error, ESTALE, can occur when out of data
> dentries, stored in the dcache, is used to translate a pathname
> component to a dentry. When this occurs, the dentry which
> contains the pointer to the inode which refers to the non-existent
> file is dropped from the dcache and then the lookup process
> started again. Care is taken to ensure that forward process is
> always being made. If forward process is not detected, then the
> lookup process is terminated and the error, ENOENT, is returned
> to the caller.

This collides in non-trivial ways with the always-coming-never-arrives
r-o-bind-mounts patches.

I have an old version of those patches in -mm and I believe that Al
is/was/has set up some git tree with these patches and perhaps other stuff.

So some coordination is required please. I'd suggest that if Al indeed
does have such a tree, he hand over the URL so I can get it into -mm and
that you then redo these patches on top of that.

Please also Cc Al and Christoph on these patches, as they are hitting files
which they maintain and develop.

Thanks.


2008-03-10 22:04:03

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 1/3] enhanced lookup ESTALE error handling (v3)

> > This is a patch to enhance ESTALE error handling during the
> > lookup process. The error, ESTALE, can occur when out of data
> > dentries, stored in the dcache, is used to translate a pathname
> > component to a dentry. When this occurs, the dentry which
> > contains the pointer to the inode which refers to the non-existent
> > file is dropped from the dcache and then the lookup process
> > started again. Care is taken to ensure that forward process is
> > always being made. If forward process is not detected, then the
> > lookup process is terminated and the error, ENOENT, is returned
> > to the caller.
>
> This collides in non-trivial ways with the always-coming-never-arrives
> r-o-bind-mounts patches.
>
> I have an old version of those patches in -mm and I believe that Al
> is/was/has set up some git tree with these patches and perhaps other stuff.
>
> So some coordination is required please. I'd suggest that if Al indeed
> does have such a tree, he hand over the URL so I can get it into -mm and
> that you then redo these patches on top of that.
>
> Please also Cc Al and Christoph on these patches, as they are hitting files
> which they maintain and develop.

Also my objection about breaking backward compatibility for fuse
filesystems is still seemingly unaddressed.

I know it would be more convenient to push this problem into the fuse
filesystems, but they are unfortunately on the other side of the
stable kernel ABI, so this is not an option.

I've already presented a solution (which wasn't greeted with big
enthusiasm), and I'm open to discussion about other ways to solve
this.

Miklos

2008-03-10 22:19:50

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 1/3] enhanced lookup ESTALE error handling (v3)

On Mon, 2008-03-10 at 23:03 +0100, Miklos Szeredi wrote:
> > > This is a patch to enhance ESTALE error handling during the
> > > lookup process. The error, ESTALE, can occur when out of data
> > > dentries, stored in the dcache, is used to translate a pathname
> > > component to a dentry. When this occurs, the dentry which
> > > contains the pointer to the inode which refers to the non-existent
> > > file is dropped from the dcache and then the lookup process
> > > started again. Care is taken to ensure that forward process is
> > > always being made. If forward process is not detected, then the
> > > lookup process is terminated and the error, ENOENT, is returned
> > > to the caller.
> >
> > This collides in non-trivial ways with the always-coming-never-arrives
> > r-o-bind-mounts patches.
> >
> > I have an old version of those patches in -mm and I believe that Al
> > is/was/has set up some git tree with these patches and perhaps other stuff.

Al's tree is here:

http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/viro/vfs-2.6.git;a=summary

It's probably best to try and base your patches on top of there.

-- Dave


2008-03-11 04:14:09

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/3] enhanced lookup ESTALE error handling (v3)

On Mon, 10 Mar 2008 15:19:44 -0700 Dave Hansen <[email protected]> wrote:

> On Mon, 2008-03-10 at 23:03 +0100, Miklos Szeredi wrote:
> > > > This is a patch to enhance ESTALE error handling during the
> > > > lookup process. The error, ESTALE, can occur when out of data
> > > > dentries, stored in the dcache, is used to translate a pathname
> > > > component to a dentry. When this occurs, the dentry which
> > > > contains the pointer to the inode which refers to the non-existent
> > > > file is dropped from the dcache and then the lookup process
> > > > started again. Care is taken to ensure that forward process is
> > > > always being made. If forward process is not detected, then the
> > > > lookup process is terminated and the error, ENOENT, is returned
> > > > to the caller.
> > >
> > > This collides in non-trivial ways with the always-coming-never-arrives
> > > r-o-bind-mounts patches.
> > >
> > > I have an old version of those patches in -mm and I believe that Al
> > > is/was/has set up some git tree with these patches and perhaps other stuff.
>
> Al's tree is here:
>
> http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/viro/vfs-2.6.git;a=summary
>
> It's probably best to try and base your patches on top of there.
>

Al has a unique versioning scheme which I am not smart enough to
understand, so I'd be reluctant to pick this up without coordinating with
him.

afacit the "b1" branch there is the latest, but it doesn't appear to have
been touched for a couple of weeks.