LinuxLists.cc - [PATCH 2/3] enhanced ESTALE error handling

2008-01-18 15:37:30

Subject: [PATCH 2/3] enhanced ESTALE error handling

--- linux-2.6.23.i686/fs/namei.c.org
+++ linux-2.6.23.i686/fs/namei.c
@@ -1956,6 +1986,7 @@ asmlinkage long sys_mknodat(int dfd, con
if (IS_ERR(tmp))
return PTR_ERR(tmp);

+top:
error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -1986,6 +2017,8 @@ asmlinkage long sys_mknodat(int dfd, con
}
mutex_unlock(&nd.dentry->d_inode->i_mutex);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
putname(tmp);

@@ -2021,8 +2054,8 @@ int vfs_mkdir(struct inode *dir, struct

asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
{
- int error = 0;
- char * tmp;
+ int error;
+ char *tmp;
struct dentry *dentry;
struct nameidata nd;

@@ -2031,6 +2064,7 @@ asmlinkage long sys_mkdirat(int dfd, con
if (IS_ERR(tmp))
goto out_err;

+top:
error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -2046,6 +2080,8 @@ asmlinkage long sys_mkdirat(int dfd, con
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
putname(tmp);
out_err:
@@ -2125,23 +2161,24 @@ static long do_rmdir(int dfd, const char
struct nameidata nd;

name = getname(pathname);
- if(IS_ERR(name))
+ if (IS_ERR(name))
return PTR_ERR(name);

+top:
error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
if (error)
goto exit;

- switch(nd.last_type) {
- case LAST_DOTDOT:
- error = -ENOTEMPTY;
- goto exit1;
- case LAST_DOT:
- error = -EINVAL;
- goto exit1;
- case LAST_ROOT:
- error = -EBUSY;
- goto exit1;
+ switch (nd.last_type) {
+ case LAST_DOTDOT:
+ error = -ENOTEMPTY;
+ goto exit1;
+ case LAST_DOT:
+ error = -EINVAL;
+ goto exit1;
+ case LAST_ROOT:
+ error = -EBUSY;
+ goto exit1;
}
mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -2154,6 +2191,8 @@ exit2:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
exit1:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
exit:
putname(name);
return error;
@@ -2206,12 +2245,14 @@ static long do_unlinkat(int dfd, const c
char * name;
struct dentry *dentry;
struct nameidata nd;
- struct inode *inode = NULL;
+ struct inode *inode;

name = getname(pathname);
if(IS_ERR(name))
return PTR_ERR(name);

+top:
+ inode = NULL;
error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
if (error)
goto exit;
@@ -2237,6 +2278,8 @@ static long do_unlinkat(int dfd, const c
iput(inode); /* truncate the inode here */
exit1:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
exit:
putname(name);
return error;
@@ -2301,6 +2344,7 @@ asmlinkage long sys_symlinkat(const char
if (IS_ERR(to))
goto out_putname;

+top:
error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -2314,6 +2358,8 @@ asmlinkage long sys_symlinkat(const char
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
putname(to);
out_putname:
@@ -2389,6 +2435,7 @@ asmlinkage long sys_linkat(int olddfd, c
if (IS_ERR(to))
return PTR_ERR(to);

+top:
error = __user_walk_fd(olddfd, oldname,
flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
&old_nd);
@@ -2408,6 +2455,11 @@ asmlinkage long sys_linkat(int olddfd, c
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
+ if (error == -ESTALE) {
+ path_release(&nd);
+ path_release(&old_nd);
+ goto top;
+ }
out_release:
path_release(&nd);
out:
@@ -2578,6 +2630,7 @@ static int do_rename(int olddfd, const c
struct dentry * trap;
struct nameidata oldnd, newnd;

+top:
error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
if (error)
goto exit;
@@ -2638,6 +2691,11 @@ exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ if (error == -ESTALE) {
+ path_release(&newnd);
+ path_release(&oldnd);
+ goto top;
+ }
exit2:
path_release(&newnd);
exit1:
--- linux-2.6.23.i686/fs/open.c.org
+++ linux-2.6.23.i686/fs/open.c
@@ -124,6 +124,7 @@ asmlinkage long sys_statfs(const char __
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct statfs tmp;
@@ -131,6 +132,8 @@ asmlinkage long sys_statfs(const char __
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -143,6 +146,7 @@ asmlinkage long sys_statfs64(const char

if (sz != sizeof(*buf))
return -EINVAL;
+top:
error = user_path_walk(path, &nd);
if (!error) {
struct statfs64 tmp;
@@ -150,6 +154,8 @@ asmlinkage long sys_statfs64(const char
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -230,6 +236,7 @@ static long do_sys_truncate(const char _
if (length < 0) /* sorry, but loff_t says... */
goto out;

+top:
error = user_path_walk(path, &nd);
if (error)
goto out;
@@ -278,6 +285,8 @@ put_write_and_out:
put_write_access(inode);
dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -448,21 +457,24 @@ asmlinkage long sys_faccessat(int dfd, c
else
current->cap_effective = current->cap_permitted;

+top:
res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
if (res)
goto out;

res = vfs_permission(&nd, mode);
/* SuS v2 requires we report a read only fs too */
- if(res || !(mode & S_IWOTH) ||
+ if (res || !(mode & S_IWOTH) ||
special_file(nd.dentry->d_inode->i_mode))
goto out_path_release;

- if(IS_RDONLY(nd.dentry->d_inode))
+ if (IS_RDONLY(nd.dentry->d_inode))
res = -EROFS;

out_path_release:
path_release(&nd);
+ if (res == -ESTALE)
+ goto top;
out:
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
@@ -481,6 +493,7 @@ asmlinkage long sys_chdir(const char __u
struct nameidata nd;
int error;

+top:
error = __user_walk(filename,
LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
if (error)
@@ -494,6 +507,8 @@ asmlinkage long sys_chdir(const char __u

dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -533,6 +548,7 @@ asmlinkage long sys_chroot(const char __
struct nameidata nd;
int error;

+top:
error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
if (error)
goto out;
@@ -550,6 +566,8 @@ asmlinkage long sys_chroot(const char __
error = 0;
dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -599,6 +617,7 @@ asmlinkage long sys_fchmodat(int dfd, co
int error;
struct iattr newattrs;

+top:
error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
if (error)
goto out;
@@ -622,6 +641,8 @@ asmlinkage long sys_fchmodat(int dfd, co

dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -672,11 +693,14 @@ asmlinkage long sys_chown(const char __u
struct nameidata nd;
int error;

+top:
error = user_path_walk(filename, &nd);
if (error)
goto out;
error = chown_common(nd.dentry, user, group);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -692,11 +716,14 @@ asmlinkage long sys_fchownat(int dfd, co
goto out;

follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+top:
error = __user_walk_fd(dfd, filename, follow, &nd);
if (error)
goto out;
error = chown_common(nd.dentry, user, group);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -706,11 +733,14 @@ asmlinkage long sys_lchown(const char __
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(filename, &nd);
if (error)
goto out;
error = chown_common(nd.dentry, user, group);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -819,16 +849,22 @@ static struct file *do_filp_open(int dfd
{
int namei_flags, error;
struct nameidata nd;
+ struct file *res;

+top:
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;

error = open_namei(dfd, filename, namei_flags, mode, &nd);
- if (!error)
- return nameidata_to_filp(&nd, flags);
+ if (error)
+ return ERR_PTR(error);
+
+ res = nameidata_to_filp(&nd, flags);
+ if (IS_ERR(res) && res == ERR_PTR(-ESTALE))
+ goto top;
+ return res;

- return ERR_PTR(error);
}

struct file *filp_open(const char *filename, int flags, int mode)
--- linux-2.6.23.i686/fs/inotify_user.c.org
+++ linux-2.6.23.i686/fs/inotify_user.c
@@ -346,13 +346,17 @@ static int find_inode(const char __user
{
int error;

+top:
error = __user_walk(dirname, flags, nd);
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
error = vfs_permission(nd, MAY_READ);
- if (error)
+ if (error) {
path_release(nd);
+ if (error == -ESTALE)
+ goto top;
+ }
return error;
}

--- linux-2.6.23.i686/fs/stat.c.org
+++ linux-2.6.23.i686/fs/stat.c
@@ -60,10 +60,13 @@ int vfs_stat_fd(int dfd, char __user *na
struct nameidata nd;
int error;

+top:
error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
if (!error) {
error = vfs_getattr(nd.mnt, nd.dentry, stat);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -80,10 +83,13 @@ int vfs_lstat_fd(int dfd, char __user *n
struct nameidata nd;
int error;

+top:
error = __user_walk_fd(dfd, name, 0, &nd);
if (!error) {
error = vfs_getattr(nd.mnt, nd.dentry, stat);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -300,6 +306,7 @@ asmlinkage long sys_readlinkat(int dfd,
if (bufsiz <= 0)
return -EINVAL;

+top:
error = __user_walk_fd(dfd, path, 0, &nd);
if (!error) {
struct inode * inode = nd.dentry->d_inode;
@@ -313,6 +320,8 @@ asmlinkage long sys_readlinkat(int dfd,
}
}
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
--- linux-2.6.23.i686/fs/exec.c.org
+++ linux-2.6.23.i686/fs/exec.c
@@ -107,6 +107,7 @@ asmlinkage long sys_uselib(const char __
struct nameidata nd;
int error;

+top:
error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
if (error)
goto out;
@@ -149,6 +150,8 @@ out:
exit:
release_open_intent(&nd);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
goto out;
}

@@ -648,14 +651,16 @@ struct file *open_exec(const char *name)
int err;
struct file *file;

- err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+top:
+ err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
+ FMODE_READ|FMODE_EXEC);
file = ERR_PTR(err);

if (!err) {
struct inode *inode = nd.dentry->d_inode;
file = ERR_PTR(-EACCES);
if (S_ISREG(inode->i_mode)) {
- int err = vfs_permission(&nd, MAY_EXEC);
+ err = vfs_permission(&nd, MAY_EXEC);
file = ERR_PTR(err);
if (!err) {
file = nameidata_to_filp(&nd, O_RDONLY);
@@ -665,15 +670,17 @@ struct file *open_exec(const char *name)
fput(file);
file = ERR_PTR(err);
}
- }
-out:
+ } else if (file == ERR_PTR(-ESTALE))
+ goto top;
return file;
}
}
release_open_intent(&nd);
path_release(&nd);
+ if (err == -ESTALE)
+ goto top;
}
- goto out;
+ return file;
}

EXPORT_SYMBOL(open_exec);
--- linux-2.6.23.i686/fs/utimes.c.org
+++ linux-2.6.23.i686/fs/utimes.c
@@ -79,6 +79,7 @@ long do_utimes(int dfd, char __user *fil
goto out;
dentry = f->f_path.dentry;
} else {
+top:
error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
if (error)
goto out;
@@ -136,8 +137,11 @@ long do_utimes(int dfd, char __user *fil
dput_and_out:
if (f)
fput(f);
- else
+ else {
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
+ }
out:
return error;
}
--- linux-2.6.23.i686/fs/compat.c.org
+++ linux-2.6.23.i686/fs/compat.c
@@ -238,6 +238,7 @@ asmlinkage long compat_sys_statfs(const
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct kstatfs tmp;
@@ -245,6 +246,8 @@ asmlinkage long compat_sys_statfs(const
if (!error)
error = put_compat_statfs(buf, &tmp);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -306,6 +309,7 @@ asmlinkage long compat_sys_statfs64(cons
if (sz != sizeof(*buf))
return -EINVAL;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct kstatfs tmp;
@@ -313,6 +317,8 @@ asmlinkage long compat_sys_statfs64(cons
if (!error)
error = put_compat_statfs64(buf, &tmp);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
--- linux-2.6.23.i686/fs/xattr.c.org
+++ linux-2.6.23.i686/fs/xattr.c
@@ -232,11 +232,14 @@ sys_setxattr(char __user *path, char __u
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = setxattr(nd.dentry, name, value, size, flags);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -247,11 +250,14 @@ sys_lsetxattr(char __user *path, char __
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = setxattr(nd.dentry, name, value, size, flags);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -317,11 +323,14 @@ sys_getxattr(char __user *path, char __u
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = getxattr(nd.dentry, name, value, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -332,11 +341,14 @@ sys_lgetxattr(char __user *path, char __
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = getxattr(nd.dentry, name, value, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -391,11 +403,14 @@ sys_listxattr(char __user *path, char __
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = listxattr(nd.dentry, list, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -405,11 +420,14 @@ sys_llistxattr(char __user *path, char _
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = listxattr(nd.dentry, list, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -452,11 +470,14 @@ sys_removexattr(char __user *path, char
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = removexattr(nd.dentry, name);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -466,11 +487,14 @@ sys_lremovexattr(char __user *path, char
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = removexattr(nd.dentry, name);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

Attachments:

estale.syscall (15.03 kB)

2008-02-01 20:58:43

by Peter Staubach

[permalink] [raw]

Subject: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

--- linux-2.6.24.i686/fs/namei.c.org
+++ linux-2.6.24.i686/fs/namei.c
@@ -1956,6 +1982,7 @@ asmlinkage long sys_mknodat(int dfd, con
if (IS_ERR(tmp))
return PTR_ERR(tmp);

+top:
error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -1986,6 +2013,8 @@ asmlinkage long sys_mknodat(int dfd, con
}
mutex_unlock(&nd.dentry->d_inode->i_mutex);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
putname(tmp);

@@ -2021,8 +2050,8 @@ int vfs_mkdir(struct inode *dir, struct

asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
{
- int error = 0;
- char * tmp;
+ int error;
+ char *tmp;
struct dentry *dentry;
struct nameidata nd;

@@ -2031,6 +2060,7 @@ asmlinkage long sys_mkdirat(int dfd, con
if (IS_ERR(tmp))
goto out_err;

+top:
error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -2046,6 +2076,8 @@ asmlinkage long sys_mkdirat(int dfd, con
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
putname(tmp);
out_err:
@@ -2125,23 +2157,24 @@ static long do_rmdir(int dfd, const char
struct nameidata nd;

name = getname(pathname);
- if(IS_ERR(name))
+ if (IS_ERR(name))
return PTR_ERR(name);

+top:
error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
if (error)
goto exit;

- switch(nd.last_type) {
- case LAST_DOTDOT:
- error = -ENOTEMPTY;
- goto exit1;
- case LAST_DOT:
- error = -EINVAL;
- goto exit1;
- case LAST_ROOT:
- error = -EBUSY;
- goto exit1;
+ switch (nd.last_type) {
+ case LAST_DOTDOT:
+ error = -ENOTEMPTY;
+ goto exit1;
+ case LAST_DOT:
+ error = -EINVAL;
+ goto exit1;
+ case LAST_ROOT:
+ error = -EBUSY;
+ goto exit1;
}
mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -2154,6 +2187,8 @@ exit2:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
exit1:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
exit:
putname(name);
return error;
@@ -2206,12 +2241,14 @@ static long do_unlinkat(int dfd, const c
char * name;
struct dentry *dentry;
struct nameidata nd;
- struct inode *inode = NULL;
+ struct inode *inode;

name = getname(pathname);
if(IS_ERR(name))
return PTR_ERR(name);

+top:
+ inode = NULL;
error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
if (error)
goto exit;
@@ -2237,6 +2274,8 @@ static long do_unlinkat(int dfd, const c
iput(inode); /* truncate the inode here */
exit1:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
exit:
putname(name);
return error;
@@ -2301,6 +2340,7 @@ asmlinkage long sys_symlinkat(const char
if (IS_ERR(to))
goto out_putname;

+top:
error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -2314,6 +2354,8 @@ asmlinkage long sys_symlinkat(const char
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
putname(to);
out_putname:
@@ -2389,6 +2431,7 @@ asmlinkage long sys_linkat(int olddfd, c
if (IS_ERR(to))
return PTR_ERR(to);

+top:
error = __user_walk_fd(olddfd, oldname,
flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
&old_nd);
@@ -2408,6 +2451,11 @@ asmlinkage long sys_linkat(int olddfd, c
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.dentry->d_inode->i_mutex);
+ if (error == -ESTALE) {
+ path_release(&nd);
+ path_release(&old_nd);
+ goto top;
+ }
out_release:
path_release(&nd);
out:
@@ -2578,6 +2626,7 @@ static int do_rename(int olddfd, const c
struct dentry * trap;
struct nameidata oldnd, newnd;

+top:
error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
if (error)
goto exit;
@@ -2638,6 +2687,11 @@ exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ if (error == -ESTALE) {
+ path_release(&newnd);
+ path_release(&oldnd);
+ goto top;
+ }
exit2:
path_release(&newnd);
exit1:
--- linux-2.6.24.i686/fs/open.c.org
+++ linux-2.6.24.i686/fs/open.c
@@ -124,6 +124,7 @@ asmlinkage long sys_statfs(const char __
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct statfs tmp;
@@ -131,6 +132,8 @@ asmlinkage long sys_statfs(const char __
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -143,6 +146,7 @@ asmlinkage long sys_statfs64(const char

if (sz != sizeof(*buf))
return -EINVAL;
+top:
error = user_path_walk(path, &nd);
if (!error) {
struct statfs64 tmp;
@@ -150,6 +154,8 @@ asmlinkage long sys_statfs64(const char
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -230,6 +236,7 @@ static long do_sys_truncate(const char _
if (length < 0) /* sorry, but loff_t says... */
goto out;

+top:
error = user_path_walk(path, &nd);
if (error)
goto out;
@@ -278,6 +285,8 @@ put_write_and_out:
put_write_access(inode);
dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -448,21 +457,24 @@ asmlinkage long sys_faccessat(int dfd, c
else
current->cap_effective = current->cap_permitted;

+top:
res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
if (res)
goto out;

res = vfs_permission(&nd, mode);
/* SuS v2 requires we report a read only fs too */
- if(res || !(mode & S_IWOTH) ||
+ if (res || !(mode & S_IWOTH) ||
special_file(nd.dentry->d_inode->i_mode))
goto out_path_release;

- if(IS_RDONLY(nd.dentry->d_inode))
+ if (IS_RDONLY(nd.dentry->d_inode))
res = -EROFS;

out_path_release:
path_release(&nd);
+ if (res == -ESTALE)
+ goto top;
out:
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
@@ -481,6 +493,7 @@ asmlinkage long sys_chdir(const char __u
struct nameidata nd;
int error;

+top:
error = __user_walk(filename,
LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
if (error)
@@ -494,6 +507,8 @@ asmlinkage long sys_chdir(const char __u

dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -533,6 +548,7 @@ asmlinkage long sys_chroot(const char __
struct nameidata nd;
int error;

+top:
error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
if (error)
goto out;
@@ -550,6 +566,8 @@ asmlinkage long sys_chroot(const char __
error = 0;
dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -599,6 +617,7 @@ asmlinkage long sys_fchmodat(int dfd, co
int error;
struct iattr newattrs;

+top:
error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
if (error)
goto out;
@@ -622,6 +641,8 @@ asmlinkage long sys_fchmodat(int dfd, co

dput_and_out:
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -672,11 +693,14 @@ asmlinkage long sys_chown(const char __u
struct nameidata nd;
int error;

+top:
error = user_path_walk(filename, &nd);
if (error)
goto out;
error = chown_common(nd.dentry, user, group);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -692,11 +716,14 @@ asmlinkage long sys_fchownat(int dfd, co
goto out;

follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+top:
error = __user_walk_fd(dfd, filename, follow, &nd);
if (error)
goto out;
error = chown_common(nd.dentry, user, group);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -706,11 +733,14 @@ asmlinkage long sys_lchown(const char __
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(filename, &nd);
if (error)
goto out;
error = chown_common(nd.dentry, user, group);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -819,16 +849,22 @@ static struct file *do_filp_open(int dfd
{
int namei_flags, error;
struct nameidata nd;
+ struct file *res;

+top:
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;

error = open_namei(dfd, filename, namei_flags, mode, &nd);
- if (!error)
- return nameidata_to_filp(&nd, flags);
+ if (error)
+ return ERR_PTR(error);
+
+ res = nameidata_to_filp(&nd, flags);
+ if (IS_ERR(res) && res == ERR_PTR(-ESTALE))
+ goto top;
+ return res;

- return ERR_PTR(error);
}

struct file *filp_open(const char *filename, int flags, int mode)
--- linux-2.6.24.i686/fs/inotify_user.c.org
+++ linux-2.6.24.i686/fs/inotify_user.c
@@ -346,13 +346,17 @@ static int find_inode(const char __user
{
int error;

+top:
error = __user_walk(dirname, flags, nd);
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
error = vfs_permission(nd, MAY_READ);
- if (error)
+ if (error) {
path_release(nd);
+ if (error == -ESTALE)
+ goto top;
+ }
return error;
}

--- linux-2.6.24.i686/fs/stat.c.org
+++ linux-2.6.24.i686/fs/stat.c
@@ -60,10 +60,13 @@ int vfs_stat_fd(int dfd, char __user *na
struct nameidata nd;
int error;

+top:
error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
if (!error) {
error = vfs_getattr(nd.mnt, nd.dentry, stat);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -80,10 +83,13 @@ int vfs_lstat_fd(int dfd, char __user *n
struct nameidata nd;
int error;

+top:
error = __user_walk_fd(dfd, name, 0, &nd);
if (!error) {
error = vfs_getattr(nd.mnt, nd.dentry, stat);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -300,6 +306,7 @@ asmlinkage long sys_readlinkat(int dfd,
if (bufsiz <= 0)
return -EINVAL;

+top:
error = __user_walk_fd(dfd, path, 0, &nd);
if (!error) {
struct inode * inode = nd.dentry->d_inode;
@@ -313,6 +320,8 @@ asmlinkage long sys_readlinkat(int dfd,
}
}
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
--- linux-2.6.24.i686/fs/exec.c.org
+++ linux-2.6.24.i686/fs/exec.c
@@ -107,6 +107,7 @@ asmlinkage long sys_uselib(const char __
struct nameidata nd;
int error;

+top:
error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
if (error)
goto out;
@@ -149,6 +150,8 @@ out:
exit:
release_open_intent(&nd);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
goto out;
}

@@ -648,14 +651,16 @@ struct file *open_exec(const char *name)
int err;
struct file *file;

- err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+top:
+ err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
+ FMODE_READ|FMODE_EXEC);
file = ERR_PTR(err);

if (!err) {
struct inode *inode = nd.dentry->d_inode;
file = ERR_PTR(-EACCES);
if (S_ISREG(inode->i_mode)) {
- int err = vfs_permission(&nd, MAY_EXEC);
+ err = vfs_permission(&nd, MAY_EXEC);
file = ERR_PTR(err);
if (!err) {
file = nameidata_to_filp(&nd, O_RDONLY);
@@ -665,15 +670,17 @@ struct file *open_exec(const char *name)
fput(file);
file = ERR_PTR(err);
}
- }
-out:
+ } else if (file == ERR_PTR(-ESTALE))
+ goto top;
return file;
}
}
release_open_intent(&nd);
path_release(&nd);
+ if (err == -ESTALE)
+ goto top;
}
- goto out;
+ return file;
}

EXPORT_SYMBOL(open_exec);
--- linux-2.6.24.i686/fs/utimes.c.org
+++ linux-2.6.24.i686/fs/utimes.c
@@ -79,6 +79,7 @@ long do_utimes(int dfd, char __user *fil
goto out;
dentry = f->f_path.dentry;
} else {
+top:
error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
if (error)
goto out;
@@ -136,8 +137,11 @@ long do_utimes(int dfd, char __user *fil
dput_and_out:
if (f)
fput(f);
- else
+ else {
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
+ }
out:
return error;
}
--- linux-2.6.24.i686/fs/compat.c.org
+++ linux-2.6.24.i686/fs/compat.c
@@ -238,6 +238,7 @@ asmlinkage long compat_sys_statfs(const
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct kstatfs tmp;
@@ -245,6 +246,8 @@ asmlinkage long compat_sys_statfs(const
if (!error)
error = put_compat_statfs(buf, &tmp);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -306,6 +309,7 @@ asmlinkage long compat_sys_statfs64(cons
if (sz != sizeof(*buf))
return -EINVAL;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct kstatfs tmp;
@@ -313,6 +317,8 @@ asmlinkage long compat_sys_statfs64(cons
if (!error)
error = put_compat_statfs64(buf, &tmp);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
--- linux-2.6.24.i686/fs/xattr.c.org
+++ linux-2.6.24.i686/fs/xattr.c
@@ -232,11 +232,14 @@ sys_setxattr(char __user *path, char __u
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = setxattr(nd.dentry, name, value, size, flags);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -247,11 +250,14 @@ sys_lsetxattr(char __user *path, char __
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = setxattr(nd.dentry, name, value, size, flags);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -317,11 +323,14 @@ sys_getxattr(char __user *path, char __u
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = getxattr(nd.dentry, name, value, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -332,11 +341,14 @@ sys_lgetxattr(char __user *path, char __
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = getxattr(nd.dentry, name, value, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -391,11 +403,14 @@ sys_listxattr(char __user *path, char __
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = listxattr(nd.dentry, list, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -405,11 +420,14 @@ sys_llistxattr(char __user *path, char _
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = listxattr(nd.dentry, list, size);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -452,11 +470,14 @@ sys_removexattr(char __user *path, char
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = removexattr(nd.dentry, name);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -466,11 +487,14 @@ sys_lremovexattr(char __user *path, char
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = removexattr(nd.dentry, name);
path_release(&nd);
+ if (error == -ESTALE)
+ goto top;
return error;
}

Attachments:

estale.syscall.2 (15.03 kB)

2008-02-01 21:38:24

by Miklos Szeredi

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

This doesn't apply to -mm, because the ro-mounts stuff touches a lot
of the same places as this patch. You probably need to rebase this on
top of those changes.

> This patch adds handling for the error, ESTALE, to the system
> calls which take pathnames as arguments. The algorithm used
> is to detect that an ESTALE error has occurred during an
> operation subsequent to the lookup process and then to unwind
> appropriately and then to perform the lookup process again.
> Eventually, either the lookup process will return an error
> or a valid dentry/inode combination and then operation can
> succeed or fail based on its own merits.

If a broken NFS server or FUSE filesysem keeps returning ESTALE, this
goes into an infinite loop. How are we planning to deal with that?

And it has to be dealt with either in the VFS, or in the kernel parts
of the relevant filesystems. We can't just say, fix the broken
servers, especially not with FUSE, where the server is totally
untrusted.

Miklos

2008-02-01 21:52:15

by Peter Staubach

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

Miklos Szeredi wrote:
> This doesn't apply to -mm, because the ro-mounts stuff touches a lot
> of the same places as this patch. You probably need to rebase this on
> top of those changes.
>
>
>> This patch adds handling for the error, ESTALE, to the system
>> calls which take pathnames as arguments. The algorithm used
>> is to detect that an ESTALE error has occurred during an
>> operation subsequent to the lookup process and then to unwind
>> appropriately and then to perform the lookup process again.
>> Eventually, either the lookup process will return an error
>> or a valid dentry/inode combination and then operation can
>> succeed or fail based on its own merits.
>>
>
> If a broken NFS server or FUSE filesysem keeps returning ESTALE, this
> goes into an infinite loop. How are we planning to deal with that?
>
>

Would you describe the situation that would cause the kernel to
go into an infinite loop, please?

Please note that, at least for NFS, this looping is interruptible
by the user, so the system can't hang without anything that can
be done.

> And it has to be dealt with either in the VFS, or in the kernel parts
> of the relevant filesystems. We can't just say, fix the broken
> servers, especially not with FUSE, where the server is totally
> untrusted.

Nope, certainly can't depend upon fixing servers. The client
should not depend upon the server to avoid things like looping.

Thanx...

ps

2008-02-01 22:03:55

by Miklos Szeredi

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

> > This doesn't apply to -mm, because the ro-mounts stuff touches a lot
> > of the same places as this patch. You probably need to rebase this on
> > top of those changes.
> >
> >
> >> This patch adds handling for the error, ESTALE, to the system
> >> calls which take pathnames as arguments. The algorithm used
> >> is to detect that an ESTALE error has occurred during an
> >> operation subsequent to the lookup process and then to unwind
> >> appropriately and then to perform the lookup process again.
> >> Eventually, either the lookup process will return an error
> >> or a valid dentry/inode combination and then operation can
> >> succeed or fail based on its own merits.
> >>
> >
> > If a broken NFS server or FUSE filesysem keeps returning ESTALE, this
> > goes into an infinite loop. How are we planning to deal with that?
> >
> >
>
> Would you describe the situation that would cause the kernel to
> go into an infinite loop, please?

The patch basically does:

do {
...
error = inode->i_op->foo()
...
} while (error == ESTALE);

What is the guarantee, that ->foo() will not always return ESTALE?

Miklos

2008-02-01 22:31:09

by Peter Staubach

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

Miklos Szeredi wrote:
>>> This doesn't apply to -mm, because the ro-mounts stuff touches a lot
>>> of the same places as this patch. You probably need to rebase this on
>>> top of those changes.
>>>
>>>
>>>
>>>> This patch adds handling for the error, ESTALE, to the system
>>>> calls which take pathnames as arguments. The algorithm used
>>>> is to detect that an ESTALE error has occurred during an
>>>> operation subsequent to the lookup process and then to unwind
>>>> appropriately and then to perform the lookup process again.
>>>> Eventually, either the lookup process will return an error
>>>> or a valid dentry/inode combination and then operation can
>>>> succeed or fail based on its own merits.
>>>>
>>>>
>>> If a broken NFS server or FUSE filesysem keeps returning ESTALE, this
>>> goes into an infinite loop. How are we planning to deal with that?
>>>
>>>
>>>
>> Would you describe the situation that would cause the kernel to
>> go into an infinite loop, please?
>>
>
> The patch basically does:
>
> do {
> ...
> error = inode->i_op->foo()
> ...
> } while (error == ESTALE);
>
> What is the guarantee, that ->foo() will not always return ESTALE?

You skimmed over some stuff, like the pathname lookup component
contained in the first set of dots...

I can't guarantee that ->foo() won't always return ESTALE.

That said, the loop is not unbreakable. At least for NFS, a signal
to the process will interrupt the loop because the error returned
will change from ESTALE to EINTR.

These changes include the base assumption that the components of
the underlying file system are basically reliable, that there is
a way to deal with bugs and/or malicious entities in the short
term, and that these things will be dealt with appropriately
in the longer term.

The short term resolution is a signal. The longer term fix is
to hunt down the bug or the malicious entity and either make it
go away or fence it off via some security measure or another to
prevent it from causing another problem.

If the underlying file system is the type that could potentially
return ESTALE, then it needs to be aware of the system architecture
and handle things appropriately.

Thanx...

ps

2008-02-02 08:01:28

by Miklos Szeredi

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

> >>>
> >> Would you describe the situation that would cause the kernel to
> >> go into an infinite loop, please?
> >>
> >
> > The patch basically does:
> >
> > do {
> > ...
> > error = inode->i_op->foo()
> > ...
> > } while (error == ESTALE);
> >
> > What is the guarantee, that ->foo() will not always return ESTALE?
>
> You skimmed over some stuff, like the pathname lookup component
> contained in the first set of dots...
>
> I can't guarantee that ->foo() won't always return ESTALE.
>
> That said, the loop is not unbreakable. At least for NFS, a signal
> to the process will interrupt the loop because the error returned
> will change from ESTALE to EINTR.

In FUSE interrupts are sent to userspace, and the filesystem decides
what to do with them. So it is entirely possible and valid for a
filesystem to ignore an interrupt. If an operation was non-blocking
(such as one returning an error), then there would in fact be no
purpose in checking interrupts.

So while sending a signal might reliably work in NFS to break out of
the loop, it does not necessarily work for other filesystems, and fuse
may not be the only one affected.

Also up till now, returning ESTALE in a fuse filesystem was a
perfectly valid thing to do. This patch changes the behavior of that
rather drastically. There might be installed systems that rely on
current behavior, and we want to avoid breaking those on a kernel
upgrade.

A few solutions come to mind, perhaps the best is to introduce a
kernel internal errno value (ERETRYSTALE), that forces the relevant
system calls to be retried.

NFS could transform ESTALE errors to ERETRYSTALE and get the desired
behavior, while other filesystems would not be affected.

Miklos

2008-02-04 15:55:56

by Peter Staubach

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

Miklos Szeredi wrote:
>>>>>
>>>>>
>>>> Would you describe the situation that would cause the kernel to
>>>> go into an infinite loop, please?
>>>>
>>>>
>>> The patch basically does:
>>>
>>> do {
>>> ...
>>> error = inode->i_op->foo()
>>> ...
>>> } while (error == ESTALE);
>>>
>>> What is the guarantee, that ->foo() will not always return ESTALE?
>>>
>> You skimmed over some stuff, like the pathname lookup component
>> contained in the first set of dots...
>>
>> I can't guarantee that ->foo() won't always return ESTALE.
>>
>> That said, the loop is not unbreakable. At least for NFS, a signal
>> to the process will interrupt the loop because the error returned
>> will change from ESTALE to EINTR.
>>
>
> In FUSE interrupts are sent to userspace, and the filesystem decides
> what to do with them. So it is entirely possible and valid for a
> filesystem to ignore an interrupt. If an operation was non-blocking
> (such as one returning an error), then there would in fact be no
> purpose in checking interrupts.
>
>

Why do you think that it is valid to ignore pending signals?
You seem to be asserting that it okay for processes to hang,
uninterruptibly, when accessing files on fuse mounted file
systems?

Perhaps the right error to return when there is a signal
pending is EINTR and not ESTALE or some other error? There
has to be some way for the application to detect that its
system call was interrupted due to a signal pending.

> So while sending a signal might reliably work in NFS to break out of
> the loop, it does not necessarily work for other filesystems, and fuse
> may not be the only one affected.
>
>

Have you noticed another one? I would be happy to chat with the
developers for that file system to see if this support would
negatively impact them.

> Also up till now, returning ESTALE in a fuse filesystem was a
> perfectly valid thing to do. This patch changes the behavior of that
> rather drastically. There might be installed systems that rely on
> current behavior, and we want to avoid breaking those on a kernel
> upgrade.
>
>

Perhaps the explanation for what ESTALE means was not clear?
If there are fuse file systems which really do support the
notion of ESTALE, then it seems to me that they would also
benefit from this support, ie. the ability to do some recovery
from the situation.

> A few solutions come to mind, perhaps the best is to introduce a
> kernel internal errno value (ERETRYSTALE), that forces the relevant
> system calls to be retried.
>
> NFS could transform ESTALE errors to ERETRYSTALE and get the desired
> behavior, while other filesystems would not be affected.

We don't need more error numbers, we've got plenty already. :-)

Do you have anything more specific about any real problems?
I see lots of "mays" and "coulds", but I don't see anything
that I can do to make this support better.

Thanx...

ps

2008-02-04 17:38:51

by Miklos Szeredi

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

> > In FUSE interrupts are sent to userspace, and the filesystem decides
> > what to do with them. So it is entirely possible and valid for a
> > filesystem to ignore an interrupt. If an operation was non-blocking
> > (such as one returning an error), then there would in fact be no
> > purpose in checking interrupts.
> >
> >
>
> Why do you think that it is valid to ignore pending signals?
> You seem to be asserting that it okay for processes to hang,
> uninterruptibly, when accessing files on fuse mounted file
> systems?
>
> Perhaps the right error to return when there is a signal
> pending is EINTR and not ESTALE or some other error? There
> has to be some way for the application to detect that its
> system call was interrupted due to a signal pending.

Traditionally a lot of filesystem related system calls are not
interruptible, and for good reason. For example what happens, if an
app receives a signal, while the filesystem is performing a rename()
request? It would be very confusing if the call returned EINTR, but
the rename would successfully complete regardless.

We had a related problem with the open(O_CREAT) call in fuse, which
was interruptible between the creation and the actual open because of
a design mistake. So it could return EINTR, after the file was
created, and this broke a real world application (don't have details
at hand, but could dig them out if you are interested).

I don't know what NFS does, but returning EINTR without actually
canceling an operation in the server is generally not a good idea.

> > So while sending a signal might reliably work in NFS to break out of
> > the loop, it does not necessarily work for other filesystems, and fuse
> > may not be the only one affected.
> >
> >
>
> Have you noticed another one? I would be happy to chat with the
> developers for that file system to see if this support would
> negatively impact them.

Oh, I have no idea. And I wouldn't want to do a full audit of all the
filesystems to find out. But if you do, please go ahead.

> > A few solutions come to mind, perhaps the best is to introduce a
> > kernel internal errno value (ERETRYSTALE), that forces the relevant
> > system calls to be retried.
> >
> > NFS could transform ESTALE errors to ERETRYSTALE and get the desired
> > behavior, while other filesystems would not be affected.
>
> We don't need more error numbers, we've got plenty already. :-)

That's a rather poor excuse against a simple solution which would
spare us some backward compatibility problems.

> Do you have anything more specific about any real problems?
> I see lots of "mays" and "coulds", but I don't see anything
> that I can do to make this support better.

Implement the above suggestion? Or something else.

Otherwise I have to NAK this patch due to the possibility of it
breaking existing fuse installations.

Thanks,
Miklos

2008-02-04 18:44:13

by Peter Staubach

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

Miklos Szeredi wrote:
>>> In FUSE interrupts are sent to userspace, and the filesystem decides
>>> what to do with them. So it is entirely possible and valid for a
>>> filesystem to ignore an interrupt. If an operation was non-blocking
>>> (such as one returning an error), then there would in fact be no
>>> purpose in checking interrupts.
>>>
>>>
>>>
>> Why do you think that it is valid to ignore pending signals?
>> You seem to be asserting that it okay for processes to hang,
>> uninterruptibly, when accessing files on fuse mounted file
>> systems?
>>
>> Perhaps the right error to return when there is a signal
>> pending is EINTR and not ESTALE or some other error? There
>> has to be some way for the application to detect that its
>> system call was interrupted due to a signal pending.
>>
>
> Traditionally a lot of filesystem related system calls are not
> interruptible, and for good reason. For example what happens, if an
> app receives a signal, while the filesystem is performing a rename()
> request? It would be very confusing if the call returned EINTR, but
> the rename would successfully complete regardless.
>
> We had a related problem with the open(O_CREAT) call in fuse, which
> was interruptible between the creation and the actual open because of
> a design mistake. So it could return EINTR, after the file was
> created, and this broke a real world application (don't have details
> at hand, but could dig them out if you are interested).
>
> I don't know what NFS does, but returning EINTR without actually
> canceling an operation in the server is generally not a good idea.
>
>

This is what NFS has been doing, for several decades, and no one
has complained yet. It is just generally accepted. I do agree
that it isn't the best of semantics, but it does seem to work and
does solve a real problem which exists if you don't allow an
operation to be interrupted. The alternative, for NFS clients,
was potentially to block an application until a server, which
might never come back up, comes back up. It was a serious
problem and worse than this resolution.

Yes, I'd like to hear the details and find out why it was a
problem. If you allow the fuse file system to block waiting
on things which may never occur, than you are going to have a
problem. I would suggest considering this now instead of waiting
until it is too late. We can learn from the NFS experience instead
of just dismissing it.

>>> So while sending a signal might reliably work in NFS to break out of
>>> the loop, it does not necessarily work for other filesystems, and fuse
>>> may not be the only one affected.
>>>
>>>
>>>
>> Have you noticed another one? I would be happy to chat with the
>> developers for that file system to see if this support would
>> negatively impact them.
>>
>
> Oh, I have no idea. And I wouldn't want to do a full audit of all the
> filesystems to find out. But if you do, please go ahead.
>
>

Well, you brought it up. I thought that perhaps you had something
other than FUD.

>>> A few solutions come to mind, perhaps the best is to introduce a
>>> kernel internal errno value (ERETRYSTALE), that forces the relevant
>>> system calls to be retried.
>>>
>>> NFS could transform ESTALE errors to ERETRYSTALE and get the desired
>>> behavior, while other filesystems would not be affected.
>>>
>> We don't need more error numbers, we've got plenty already. :-)
>>
>
> That's a rather poor excuse against a simple solution which would
> spare us some backward compatibility problems.
>
>

Potential backwards compatibility problems and none are even known
or even considered.

The solution here isn't to create more hacks and a new error number
for this purpose is just a hack.

>> Do you have anything more specific about any real problems?
>> I see lots of "mays" and "coulds", but I don't see anything
>> that I can do to make this support better.
>>
>
> Implement the above suggestion? Or something else.
>
> Otherwise I have to NAK this patch due to the possibility of it
> breaking existing fuse installations.

Please describe this real and existing fuse installation so that I can
better understand the situation and the real requirements here.

Instead of attempting to block this proposal, what about considering
how to architect fuse to handle the situation instead of pretending
that fuse won't have the same problem to solve if it isn't solved
here? I have a real problem to solve and I need to get it resolved.
I have real customers, with real problems, and not just theoretical
and vague ones.

ps

2008-02-04 19:03:25

by Miklos Szeredi

[permalink] [raw]

Subject: Re: [PATCH 2/3] enhanced syscall ESTALE error handling (v2)

> > I don't know what NFS does, but returning EINTR without actually
> > canceling an operation in the server is generally not a good idea.
> >
> >
>
> This is what NFS has been doing, for several decades, and no one
> has complained yet.

Is it really? Man nfs says something quite different (emphasis mine):

intr If an NFS file operation has a *major timeout* and it is
hard mounted, then allow signals to interupt the file
operation and cause it to return EINTR to the calling
program. The *default* is to *not* allow file operations to
be *interrupted*.

> >> Have you noticed another one? I would be happy to chat with the
> >> developers for that file system to see if this support would
> >> negatively impact them.
> >>
> >
> > Oh, I have no idea. And I wouldn't want to do a full audit of all the
> > filesystems to find out. But if you do, please go ahead.
> >
> >
>
> Well, you brought it up. I thought that perhaps you had something
> other than FUD.

It's not FUD, it's being careful not to break an implementation when
changing an API in a backward incompatbile way.

> Please describe this real and existing fuse installation so that I can
> better understand the situation and the real requirements here.

I have already done so:

"Also up till now, returning ESTALE in a fuse filesystem was a
perfectly valid thing to do. This patch changes the behavior of
that rather drastically. There might be installed systems that
rely on current behavior, and we want to avoid breaking those on a
kernel upgrade."

Miklos

2008-03-10 20:25:04

by Peter Staubach

[permalink] [raw]

Subject: [PATCH 2/3] enhanced syscall ESTALE error handling (v3)

--- linux-2.6.24.i686/fs/namei.c.org
+++ linux-2.6.24.i686/fs/namei.c
@@ -1958,6 +1984,7 @@ asmlinkage long sys_mknodat(int dfd, con
if (IS_ERR(tmp))
return PTR_ERR(tmp);

+top:
error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -1988,6 +2015,8 @@ asmlinkage long sys_mknodat(int dfd, con
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
putname(tmp);

@@ -2023,8 +2052,8 @@ int vfs_mkdir(struct inode *dir, struct

asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
{
- int error = 0;
- char * tmp;
+ int error;
+ char *tmp;
struct dentry *dentry;
struct nameidata nd;

@@ -2033,6 +2062,7 @@ asmlinkage long sys_mkdirat(int dfd, con
if (IS_ERR(tmp))
goto out_err;

+top:
error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -2048,6 +2078,8 @@ asmlinkage long sys_mkdirat(int dfd, con
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
putname(tmp);
out_err:
@@ -2127,23 +2159,24 @@ static long do_rmdir(int dfd, const char
struct nameidata nd;

name = getname(pathname);
- if(IS_ERR(name))
+ if (IS_ERR(name))
return PTR_ERR(name);

+top:
error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
if (error)
goto exit;

- switch(nd.last_type) {
- case LAST_DOTDOT:
- error = -ENOTEMPTY;
- goto exit1;
- case LAST_DOT:
- error = -EINVAL;
- goto exit1;
- case LAST_ROOT:
- error = -EBUSY;
- goto exit1;
+ switch (nd.last_type) {
+ case LAST_DOTDOT:
+ error = -ENOTEMPTY;
+ goto exit1;
+ case LAST_DOT:
+ error = -EINVAL;
+ goto exit1;
+ case LAST_ROOT:
+ error = -EBUSY;
+ goto exit1;
}
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -2156,6 +2189,8 @@ exit2:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
exit1:
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
exit:
putname(name);
return error;
@@ -2209,12 +2244,14 @@ static long do_unlinkat(int dfd, const c
char * name;
struct dentry *dentry;
struct nameidata nd;
- struct inode *inode = NULL;
+ struct inode *inode;

name = getname(pathname);
if(IS_ERR(name))
return PTR_ERR(name);

+top:
+ inode = NULL;
error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
if (error)
goto exit;
@@ -2240,6 +2277,8 @@ static long do_unlinkat(int dfd, const c
iput(inode); /* truncate the inode here */
exit1:
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
exit:
putname(name);
return error;
@@ -2304,6 +2343,7 @@ asmlinkage long sys_symlinkat(const char
if (IS_ERR(to))
goto out_putname;

+top:
error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
if (error)
goto out;
@@ -2317,6 +2357,8 @@ asmlinkage long sys_symlinkat(const char
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
putname(to);
out_putname:
@@ -2392,6 +2434,7 @@ asmlinkage long sys_linkat(int olddfd, c
if (IS_ERR(to))
return PTR_ERR(to);

+top:
error = __user_walk_fd(olddfd, oldname,
flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
&old_nd);
@@ -2411,6 +2454,11 @@ asmlinkage long sys_linkat(int olddfd, c
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ if (error == -ESTALE) {
+ path_put(&nd.path);
+ path_put(&old_nd.path);
+ goto top;
+ }
out_release:
path_put(&nd.path);
out:
@@ -2581,6 +2629,7 @@ static int do_rename(int olddfd, const c
struct dentry * trap;
struct nameidata oldnd, newnd;

+top:
error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
if (error)
goto exit;
@@ -2641,6 +2690,11 @@ exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ if (error == -ESTALE) {
+ path_put(&newnd.path);
+ path_put(&oldnd.path);
+ goto top;
+ }
exit2:
path_put(&newnd.path);
exit1:
--- linux-2.6.24.i686/fs/open.c.org
+++ linux-2.6.24.i686/fs/open.c
@@ -124,6 +124,7 @@ asmlinkage long sys_statfs(const char __
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct statfs tmp;
@@ -131,6 +132,8 @@ asmlinkage long sys_statfs(const char __
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -143,6 +146,7 @@ asmlinkage long sys_statfs64(const char

if (sz != sizeof(*buf))
return -EINVAL;
+top:
error = user_path_walk(path, &nd);
if (!error) {
struct statfs64 tmp;
@@ -150,6 +154,8 @@ asmlinkage long sys_statfs64(const char
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -230,6 +236,7 @@ static long do_sys_truncate(const char _
if (length < 0) /* sorry, but loff_t says... */
goto out;

+top:
error = user_path_walk(path, &nd);
if (error)
goto out;
@@ -278,6 +285,8 @@ put_write_and_out:
put_write_access(inode);
dput_and_out:
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -448,6 +457,7 @@ asmlinkage long sys_faccessat(int dfd, c
else
current->cap_effective = current->cap_permitted;

+top:
res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
if (res)
goto out;
@@ -463,6 +473,8 @@ asmlinkage long sys_faccessat(int dfd, c

out_path_release:
path_put(&nd.path);
+ if (res == -ESTALE)
+ goto top;
out:
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
@@ -481,6 +493,7 @@ asmlinkage long sys_chdir(const char __u
struct nameidata nd;
int error;

+top:
error = __user_walk(filename,
LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
if (error)
@@ -494,6 +507,8 @@ asmlinkage long sys_chdir(const char __u

dput_and_out:
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -529,6 +544,7 @@ asmlinkage long sys_chroot(const char __
struct nameidata nd;
int error;

+top:
error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
if (error)
goto out;
@@ -546,6 +562,8 @@ asmlinkage long sys_chroot(const char __
error = 0;
dput_and_out:
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -595,6 +613,7 @@ asmlinkage long sys_fchmodat(int dfd, co
int error;
struct iattr newattrs;

+top:
error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
if (error)
goto out;
@@ -618,6 +637,8 @@ asmlinkage long sys_fchmodat(int dfd, co

dput_and_out:
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -668,11 +689,14 @@ asmlinkage long sys_chown(const char __u
struct nameidata nd;
int error;

+top:
error = user_path_walk(filename, &nd);
if (error)
goto out;
error = chown_common(nd.path.dentry, user, group);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -688,11 +712,14 @@ asmlinkage long sys_fchownat(int dfd, co
goto out;

follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+top:
error = __user_walk_fd(dfd, filename, follow, &nd);
if (error)
goto out;
error = chown_common(nd.path.dentry, user, group);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -702,11 +729,14 @@ asmlinkage long sys_lchown(const char __
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(filename, &nd);
if (error)
goto out;
error = chown_common(nd.path.dentry, user, group);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
out:
return error;
}
@@ -815,16 +845,22 @@ static struct file *do_filp_open(int dfd
{
int namei_flags, error;
struct nameidata nd;
+ struct file *res;

+top:
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;

error = open_namei(dfd, filename, namei_flags, mode, &nd);
- if (!error)
- return nameidata_to_filp(&nd, flags);
+ if (error)
+ return ERR_PTR(error);
+
+ res = nameidata_to_filp(&nd, flags);
+ if (IS_ERR(res) && res == ERR_PTR(-ESTALE))
+ goto top;
+ return res;

- return ERR_PTR(error);
}

struct file *filp_open(const char *filename, int flags, int mode)
--- linux-2.6.24.i686/fs/inotify_user.c.org
+++ linux-2.6.24.i686/fs/inotify_user.c
@@ -361,13 +361,17 @@ static int find_inode(const char __user
{
int error;

+top:
error = __user_walk(dirname, flags, nd);
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
error = vfs_permission(nd, MAY_READ);
- if (error)
+ if (error) {
path_put(&nd->path);
+ if (error == -ESTALE)
+ goto top;
+ }
return error;
}

--- linux-2.6.24.i686/fs/stat.c.org
+++ linux-2.6.24.i686/fs/stat.c
@@ -60,10 +60,13 @@ int vfs_stat_fd(int dfd, char __user *na
struct nameidata nd;
int error;

+top:
error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
if (!error) {
error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -80,10 +83,13 @@ int vfs_lstat_fd(int dfd, char __user *n
struct nameidata nd;
int error;

+top:
error = __user_walk_fd(dfd, name, 0, &nd);
if (!error) {
error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -300,6 +306,7 @@ asmlinkage long sys_readlinkat(int dfd,
if (bufsiz <= 0)
return -EINVAL;

+top:
error = __user_walk_fd(dfd, path, 0, &nd);
if (!error) {
struct inode *inode = nd.path.dentry->d_inode;
@@ -314,6 +321,8 @@ asmlinkage long sys_readlinkat(int dfd,
}
}
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
--- linux-2.6.24.i686/fs/exec.c.org
+++ linux-2.6.24.i686/fs/exec.c
@@ -107,6 +107,7 @@ asmlinkage long sys_uselib(const char __
struct nameidata nd;
int error;

+top:
error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
if (error)
goto out;
@@ -149,6 +150,8 @@ out:
exit:
release_open_intent(&nd);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
goto out;
}

@@ -656,6 +659,7 @@ struct file *open_exec(const char *name)
int err;
struct file *file;

+top:
err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
file = ERR_PTR(err);

@@ -663,7 +667,7 @@ struct file *open_exec(const char *name)
struct inode *inode = nd.path.dentry->d_inode;
file = ERR_PTR(-EACCES);
if (S_ISREG(inode->i_mode)) {
- int err = vfs_permission(&nd, MAY_EXEC);
+ err = vfs_permission(&nd, MAY_EXEC);
file = ERR_PTR(err);
if (!err) {
file = nameidata_to_filp(&nd,
@@ -674,13 +678,16 @@ struct file *open_exec(const char *name)
fput(file);
file = ERR_PTR(err);
}
- }
+ } else if (file == ERR_PTR(-ESTALE))
+ goto top;
out:
return file;
}
}
release_open_intent(&nd);
path_put(&nd.path);
+ if (err == -ESTALE)
+ goto top;
}
goto out;
}
--- linux-2.6.24.i686/fs/utimes.c.org
+++ linux-2.6.24.i686/fs/utimes.c
@@ -80,6 +80,7 @@ long do_utimes(int dfd, char __user *fil
goto out;
dentry = f->f_path.dentry;
} else {
+top:
error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
if (error)
goto out;
@@ -137,8 +138,11 @@ long do_utimes(int dfd, char __user *fil
dput_and_out:
if (f)
fput(f);
- else
+ else {
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
+ }
out:
return error;
}
--- linux-2.6.24.i686/fs/compat.c.org
+++ linux-2.6.24.i686/fs/compat.c
@@ -238,6 +238,7 @@ asmlinkage long compat_sys_statfs(const
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct kstatfs tmp;
@@ -245,6 +246,8 @@ asmlinkage long compat_sys_statfs(const
if (!error)
error = put_compat_statfs(buf, &tmp);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
@@ -306,6 +309,7 @@ asmlinkage long compat_sys_statfs64(cons
if (sz != sizeof(*buf))
return -EINVAL;

+top:
error = user_path_walk(path, &nd);
if (!error) {
struct kstatfs tmp;
@@ -313,6 +317,8 @@ asmlinkage long compat_sys_statfs64(cons
if (!error)
error = put_compat_statfs64(buf, &tmp);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
}
return error;
}
--- linux-2.6.24.i686/fs/xattr.c.org
+++ linux-2.6.24.i686/fs/xattr.c
@@ -259,11 +259,14 @@ sys_setxattr(char __user *path, char __u
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = setxattr(nd.path.dentry, name, value, size, flags);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -274,11 +277,14 @@ sys_lsetxattr(char __user *path, char __
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = setxattr(nd.path.dentry, name, value, size, flags);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -344,11 +350,14 @@ sys_getxattr(char __user *path, char __u
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = getxattr(nd.path.dentry, name, value, size);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -359,11 +368,14 @@ sys_lgetxattr(char __user *path, char __
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = getxattr(nd.path.dentry, name, value, size);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -418,11 +430,14 @@ sys_listxattr(char __user *path, char __
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = listxattr(nd.path.dentry, list, size);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -432,11 +447,14 @@ sys_llistxattr(char __user *path, char _
struct nameidata nd;
ssize_t error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = listxattr(nd.path.dentry, list, size);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -479,11 +497,14 @@ sys_removexattr(char __user *path, char
struct nameidata nd;
int error;

+top:
error = user_path_walk(path, &nd);
if (error)
return error;
error = removexattr(nd.path.dentry, name);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

@@ -493,11 +514,14 @@ sys_lremovexattr(char __user *path, char
struct nameidata nd;
int error;

+top:
error = user_path_walk_link(path, &nd);
if (error)
return error;
error = removexattr(nd.path.dentry, name);
path_put(&nd.path);
+ if (error == -ESTALE)
+ goto top;
return error;
}

Attachments:

estale.syscall.3 (14.82 kB)