Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760249AbYBAU7X (ORCPT ); Fri, 1 Feb 2008 15:59:23 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1760781AbYBAU6Y (ORCPT ); Fri, 1 Feb 2008 15:58:24 -0500 Received: from mx1.redhat.com ([66.187.233.31]:44894 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1760560AbYBAU6L (ORCPT ); Fri, 1 Feb 2008 15:58:11 -0500 Message-ID: <47A387D9.1070206@redhat.com> Date: Fri, 01 Feb 2008 15:58:01 -0500 From: Peter Staubach User-Agent: Thunderbird 1.5.0.12 (X11/20071018) MIME-Version: 1.0 To: Linux Kernel Mailing List CC: linux-nfs@vger.kernel.org, Andrew Morton , Trond Myklebust , linux-fsdevel Subject: [PATCH 3/3] enhanced NFS ESTALE error handling (v2) References: <4790C76F.2050105@redhat.com> In-Reply-To: <4790C76F.2050105@redhat.com> Content-Type: multipart/mixed; boundary="------------020804050301060300010907" Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 15831 Lines: 540 This is a multi-part message in MIME format. --------------020804050301060300010907 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi. The patch enhanced the ESTALE error handling for NFS mounted file systems. It expands the number of places that the NFS client checks for ESTALE returns from the server. It also enhances the ESTALE handling for directories by occasionally retrying revalidation to check to see whether the directory becomes valid again. This sounds odd, but can occur when a systems administrator, accidently or unknowingly, unexports a file system which is in use. All active non-directory files become permanently inaccessible, but directories can be become accessible again after the administrator re-exports the file system. This is a situation that users have been complaining about for years and this support can help to alleviate their situations. Thanx... ps Signed-off-by: Peter Staubach --------------020804050301060300010907 Content-Type: text/plain; name="estale.nfs.2" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="estale.nfs.2" --- linux-2.6.24.i686/fs/nfs/inode.c.org +++ linux-2.6.24.i686/fs/nfs/inode.c @@ -192,7 +192,7 @@ void nfs_invalidate_atime(struct inode * */ static void nfs_invalidate_inode(struct inode *inode) { - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); + nfs_handle_estale(-ESTALE, inode); nfs_zap_caches_locked(inode); } @@ -385,6 +385,8 @@ nfs_setattr(struct dentry *dentry, struc error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); + else + nfs_handle_estale(error, inode); unlock_kernel(); return error; } @@ -629,7 +631,7 @@ int nfs_release(struct inode *inode, str int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - int status = -ESTALE; + int status = -ESTALE; struct nfs_fattr fattr; struct nfs_inode *nfsi = NFS_I(inode); @@ -640,15 +642,25 @@ __nfs_revalidate_inode(struct nfs_server lock_kernel(); if (is_bad_inode(inode)) goto out_nowait; - if (NFS_STALE(inode)) + if (NFS_STALE(inode) && !S_ISDIR(inode->i_mode)) goto out_nowait; status = nfs_wait_on_inode(inode); if (status < 0) goto out; + /* + * Do we believe that the file handle is still stale? + * For non-directories, once stale, always stale. + * For directories, believe the stale status for the + * attribute cache timeout period, and then try again. + * This will help to address the problem of the server + * admin "accidently" unexporting a file system without + * stopping the NFS server first. + */ status = -ESTALE; - if (NFS_STALE(inode)) + if (NFS_STALE(inode) && + (!S_ISDIR(inode->i_mode) || !nfs_attribute_timeout(inode))) goto out; status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); @@ -656,11 +668,9 @@ __nfs_revalidate_inode(struct nfs_server dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); - if (status == -ESTALE) { + nfs_handle_estale(status, inode); + if (status == -ESTALE) nfs_zap_caches(inode); - if (!S_ISDIR(inode->i_mode)) - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); - } goto out; } @@ -986,14 +996,28 @@ static int nfs_update_inode(struct inode __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); - if (nfsi->fileid != fattr->fileid) - goto out_fileid; + if (nfsi->fileid != fattr->fileid) { + printk(KERN_ERR "NFS: server %s error: fileid changed\n" + "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, + inode->i_sb->s_id, + (long long)nfsi->fileid, (long long)fattr->fileid); + goto out_err; + } /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) - goto out_changed; + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + /* + * Big trouble! The inode has become a different object. + */ + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); + goto out_err; + } + + nfs_clear_estale(inode); server = NFS_SERVER(inode); /* Update the fsid? */ @@ -1099,12 +1123,7 @@ static int nfs_update_inode(struct inode nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; - out_changed: - /* - * Big trouble! The inode has become a different object. - */ - printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", - __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); + out_err: /* * No need to worry about unhashing the dentry, as the @@ -1113,13 +1132,6 @@ static int nfs_update_inode(struct inode */ nfs_invalidate_inode(inode); return -ESTALE; - - out_fileid: - printk(KERN_ERR "NFS: server %s error: fileid changed\n" - "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", - NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, - (long long)nfsi->fileid, (long long)fattr->fileid); - goto out_err; } --- linux-2.6.24.i686/fs/nfs/dir.c.org +++ linux-2.6.24.i686/fs/nfs/dir.c @@ -186,8 +186,9 @@ int nfs_readdir_filler(nfs_readdir_descr again: timestamp = jiffies; - error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, - NFS_SERVER(inode)->dtsize, desc->plus); + error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, + desc->entry->cookie, page, + NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { @@ -213,6 +214,7 @@ int nfs_readdir_filler(nfs_readdir_descr return 0; error: unlock_page(page); + nfs_handle_estale(error, inode); desc->error = error; return -EIO; } @@ -494,8 +496,10 @@ int uncached_readdir(nfs_readdir_descrip desc->timestamp_valid = 1; if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = *desc->dir_cookie; - } else + } else { + nfs_handle_estale(desc->error, inode); status = -EIO; + } if (status < 0) goto out_release; @@ -748,7 +752,7 @@ static int nfs_lookup_revalidate(struct struct inode *dir; struct inode *inode; struct dentry *parent; - int error; + int error = 0; struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -779,13 +783,16 @@ static int nfs_lookup_revalidate(struct } if (NFS_STALE(inode)) - goto out_bad; + goto out_zap_parent; error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); - if (error) + if (error) { + if (error == -ESTALE) + goto out_zap_parent; goto out_bad; + } if (nfs_compare_fh(NFS_FH(inode), &fhandle)) - goto out_bad; + goto out_zap_parent; if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; @@ -800,6 +807,7 @@ static int nfs_lookup_revalidate(struct out_zap_parent: nfs_zap_caches(dir); out_bad: + nfs_handle_estale(error, dir); nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ @@ -883,7 +891,10 @@ static struct dentry *nfs_lookup(struct if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - res = ERR_PTR(-ENOMEM); + res = ERR_PTR(-ESTALE); + if (NFS_STALE(dir)) + goto out; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); @@ -902,9 +913,10 @@ static struct dentry *nfs_lookup(struct /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); - if (error == -ENOENT) - goto no_entry; if (error < 0) { + if (error == -ENOENT) + goto no_entry; + nfs_handle_estale(error, dir); res = ERR_PTR(error); goto out_unblock_sillyrename; } @@ -1154,8 +1166,10 @@ int nfs_instantiate(struct dentry *dentr goto out; if (fhandle->size == 0) { error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); - if (error) + if (error) { + nfs_handle_estale(error, dir); goto out_error; + } } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { @@ -1207,6 +1221,7 @@ static int nfs_create(struct inode *dir, unlock_kernel(); return 0; out_err: + nfs_handle_estale(error, dir); unlock_kernel(); d_drop(dentry); return error; @@ -1237,6 +1252,7 @@ nfs_mknod(struct inode *dir, struct dent unlock_kernel(); return 0; out_err: + nfs_handle_estale(status, dir); unlock_kernel(); d_drop(dentry); return status; @@ -1263,6 +1279,7 @@ static int nfs_mkdir(struct inode *dir, unlock_kernel(); return 0; out_err: + nfs_handle_estale(error, dir); d_drop(dentry); unlock_kernel(); return error; @@ -1280,6 +1297,7 @@ static int nfs_rmdir(struct inode *dir, /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); + nfs_handle_estale(error, dir); unlock_kernel(); return error; @@ -1349,7 +1367,8 @@ static int nfs_sillyrename(struct inode d_move(dentry, sdentry); error = nfs_async_unlink(dir, dentry); /* If we return 0 we don't unlink */ - } + } else + nfs_handle_estale(error, dir); dput(sdentry); out: return error; @@ -1386,6 +1405,7 @@ static int nfs_safe_remove(struct dentry nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_handle_estale(error, dir); out: return error; } @@ -1482,6 +1502,7 @@ static int nfs_symlink(struct inode *dir dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name, symname, error); + nfs_handle_estale(error, dir); d_drop(dentry); __free_page(page); unlock_kernel(); @@ -1866,8 +1887,10 @@ static int nfs_do_access(struct inode *i cache.cred = cred; cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); - if (status != 0) + if (status != 0) { + nfs_handle_estale(status, inode); return status; + } nfs_access_add_cache(inode, &cache); out: if ((cache.mask & mask) == mask) --- linux-2.6.24.i686/fs/nfs/nfs4proc.c.org +++ linux-2.6.24.i686/fs/nfs/nfs4proc.c @@ -3627,6 +3627,7 @@ int nfs4_setxattr(struct dentry *dentry, size_t buflen, int flags) { struct inode *inode = dentry->d_inode; + int error; if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) return -EOPNOTSUPP; @@ -3635,7 +3636,9 @@ int nfs4_setxattr(struct dentry *dentry, (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) return -EPERM; - return nfs4_proc_set_acl(inode, buf, buflen); + error = nfs4_proc_set_acl(inode, buf, buflen); + nfs_handle_estale(error, inode); + return error; } /* The getxattr man page suggests returning -ENODATA for unknown attributes, @@ -3646,11 +3649,14 @@ ssize_t nfs4_getxattr(struct dentry *den size_t buflen) { struct inode *inode = dentry->d_inode; + int error; if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) return -EOPNOTSUPP; - return nfs4_proc_get_acl(inode, buf, buflen); + error = nfs4_proc_get_acl(inode, buf, buflen); + nfs_handle_estale(error, inode); + return error; } ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) --- linux-2.6.24.i686/fs/nfs/namespace.c.org +++ linux-2.6.24.i686/fs/nfs/namespace.c @@ -115,6 +115,7 @@ static void * nfs_follow_mountpoint(stru err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + nfs_handle_estale(err, parent->d_inode); dput(parent); if (err != 0) goto out_err; --- linux-2.6.24.i686/fs/nfs/symlink.c.org +++ linux-2.6.24.i686/fs/nfs/symlink.c @@ -40,6 +40,7 @@ static int nfs_symlink_filler(struct ino return 0; error: + nfs_handle_estale(error, inode); SetPageError(page); unlock_page(page); return -EIO; --- linux-2.6.24.i686/fs/nfs/nfs3acl.c.org +++ linux-2.6.24.i686/fs/nfs/nfs3acl.c @@ -22,8 +22,10 @@ ssize_t nfs3_listxattr(struct dentry *de } while(0) acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) + if (IS_ERR(acl)) { + nfs_handle_estale(PTR_ERR(acl), inode); return PTR_ERR(acl); + } if (acl) { output("system.posix_acl_access"); posix_acl_release(acl); @@ -31,8 +33,10 @@ ssize_t nfs3_listxattr(struct dentry *de if (S_ISDIR(inode->i_mode)) { acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) + if (IS_ERR(acl)) { + nfs_handle_estale(PTR_ERR(acl), inode); return PTR_ERR(acl); + } if (acl) { output("system.posix_acl_default"); posix_acl_release(acl); @@ -61,9 +65,11 @@ ssize_t nfs3_getxattr(struct dentry *den return -EOPNOTSUPP; acl = nfs3_proc_getacl(inode, type); - if (IS_ERR(acl)) + if (IS_ERR(acl)) { + nfs_handle_estale(PTR_ERR(acl), inode); return PTR_ERR(acl); - else if (acl) { + } + if (acl) { if (type == ACL_TYPE_ACCESS && acl->a_count == 0) error = -ENODATA; else @@ -92,6 +98,7 @@ int nfs3_setxattr(struct dentry *dentry, acl = posix_acl_from_xattr(value, size); if (IS_ERR(acl)) return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); posix_acl_release(acl); @@ -355,16 +362,22 @@ int nfs3_proc_setacl(struct inode *inode case ACL_TYPE_ACCESS: alloc = dfacl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); - if (IS_ERR(alloc)) + if (IS_ERR(alloc)) { + nfs_handle_estale(PTR_ERR(alloc), + inode); goto fail; + } break; case ACL_TYPE_DEFAULT: dfacl = acl; alloc = acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(alloc)) + if (IS_ERR(alloc)) { + nfs_handle_estale(PTR_ERR(alloc), + inode); goto fail; + } break; default: @@ -380,6 +393,7 @@ int nfs3_proc_setacl(struct inode *inode } status = nfs3_proc_setacls(inode, acl, dfacl); posix_acl_release(alloc); + nfs_handle_estale(status, inode); return status; fail: @@ -395,6 +409,7 @@ int nfs3_proc_set_default_acl(struct ino dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); if (IS_ERR(dfacl)) { error = PTR_ERR(dfacl); + nfs_handle_estale(error, dir); return (error == -EOPNOTSUPP) ? 0 : error; } if (!dfacl) @@ -408,6 +423,7 @@ int nfs3_proc_set_default_acl(struct ino goto out_release_acl; error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? dfacl : NULL); + nfs_handle_estale(error, inode); out_release_acl: posix_acl_release(acl); out_release_dfacl: --- linux-2.6.24.i686/include/linux/nfs_fs.h.org +++ linux-2.6.24.i686/include/linux/nfs_fs.h @@ -219,6 +219,31 @@ static inline struct nfs_inode *NFS_I(st #define NFS_FILEID(inode) (NFS_I(inode)->fileid) +static inline void nfs_handle_estale(int error, struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (error == -ESTALE) { + if (!NFS_STALE(inode)) + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); + /* + * If a directory is being marked as stale, then + * reset the attribute cache timeout. This controls + * the duration that the stale condition is believed + * before being verified again with another call to + * the server. + */ + if (S_ISDIR(inode->i_mode)) + nfsi->read_cache_jiffies = jiffies; + } +} + +static inline void nfs_clear_estale(struct inode *inode) +{ + if (NFS_STALE(inode)) + clear_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); +} + static inline void nfs_mark_for_revalidate(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); --------------020804050301060300010907-- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/