Hey Bruce,
Here is the latest version of the knfsd sync changes that I've been spamming
you with. I have addressed the latest suggestions provided by Christoph,
Trond, Dave, and Alex. This version of the commit_metadata export operation
takes only one inode as suggested by Christoph and it turns out to be much
cleaner this way. Now we've gone back to committing all of the time in
nfsd_create_setattr and don't bother with the added argument. A couple extra
comments to explain the commit ordering.
Thanks,
Ben
---
Ben Myers (2):
commit_metadata export operation replacing nfsd_sync_dir
xfs_export_operations.commit_metadata
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/vfs.c | 109 ++++++++++++++++++++---------------------
fs/xfs/linux-2.6/xfs_export.c | 24 +++++++++
include/linux/exportfs.h | 5 ++
4 files changed, 83 insertions(+), 59 deletions(-)
--
On Tue, Feb 16, 2010 at 03:04:08PM -0600, Ben Myers wrote:
> Here is the latest version of the knfsd sync changes that I've been spamming
> you with.
Don't worry about spamming--it's good that people have lots of comments!
--b.
> I have addressed the latest suggestions provided by Christoph,
> Trond, Dave, and Alex. This version of the commit_metadata export operation
> takes only one inode as suggested by Christoph and it turns out to be much
> cleaner this way. Now we've gone back to committing all of the time in
> nfsd_create_setattr and don't bother with the added argument. A couple extra
> comments to explain the commit ordering.
>
> Thanks,
> Ben
>
> ---
>
> Ben Myers (2):
> commit_metadata export operation replacing nfsd_sync_dir
> xfs_export_operations.commit_metadata
>
>
> fs/nfsd/nfs4recover.c | 4 --
> fs/nfsd/vfs.c | 109 ++++++++++++++++++++---------------------
> fs/xfs/linux-2.6/xfs_export.c | 24 +++++++++
> include/linux/exportfs.h | 5 ++
> 4 files changed, 83 insertions(+), 59 deletions(-)
>
> --
- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to commit an inode most efficiently.
- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh.
- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synced here.
- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static
Signed-off-by: Ben Myers <[email protected]>
---
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/vfs.c | 109 ++++++++++++++++++++++------------------------
include/linux/exportfs.h | 5 ++
3 files changed, 59 insertions(+), 59 deletions(-)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7..98fb98e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
static void
nfsd4_sync_rec_dir(void)
{
- mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
- nfsd_sync_dir(rec_dir.dentry);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+ vfs_fsync(NULL, rec_dir.dentry, 0);
}
int
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed024d3..cde275b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
#include <linux/jhash.h>
#include <linux/ima.h>
#include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
@@ -271,6 +273,31 @@ out:
return err;
}
+/*
+ * Commit metadata changes to stable storage. You pay pass NULL for dchild.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ const struct export_operations *export_ops = inode->i_sb->s_export_op;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata only */
+ };
+ int error = 0;
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+
+ if (export_ops->commit_metadata) {
+ error = export_ops->commit_metadata(inode);
+ } else {
+ error = sync_inode(inode, &wbc);
+ }
+
+ return error;
+}
/*
* Set various file attributes.
@@ -769,28 +796,6 @@ nfsd_close(struct file *filp)
}
/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- * a) we do not have a file struct available
- * b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
- error = filemap_write_and_wait(inode->i_mapping);
- if (!error && inode->i_fop->fsync)
- error = inode->i_fop->fsync(NULL, dentry, 0);
- return error;
-}
-
-/*
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
@@ -1199,7 +1204,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ return nfsd_setattr(rqstp, resfhp, iap, 0, 0);
return 0;
}
@@ -1331,13 +1336,15 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- write_inode_now(dchild->d_inode, 1);
- }
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
+ /*
+ * nfsd_setattr already committed the child. Transactional filesystems
+ * had a chance to commit changes for both parent and child
+ * simultaneously making the following commit_metadata a noop.
+ */
+ err2 = nfserrno(commit_metadata(fhp));
+ if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1368,7 +1375,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
- __be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@@ -1463,11 +1469,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (created)
*created = 1;
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- /* setattr will sync the child (or not) */
- }
-
nfsd_check_ignore_resizing(iap);
if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1482,9 +1483,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
set_attr:
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
- err = err2;
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+ /*
+ * nfsd_setattr already committed the child (and possibly also the parent).
+ */
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1599,12 +1604,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
} else
host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
- if (!host_err) {
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
- }
err = nfserrno(host_err);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
fh_unlock(fhp);
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1666,11 +1668,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
- if (EX_ISSYNC(ffhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(ddir));
- write_inode_now(dest, 1);
- }
- err = 0;
+ err = nfserrno(commit_metadata(ffhp));
+ if (!err)
+ err = nfserrno(commit_metadata(tfhp));
} else {
if (host_err == -EXDEV && rqstp->rq_vers == 2)
err = nfserr_acces;
@@ -1766,10 +1766,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
- if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
- host_err = nfsd_sync_dir(tdentry);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
if (!host_err)
- host_err = nfsd_sync_dir(fdentry);
+ host_err = commit_metadata(ffhp);
}
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1850,12 +1850,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dput(rdentry);
- if (host_err)
- goto out_drop;
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
+ if (!host_err)
+ host_err = commit_metadata(fhp);
-out_drop:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
out_nfserr:
err = nfserrno(host_err);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f41..a9cd507 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
* @fh_to_parent: find the implied object's parent and get a dentry for it
* @get_name: find the name for a given inode in a given directory
* @get_parent: find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
*
* See Documentation/filesystems/nfs/Exporting for details on how to use
* this interface correctly.
@@ -137,6 +138,9 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * commit_metadata:
+ * @commit_metadata should commit metadata changes to stable storage.
+ *
* Locking rules:
* get_parent is called with child->d_inode->i_mutex down
* get_name is not (which is possibly inconsistent)
@@ -152,6 +156,7 @@ struct export_operations {
int (*get_name)(struct dentry *parent, char *name,
struct dentry *child);
struct dentry * (*get_parent)(struct dentry *child);
+ int (*commit_metadata)(struct inode *inode);
};
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
This is the commit_metadata export operation for XFS.
- Takes one inode to be committed.
- Forces the log up to the lsn of the inode.
- Doesn't force the log if the inode doesn't have a pincount.
Signed-off-by: Ben Myers <[email protected]>
---
fs/xfs/linux-2.6/xfs_export.c | 24 ++++++++++++++++++++++++
1 files changed, 24 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 87b8cbd..47a8d1f 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,6 +29,7 @@
#include "xfs_vnodeops.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
/*
* Note that we only accept fileids which are long enough rather than allow
@@ -215,9 +216,32 @@ xfs_fs_get_parent(
return d_obtain_alias(VFS_I(cip));
}
+STATIC int
+xfs_fs_nfs_commit_metadata(
+ struct inode *inode)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_lsn_t force_lsn = NULLCOMMITLSN;
+ int error = 0;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (xfs_ipincount(ip)) {
+ force_lsn = ip->i_itemp->ili_last_lsn;
+ }
+ if (force_lsn != NULLCOMMITLSN) {
+ error = _xfs_log_force(mp, force_lsn,
+ XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
+ }
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ return error;
+}
+
const struct export_operations xfs_export_operations = {
.encode_fh = xfs_fs_encode_fh,
.fh_to_dentry = xfs_fs_fh_to_dentry,
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
+ .commit_metadata = xfs_fs_nfs_commit_metadata,
};
This looks very good to me. A couple of tiny nitpicks below:
> +/*
> + * Commit metadata changes to stable storage. You pay pass NULL for dchild.
> + */
The dchild argument is gone in this version.
> + struct writeback_control wbc = {
> + .sync_mode = WB_SYNC_ALL,
> + .nr_to_write = 0, /* metadata only */
> + };
> + int error = 0;
> +
> + if (!EX_ISSYNC(fhp->fh_export))
> + return 0;
> +
> + if (export_ops->commit_metadata) {
> + error = export_ops->commit_metadata(inode);
> + } else {
> + error = sync_inode(inode, &wbc);
> + }
Maybe move the wbc declaration into the else branch here to keep
variables in the smallest possible scope.
> @@ -1199,7 +1204,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
> if (current_fsuid() != 0)
> iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
> if (iap->ia_valid)
> - return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
> + return nfsd_setattr(rqstp, resfhp, iap, 0, 0);
While this is a worthwhile cleanup I'd not put it into a patch that is
now entirely unrelated.
> + err = nfsd_create_setattr(rqstp, resfhp, iap);
>
> + /*
> + * nfsd_setattr already committed the child. Transactional filesystems
> + * had a chance to commit changes for both parent and child
> + * simultaneously making the following commit_metadata a noop.
> + */
> + err2 = nfserrno(commit_metadata(fhp));
> + if (err2)
> err = err2;
The if statement above seems rather minindented, possibly due to the
partial use of spaces instead of tabs.
Looks good,
Reviewed-by: Christoph Hellwig <[email protected]>
On Tue, Feb 16, 2010 at 03:04:18PM -0600, Ben Myers wrote:
> This is the commit_metadata export operation for XFS.
>
> - Takes one inode to be committed.
>
> - Forces the log up to the lsn of the inode.
>
> - Doesn't force the log if the inode doesn't have a pincount.
>
> Signed-off-by: Ben Myers <[email protected]>
> ---
> fs/xfs/linux-2.6/xfs_export.c | 24 ++++++++++++++++++++++++
> 1 files changed, 24 insertions(+), 0 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
> index 87b8cbd..47a8d1f 100644
> --- a/fs/xfs/linux-2.6/xfs_export.c
> +++ b/fs/xfs/linux-2.6/xfs_export.c
> @@ -29,6 +29,7 @@
> #include "xfs_vnodeops.h"
> #include "xfs_bmap_btree.h"
> #include "xfs_inode.h"
> +#include "xfs_inode_item.h"
>
> /*
> * Note that we only accept fileids which are long enough rather than allow
> @@ -215,9 +216,32 @@ xfs_fs_get_parent(
> return d_obtain_alias(VFS_I(cip));
> }
>
> +STATIC int
> +xfs_fs_nfs_commit_metadata(
> + struct inode *inode)
> +{
> + struct xfs_inode *ip = XFS_I(inode);
> + struct xfs_mount *mp = ip->i_mount;
> + xfs_lsn_t force_lsn = NULLCOMMITLSN;
> + int error = 0;
> +
> + xfs_ilock(ip, XFS_ILOCK_SHARED);
> + if (xfs_ipincount(ip)) {
> + force_lsn = ip->i_itemp->ili_last_lsn;
> + }
> + if (force_lsn != NULLCOMMITLSN) {
> + error = _xfs_log_force(mp, force_lsn,
> + XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
> + }
That could be simplified to:
if (xfs_ipincount(ip))
_xfs_log_force(mp, ip->i_itemp->ili_last_lsn
XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
Cheers,
Dave.
--
Dave Chinner
[email protected]