2021-12-17 21:57:18

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 6/9] nfsd: Add a tracepoint for errors in nfsd4_clone_file_range()

From: Trond Myklebust <[email protected]>

Since a clone error commit can cause the boot verifier to change,
we should trace those errors.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/trace.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/vfs.c | 18 +++++++++++++++--
fs/nfsd/vfs.h | 3 ++-
4 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 486c5dba4b65..53c2a8f0d627 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1102,7 +1102,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;

- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index f1e0d3c51bc2..001444d9829d 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -413,6 +413,56 @@ TRACE_EVENT(nfsd_dirent,
)
)

+DECLARE_EVENT_CLASS(nfsd_copy_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *src_fhp,
+ loff_t src_offset,
+ struct svc_fh *dst_fhp,
+ loff_t dst_offset,
+ u64 count,
+ int status),
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, src_fh_hash)
+ __field(loff_t, src_offset)
+ __field(u32, dst_fh_hash)
+ __field(loff_t, dst_offset)
+ __field(u64, count)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
+ __entry->src_offset = src_offset;
+ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
+ __entry->dst_offset = dst_offset;
+ __entry->count = count;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
+ "dst_fh_hash=0x%08x dst_offset=%lld "
+ "count=%llu status=%d",
+ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
+ __entry->dst_fh_hash, __entry->dst_offset,
+ (unsigned long long)__entry->count,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *src_fhp, \
+ loff_t src_offset, \
+ struct svc_fh *dst_fhp, \
+ loff_t dst_offset, \
+ u64 count, \
+ int status), \
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
+ count, status))
+
+DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+
#include "state.h"
#include "filecache.h"
#include "vfs.h"
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 38fdfcbb079e..e761b2eff415 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -40,6 +40,7 @@
#include "../internal.h"
#include "acl.h"
#include "idmap.h"
+#include "xdr4.h"
#endif /* CONFIG_NFSD_V4 */

#include "nfsd.h"
@@ -516,8 +517,15 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif

-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
- struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
+static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+{
+ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
+}
+
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync)
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
@@ -541,6 +549,12 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
nfsd_net_id));
ret = nfserrno(status);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 6edae1b9a96e..3dba6397d452 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+__be32 nfsd4_clone_file_range(struct svc_rqst *,
+ struct nfsd_file *nf_src, u64 src_pos,
struct nfsd_file *nf_dst, u64 dst_pos,
u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
--
2.33.1



2021-12-17 21:57:22

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 7/9] nfsd: Replace use of rwsem with errseq_t

From: Trond Myklebust <[email protected]>

The nfsd_file nf_rwsem is currently being used to separate file write
and commit instances to ensure that we catch errors and apply them to
the correct write/commit.
We can improve scalability at the expense of a little accuracy (some
extra false positives) by replacing the nf_rwsem with more careful
use of the errseq_t mechanism to track errors across the different
operations.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfsd/filecache.c | 1 -
fs/nfsd/filecache.h | 1 -
fs/nfsd/nfs4proc.c | 16 +++++++++-------
fs/nfsd/vfs.c | 40 +++++++++++++++-------------------------
4 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 7629248fdd53..1e894ddc7ac2 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -194,7 +194,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
- init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf);
}
return nf;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 7872df5a0fe3..435ceab27897 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -46,7 +46,6 @@ struct nfsd_file {
refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
- struct rw_semaphore nf_rwsem;
};

int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 53c2a8f0d627..9ea2d2e554ce 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1511,6 +1511,9 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)

static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{
+ struct file *dst = copy->nf_dst->nf_file;
+ struct file *src = copy->nf_src->nf_file;
+ errseq_t since;
ssize_t bytes_copied = 0;
u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
@@ -1523,9 +1526,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
- src_pos, copy->nf_dst->nf_file, dst_pos,
- bytes_total);
+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
@@ -1535,11 +1537,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
} while (bytes_total > 0 && !copy->cp_synchronous);
/* for a non-zero asynchronous copy do a commit of data */
if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
- down_write(&copy->nf_dst->nf_rwsem);
- status = vfs_fsync_range(copy->nf_dst->nf_file,
- copy->cp_dst_pos,
+ since = READ_ONCE(dst->f_wb_err);
+ status = vfs_fsync_range(dst, copy->cp_dst_pos,
copy->cp_res.wr_bytes_written, 0);
- up_write(&copy->nf_dst->nf_rwsem);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
copy->committed = true;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e761b2eff415..19215b9f768e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -529,10 +529,11 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
+ errseq_t since;
loff_t cloned;
__be32 ret = 0;

- down_write(&nf_dst->nf_rwsem);
+ since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (cloned < 0) {
ret = nfserrno(cloned);
@@ -546,6 +547,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);

+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
@@ -561,7 +564,6 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
}
}
out_err:
- up_write(&nf_dst->nf_rwsem);
return ret;
}

@@ -972,6 +974,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
int use_wgather;
@@ -1012,21 +1015,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
flags |= RWF_SYNC;

iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ since = READ_ONCE(file->f_wb_err);
if (flags & RWF_SYNC) {
- down_write(&nf->nf_rwsem);
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
nfsd_net_id));
- up_write(&nf->nf_rwsem);
} else {
- down_read(&nf->nf_rwsem);
if (verf)
nfsd_copy_boot_verifier(verf,
net_generic(SVC_NET(rqstp),
nfsd_net_id));
host_err = vfs_iter_write(file, &iter, &pos, flags);
- up_read(&nf->nf_rwsem);
}
if (host_err < 0) {
nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
@@ -1036,6 +1036,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
*cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt);
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;

if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
@@ -1116,19 +1119,6 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
}

#ifdef CONFIG_NFSD_V3
-static int
-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
- loff_t end)
-{
- struct address_space *mapping = nf->nf_file->f_mapping;
- int ret = filemap_fdatawrite_range(mapping, offset, end);
-
- if (ret)
- return ret;
- filemap_fdatawait_range_keep_errors(mapping, offset, end);
- return 0;
-}
-
/*
* Commit all pending writes to stable storage.
*
@@ -1159,25 +1149,25 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;

- down_write(&nf->nf_rwsem);
- if (!err2)
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
nfsd_net_id));
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
break;
case -EINVAL:
err = nfserr_notsupp;
break;
default:
- err = nfserrno(err2);
nfsd_reset_boot_verifier(net_generic(nf->nf_net,
nfsd_net_id));
}
- up_write(&nf->nf_rwsem);
+ err = nfserrno(err2);
} else
nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
nfsd_net_id));
--
2.33.1