Currently GETATTR conflict with a write delegation is handled by
recalling the delegation before replying to the GETATTR.
This patch series add supports for CB_GETATTR callback to get the latest
change_info and size information of the file from the client that holds
the delegation to reply to the GETATTR from the second client.
NOTE: this patch series is mostly the same as the previous patches which
were backed out when un unrelated problem of NFSD server hang on reboot
was reported.
The only difference is the wait_on_bit() in nfsd4_deleg_getattr_conflict was
replaced with wait_on_bit_timeout() with 30ms timeout to avoid a potential
DOS attack by exhausting NFSD kernel threads with GETATTR conflicts.
fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++++-
fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
fs/nfsd/nfs4xdr.c | 10 +++-
fs/nfsd/nfsd.h | 1 +
fs/nfsd/state.h | 24 ++++++++-
fs/nfsd/xdr4cb.h | 18 +++++++
6 files changed, 255 insertions(+), 14 deletions(-)
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the request is handled as below:
Server sends CB_GETATTR to client to get the latest change info and file
size. If these values are the same as the server's cached values then
the GETATTR proceeds as normal.
If either the change info or file size is different from the server's
cached values, or the file was already marked as modified, then:
. update time_modify and time_metadata into file's metadata
with current time
. encode GETATTR as normal except the file size is encoded with
the value returned from CB_GETATTR
. mark the file as modified
The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for
any reasons, the delegation is recalled and NFS4ERR_DELAY is returned
for the GETATTR from the second client.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
fs/nfsd/nfs4xdr.c | 10 +++-
fs/nfsd/nfsd.h | 1 +
fs/nfsd/state.h | 10 +++-
4 files changed, 127 insertions(+), 13 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d9260e77ef2d..87987515e03d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
static struct workqueue_struct *laundry_wq;
@@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+ &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+ dp->dl_cb_fattr.ncf_file_modified = false;
+ dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
spin_unlock(&nn->client_lock);
}
+static int
+nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ ncf->ncf_cb_status = task->tk_status;
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ nfs4_put_stid(&dp->dl_stid);
+ clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
+ wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+}
+
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
};
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
+ .done = nfsd4_cb_getattr_done,
+ .release = nfsd4_cb_getattr_release,
+};
+
+static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
+{
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+ return;
+ /* set to proper status when nfsd4_cb_getattr_done runs */
+ ncf->ncf_cb_status = NFS4ERR_IO;
+
+ refcount_inc(&dp->dl_stid.sc_count);
+ nfsd4_run_cb(&ncf->ncf_getattr);
+}
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -5854,6 +5907,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent = NULL;
int cb_up;
int status = 0;
+ struct kstat stat;
+ struct path path;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -5891,6 +5946,18 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = currentfh->fh_dentry;
+ if (vfs_getattr(&path, &stat,
+ (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ AT_STATX_SYNC_AS_STAT)) {
+ nfs4_put_stid(&dp->dl_stid);
+ destroy_delegation(dp);
+ goto out_no_deleg;
+ }
+ dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
+ dp->dl_cb_fattr.ncf_initial_cinfo =
+ nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
} else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
@@ -8720,6 +8787,8 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @inode: file to be checked for a conflict
+ * @modified: return true if file was modified
+ * @size: new size of file if modified is true
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@@ -8728,22 +8797,22 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* delegation before replying to the GETATTR. See RFC 8881 section
* 18.7.4.
*
- * The current implementation does not support CB_GETATTR yet. However
- * this can avoid recalling the delegation could be added in follow up
- * work.
- *
* Returns 0 if there is no conflict; otherwise an nfs_stat
* code is returned.
*/
__be32
-nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
+ bool *modified, u64 *size)
{
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
struct file_lock *fl;
struct nfs4_delegation *dp;
+ struct iattr attrs;
+ struct nfs4_cb_fattr *ncf;
+ *modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
return 0;
@@ -8768,12 +8837,42 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
return 0;
}
break_lease:
- spin_unlock(&ctx->flc_lock);
nfsd_stats_wdeleg_getattr_inc(nn);
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
- if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode))
- return status;
+ dp = fl->fl_owner;
+ ncf = &dp->dl_cb_fattr;
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
+ spin_unlock(&ctx->flc_lock);
+ /*
+ * allow 30 ms for the callback to complete which should
+ * take care most cases when everything works normally.
+ * Otherwise just fall back to the normal mechanisnm to
+ * recall the delegation.
+ */
+ wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
+ TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ if (ncf->ncf_cb_status) {
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ }
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
+ ncf->ncf_file_modified = true;
+ if (ncf->ncf_file_modified) {
+ /*
+ * The server would not update the file's metadata
+ * with the client's modified size.
+ */
+ attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
+ attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
+ setattr_copy(&nop_mnt_idmap, inode, &attrs);
+ mark_inode_dirty(inode);
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ *size = ncf->ncf_cur_fsize;
+ *modified = true;
+ }
return 0;
}
break;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e3f761cd5ee7..9e8f230fc96e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
unsigned long mask[2];
} u;
unsigned long bit;
+ bool file_modified = false;
+ u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
+ &file_modified, &size);
if (status)
goto out;
}
@@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
- args.size = args.stat.size;
+ if (file_modified)
+ args.size = size;
+ else
+ args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 8daf22d766c6..16c5a05f340e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -367,6 +367,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
#define NFS4_CLIENTS_PER_GB 1024
#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
+#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
/*
* The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3bf418ee6c97..01c6f3445646 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -142,8 +142,16 @@ struct nfs4_cb_fattr {
/* from CB_GETATTR reply */
u64 ncf_cb_change;
u64 ncf_cb_fsize;
+
+ unsigned long ncf_cb_flags;
+ bool ncf_file_modified;
+ u64 ncf_initial_cinfo;
+ u64 ncf_cur_fsize;
};
+/* bits for ncf_cb_flags */
+#define CB_GETATTR_BUSY 0
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -773,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct inode *inode);
+ struct inode *inode, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */
--
2.39.3
Includes:
. CB_GETATTR proc for nfs4_cb_procedures[]
. XDR encoding and decoding function for CB_GETATTR request/reply
. add nfs4_cb_fattr to nfs4_delegation for sending CB_GETATTR
and store file attributes from client's reply.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 14 ++++++
fs/nfsd/xdr4cb.h | 18 ++++++++
3 files changed, 128 insertions(+), 1 deletion(-)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 32dd2fbb1f30..e440f72b9d4e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -85,7 +85,21 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n)
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len)
{
- WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
+ xdr_stream_encode_uint32_array(xdr, bitmap, len);
+}
+
+static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_cb_fattr *fattr)
+{
+ fattr->ncf_cb_change = 0;
+ fattr->ncf_cb_fsize = 0;
+ if (bitmap[0] & FATTR4_WORD0_CHANGE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
+ return -NFSERR_BAD_XDR;
+ if (bitmap[0] & FATTR4_WORD0_SIZE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
+ return -NFSERR_BAD_XDR;
+ return 0;
}
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
@@ -333,6 +347,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr,
hdr->nops++;
}
+/*
+ * CB_GETATTR4args
+ * struct CB_GETATTR4args {
+ * nfs_fh4 fh;
+ * bitmap4 attr_request;
+ * };
+ *
+ * The size and change attributes are the only one
+ * guaranteed to be serviced by the client.
+ */
+static void
+encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
+ struct nfs4_cb_fattr *fattr)
+{
+ struct nfs4_delegation *dp =
+ container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+ struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
+ encode_nfs_fh4(xdr, fh);
+ encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
+ hdr->nops++;
+}
+
/*
* CB_SEQUENCE4args
*
@@ -468,6 +506,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
xdr_reserve_space(xdr, 0);
}
+/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_getattr4args(xdr, &hdr, ncf);
+ encode_cb_nops(&hdr);
+}
+
/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
@@ -523,6 +581,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
return 0;
}
+/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+ u32 bitmap[3] = {0};
+ u32 attrlen;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
+ status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
+ if (status)
+ return status;
+ if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
+ return -NFSERR_BAD_XDR;
+ if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
+ return -NFSERR_BAD_XDR;
+ if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
+ return -NFSERR_BAD_XDR;
+ status = decode_cb_fattr4(xdr, bitmap, ncf);
+ return status;
+}
+
/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
@@ -831,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
+ PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 0c8ec578ba7e..3bf418ee6c97 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -134,6 +134,16 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
+struct nfs4_cb_fattr {
+ struct nfsd4_callback ncf_getattr;
+ u32 ncf_cb_status;
+ u32 ncf_cb_bmap[1];
+
+ /* from CB_GETATTR reply */
+ u64 ncf_cb_change;
+ u64 ncf_cb_fsize;
+};
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -167,6 +177,9 @@ struct nfs4_delegation {
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
+
+ /* for CB_GETATTR */
+ struct nfs4_cb_fattr dl_cb_fattr;
};
#define cb_to_delegation(cb) \
@@ -659,6 +672,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
NFSPROC4_CLNT_CB_RECALL_ANY,
+ NFSPROC4_CLNT_CB_GETATTR,
};
/* Returns true iff a is later than b: */
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 0d39af1b00a0..e8b00309c449 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -54,3 +54,21 @@
#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+
+/*
+ * 1: CB_GETATTR opcode (32-bit)
+ * N: file_handle
+ * 1: number of entry in attribute array (32-bit)
+ * 1: entry 0 in attribute array (32-bit)
+ */
+#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + enc_nfs4_fh_sz + 1 + 1)
+/*
+ * 4: fattr_bitmap_maxsz
+ * 1: attribute array len
+ * 2: change attr (64-bit)
+ * 2: size (64-bit)
+ */
+#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
--
2.39.3
On Tue, 2024-02-13 at 09:47 -0800, Dai Ngo wrote:
> Currently GETATTR conflict with a write delegation is handled by
> recalling the delegation before replying to the GETATTR.
>
> This patch series add supports for CB_GETATTR callback to get the latest
> change_info and size information of the file from the client that holds
> the delegation to reply to the GETATTR from the second client.
>
> NOTE: this patch series is mostly the same as the previous patches which
> were backed out when un unrelated problem of NFSD server hang on reboot
> was reported.
>
> The only difference is the wait_on_bit() in nfsd4_deleg_getattr_conflict was
> replaced with wait_on_bit_timeout() with 30ms timeout to avoid a potential
> DOS attack by exhausting NFSD kernel threads with GETATTR conflicts.
>
> fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++++-
> fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/nfs4xdr.c | 10 +++-
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/state.h | 24 ++++++++-
> fs/nfsd/xdr4cb.h | 18 +++++++
> 6 files changed, 255 insertions(+), 14 deletions(-)
Reviewed-by: Jeff Layton <[email protected]>
On Tue, Feb 13, 2024 at 09:47:27AM -0800, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the request is handled as below:
>
> Server sends CB_GETATTR to client to get the latest change info and file
> size. If these values are the same as the server's cached values then
> the GETATTR proceeds as normal.
>
> If either the change info or file size is different from the server's
> cached values, or the file was already marked as modified, then:
>
> . update time_modify and time_metadata into file's metadata
> with current time
>
> . encode GETATTR as normal except the file size is encoded with
> the value returned from CB_GETATTR
>
> . mark the file as modified
>
> The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for
> any reasons, the delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR from the second client.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/nfs4xdr.c | 10 +++-
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/state.h | 10 +++-
> 4 files changed, 127 insertions(+), 13 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index d9260e77ef2d..87987515e03d 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>
> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>
> static struct workqueue_struct *laundry_wq;
>
> @@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> dp->dl_recalled = false;
> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> + dp->dl_cb_fattr.ncf_file_modified = false;
> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> get_nfs4_file(fp);
> dp->dl_stid.sc_file = fp;
> return dp;
> @@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> spin_unlock(&nn->client_lock);
> }
>
> +static int
> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> + struct nfs4_cb_fattr *ncf =
> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> +
> + ncf->ncf_cb_status = task->tk_status;
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_cb_fattr *ncf =
> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> + struct nfs4_delegation *dp =
> + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
> +
> + nfs4_put_stid(&dp->dl_stid);
> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> +}
> +
What happens if the client responds after the GETATTR timer elapses?
Can nfsd4_cb_getattr_release over-write memory that is now being
used for something else?
> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> .done = nfsd4_cb_recall_any_done,
> .release = nfsd4_cb_recall_any_release,
> };
>
> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
> + .done = nfsd4_cb_getattr_done,
> + .release = nfsd4_cb_getattr_release,
> +};
> +
> +static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
> +{
> + struct nfs4_delegation *dp =
> + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
> +
> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
> + return;
> + /* set to proper status when nfsd4_cb_getattr_done runs */
> + ncf->ncf_cb_status = NFS4ERR_IO;
> +
> + refcount_inc(&dp->dl_stid.sc_count);
> + nfsd4_run_cb(&ncf->ncf_getattr);
> +}
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -5854,6 +5907,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> struct svc_fh *parent = NULL;
> int cb_up;
> int status = 0;
> + struct kstat stat;
> + struct path path;
>
> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
> open->op_recall = false;
> @@ -5891,6 +5946,18 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
> open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
> trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
> + path.mnt = currentfh->fh_export->ex_path.mnt;
> + path.dentry = currentfh->fh_dentry;
> + if (vfs_getattr(&path, &stat,
> + (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
> + AT_STATX_SYNC_AS_STAT)) {
> + nfs4_put_stid(&dp->dl_stid);
> + destroy_delegation(dp);
> + goto out_no_deleg;
> + }
> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> + dp->dl_cb_fattr.ncf_initial_cinfo =
> + nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
> } else {
> open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
> trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
> @@ -8720,6 +8787,8 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
> * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
> * @rqstp: RPC transaction context
> * @inode: file to be checked for a conflict
> + * @modified: return true if file was modified
> + * @size: new size of file if modified is true
> *
> * This function is called when there is a conflict between a write
> * delegation and a change/size GETATTR from another client. The server
> @@ -8728,22 +8797,22 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
> * delegation before replying to the GETATTR. See RFC 8881 section
> * 18.7.4.
> *
> - * The current implementation does not support CB_GETATTR yet. However
> - * this can avoid recalling the delegation could be added in follow up
> - * work.
> - *
> * Returns 0 if there is no conflict; otherwise an nfs_stat
> * code is returned.
> */
> __be32
> -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
> + bool *modified, u64 *size)
> {
> __be32 status;
> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> struct file_lock_context *ctx;
> struct file_lock *fl;
> struct nfs4_delegation *dp;
> + struct iattr attrs;
> + struct nfs4_cb_fattr *ncf;
>
> + *modified = false;
> ctx = locks_inode_context(inode);
> if (!ctx)
> return 0;
> @@ -8768,12 +8837,42 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
> return 0;
> }
> break_lease:
> - spin_unlock(&ctx->flc_lock);
> nfsd_stats_wdeleg_getattr_inc(nn);
> - status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> - if (status != nfserr_jukebox ||
> - !nfsd_wait_for_delegreturn(rqstp, inode))
> - return status;
> + dp = fl->fl_owner;
> + ncf = &dp->dl_cb_fattr;
> + nfs4_cb_getattr(&dp->dl_cb_fattr);
> + spin_unlock(&ctx->flc_lock);
> + /*
> + * allow 30 ms for the callback to complete which should
> + * take care most cases when everything works normally.
> + * Otherwise just fall back to the normal mechanisnm to
> + * recall the delegation.
> + */
The code already says what the comment says, and if
NFSD_CB_GETATTR_TIMEOUT is ever changed, the comment will become
stale. I suggest removing this comment and adding just a one-liner
something like below:
> + wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
> + TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
> + if (ncf->ncf_cb_status) {
/* Recall delegation only if client didn't respond */
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + if (status != nfserr_jukebox ||
> + !nfsd_wait_for_delegreturn(rqstp, inode))
> + return status;
> + }
> + if (!ncf->ncf_file_modified &&
> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
> + ncf->ncf_file_modified = true;
> + if (ncf->ncf_file_modified) {
> + /*
> + * The server would not update the file's metadata
> + * with the client's modified size.
> + */
I don't understand "The server would not update...". Does that mean
the server is not supposed to update, or something failed? Can this
comment be clarified?
> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
> + mark_inode_dirty(inode);
> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
> + *size = ncf->ncf_cur_fsize;
> + *modified = true;
> + }
One of the lesser-known guidelines of coding style is that if the
indent level grows too deep, that's a sign that you should move
this code into another function. Up to you, but IMO that might be
easier to read.
> return 0;
> }
> break;
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index e3f761cd5ee7..9e8f230fc96e 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> unsigned long mask[2];
> } u;
> unsigned long bit;
> + bool file_modified = false;
> + u64 size = 0;
>
> WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
> WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
> @@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> }
> args.size = 0;
> if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
> + status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
> + &file_modified, &size);
> if (status)
> goto out;
> }
> @@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> AT_STATX_SYNC_AS_STAT);
> if (err)
> goto out_nfserr;
> - args.size = args.stat.size;
> + if (file_modified)
> + args.size = size;
> + else
> + args.size = args.stat.size;
>
> if (!(args.stat.result_mask & STATX_BTIME))
> /* underlying FS does not offer btime so we can't share it */
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 8daf22d766c6..16c5a05f340e 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -367,6 +367,7 @@ void nfsd_lockd_shutdown(void);
> #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
> #define NFS4_CLIENTS_PER_GB 1024
> #define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
> +#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
>
> /*
> * The following attributes are currently not supported by the NFSv4 server:
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 3bf418ee6c97..01c6f3445646 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -142,8 +142,16 @@ struct nfs4_cb_fattr {
> /* from CB_GETATTR reply */
> u64 ncf_cb_change;
> u64 ncf_cb_fsize;
> +
> + unsigned long ncf_cb_flags;
> + bool ncf_file_modified;
> + u64 ncf_initial_cinfo;
> + u64 ncf_cur_fsize;
> };
>
> +/* bits for ncf_cb_flags */
> +#define CB_GETATTR_BUSY 0
> +
> /*
> * Represents a delegation stateid. The nfs4_client holds references to these
> * and they are put when it is being destroyed or when the delegation is
> @@ -773,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
> }
>
> extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
> - struct inode *inode);
> + struct inode *inode, bool *file_modified, u64 *size);
> #endif /* NFSD4_STATE_H */
> --
> 2.39.3
>
--
Chuck Lever
On 2/14/24 6:50 AM, Chuck Lever wrote:
> On Tue, Feb 13, 2024 at 09:47:27AM -0800, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the request is handled as below:
>>
>> Server sends CB_GETATTR to client to get the latest change info and file
>> size. If these values are the same as the server's cached values then
>> the GETATTR proceeds as normal.
>>
>> If either the change info or file size is different from the server's
>> cached values, or the file was already marked as modified, then:
>>
>> . update time_modify and time_metadata into file's metadata
>> with current time
>>
>> . encode GETATTR as normal except the file size is encoded with
>> the value returned from CB_GETATTR
>>
>> . mark the file as modified
>>
>> The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for
>> any reasons, the delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR from the second client.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
>> fs/nfsd/nfs4xdr.c | 10 +++-
>> fs/nfsd/nfsd.h | 1 +
>> fs/nfsd/state.h | 10 +++-
>> 4 files changed, 127 insertions(+), 13 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index d9260e77ef2d..87987515e03d 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>>
>> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
>> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>>
>> static struct workqueue_struct *laundry_wq;
>>
>> @@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
>> dp->dl_recalled = false;
>> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
>> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
>> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
>> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
>> + dp->dl_cb_fattr.ncf_file_modified = false;
>> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
>> get_nfs4_file(fp);
>> dp->dl_stid.sc_file = fp;
>> return dp;
>> @@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> spin_unlock(&nn->client_lock);
>> }
>>
>> +static int
>> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
>> +{
>> + struct nfs4_cb_fattr *ncf =
>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>> +
>> + ncf->ncf_cb_status = task->tk_status;
>> + switch (task->tk_status) {
>> + case -NFS4ERR_DELAY:
>> + rpc_delay(task, 2 * HZ);
>> + return 0;
>> + default:
>> + return 1;
>> + }
>> +}
>> +
>> +static void
>> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
>> +{
>> + struct nfs4_cb_fattr *ncf =
>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>> + struct nfs4_delegation *dp =
>> + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
>> +
>> + nfs4_put_stid(&dp->dl_stid);
>> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>> +}
>> +
> What happens if the client responds after the GETATTR timer elapses?
> Can nfsd4_cb_getattr_release over-write memory that is now being
> used for something else?
The refcount added in nfs4_cb_getattr keeps the delegation state valid
until it's released here by nfs4_put_stid.
>
>
>> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> .done = nfsd4_cb_recall_any_done,
>> .release = nfsd4_cb_recall_any_release,
>> };
>>
>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
>> + .done = nfsd4_cb_getattr_done,
>> + .release = nfsd4_cb_getattr_release,
>> +};
>> +
>> +static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>> +{
>> + struct nfs4_delegation *dp =
>> + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
>> +
>> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>> + return;
>> + /* set to proper status when nfsd4_cb_getattr_done runs */
>> + ncf->ncf_cb_status = NFS4ERR_IO;
>> +
>> + refcount_inc(&dp->dl_stid.sc_count);
>> + nfsd4_run_cb(&ncf->ncf_getattr);
>> +}
>> +
>> static struct nfs4_client *create_client(struct xdr_netobj name,
>> struct svc_rqst *rqstp, nfs4_verifier *verf)
>> {
>> @@ -5854,6 +5907,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> struct svc_fh *parent = NULL;
>> int cb_up;
>> int status = 0;
>> + struct kstat stat;
>> + struct path path;
>>
>> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
>> open->op_recall = false;
>> @@ -5891,6 +5946,18 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
>> open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
>> trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
>> + path.mnt = currentfh->fh_export->ex_path.mnt;
>> + path.dentry = currentfh->fh_dentry;
>> + if (vfs_getattr(&path, &stat,
>> + (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
>> + AT_STATX_SYNC_AS_STAT)) {
>> + nfs4_put_stid(&dp->dl_stid);
>> + destroy_delegation(dp);
>> + goto out_no_deleg;
>> + }
>> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>> + dp->dl_cb_fattr.ncf_initial_cinfo =
>> + nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
>> } else {
>> open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
>> trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
>> @@ -8720,6 +8787,8 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>> * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
>> * @rqstp: RPC transaction context
>> * @inode: file to be checked for a conflict
>> + * @modified: return true if file was modified
>> + * @size: new size of file if modified is true
>> *
>> * This function is called when there is a conflict between a write
>> * delegation and a change/size GETATTR from another client. The server
>> @@ -8728,22 +8797,22 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>> * delegation before replying to the GETATTR. See RFC 8881 section
>> * 18.7.4.
>> *
>> - * The current implementation does not support CB_GETATTR yet. However
>> - * this can avoid recalling the delegation could be added in follow up
>> - * work.
>> - *
>> * Returns 0 if there is no conflict; otherwise an nfs_stat
>> * code is returned.
>> */
>> __be32
>> -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
>> + bool *modified, u64 *size)
>> {
>> __be32 status;
>> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> struct file_lock_context *ctx;
>> struct file_lock *fl;
>> struct nfs4_delegation *dp;
>> + struct iattr attrs;
>> + struct nfs4_cb_fattr *ncf;
>>
>> + *modified = false;
>> ctx = locks_inode_context(inode);
>> if (!ctx)
>> return 0;
>> @@ -8768,12 +8837,42 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> return 0;
>> }
>> break_lease:
>> - spin_unlock(&ctx->flc_lock);
>> nfsd_stats_wdeleg_getattr_inc(nn);
>> - status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> - if (status != nfserr_jukebox ||
>> - !nfsd_wait_for_delegreturn(rqstp, inode))
>> - return status;
>> + dp = fl->fl_owner;
>> + ncf = &dp->dl_cb_fattr;
>> + nfs4_cb_getattr(&dp->dl_cb_fattr);
>> + spin_unlock(&ctx->flc_lock);
>> + /*
>> + * allow 30 ms for the callback to complete which should
>> + * take care most cases when everything works normally.
>> + * Otherwise just fall back to the normal mechanisnm to
>> + * recall the delegation.
>> + */
> The code already says what the comment says, and if
> NFSD_CB_GETATTR_TIMEOUT is ever changed, the comment will become
> stale. I suggest removing this comment and adding just a one-liner
> something like below:
ok.
>
>
>> + wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
>> + TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
>> + if (ncf->ncf_cb_status) {
> /* Recall delegation only if client didn't respond */
>
>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> + if (status != nfserr_jukebox ||
>> + !nfsd_wait_for_delegreturn(rqstp, inode))
>> + return status;
>> + }
>> + if (!ncf->ncf_file_modified &&
>> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
>> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
>> + ncf->ncf_file_modified = true;
>> + if (ncf->ncf_file_modified) {
>> + /*
>> + * The server would not update the file's metadata
>> + * with the client's modified size.
>> + */
> I don't understand "The server would not update...". Does that mean
> the server is not supposed to update, or something failed? Can this
> comment be clarified?
This is the recommendation from section 10.4.3 Handling of CB_GETATTR
in RFC 8881. I'll add this info to the comment in v2.
>
>
>> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
>> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
>> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
>> + mark_inode_dirty(inode);
>> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
>> + *size = ncf->ncf_cur_fsize;
>> + *modified = true;
>> + }
> One of the lesser-known guidelines of coding style is that if the
> indent level grows too deep, that's a sign that you should move
> this code into another function. Up to you, but IMO that might be
> easier to read.
Thanks, I'll keep this in mind for next time.
-Dai
>
>
>> return 0;
>> }
>> break;
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index e3f761cd5ee7..9e8f230fc96e 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
>> unsigned long mask[2];
>> } u;
>> unsigned long bit;
>> + bool file_modified = false;
>> + u64 size = 0;
>>
>> WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
>> WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
>> @@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
>> }
>> args.size = 0;
>> if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
>> + status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
>> + &file_modified, &size);
>> if (status)
>> goto out;
>> }
>> @@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
>> AT_STATX_SYNC_AS_STAT);
>> if (err)
>> goto out_nfserr;
>> - args.size = args.stat.size;
>> + if (file_modified)
>> + args.size = size;
>> + else
>> + args.size = args.stat.size;
>>
>> if (!(args.stat.result_mask & STATX_BTIME))
>> /* underlying FS does not offer btime so we can't share it */
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index 8daf22d766c6..16c5a05f340e 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -367,6 +367,7 @@ void nfsd_lockd_shutdown(void);
>> #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
>> #define NFS4_CLIENTS_PER_GB 1024
>> #define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
>> +#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
>>
>> /*
>> * The following attributes are currently not supported by the NFSv4 server:
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 3bf418ee6c97..01c6f3445646 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -142,8 +142,16 @@ struct nfs4_cb_fattr {
>> /* from CB_GETATTR reply */
>> u64 ncf_cb_change;
>> u64 ncf_cb_fsize;
>> +
>> + unsigned long ncf_cb_flags;
>> + bool ncf_file_modified;
>> + u64 ncf_initial_cinfo;
>> + u64 ncf_cur_fsize;
>> };
>>
>> +/* bits for ncf_cb_flags */
>> +#define CB_GETATTR_BUSY 0
>> +
>> /*
>> * Represents a delegation stateid. The nfs4_client holds references to these
>> * and they are put when it is being destroyed or when the delegation is
>> @@ -773,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
>> }
>>
>> extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
>> - struct inode *inode);
>> + struct inode *inode, bool *file_modified, u64 *size);
>> #endif /* NFSD4_STATE_H */
>> --
>> 2.39.3
>>
On Wed, Feb 14, 2024 at 10:10:50AM -0800, [email protected] wrote:
>
> On 2/14/24 6:50 AM, Chuck Lever wrote:
> > On Tue, Feb 13, 2024 at 09:47:27AM -0800, Dai Ngo wrote:
> > > If the GETATTR request on a file that has write delegation in effect
> > > and the request attributes include the change info and size attribute
> > > then the request is handled as below:
> > >
> > > Server sends CB_GETATTR to client to get the latest change info and file
> > > size. If these values are the same as the server's cached values then
> > > the GETATTR proceeds as normal.
> > >
> > > If either the change info or file size is different from the server's
> > > cached values, or the file was already marked as modified, then:
> > >
> > > . update time_modify and time_metadata into file's metadata
> > > with current time
> > >
> > > . encode GETATTR as normal except the file size is encoded with
> > > the value returned from CB_GETATTR
> > >
> > > . mark the file as modified
> > >
> > > The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for
> > > any reasons, the delegation is recalled and NFS4ERR_DELAY is returned
> > > for the GETATTR from the second client.
> > >
> > > Signed-off-by: Dai Ngo <[email protected]>
> > > ---
> > > fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
> > > fs/nfsd/nfs4xdr.c | 10 +++-
> > > fs/nfsd/nfsd.h | 1 +
> > > fs/nfsd/state.h | 10 +++-
> > > 4 files changed, 127 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index d9260e77ef2d..87987515e03d 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
> > > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> > > static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
> > > static struct workqueue_struct *laundry_wq;
> > > @@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > > dp->dl_recalled = false;
> > > nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> > > &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> > > + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> > > + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> > > + dp->dl_cb_fattr.ncf_file_modified = false;
> > > + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> > > get_nfs4_file(fp);
> > > dp->dl_stid.sc_file = fp;
> > > return dp;
> > > @@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > spin_unlock(&nn->client_lock);
> > > }
> > > +static int
> > > +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > > +{
> > > + struct nfs4_cb_fattr *ncf =
> > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > +
> > > + ncf->ncf_cb_status = task->tk_status;
> > > + switch (task->tk_status) {
> > > + case -NFS4ERR_DELAY:
> > > + rpc_delay(task, 2 * HZ);
> > > + return 0;
> > > + default:
> > > + return 1;
> > > + }
> > > +}
> > > +
> > > +static void
> > > +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> > > +{
> > > + struct nfs4_cb_fattr *ncf =
> > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > + struct nfs4_delegation *dp =
> > > + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
> > > +
> > > + nfs4_put_stid(&dp->dl_stid);
> > > + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> > > + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> > > +}
> > > +
> > What happens if the client responds after the GETATTR timer elapses?
> > Can nfsd4_cb_getattr_release over-write memory that is now being
> > used for something else?
>
> The refcount added in nfs4_cb_getattr keeps the delegation state valid
> until it's released here by nfs4_put_stid.
If the client never replies, does that pin the stateid?
--
Chuck Lever
On 2/14/24 10:14 AM, Chuck Lever wrote:
> On Wed, Feb 14, 2024 at 10:10:50AM -0800, [email protected] wrote:
>> On 2/14/24 6:50 AM, Chuck Lever wrote:
>>> On Tue, Feb 13, 2024 at 09:47:27AM -0800, Dai Ngo wrote:
>>>> If the GETATTR request on a file that has write delegation in effect
>>>> and the request attributes include the change info and size attribute
>>>> then the request is handled as below:
>>>>
>>>> Server sends CB_GETATTR to client to get the latest change info and file
>>>> size. If these values are the same as the server's cached values then
>>>> the GETATTR proceeds as normal.
>>>>
>>>> If either the change info or file size is different from the server's
>>>> cached values, or the file was already marked as modified, then:
>>>>
>>>> . update time_modify and time_metadata into file's metadata
>>>> with current time
>>>>
>>>> . encode GETATTR as normal except the file size is encoded with
>>>> the value returned from CB_GETATTR
>>>>
>>>> . mark the file as modified
>>>>
>>>> The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for
>>>> any reasons, the delegation is recalled and NFS4ERR_DELAY is returned
>>>> for the GETATTR from the second client.
>>>>
>>>> Signed-off-by: Dai Ngo <[email protected]>
>>>> ---
>>>> fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
>>>> fs/nfsd/nfs4xdr.c | 10 +++-
>>>> fs/nfsd/nfsd.h | 1 +
>>>> fs/nfsd/state.h | 10 +++-
>>>> 4 files changed, 127 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index d9260e77ef2d..87987515e03d 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>>>> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
>>>> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>>>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>>>> static struct workqueue_struct *laundry_wq;
>>>> @@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
>>>> dp->dl_recalled = false;
>>>> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
>>>> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
>>>> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
>>>> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
>>>> + dp->dl_cb_fattr.ncf_file_modified = false;
>>>> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
>>>> get_nfs4_file(fp);
>>>> dp->dl_stid.sc_file = fp;
>>>> return dp;
>>>> @@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>>>> spin_unlock(&nn->client_lock);
>>>> }
>>>> +static int
>>>> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
>>>> +{
>>>> + struct nfs4_cb_fattr *ncf =
>>>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>>>> +
>>>> + ncf->ncf_cb_status = task->tk_status;
>>>> + switch (task->tk_status) {
>>>> + case -NFS4ERR_DELAY:
>>>> + rpc_delay(task, 2 * HZ);
>>>> + return 0;
>>>> + default:
>>>> + return 1;
>>>> + }
>>>> +}
>>>> +
>>>> +static void
>>>> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
>>>> +{
>>>> + struct nfs4_cb_fattr *ncf =
>>>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>>>> + struct nfs4_delegation *dp =
>>>> + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
>>>> +
>>>> + nfs4_put_stid(&dp->dl_stid);
>>>> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>>>> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>>>> +}
>>>> +
>>> What happens if the client responds after the GETATTR timer elapses?
>>> Can nfsd4_cb_getattr_release over-write memory that is now being
>>> used for something else?
>> The refcount added in nfs4_cb_getattr keeps the delegation state valid
>> until it's released here by nfs4_put_stid.
> If the client never replies, does that pin the stateid?
Won't RPC timeout on waiting for reply?
This is the same behavior as when recalling a delegation,
nfsd_break_deleg_cb and nfsd4_cb_recall_release.
-Dai
>
>
On Wed, Feb 14, 2024 at 10:22:05AM -0800, [email protected] wrote:
>
> On 2/14/24 10:14 AM, Chuck Lever wrote:
> > On Wed, Feb 14, 2024 at 10:10:50AM -0800, [email protected] wrote:
> > > On 2/14/24 6:50 AM, Chuck Lever wrote:
> > > > On Tue, Feb 13, 2024 at 09:47:27AM -0800, Dai Ngo wrote:
> > > > > If the GETATTR request on a file that has write delegation in effect
> > > > > and the request attributes include the change info and size attribute
> > > > > then the request is handled as below:
> > > > >
> > > > > Server sends CB_GETATTR to client to get the latest change info and file
> > > > > size. If these values are the same as the server's cached values then
> > > > > the GETATTR proceeds as normal.
> > > > >
> > > > > If either the change info or file size is different from the server's
> > > > > cached values, or the file was already marked as modified, then:
> > > > >
> > > > > . update time_modify and time_metadata into file's metadata
> > > > > with current time
> > > > >
> > > > > . encode GETATTR as normal except the file size is encoded with
> > > > > the value returned from CB_GETATTR
> > > > >
> > > > > . mark the file as modified
> > > > >
> > > > > The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for
> > > > > any reasons, the delegation is recalled and NFS4ERR_DELAY is returned
> > > > > for the GETATTR from the second client.
> > > > >
> > > > > Signed-off-by: Dai Ngo <[email protected]>
> > > > > ---
> > > > > fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++----
> > > > > fs/nfsd/nfs4xdr.c | 10 +++-
> > > > > fs/nfsd/nfsd.h | 1 +
> > > > > fs/nfsd/state.h | 10 +++-
> > > > > 4 files changed, 127 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > index d9260e77ef2d..87987515e03d 100644
> > > > > --- a/fs/nfsd/nfs4state.c
> > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
> > > > > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> > > > > static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> > > > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
> > > > > static struct workqueue_struct *laundry_wq;
> > > > > @@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > > > > dp->dl_recalled = false;
> > > > > nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> > > > > &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> > > > > + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> > > > > + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> > > > > + dp->dl_cb_fattr.ncf_file_modified = false;
> > > > > + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> > > > > get_nfs4_file(fp);
> > > > > dp->dl_stid.sc_file = fp;
> > > > > return dp;
> > > > > @@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > > > spin_unlock(&nn->client_lock);
> > > > > }
> > > > > +static int
> > > > > +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > > > > +{
> > > > > + struct nfs4_cb_fattr *ncf =
> > > > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > > > +
> > > > > + ncf->ncf_cb_status = task->tk_status;
> > > > > + switch (task->tk_status) {
> > > > > + case -NFS4ERR_DELAY:
> > > > > + rpc_delay(task, 2 * HZ);
> > > > > + return 0;
> > > > > + default:
> > > > > + return 1;
> > > > > + }
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> > > > > +{
> > > > > + struct nfs4_cb_fattr *ncf =
> > > > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > > > + struct nfs4_delegation *dp =
> > > > > + container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
> > > > > +
> > > > > + nfs4_put_stid(&dp->dl_stid);
> > > > > + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> > > > > + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> > > > > +}
> > > > > +
> > > > What happens if the client responds after the GETATTR timer elapses?
> > > > Can nfsd4_cb_getattr_release over-write memory that is now being
> > > > used for something else?
> > > The refcount added in nfs4_cb_getattr keeps the delegation state valid
> > > until it's released here by nfs4_put_stid.
> > If the client never replies, does that pin the stateid?
>
> Won't RPC timeout on waiting for reply?
> This is the same behavior as when recalling a delegation,
> nfsd_break_deleg_cb and nfsd4_cb_recall_release.
Fair enough. Looking forward to v2.
--
Chuck Lever