2023-05-15 00:56:09

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the request is handled as below:

Server sends CB_GETATTR to client to get the latest change info and file
size. If these values are the same as the server's cached values then
the GETATTR proceeds as normal.

If either the change info or file size is different from the server's
cached values, or the file was already marked as modified, then:

. update time_modify and time_metadata into file's metadata
with current time

. encode GETATTR as normal except the file size is encoded with
the value returned from CB_GETATTR

. mark the file as modified

If the CB_GETATTR fails for any reasons, the delegation is recalled
and NFS4ERR_DELAY is returned for the GETATTR.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 7 +++++
3 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 09a9e16407f9..fb305b28a090 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);

static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;

static struct workqueue_struct *laundry_wq;

@@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+ &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+ dp->dl_cb_fattr.ncf_file_modified = false;
+ dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
spin_unlock(&nn->client_lock);
}

+static int
+nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ ncf->ncf_cb_status = task->tk_status;
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
+ wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+}
+
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
};

+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
+ .done = nfsd4_cb_getattr_done,
+ .release = nfsd4_cb_getattr_release,
+};
+
+void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
+{
+ if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+ return;
+ nfsd4_run_cb(&ncf->ncf_getattr);
+}
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
int cb_up;
int status = 0;
u32 wdeleg = false;
+ struct kstat stat;
+ struct path path;

cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = 0;
@@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
open->op_delegate_type = wdeleg ?
NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
+ if (wdeleg) {
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = currentfh->fh_dentry;
+ if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT)) {
+ nfs4_put_stid(&dp->dl_stid);
+ destroy_delegation(dp);
+ goto out_no_deleg;
+ }
+ dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
+ dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
+ d_inode(currentfh->fh_dentry));
+ }
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..5d7e11db8ccf 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
return nfserr_resource;
}

+static struct file_lock *
+nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
+{
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return NULL;
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+ if (fl->fl_type == F_WRLCK) {
+ spin_unlock(&ctx->flc_lock);
+ return fl;
+ }
+ }
+ spin_unlock(&ctx->flc_lock);
+ return NULL;
+}
+
+static __be32
+nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
+ bool *modified, u64 *size)
+{
+ __be32 status;
+ struct file_lock *fl;
+ struct nfs4_delegation *dp;
+ struct nfs4_cb_fattr *ncf;
+ struct iattr attrs;
+
+ *modified = false;
+ fl = nfs4_wrdeleg_filelock(rqstp, inode);
+ if (!fl)
+ return 0;
+ dp = fl->fl_owner;
+ ncf = &dp->dl_cb_fattr;
+ if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
+ return 0;
+
+ refcount_inc(&dp->dl_stid.sc_count);
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
+ wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
+ if (ncf->ncf_cb_status) {
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ nfs4_put_stid(&dp->dl_stid);
+ return status;
+ }
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
+ ncf->ncf_file_modified = true;
+ }
+
+ if (ncf->ncf_file_modified) {
+ /*
+ * The server would not update the file's metadata
+ * with the client's modified size.
+ * nfsd4 change attribute is constructed from ctime.
+ */
+ attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
+ attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
+ setattr_copy(&nop_mnt_idmap, inode, &attrs);
+ mark_inode_dirty(inode);
+ *size = ncf->ncf_cur_fsize;
+ *modified = true;
+ }
+ nfs4_put_stid(&dp->dl_stid);
+ return 0;
+}
+
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
* ourselves.
@@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
.dentry = dentry,
};
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ bool file_modified;
+ u64 size = 0;

BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
@@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
if (status)
goto out;
}
+ if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
+ &file_modified, &size);
+ if (status)
+ goto out;
+ }

err = vfs_getattr(&path, &stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
@@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
- p = xdr_encode_hyper(p, stat.size);
+ if (file_modified)
+ p = xdr_encode_hyper(p, size);
+ else
+ p = xdr_encode_hyper(p, stat.size);
}
if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
p = xdr_reserve_space(xdr, 4);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9fb69ed8ae80..b20b65fe89b4 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
struct nfsd4_callback ncf_getattr;
u32 ncf_cb_status;
u32 ncf_cb_bmap[1];
+ unsigned long ncf_cb_flags;
+ bool ncf_file_modified;
+ u64 ncf_initial_cinfo;
+ u64 ncf_cur_fsize;

/* from CB_GETATTR reply */
u64 ncf_cb_change;
@@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
extern int nfsd4_client_record_check(struct nfs4_client *clp);
extern void nfsd4_record_grace_done(struct nfsd_net *nn);

+/* CB_GETTTAR */
+extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
+
static inline bool try_to_expire_client(struct nfs4_client *clp)
{
cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
--
2.9.5



2023-05-15 11:58:12

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Sun, 2023-05-14 at 17:20 -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the request is handled as below:
>
> Server sends CB_GETATTR to client to get the latest change info and file
> size. If these values are the same as the server's cached values then
> the GETATTR proceeds as normal.
>
> If either the change info or file size is different from the server's
> cached values, or the file was already marked as modified, then:
>
> . update time_modify and time_metadata into file's metadata
> with current time
>
> . encode GETATTR as normal except the file size is encoded with
> the value returned from CB_GETATTR
>
> . mark the file as modified
>
> If the CB_GETATTR fails for any reasons, the delegation is recalled
> and NFS4ERR_DELAY is returned for the GETATTR.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
> fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 7 +++++
> 3 files changed, 148 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 09a9e16407f9..fb305b28a090 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>
> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>
> static struct workqueue_struct *laundry_wq;
>
> @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> dp->dl_recalled = false;
> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> + dp->dl_cb_fattr.ncf_file_modified = false;
> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> get_nfs4_file(fp);
> dp->dl_stid.sc_file = fp;
> return dp;
> @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> spin_unlock(&nn->client_lock);
> }
>
> +static int
> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> + struct nfs4_cb_fattr *ncf =
> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> +
> + ncf->ncf_cb_status = task->tk_status;
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_cb_fattr *ncf =
> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> +
> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> +}
> +
> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> .done = nfsd4_cb_recall_any_done,
> .release = nfsd4_cb_recall_any_release,
> };
>
> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
> + .done = nfsd4_cb_getattr_done,
> + .release = nfsd4_cb_getattr_release,
> +};
> +
> +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
> +{
> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
> + return;
> + nfsd4_run_cb(&ncf->ncf_getattr);
> +}
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> int cb_up;
> int status = 0;
> u32 wdeleg = false;
> + struct kstat stat;
> + struct path path;
>
> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
> open->op_recall = 0;
> @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
> open->op_delegate_type = wdeleg ?
> NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
> + if (wdeleg) {
> + path.mnt = currentfh->fh_export->ex_path.mnt;
> + path.dentry = currentfh->fh_dentry;
> + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,

I think you want (STATX_SIZE|STATX_CTIME|STATX_CHANGE_COOKIE) here
instead of BASIC_STATS. You might not get the change cookie otherwise,
even when it's supported.

> + AT_STATX_SYNC_AS_STAT)) {
> + nfs4_put_stid(&dp->dl_stid);
> + destroy_delegation(dp);
> + goto out_no_deleg;
> + }
> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
> + d_inode(currentfh->fh_dentry));
> + }
> nfs4_put_stid(&dp->dl_stid);
> return;
> out_no_deleg:
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..5d7e11db8ccf 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> return nfserr_resource;
> }
>
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + struct file_lock_context *ctx;
> + struct file_lock *fl;
> +
> + ctx = locks_inode_context(inode);
> + if (!ctx)
> + return NULL;
> + spin_lock(&ctx->flc_lock);
> + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
> + if (fl->fl_type == F_WRLCK) {
> + spin_unlock(&ctx->flc_lock);
> + return fl;
> + }
> + }

When there is a write lease, then there cannot be any read leases, so
you don't need to walk the entire list here. Just check the first
element and see whether it's a write lease.

> + spin_unlock(&ctx->flc_lock);
> + return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
> + bool *modified, u64 *size)
> +{
> + __be32 status;
> + struct file_lock *fl;
> + struct nfs4_delegation *dp;
> + struct nfs4_cb_fattr *ncf;
> + struct iattr attrs;
> +
> + *modified = false;
> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> + if (!fl)
> + return 0;
> + dp = fl->fl_owner;
> + ncf = &dp->dl_cb_fattr;
> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> + return 0;
> +
> + refcount_inc(&dp->dl_stid.sc_count);
> + nfs4_cb_getattr(&dp->dl_cb_fattr);
> + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
> + if (ncf->ncf_cb_status) {
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + nfs4_put_stid(&dp->dl_stid);
> + return status;
> + }
> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
> + if (!ncf->ncf_file_modified &&
> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
> + ncf->ncf_file_modified = true;
> + }
> +
> + if (ncf->ncf_file_modified) {
> + /*
> + * The server would not update the file's metadata
> + * with the client's modified size.
> + * nfsd4 change attribute is constructed from ctime.
> + */
> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
> + mark_inode_dirty(inode);
> + *size = ncf->ncf_cur_fsize;
> + *modified = true;
> + }
> + nfs4_put_stid(&dp->dl_stid);
> + return 0;
> +}
> +
> /*
> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> * ourselves.
> @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> .dentry = dentry,
> };
> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> + bool file_modified;
> + u64 size = 0;
>
> BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
> BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
> @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> if (status)
> goto out;
> }
> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
> + &file_modified, &size);
> + if (status)
> + goto out;
> + }
>
> err = vfs_getattr(&path, &stat,
> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> p = xdr_reserve_space(xdr, 8);
> if (!p)
> goto out_resource;
> - p = xdr_encode_hyper(p, stat.size);
> + if (file_modified)
> + p = xdr_encode_hyper(p, size);
> + else
> + p = xdr_encode_hyper(p, stat.size);
> }
> if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
> p = xdr_reserve_space(xdr, 4);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 9fb69ed8ae80..b20b65fe89b4 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
> struct nfsd4_callback ncf_getattr;
> u32 ncf_cb_status;
> u32 ncf_cb_bmap[1];
> + unsigned long ncf_cb_flags;
> + bool ncf_file_modified;
> + u64 ncf_initial_cinfo;
> + u64 ncf_cur_fsize;
>
> /* from CB_GETATTR reply */
> u64 ncf_cb_change;
> @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
> extern int nfsd4_client_record_check(struct nfs4_client *clp);
> extern void nfsd4_record_grace_done(struct nfsd_net *nn);
>
> +/* CB_GETTTAR */
> +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
> +
> static inline bool try_to_expire_client(struct nfs4_client *clp)
> {
> cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);

--
Jeff Layton <[email protected]>

2023-05-15 18:04:31

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation


On 5/15/23 4:51 AM, Jeff Layton wrote:
> On Sun, 2023-05-14 at 17:20 -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the request is handled as below:
>>
>> Server sends CB_GETATTR to client to get the latest change info and file
>> size. If these values are the same as the server's cached values then
>> the GETATTR proceeds as normal.
>>
>> If either the change info or file size is different from the server's
>> cached values, or the file was already marked as modified, then:
>>
>> . update time_modify and time_metadata into file's metadata
>> with current time
>>
>> . encode GETATTR as normal except the file size is encoded with
>> the value returned from CB_GETATTR
>>
>> . mark the file as modified
>>
>> If the CB_GETATTR fails for any reasons, the delegation is recalled
>> and NFS4ERR_DELAY is returned for the GETATTR.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
>> fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> fs/nfsd/state.h | 7 +++++
>> 3 files changed, 148 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 09a9e16407f9..fb305b28a090 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>>
>> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
>> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>>
>> static struct workqueue_struct *laundry_wq;
>>
>> @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
>> dp->dl_recalled = false;
>> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
>> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
>> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
>> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
>> + dp->dl_cb_fattr.ncf_file_modified = false;
>> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
>> get_nfs4_file(fp);
>> dp->dl_stid.sc_file = fp;
>> return dp;
>> @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> spin_unlock(&nn->client_lock);
>> }
>>
>> +static int
>> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
>> +{
>> + struct nfs4_cb_fattr *ncf =
>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>> +
>> + ncf->ncf_cb_status = task->tk_status;
>> + switch (task->tk_status) {
>> + case -NFS4ERR_DELAY:
>> + rpc_delay(task, 2 * HZ);
>> + return 0;
>> + default:
>> + return 1;
>> + }
>> +}
>> +
>> +static void
>> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
>> +{
>> + struct nfs4_cb_fattr *ncf =
>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>> +
>> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>> +}
>> +
>> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> .done = nfsd4_cb_recall_any_done,
>> .release = nfsd4_cb_recall_any_release,
>> };
>>
>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
>> + .done = nfsd4_cb_getattr_done,
>> + .release = nfsd4_cb_getattr_release,
>> +};
>> +
>> +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>> +{
>> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>> + return;
>> + nfsd4_run_cb(&ncf->ncf_getattr);
>> +}
>> +
>> static struct nfs4_client *create_client(struct xdr_netobj name,
>> struct svc_rqst *rqstp, nfs4_verifier *verf)
>> {
>> @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> int cb_up;
>> int status = 0;
>> u32 wdeleg = false;
>> + struct kstat stat;
>> + struct path path;
>>
>> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
>> open->op_recall = 0;
>> @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
>> open->op_delegate_type = wdeleg ?
>> NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
>> + if (wdeleg) {
>> + path.mnt = currentfh->fh_export->ex_path.mnt;
>> + path.dentry = currentfh->fh_dentry;
>> + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
> I think you want (STATX_SIZE|STATX_CTIME|STATX_CHANGE_COOKIE) here
> instead of BASIC_STATS. You might not get the change cookie otherwise,
> even when it's supported.

Fix in v3.

>
>> + AT_STATX_SYNC_AS_STAT)) {
>> + nfs4_put_stid(&dp->dl_stid);
>> + destroy_delegation(dp);
>> + goto out_no_deleg;
>> + }
>> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>> + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
>> + d_inode(currentfh->fh_dentry));
>> + }
>> nfs4_put_stid(&dp->dl_stid);
>> return;
>> out_no_deleg:
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..5d7e11db8ccf 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>> return nfserr_resource;
>> }
>>
>> +static struct file_lock *
>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> + struct file_lock_context *ctx;
>> + struct file_lock *fl;
>> +
>> + ctx = locks_inode_context(inode);
>> + if (!ctx)
>> + return NULL;
>> + spin_lock(&ctx->flc_lock);
>> + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
>> + if (fl->fl_type == F_WRLCK) {
>> + spin_unlock(&ctx->flc_lock);
>> + return fl;
>> + }
>> + }
> When there is a write lease, then there cannot be any read leases, so
> you don't need to walk the entire list here. Just check the first
> element and see whether it's a write lease.

Right, fix in v3.

Thanks,
-Dai

>
>> + spin_unlock(&ctx->flc_lock);
>> + return NULL;
>> +}
>> +
>> +static __be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
>> + bool *modified, u64 *size)
>> +{
>> + __be32 status;
>> + struct file_lock *fl;
>> + struct nfs4_delegation *dp;
>> + struct nfs4_cb_fattr *ncf;
>> + struct iattr attrs;
>> +
>> + *modified = false;
>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>> + if (!fl)
>> + return 0;
>> + dp = fl->fl_owner;
>> + ncf = &dp->dl_cb_fattr;
>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>> + return 0;
>> +
>> + refcount_inc(&dp->dl_stid.sc_count);
>> + nfs4_cb_getattr(&dp->dl_cb_fattr);
>> + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
>> + if (ncf->ncf_cb_status) {
>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> + nfs4_put_stid(&dp->dl_stid);
>> + return status;
>> + }
>> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
>> + if (!ncf->ncf_file_modified &&
>> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
>> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
>> + ncf->ncf_file_modified = true;
>> + }
>> +
>> + if (ncf->ncf_file_modified) {
>> + /*
>> + * The server would not update the file's metadata
>> + * with the client's modified size.
>> + * nfsd4 change attribute is constructed from ctime.
>> + */
>> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
>> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
>> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
>> + mark_inode_dirty(inode);
>> + *size = ncf->ncf_cur_fsize;
>> + *modified = true;
>> + }
>> + nfs4_put_stid(&dp->dl_stid);
>> + return 0;
>> +}
>> +
>> /*
>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>> * ourselves.
>> @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> .dentry = dentry,
>> };
>> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> + bool file_modified;
>> + u64 size = 0;
>>
>> BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
>> BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
>> @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> if (status)
>> goto out;
>> }
>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
>> + &file_modified, &size);
>> + if (status)
>> + goto out;
>> + }
>>
>> err = vfs_getattr(&path, &stat,
>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
>> @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> p = xdr_reserve_space(xdr, 8);
>> if (!p)
>> goto out_resource;
>> - p = xdr_encode_hyper(p, stat.size);
>> + if (file_modified)
>> + p = xdr_encode_hyper(p, size);
>> + else
>> + p = xdr_encode_hyper(p, stat.size);
>> }
>> if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
>> p = xdr_reserve_space(xdr, 4);
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 9fb69ed8ae80..b20b65fe89b4 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
>> struct nfsd4_callback ncf_getattr;
>> u32 ncf_cb_status;
>> u32 ncf_cb_bmap[1];
>> + unsigned long ncf_cb_flags;
>> + bool ncf_file_modified;
>> + u64 ncf_initial_cinfo;
>> + u64 ncf_cur_fsize;
>>
>> /* from CB_GETATTR reply */
>> u64 ncf_cb_change;
>> @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
>> extern int nfsd4_client_record_check(struct nfs4_client *clp);
>> extern void nfsd4_record_grace_done(struct nfsd_net *nn);
>>
>> +/* CB_GETTTAR */
>> +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
>> +
>> static inline bool try_to_expire_client(struct nfs4_client *clp)
>> {
>> cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);

2023-05-15 18:21:06

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
>
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the request is handled as below:
>
> Server sends CB_GETATTR to client to get the latest change info and file
> size. If these values are the same as the server's cached values then
> the GETATTR proceeds as normal.
>
> If either the change info or file size is different from the server's
> cached values, or the file was already marked as modified, then:
>
> . update time_modify and time_metadata into file's metadata
> with current time
>
> . encode GETATTR as normal except the file size is encoded with
> the value returned from CB_GETATTR
>
> . mark the file as modified
>
> If the CB_GETATTR fails for any reasons, the delegation is recalled
> and NFS4ERR_DELAY is returned for the GETATTR.

Hi Dai,

I'm curious what does the server gain by implementing handling of
GETATTR with delegations? As far as I can tell it is not strictly
required by the RFC(s). When the file is being written any attempt at
querying its attribute is immediately stale.

>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
> fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 7 +++++
> 3 files changed, 148 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 09a9e16407f9..fb305b28a090 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>
> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>
> static struct workqueue_struct *laundry_wq;
>
> @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> dp->dl_recalled = false;
> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> + dp->dl_cb_fattr.ncf_file_modified = false;
> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> get_nfs4_file(fp);
> dp->dl_stid.sc_file = fp;
> return dp;
> @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> spin_unlock(&nn->client_lock);
> }
>
> +static int
> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> + struct nfs4_cb_fattr *ncf =
> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> +
> + ncf->ncf_cb_status = task->tk_status;
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_cb_fattr *ncf =
> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> +
> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> +}
> +
> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> .done = nfsd4_cb_recall_any_done,
> .release = nfsd4_cb_recall_any_release,
> };
>
> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
> + .done = nfsd4_cb_getattr_done,
> + .release = nfsd4_cb_getattr_release,
> +};
> +
> +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
> +{
> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
> + return;
> + nfsd4_run_cb(&ncf->ncf_getattr);
> +}
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> int cb_up;
> int status = 0;
> u32 wdeleg = false;
> + struct kstat stat;
> + struct path path;
>
> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
> open->op_recall = 0;
> @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
> open->op_delegate_type = wdeleg ?
> NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
> + if (wdeleg) {
> + path.mnt = currentfh->fh_export->ex_path.mnt;
> + path.dentry = currentfh->fh_dentry;
> + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
> + AT_STATX_SYNC_AS_STAT)) {
> + nfs4_put_stid(&dp->dl_stid);
> + destroy_delegation(dp);
> + goto out_no_deleg;
> + }
> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
> + d_inode(currentfh->fh_dentry));
> + }
> nfs4_put_stid(&dp->dl_stid);
> return;
> out_no_deleg:
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..5d7e11db8ccf 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> return nfserr_resource;
> }
>
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + struct file_lock_context *ctx;
> + struct file_lock *fl;
> +
> + ctx = locks_inode_context(inode);
> + if (!ctx)
> + return NULL;
> + spin_lock(&ctx->flc_lock);
> + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
> + if (fl->fl_type == F_WRLCK) {
> + spin_unlock(&ctx->flc_lock);
> + return fl;
> + }
> + }
> + spin_unlock(&ctx->flc_lock);
> + return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
> + bool *modified, u64 *size)
> +{
> + __be32 status;
> + struct file_lock *fl;
> + struct nfs4_delegation *dp;
> + struct nfs4_cb_fattr *ncf;
> + struct iattr attrs;
> +
> + *modified = false;
> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> + if (!fl)
> + return 0;
> + dp = fl->fl_owner;
> + ncf = &dp->dl_cb_fattr;
> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> + return 0;
> +
> + refcount_inc(&dp->dl_stid.sc_count);
> + nfs4_cb_getattr(&dp->dl_cb_fattr);
> + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
> + if (ncf->ncf_cb_status) {
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + nfs4_put_stid(&dp->dl_stid);
> + return status;
> + }
> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
> + if (!ncf->ncf_file_modified &&
> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
> + ncf->ncf_file_modified = true;
> + }
> +
> + if (ncf->ncf_file_modified) {
> + /*
> + * The server would not update the file's metadata
> + * with the client's modified size.
> + * nfsd4 change attribute is constructed from ctime.
> + */
> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
> + mark_inode_dirty(inode);
> + *size = ncf->ncf_cur_fsize;
> + *modified = true;
> + }
> + nfs4_put_stid(&dp->dl_stid);
> + return 0;
> +}
> +
> /*
> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> * ourselves.
> @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> .dentry = dentry,
> };
> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> + bool file_modified;
> + u64 size = 0;
>
> BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
> BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
> @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> if (status)
> goto out;
> }
> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
> + &file_modified, &size);
> + if (status)
> + goto out;
> + }
>
> err = vfs_getattr(&path, &stat,
> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> p = xdr_reserve_space(xdr, 8);
> if (!p)
> goto out_resource;
> - p = xdr_encode_hyper(p, stat.size);
> + if (file_modified)
> + p = xdr_encode_hyper(p, size);
> + else
> + p = xdr_encode_hyper(p, stat.size);
> }
> if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
> p = xdr_reserve_space(xdr, 4);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 9fb69ed8ae80..b20b65fe89b4 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
> struct nfsd4_callback ncf_getattr;
> u32 ncf_cb_status;
> u32 ncf_cb_bmap[1];
> + unsigned long ncf_cb_flags;
> + bool ncf_file_modified;
> + u64 ncf_initial_cinfo;
> + u64 ncf_cur_fsize;
>
> /* from CB_GETATTR reply */
> u64 ncf_cb_change;
> @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
> extern int nfsd4_client_record_check(struct nfs4_client *clp);
> extern void nfsd4_record_grace_done(struct nfsd_net *nn);
>
> +/* CB_GETTTAR */
> +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
> +
> static inline bool try_to_expire_client(struct nfs4_client *clp)
> {
> cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
> --
> 2.9.5
>

2023-05-15 18:32:53

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation


On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
> On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the request is handled as below:
>>
>> Server sends CB_GETATTR to client to get the latest change info and file
>> size. If these values are the same as the server's cached values then
>> the GETATTR proceeds as normal.
>>
>> If either the change info or file size is different from the server's
>> cached values, or the file was already marked as modified, then:
>>
>> . update time_modify and time_metadata into file's metadata
>> with current time
>>
>> . encode GETATTR as normal except the file size is encoded with
>> the value returned from CB_GETATTR
>>
>> . mark the file as modified
>>
>> If the CB_GETATTR fails for any reasons, the delegation is recalled
>> and NFS4ERR_DELAY is returned for the GETATTR.
> Hi Dai,
>
> I'm curious what does the server gain by implementing handling of
> GETATTR with delegations? As far as I can tell it is not strictly
> required by the RFC(s). When the file is being written any attempt at
> querying its attribute is immediately stale.

Yes, you're right that handling of GETATTR with delegations is not
required by the spec. The only benefit I see is that the server
provides a more accurate state of the file as whether the file has
been changed/updated since the client's last GETATTR. This allows
the app on the client to take appropriate action (whatever that
might be) when sharing files among multiple clients.

-Dai

>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
>> fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> fs/nfsd/state.h | 7 +++++
>> 3 files changed, 148 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 09a9e16407f9..fb305b28a090 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>>
>> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
>> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>>
>> static struct workqueue_struct *laundry_wq;
>>
>> @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
>> dp->dl_recalled = false;
>> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
>> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
>> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
>> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
>> + dp->dl_cb_fattr.ncf_file_modified = false;
>> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
>> get_nfs4_file(fp);
>> dp->dl_stid.sc_file = fp;
>> return dp;
>> @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> spin_unlock(&nn->client_lock);
>> }
>>
>> +static int
>> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
>> +{
>> + struct nfs4_cb_fattr *ncf =
>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>> +
>> + ncf->ncf_cb_status = task->tk_status;
>> + switch (task->tk_status) {
>> + case -NFS4ERR_DELAY:
>> + rpc_delay(task, 2 * HZ);
>> + return 0;
>> + default:
>> + return 1;
>> + }
>> +}
>> +
>> +static void
>> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
>> +{
>> + struct nfs4_cb_fattr *ncf =
>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>> +
>> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>> +}
>> +
>> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> .done = nfsd4_cb_recall_any_done,
>> .release = nfsd4_cb_recall_any_release,
>> };
>>
>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
>> + .done = nfsd4_cb_getattr_done,
>> + .release = nfsd4_cb_getattr_release,
>> +};
>> +
>> +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>> +{
>> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>> + return;
>> + nfsd4_run_cb(&ncf->ncf_getattr);
>> +}
>> +
>> static struct nfs4_client *create_client(struct xdr_netobj name,
>> struct svc_rqst *rqstp, nfs4_verifier *verf)
>> {
>> @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> int cb_up;
>> int status = 0;
>> u32 wdeleg = false;
>> + struct kstat stat;
>> + struct path path;
>>
>> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
>> open->op_recall = 0;
>> @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>> wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
>> open->op_delegate_type = wdeleg ?
>> NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
>> + if (wdeleg) {
>> + path.mnt = currentfh->fh_export->ex_path.mnt;
>> + path.dentry = currentfh->fh_dentry;
>> + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
>> + AT_STATX_SYNC_AS_STAT)) {
>> + nfs4_put_stid(&dp->dl_stid);
>> + destroy_delegation(dp);
>> + goto out_no_deleg;
>> + }
>> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>> + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
>> + d_inode(currentfh->fh_dentry));
>> + }
>> nfs4_put_stid(&dp->dl_stid);
>> return;
>> out_no_deleg:
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..5d7e11db8ccf 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>> return nfserr_resource;
>> }
>>
>> +static struct file_lock *
>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> + struct file_lock_context *ctx;
>> + struct file_lock *fl;
>> +
>> + ctx = locks_inode_context(inode);
>> + if (!ctx)
>> + return NULL;
>> + spin_lock(&ctx->flc_lock);
>> + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
>> + if (fl->fl_type == F_WRLCK) {
>> + spin_unlock(&ctx->flc_lock);
>> + return fl;
>> + }
>> + }
>> + spin_unlock(&ctx->flc_lock);
>> + return NULL;
>> +}
>> +
>> +static __be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
>> + bool *modified, u64 *size)
>> +{
>> + __be32 status;
>> + struct file_lock *fl;
>> + struct nfs4_delegation *dp;
>> + struct nfs4_cb_fattr *ncf;
>> + struct iattr attrs;
>> +
>> + *modified = false;
>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>> + if (!fl)
>> + return 0;
>> + dp = fl->fl_owner;
>> + ncf = &dp->dl_cb_fattr;
>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>> + return 0;
>> +
>> + refcount_inc(&dp->dl_stid.sc_count);
>> + nfs4_cb_getattr(&dp->dl_cb_fattr);
>> + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
>> + if (ncf->ncf_cb_status) {
>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> + nfs4_put_stid(&dp->dl_stid);
>> + return status;
>> + }
>> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
>> + if (!ncf->ncf_file_modified &&
>> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
>> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
>> + ncf->ncf_file_modified = true;
>> + }
>> +
>> + if (ncf->ncf_file_modified) {
>> + /*
>> + * The server would not update the file's metadata
>> + * with the client's modified size.
>> + * nfsd4 change attribute is constructed from ctime.
>> + */
>> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
>> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
>> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
>> + mark_inode_dirty(inode);
>> + *size = ncf->ncf_cur_fsize;
>> + *modified = true;
>> + }
>> + nfs4_put_stid(&dp->dl_stid);
>> + return 0;
>> +}
>> +
>> /*
>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>> * ourselves.
>> @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> .dentry = dentry,
>> };
>> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>> + bool file_modified;
>> + u64 size = 0;
>>
>> BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
>> BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
>> @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> if (status)
>> goto out;
>> }
>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
>> + &file_modified, &size);
>> + if (status)
>> + goto out;
>> + }
>>
>> err = vfs_getattr(&path, &stat,
>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
>> @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> p = xdr_reserve_space(xdr, 8);
>> if (!p)
>> goto out_resource;
>> - p = xdr_encode_hyper(p, stat.size);
>> + if (file_modified)
>> + p = xdr_encode_hyper(p, size);
>> + else
>> + p = xdr_encode_hyper(p, stat.size);
>> }
>> if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
>> p = xdr_reserve_space(xdr, 4);
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 9fb69ed8ae80..b20b65fe89b4 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
>> struct nfsd4_callback ncf_getattr;
>> u32 ncf_cb_status;
>> u32 ncf_cb_bmap[1];
>> + unsigned long ncf_cb_flags;
>> + bool ncf_file_modified;
>> + u64 ncf_initial_cinfo;
>> + u64 ncf_cur_fsize;
>>
>> /* from CB_GETATTR reply */
>> u64 ncf_cb_change;
>> @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
>> extern int nfsd4_client_record_check(struct nfs4_client *clp);
>> extern void nfsd4_record_grace_done(struct nfsd_net *nn);
>>
>> +/* CB_GETTTAR */
>> +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
>> +
>> static inline bool try_to_expire_client(struct nfs4_client *clp)
>> {
>> cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
>> --
>> 2.9.5
>>

2023-05-15 19:04:50

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
> On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
> > On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
> > > If the GETATTR request on a file that has write delegation in effect
> > > and the request attributes include the change info and size attribute
> > > then the request is handled as below:
> > >
> > > Server sends CB_GETATTR to client to get the latest change info and file
> > > size. If these values are the same as the server's cached values then
> > > the GETATTR proceeds as normal.
> > >
> > > If either the change info or file size is different from the server's
> > > cached values, or the file was already marked as modified, then:
> > >
> > > . update time_modify and time_metadata into file's metadata
> > > with current time
> > >
> > > . encode GETATTR as normal except the file size is encoded with
> > > the value returned from CB_GETATTR
> > >
> > > . mark the file as modified
> > >
> > > If the CB_GETATTR fails for any reasons, the delegation is recalled
> > > and NFS4ERR_DELAY is returned for the GETATTR.
> > Hi Dai,
> >
> > I'm curious what does the server gain by implementing handling of
> > GETATTR with delegations? As far as I can tell it is not strictly
> > required by the RFC(s). When the file is being written any attempt at
> > querying its attribute is immediately stale.
>
> Yes, you're right that handling of GETATTR with delegations is not
> required by the spec. The only benefit I see is that the server
> provides a more accurate state of the file as whether the file has
> been changed/updated since the client's last GETATTR. This allows
> the app on the client to take appropriate action (whatever that
> might be) when sharing files among multiple clients.
>



From RFC 8881 10.4.3:

"It should be noted that the server is under no obligation to use
CB_GETATTR, and therefore the server MAY simply recall the delegation to
avoid its use."

As I see it, the main benefit is that you avoid having to recall a write
delegation when someone does a drive-by stat() on the file (e.g. due to
a "ls -l" in its parent directory).

> >
> > > Signed-off-by: Dai Ngo <[email protected]>
> > > ---
> > > fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
> > > fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > > fs/nfsd/state.h | 7 +++++
> > > 3 files changed, 148 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 09a9e16407f9..fb305b28a090 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
> > >
> > > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> > > static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
> > >
> > > static struct workqueue_struct *laundry_wq;
> > >
> > > @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > > dp->dl_recalled = false;
> > > nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> > > &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> > > + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> > > + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> > > + dp->dl_cb_fattr.ncf_file_modified = false;
> > > + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> > > get_nfs4_file(fp);
> > > dp->dl_stid.sc_file = fp;
> > > return dp;
> > > @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > spin_unlock(&nn->client_lock);
> > > }
> > >
> > > +static int
> > > +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > > +{
> > > + struct nfs4_cb_fattr *ncf =
> > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > +
> > > + ncf->ncf_cb_status = task->tk_status;
> > > + switch (task->tk_status) {
> > > + case -NFS4ERR_DELAY:
> > > + rpc_delay(task, 2 * HZ);
> > > + return 0;
> > > + default:
> > > + return 1;
> > > + }
> > > +}
> > > +
> > > +static void
> > > +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> > > +{
> > > + struct nfs4_cb_fattr *ncf =
> > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > +
> > > + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> > > + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> > > +}
> > > +
> > > static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > > .done = nfsd4_cb_recall_any_done,
> > > .release = nfsd4_cb_recall_any_release,
> > > };
> > >
> > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
> > > + .done = nfsd4_cb_getattr_done,
> > > + .release = nfsd4_cb_getattr_release,
> > > +};
> > > +
> > > +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
> > > +{
> > > + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
> > > + return;
> > > + nfsd4_run_cb(&ncf->ncf_getattr);
> > > +}
> > > +
> > > static struct nfs4_client *create_client(struct xdr_netobj name,
> > > struct svc_rqst *rqstp, nfs4_verifier *verf)
> > > {
> > > @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> > > int cb_up;
> > > int status = 0;
> > > u32 wdeleg = false;
> > > + struct kstat stat;
> > > + struct path path;
> > >
> > > cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
> > > open->op_recall = 0;
> > > @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> > > wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
> > > open->op_delegate_type = wdeleg ?
> > > NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
> > > + if (wdeleg) {
> > > + path.mnt = currentfh->fh_export->ex_path.mnt;
> > > + path.dentry = currentfh->fh_dentry;
> > > + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
> > > + AT_STATX_SYNC_AS_STAT)) {
> > > + nfs4_put_stid(&dp->dl_stid);
> > > + destroy_delegation(dp);
> > > + goto out_no_deleg;
> > > + }
> > > + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
> > > + d_inode(currentfh->fh_dentry));
> > > + }
> > > nfs4_put_stid(&dp->dl_stid);
> > > return;
> > > out_no_deleg:
> > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > index 76db2fe29624..5d7e11db8ccf 100644
> > > --- a/fs/nfsd/nfs4xdr.c
> > > +++ b/fs/nfsd/nfs4xdr.c
> > > @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> > > return nfserr_resource;
> > > }
> > >
> > > +static struct file_lock *
> > > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> > > +{
> > > + struct file_lock_context *ctx;
> > > + struct file_lock *fl;
> > > +
> > > + ctx = locks_inode_context(inode);
> > > + if (!ctx)
> > > + return NULL;
> > > + spin_lock(&ctx->flc_lock);
> > > + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
> > > + if (fl->fl_type == F_WRLCK) {
> > > + spin_unlock(&ctx->flc_lock);
> > > + return fl;
> > > + }
> > > + }
> > > + spin_unlock(&ctx->flc_lock);
> > > + return NULL;
> > > +}
> > > +
> > > +static __be32
> > > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
> > > + bool *modified, u64 *size)
> > > +{
> > > + __be32 status;
> > > + struct file_lock *fl;
> > > + struct nfs4_delegation *dp;
> > > + struct nfs4_cb_fattr *ncf;
> > > + struct iattr attrs;
> > > +
> > > + *modified = false;
> > > + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> > > + if (!fl)
> > > + return 0;
> > > + dp = fl->fl_owner;
> > > + ncf = &dp->dl_cb_fattr;
> > > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> > > + return 0;
> > > +
> > > + refcount_inc(&dp->dl_stid.sc_count);
> > > + nfs4_cb_getattr(&dp->dl_cb_fattr);
> > > + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
> > > + if (ncf->ncf_cb_status) {
> > > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> > > + nfs4_put_stid(&dp->dl_stid);
> > > + return status;
> > > + }
> > > + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
> > > + if (!ncf->ncf_file_modified &&
> > > + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
> > > + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
> > > + ncf->ncf_file_modified = true;
> > > + }
> > > +
> > > + if (ncf->ncf_file_modified) {
> > > + /*
> > > + * The server would not update the file's metadata
> > > + * with the client's modified size.
> > > + * nfsd4 change attribute is constructed from ctime.
> > > + */
> > > + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
> > > + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
> > > + setattr_copy(&nop_mnt_idmap, inode, &attrs);
> > > + mark_inode_dirty(inode);
> > > + *size = ncf->ncf_cur_fsize;
> > > + *modified = true;
> > > + }
> > > + nfs4_put_stid(&dp->dl_stid);
> > > + return 0;
> > > +}
> > > +
> > > /*
> > > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> > > * ourselves.
> > > @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > > .dentry = dentry,
> > > };
> > > struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> > > + bool file_modified;
> > > + u64 size = 0;
> > >
> > > BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
> > > BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
> > > @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > > if (status)
> > > goto out;
> > > }
> > > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> > > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
> > > + &file_modified, &size);
> > > + if (status)
> > > + goto out;
> > > + }
> > >
> > > err = vfs_getattr(&path, &stat,
> > > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> > > @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > > p = xdr_reserve_space(xdr, 8);
> > > if (!p)
> > > goto out_resource;
> > > - p = xdr_encode_hyper(p, stat.size);
> > > + if (file_modified)
> > > + p = xdr_encode_hyper(p, size);
> > > + else
> > > + p = xdr_encode_hyper(p, stat.size);
> > > }
> > > if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
> > > p = xdr_reserve_space(xdr, 4);
> > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > index 9fb69ed8ae80..b20b65fe89b4 100644
> > > --- a/fs/nfsd/state.h
> > > +++ b/fs/nfsd/state.h
> > > @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
> > > struct nfsd4_callback ncf_getattr;
> > > u32 ncf_cb_status;
> > > u32 ncf_cb_bmap[1];
> > > + unsigned long ncf_cb_flags;
> > > + bool ncf_file_modified;
> > > + u64 ncf_initial_cinfo;
> > > + u64 ncf_cur_fsize;
> > >
> > > /* from CB_GETATTR reply */
> > > u64 ncf_cb_change;
> > > @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
> > > extern int nfsd4_client_record_check(struct nfs4_client *clp);
> > > extern void nfsd4_record_grace_done(struct nfsd_net *nn);
> > >
> > > +/* CB_GETTTAR */
> > > +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
> > > +
> > > static inline bool try_to_expire_client(struct nfs4_client *clp)
> > > {
> > > cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
> > > --
> > > 2.9.5
> > >

--
Jeff Layton <[email protected]>

2023-05-15 20:11:36

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
>
> On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
> > On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
> > > On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
> > > > If the GETATTR request on a file that has write delegation in effect
> > > > and the request attributes include the change info and size attribute
> > > > then the request is handled as below:
> > > >
> > > > Server sends CB_GETATTR to client to get the latest change info and file
> > > > size. If these values are the same as the server's cached values then
> > > > the GETATTR proceeds as normal.
> > > >
> > > > If either the change info or file size is different from the server's
> > > > cached values, or the file was already marked as modified, then:
> > > >
> > > > . update time_modify and time_metadata into file's metadata
> > > > with current time
> > > >
> > > > . encode GETATTR as normal except the file size is encoded with
> > > > the value returned from CB_GETATTR
> > > >
> > > > . mark the file as modified
> > > >
> > > > If the CB_GETATTR fails for any reasons, the delegation is recalled
> > > > and NFS4ERR_DELAY is returned for the GETATTR.
> > > Hi Dai,
> > >
> > > I'm curious what does the server gain by implementing handling of
> > > GETATTR with delegations? As far as I can tell it is not strictly
> > > required by the RFC(s). When the file is being written any attempt at
> > > querying its attribute is immediately stale.
> >
> > Yes, you're right that handling of GETATTR with delegations is not
> > required by the spec. The only benefit I see is that the server
> > provides a more accurate state of the file as whether the file has
> > been changed/updated since the client's last GETATTR. This allows
> > the app on the client to take appropriate action (whatever that
> > might be) when sharing files among multiple clients.
> >
>
>
>
> From RFC 8881 10.4.3:
>
> "It should be noted that the server is under no obligation to use
> CB_GETATTR, and therefore the server MAY simply recall the delegation to
> avoid its use."

This is a "MAY" which means the server can choose to not to and just
return the info it currently has without recalling a delegation.

> As I see it, the main benefit is that you avoid having to recall a write
> delegation when someone does a drive-by stat() on the file (e.g. due to
> a "ls -l" in its parent directory).
>
> > >
> > > > Signed-off-by: Dai Ngo <[email protected]>
> > > > ---
> > > > fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
> > > > fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > > > fs/nfsd/state.h | 7 +++++
> > > > 3 files changed, 148 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > index 09a9e16407f9..fb305b28a090 100644
> > > > --- a/fs/nfsd/nfs4state.c
> > > > +++ b/fs/nfsd/nfs4state.c
> > > > @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
> > > >
> > > > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> > > > static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> > > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
> > > >
> > > > static struct workqueue_struct *laundry_wq;
> > > >
> > > > @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > > > dp->dl_recalled = false;
> > > > nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> > > > &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> > > > + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
> > > > + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
> > > > + dp->dl_cb_fattr.ncf_file_modified = false;
> > > > + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
> > > > get_nfs4_file(fp);
> > > > dp->dl_stid.sc_file = fp;
> > > > return dp;
> > > > @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > > spin_unlock(&nn->client_lock);
> > > > }
> > > >
> > > > +static int
> > > > +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > > > +{
> > > > + struct nfs4_cb_fattr *ncf =
> > > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > > +
> > > > + ncf->ncf_cb_status = task->tk_status;
> > > > + switch (task->tk_status) {
> > > > + case -NFS4ERR_DELAY:
> > > > + rpc_delay(task, 2 * HZ);
> > > > + return 0;
> > > > + default:
> > > > + return 1;
> > > > + }
> > > > +}
> > > > +
> > > > +static void
> > > > +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
> > > > +{
> > > > + struct nfs4_cb_fattr *ncf =
> > > > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
> > > > +
> > > > + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> > > > + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> > > > +}
> > > > +
> > > > static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > > > .done = nfsd4_cb_recall_any_done,
> > > > .release = nfsd4_cb_recall_any_release,
> > > > };
> > > >
> > > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
> > > > + .done = nfsd4_cb_getattr_done,
> > > > + .release = nfsd4_cb_getattr_release,
> > > > +};
> > > > +
> > > > +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
> > > > +{
> > > > + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
> > > > + return;
> > > > + nfsd4_run_cb(&ncf->ncf_getattr);
> > > > +}
> > > > +
> > > > static struct nfs4_client *create_client(struct xdr_netobj name,
> > > > struct svc_rqst *rqstp, nfs4_verifier *verf)
> > > > {
> > > > @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> > > > int cb_up;
> > > > int status = 0;
> > > > u32 wdeleg = false;
> > > > + struct kstat stat;
> > > > + struct path path;
> > > >
> > > > cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
> > > > open->op_recall = 0;
> > > > @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> > > > wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
> > > > open->op_delegate_type = wdeleg ?
> > > > NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
> > > > + if (wdeleg) {
> > > > + path.mnt = currentfh->fh_export->ex_path.mnt;
> > > > + path.dentry = currentfh->fh_dentry;
> > > > + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
> > > > + AT_STATX_SYNC_AS_STAT)) {
> > > > + nfs4_put_stid(&dp->dl_stid);
> > > > + destroy_delegation(dp);
> > > > + goto out_no_deleg;
> > > > + }
> > > > + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
> > > > + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
> > > > + d_inode(currentfh->fh_dentry));
> > > > + }
> > > > nfs4_put_stid(&dp->dl_stid);
> > > > return;
> > > > out_no_deleg:
> > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > > index 76db2fe29624..5d7e11db8ccf 100644
> > > > --- a/fs/nfsd/nfs4xdr.c
> > > > +++ b/fs/nfsd/nfs4xdr.c
> > > > @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> > > > return nfserr_resource;
> > > > }
> > > >
> > > > +static struct file_lock *
> > > > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> > > > +{
> > > > + struct file_lock_context *ctx;
> > > > + struct file_lock *fl;
> > > > +
> > > > + ctx = locks_inode_context(inode);
> > > > + if (!ctx)
> > > > + return NULL;
> > > > + spin_lock(&ctx->flc_lock);
> > > > + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
> > > > + if (fl->fl_type == F_WRLCK) {
> > > > + spin_unlock(&ctx->flc_lock);
> > > > + return fl;
> > > > + }
> > > > + }
> > > > + spin_unlock(&ctx->flc_lock);
> > > > + return NULL;
> > > > +}
> > > > +
> > > > +static __be32
> > > > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
> > > > + bool *modified, u64 *size)
> > > > +{
> > > > + __be32 status;
> > > > + struct file_lock *fl;
> > > > + struct nfs4_delegation *dp;
> > > > + struct nfs4_cb_fattr *ncf;
> > > > + struct iattr attrs;
> > > > +
> > > > + *modified = false;
> > > > + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> > > > + if (!fl)
> > > > + return 0;
> > > > + dp = fl->fl_owner;
> > > > + ncf = &dp->dl_cb_fattr;
> > > > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> > > > + return 0;
> > > > +
> > > > + refcount_inc(&dp->dl_stid.sc_count);
> > > > + nfs4_cb_getattr(&dp->dl_cb_fattr);
> > > > + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
> > > > + if (ncf->ncf_cb_status) {
> > > > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> > > > + nfs4_put_stid(&dp->dl_stid);
> > > > + return status;
> > > > + }
> > > > + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
> > > > + if (!ncf->ncf_file_modified &&
> > > > + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
> > > > + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
> > > > + ncf->ncf_file_modified = true;
> > > > + }
> > > > +
> > > > + if (ncf->ncf_file_modified) {
> > > > + /*
> > > > + * The server would not update the file's metadata
> > > > + * with the client's modified size.
> > > > + * nfsd4 change attribute is constructed from ctime.
> > > > + */
> > > > + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
> > > > + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
> > > > + setattr_copy(&nop_mnt_idmap, inode, &attrs);
> > > > + mark_inode_dirty(inode);
> > > > + *size = ncf->ncf_cur_fsize;
> > > > + *modified = true;
> > > > + }
> > > > + nfs4_put_stid(&dp->dl_stid);
> > > > + return 0;
> > > > +}
> > > > +
> > > > /*
> > > > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> > > > * ourselves.
> > > > @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > > > .dentry = dentry,
> > > > };
> > > > struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> > > > + bool file_modified;
> > > > + u64 size = 0;
> > > >
> > > > BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
> > > > BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
> > > > @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > > > if (status)
> > > > goto out;
> > > > }
> > > > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> > > > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
> > > > + &file_modified, &size);
> > > > + if (status)
> > > > + goto out;
> > > > + }
> > > >
> > > > err = vfs_getattr(&path, &stat,
> > > > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> > > > @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > > > p = xdr_reserve_space(xdr, 8);
> > > > if (!p)
> > > > goto out_resource;
> > > > - p = xdr_encode_hyper(p, stat.size);
> > > > + if (file_modified)
> > > > + p = xdr_encode_hyper(p, size);
> > > > + else
> > > > + p = xdr_encode_hyper(p, stat.size);
> > > > }
> > > > if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
> > > > p = xdr_reserve_space(xdr, 4);
> > > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > > index 9fb69ed8ae80..b20b65fe89b4 100644
> > > > --- a/fs/nfsd/state.h
> > > > +++ b/fs/nfsd/state.h
> > > > @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
> > > > struct nfsd4_callback ncf_getattr;
> > > > u32 ncf_cb_status;
> > > > u32 ncf_cb_bmap[1];
> > > > + unsigned long ncf_cb_flags;
> > > > + bool ncf_file_modified;
> > > > + u64 ncf_initial_cinfo;
> > > > + u64 ncf_cur_fsize;
> > > >
> > > > /* from CB_GETATTR reply */
> > > > u64 ncf_cb_change;
> > > > @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
> > > > extern int nfsd4_client_record_check(struct nfs4_client *clp);
> > > > extern void nfsd4_record_grace_done(struct nfsd_net *nn);
> > > >
> > > > +/* CB_GETTTAR */
> > > > +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
> > > > +
> > > > static inline bool try_to_expire_client(struct nfs4_client *clp)
> > > > {
> > > > cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
> > > > --
> > > > 2.9.5
> > > >
>
> --
> Jeff Layton <[email protected]>

2023-05-15 20:16:28

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation


On 5/15/23 11:58 AM, Jeff Layton wrote:
> On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
>> On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
>>> On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
>>>> If the GETATTR request on a file that has write delegation in effect
>>>> and the request attributes include the change info and size attribute
>>>> then the request is handled as below:
>>>>
>>>> Server sends CB_GETATTR to client to get the latest change info and file
>>>> size. If these values are the same as the server's cached values then
>>>> the GETATTR proceeds as normal.
>>>>
>>>> If either the change info or file size is different from the server's
>>>> cached values, or the file was already marked as modified, then:
>>>>
>>>> . update time_modify and time_metadata into file's metadata
>>>> with current time
>>>>
>>>> . encode GETATTR as normal except the file size is encoded with
>>>> the value returned from CB_GETATTR
>>>>
>>>> . mark the file as modified
>>>>
>>>> If the CB_GETATTR fails for any reasons, the delegation is recalled
>>>> and NFS4ERR_DELAY is returned for the GETATTR.
>>> Hi Dai,
>>>
>>> I'm curious what does the server gain by implementing handling of
>>> GETATTR with delegations? As far as I can tell it is not strictly
>>> required by the RFC(s). When the file is being written any attempt at
>>> querying its attribute is immediately stale.
>> Yes, you're right that handling of GETATTR with delegations is not
>> required by the spec. The only benefit I see is that the server
>> provides a more accurate state of the file as whether the file has
>> been changed/updated since the client's last GETATTR. This allows
>> the app on the client to take appropriate action (whatever that
>> might be) when sharing files among multiple clients.
>>
>
>
> From RFC 8881 10.4.3:
>
> "It should be noted that the server is under no obligation to use
> CB_GETATTR, and therefore the server MAY simply recall the delegation to
> avoid its use."
>
> As I see it, the main benefit is that you avoid having to recall a write
> delegation when someone does a drive-by stat() on the file (e.g. due to
> a "ls -l" in its parent directory).

Yes, that's right I forgot to mention this. If we don't want to recall the
write delegation then we have to support CB_GETATTR.

Thanks,
-Dai

>
>>>> Signed-off-by: Dai Ngo <[email protected]>
>>>> ---
>>>> fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
>>>> fs/nfsd/nfs4xdr.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>>> fs/nfsd/state.h | 7 +++++
>>>> 3 files changed, 148 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 09a9e16407f9..fb305b28a090 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);
>>>>
>>>> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
>>>> static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>>>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
>>>>
>>>> static struct workqueue_struct *laundry_wq;
>>>>
>>>> @@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
>>>> dp->dl_recalled = false;
>>>> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
>>>> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
>>>> + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
>>>> + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
>>>> + dp->dl_cb_fattr.ncf_file_modified = false;
>>>> + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
>>>> get_nfs4_file(fp);
>>>> dp->dl_stid.sc_file = fp;
>>>> return dp;
>>>> @@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>>>> spin_unlock(&nn->client_lock);
>>>> }
>>>>
>>>> +static int
>>>> +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
>>>> +{
>>>> + struct nfs4_cb_fattr *ncf =
>>>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>>>> +
>>>> + ncf->ncf_cb_status = task->tk_status;
>>>> + switch (task->tk_status) {
>>>> + case -NFS4ERR_DELAY:
>>>> + rpc_delay(task, 2 * HZ);
>>>> + return 0;
>>>> + default:
>>>> + return 1;
>>>> + }
>>>> +}
>>>> +
>>>> +static void
>>>> +nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
>>>> +{
>>>> + struct nfs4_cb_fattr *ncf =
>>>> + container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
>>>> +
>>>> + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>>>> + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>>>> +}
>>>> +
>>>> static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>>>> .done = nfsd4_cb_recall_any_done,
>>>> .release = nfsd4_cb_recall_any_release,
>>>> };
>>>>
>>>> +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
>>>> + .done = nfsd4_cb_getattr_done,
>>>> + .release = nfsd4_cb_getattr_release,
>>>> +};
>>>> +
>>>> +void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>>>> +{
>>>> + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>>>> + return;
>>>> + nfsd4_run_cb(&ncf->ncf_getattr);
>>>> +}
>>>> +
>>>> static struct nfs4_client *create_client(struct xdr_netobj name,
>>>> struct svc_rqst *rqstp, nfs4_verifier *verf)
>>>> {
>>>> @@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>>>> int cb_up;
>>>> int status = 0;
>>>> u32 wdeleg = false;
>>>> + struct kstat stat;
>>>> + struct path path;
>>>>
>>>> cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
>>>> open->op_recall = 0;
>>>> @@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
>>>> wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
>>>> open->op_delegate_type = wdeleg ?
>>>> NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
>>>> + if (wdeleg) {
>>>> + path.mnt = currentfh->fh_export->ex_path.mnt;
>>>> + path.dentry = currentfh->fh_dentry;
>>>> + if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
>>>> + AT_STATX_SYNC_AS_STAT)) {
>>>> + nfs4_put_stid(&dp->dl_stid);
>>>> + destroy_delegation(dp);
>>>> + goto out_no_deleg;
>>>> + }
>>>> + dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
>>>> + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
>>>> + d_inode(currentfh->fh_dentry));
>>>> + }
>>>> nfs4_put_stid(&dp->dl_stid);
>>>> return;
>>>> out_no_deleg:
>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>> index 76db2fe29624..5d7e11db8ccf 100644
>>>> --- a/fs/nfsd/nfs4xdr.c
>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>> @@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>>>> return nfserr_resource;
>>>> }
>>>>
>>>> +static struct file_lock *
>>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>>> +{
>>>> + struct file_lock_context *ctx;
>>>> + struct file_lock *fl;
>>>> +
>>>> + ctx = locks_inode_context(inode);
>>>> + if (!ctx)
>>>> + return NULL;
>>>> + spin_lock(&ctx->flc_lock);
>>>> + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
>>>> + if (fl->fl_type == F_WRLCK) {
>>>> + spin_unlock(&ctx->flc_lock);
>>>> + return fl;
>>>> + }
>>>> + }
>>>> + spin_unlock(&ctx->flc_lock);
>>>> + return NULL;
>>>> +}
>>>> +
>>>> +static __be32
>>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
>>>> + bool *modified, u64 *size)
>>>> +{
>>>> + __be32 status;
>>>> + struct file_lock *fl;
>>>> + struct nfs4_delegation *dp;
>>>> + struct nfs4_cb_fattr *ncf;
>>>> + struct iattr attrs;
>>>> +
>>>> + *modified = false;
>>>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>>> + if (!fl)
>>>> + return 0;
>>>> + dp = fl->fl_owner;
>>>> + ncf = &dp->dl_cb_fattr;
>>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>>> + return 0;
>>>> +
>>>> + refcount_inc(&dp->dl_stid.sc_count);
>>>> + nfs4_cb_getattr(&dp->dl_cb_fattr);
>>>> + wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
>>>> + if (ncf->ncf_cb_status) {
>>>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>>>> + nfs4_put_stid(&dp->dl_stid);
>>>> + return status;
>>>> + }
>>>> + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
>>>> + if (!ncf->ncf_file_modified &&
>>>> + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
>>>> + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
>>>> + ncf->ncf_file_modified = true;
>>>> + }
>>>> +
>>>> + if (ncf->ncf_file_modified) {
>>>> + /*
>>>> + * The server would not update the file's metadata
>>>> + * with the client's modified size.
>>>> + * nfsd4 change attribute is constructed from ctime.
>>>> + */
>>>> + attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
>>>> + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
>>>> + setattr_copy(&nop_mnt_idmap, inode, &attrs);
>>>> + mark_inode_dirty(inode);
>>>> + *size = ncf->ncf_cur_fsize;
>>>> + *modified = true;
>>>> + }
>>>> + nfs4_put_stid(&dp->dl_stid);
>>>> + return 0;
>>>> +}
>>>> +
>>>> /*
>>>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>>>> * ourselves.
>>>> @@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>>> .dentry = dentry,
>>>> };
>>>> struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>>>> + bool file_modified;
>>>> + u64 size = 0;
>>>>
>>>> BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
>>>> BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
>>>> @@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>>> if (status)
>>>> goto out;
>>>> }
>>>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>>>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
>>>> + &file_modified, &size);
>>>> + if (status)
>>>> + goto out;
>>>> + }
>>>>
>>>> err = vfs_getattr(&path, &stat,
>>>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
>>>> @@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>>> p = xdr_reserve_space(xdr, 8);
>>>> if (!p)
>>>> goto out_resource;
>>>> - p = xdr_encode_hyper(p, stat.size);
>>>> + if (file_modified)
>>>> + p = xdr_encode_hyper(p, size);
>>>> + else
>>>> + p = xdr_encode_hyper(p, stat.size);
>>>> }
>>>> if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
>>>> p = xdr_reserve_space(xdr, 4);
>>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>>> index 9fb69ed8ae80..b20b65fe89b4 100644
>>>> --- a/fs/nfsd/state.h
>>>> +++ b/fs/nfsd/state.h
>>>> @@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
>>>> struct nfsd4_callback ncf_getattr;
>>>> u32 ncf_cb_status;
>>>> u32 ncf_cb_bmap[1];
>>>> + unsigned long ncf_cb_flags;
>>>> + bool ncf_file_modified;
>>>> + u64 ncf_initial_cinfo;
>>>> + u64 ncf_cur_fsize;
>>>>
>>>> /* from CB_GETATTR reply */
>>>> u64 ncf_cb_change;
>>>> @@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
>>>> extern int nfsd4_client_record_check(struct nfs4_client *clp);
>>>> extern void nfsd4_record_grace_done(struct nfsd_net *nn);
>>>>
>>>> +/* CB_GETTTAR */
>>>> +extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
>>>> +
>>>> static inline bool try_to_expire_client(struct nfs4_client *clp)
>>>> {
>>>> cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
>>>> --
>>>> 2.9.5
>>>>

2023-05-15 20:31:27

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Mon, 2023-05-15 at 16:10 -0400, Olga Kornievskaia wrote:
> On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
> >
> > On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
> > > On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
> > > > On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
> > > > > If the GETATTR request on a file that has write delegation in effect
> > > > > and the request attributes include the change info and size attribute
> > > > > then the request is handled as below:
> > > > >
> > > > > Server sends CB_GETATTR to client to get the latest change info and file
> > > > > size. If these values are the same as the server's cached values then
> > > > > the GETATTR proceeds as normal.
> > > > >
> > > > > If either the change info or file size is different from the server's
> > > > > cached values, or the file was already marked as modified, then:
> > > > >
> > > > > . update time_modify and time_metadata into file's metadata
> > > > > with current time
> > > > >
> > > > > . encode GETATTR as normal except the file size is encoded with
> > > > > the value returned from CB_GETATTR
> > > > >
> > > > > . mark the file as modified
> > > > >
> > > > > If the CB_GETATTR fails for any reasons, the delegation is recalled
> > > > > and NFS4ERR_DELAY is returned for the GETATTR.
> > > > Hi Dai,
> > > >
> > > > I'm curious what does the server gain by implementing handling of
> > > > GETATTR with delegations? As far as I can tell it is not strictly
> > > > required by the RFC(s). When the file is being written any attempt at
> > > > querying its attribute is immediately stale.
> > >
> > > Yes, you're right that handling of GETATTR with delegations is not
> > > required by the spec. The only benefit I see is that the server
> > > provides a more accurate state of the file as whether the file has
> > > been changed/updated since the client's last GETATTR. This allows
> > > the app on the client to take appropriate action (whatever that
> > > might be) when sharing files among multiple clients.
> > >
> >
> >
> >
> > From RFC 8881 10.4.3:
> >
> > "It should be noted that the server is under no obligation to use
> > CB_GETATTR, and therefore the server MAY simply recall the delegation to
> > avoid its use."
>
> This is a "MAY" which means the server can choose to not to and just
> return the info it currently has without recalling a delegation.
>
>

That's not at all how I read that. To me, it sounds like it's saying
that the only alternative to implementing CB_GETATTR is to recall the
delegation. If that's not the case, then we should clarify that in the
spec.

--
Jeff Layton <[email protected]>

2023-05-15 21:46:30

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation



> On May 15, 2023, at 4:21 PM, Jeff Layton <[email protected]> wrote:
>
> On Mon, 2023-05-15 at 16:10 -0400, Olga Kornievskaia wrote:
>> On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
>>>
>>> On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
>>>> On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
>>>>> On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
>>>>>> If the GETATTR request on a file that has write delegation in effect
>>>>>> and the request attributes include the change info and size attribute
>>>>>> then the request is handled as below:
>>>>>>
>>>>>> Server sends CB_GETATTR to client to get the latest change info and file
>>>>>> size. If these values are the same as the server's cached values then
>>>>>> the GETATTR proceeds as normal.
>>>>>>
>>>>>> If either the change info or file size is different from the server's
>>>>>> cached values, or the file was already marked as modified, then:
>>>>>>
>>>>>> . update time_modify and time_metadata into file's metadata
>>>>>> with current time
>>>>>>
>>>>>> . encode GETATTR as normal except the file size is encoded with
>>>>>> the value returned from CB_GETATTR
>>>>>>
>>>>>> . mark the file as modified
>>>>>>
>>>>>> If the CB_GETATTR fails for any reasons, the delegation is recalled
>>>>>> and NFS4ERR_DELAY is returned for the GETATTR.
>>>>> Hi Dai,
>>>>>
>>>>> I'm curious what does the server gain by implementing handling of
>>>>> GETATTR with delegations? As far as I can tell it is not strictly
>>>>> required by the RFC(s). When the file is being written any attempt at
>>>>> querying its attribute is immediately stale.
>>>>
>>>> Yes, you're right that handling of GETATTR with delegations is not
>>>> required by the spec. The only benefit I see is that the server
>>>> provides a more accurate state of the file as whether the file has
>>>> been changed/updated since the client's last GETATTR. This allows
>>>> the app on the client to take appropriate action (whatever that
>>>> might be) when sharing files among multiple clients.
>>>>
>>>
>>>
>>>
>>> From RFC 8881 10.4.3:
>>>
>>> "It should be noted that the server is under no obligation to use
>>> CB_GETATTR, and therefore the server MAY simply recall the delegation to
>>> avoid its use."
>>
>> This is a "MAY" which means the server can choose to not to and just
>> return the info it currently has without recalling a delegation.
>>
>>
>
> That's not at all how I read that. To me, it sounds like it's saying
> that the only alternative to implementing CB_GETATTR is to recall the
> delegation. If that's not the case, then we should clarify that in the
> spec.

The meaning of MAY is spelled out in RFC 2119. MAY does not mean
"the only alternative". I read this statement as alerting client
implementers that a compliant server is permitted to skip
CB_GETATTR, simply by recalling the delegation. Technically
speaking this compliance statement does not otherwise restrict
server behavior, though the author might have had something else
in mind.

I'm leery of the complexity that CB_GETATTR adds to NFSD and
the protocol. In addition, section 10.4 is riddled with errors,
albeit minor ones; that suggests this part of the protocol is
not well-reviewed.

It's not apparent how much gain is provided by CB_GETATTR.
IIRC NFSD can recall a delegation on the same nfsd thread as an
incoming request, so the turnaround for a recall from a local
client is going to be quick.

It would be good to know how many other server implementations
support CB_GETATTR.

I'm rather leaning towards postponing 3/4 and 4/4 and instead
taking a more incremental approach. Let's get the basic Write
delegation support in, and possibly add a counter or two to
find out how often a GETATTR on a write-delegated file results
in a delegation recall.

We can then take some time to disambiguate the spec language and
look at other implementations to see if this extra protocol is
really of value.

I think it would be good to understand whether Write delegation
without CB_GETATTR can result in a performance regression (say,
because the server is recalling delegations more often for a
given workload).


--
Chuck Lever


2023-05-15 23:04:22

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Mon, 2023-05-15 at 21:37 +0000, Chuck Lever III wrote:
>
> > On May 15, 2023, at 4:21 PM, Jeff Layton <[email protected]> wrote:
> >
> > On Mon, 2023-05-15 at 16:10 -0400, Olga Kornievskaia wrote:
> > > On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
> > > >
> > > > On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
> > > > > On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
> > > > > > On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
> > > > > > > If the GETATTR request on a file that has write delegation in effect
> > > > > > > and the request attributes include the change info and size attribute
> > > > > > > then the request is handled as below:
> > > > > > >
> > > > > > > Server sends CB_GETATTR to client to get the latest change info and file
> > > > > > > size. If these values are the same as the server's cached values then
> > > > > > > the GETATTR proceeds as normal.
> > > > > > >
> > > > > > > If either the change info or file size is different from the server's
> > > > > > > cached values, or the file was already marked as modified, then:
> > > > > > >
> > > > > > > . update time_modify and time_metadata into file's metadata
> > > > > > > with current time
> > > > > > >
> > > > > > > . encode GETATTR as normal except the file size is encoded with
> > > > > > > the value returned from CB_GETATTR
> > > > > > >
> > > > > > > . mark the file as modified
> > > > > > >
> > > > > > > If the CB_GETATTR fails for any reasons, the delegation is recalled
> > > > > > > and NFS4ERR_DELAY is returned for the GETATTR.
> > > > > > Hi Dai,
> > > > > >
> > > > > > I'm curious what does the server gain by implementing handling of
> > > > > > GETATTR with delegations? As far as I can tell it is not strictly
> > > > > > required by the RFC(s). When the file is being written any attempt at
> > > > > > querying its attribute is immediately stale.
> > > > >
> > > > > Yes, you're right that handling of GETATTR with delegations is not
> > > > > required by the spec. The only benefit I see is that the server
> > > > > provides a more accurate state of the file as whether the file has
> > > > > been changed/updated since the client's last GETATTR. This allows
> > > > > the app on the client to take appropriate action (whatever that
> > > > > might be) when sharing files among multiple clients.
> > > > >
> > > >
> > > >
> > > >
> > > > From RFC 8881 10.4.3:
> > > >
> > > > "It should be noted that the server is under no obligation to use
> > > > CB_GETATTR, and therefore the server MAY simply recall the delegation to
> > > > avoid its use."
> > >
> > > This is a "MAY" which means the server can choose to not to and just
> > > return the info it currently has without recalling a delegation.
> > >
> > >
> >
> > That's not at all how I read that. To me, it sounds like it's saying
> > that the only alternative to implementing CB_GETATTR is to recall the
> > delegation. If that's not the case, then we should clarify that in the
> > spec.
>
> The meaning of MAY is spelled out in RFC 2119. MAY does not mean
> "the only alternative". I read this statement as alerting client
> implementers that a compliant server is permitted to skip
> CB_GETATTR, simply by recalling the delegation. Technically
> speaking this compliance statement does not otherwise restrict
> server behavior, though the author might have had something else
> in mind.
>
> I'm leery of the complexity that CB_GETATTR adds to NFSD and
> the protocol. In addition, section 10.4 is riddled with errors,
> albeit minor ones; that suggests this part of the protocol is
> not well-reviewed.
>
> It's not apparent how much gain is provided by CB_GETATTR.
> IIRC NFSD can recall a delegation on the same nfsd thread as an
> incoming request, so the turnaround for a recall from a local
> client is going to be quick.
>
> It would be good to know how many other server implementations
> support CB_GETATTR.

> I'm rather leaning towards postponing 3/4 and 4/4 and instead
> taking a more incremental approach. Let's get the basic Write
> delegation support in, and possibly add a counter or two to
> find out how often a GETATTR on a write-delegated file results
> in a delegation recall.
>
> We can then take some time to disambiguate the spec language and
> look at other implementations to see if this extra protocol is
> really of value.
>
> I think it would be good to understand whether Write delegation
> without CB_GETATTR can result in a performance regression (say,
> because the server is recalling delegations more often for a
> given workload).
>


Ganesha has had write delegation and CB_GETATTR support for years.

Isn't CB_GETATTR the main benefit of a write delegation in the first
place? A write deleg doesn't really give any benefit otherwise, as you
can buffer writes anyway without one.

AIUI, the point of a write delegation is to allow other clients (and
potentially the server) to get up to date information on file sizes and
change attr when there is a single, active writer.

Without CB_GETATTR, your first stat() against the file will give you
fairly up to date info (since you'll have to recall the delegation), but
then you'll be back to the server just reporting the size and change
attr that it has at the time.

--
Jeff Layton <[email protected]>

2023-05-16 00:16:48

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation



> On May 15, 2023, at 6:53 PM, Jeff Layton <[email protected]> wrote:
>
> On Mon, 2023-05-15 at 21:37 +0000, Chuck Lever III wrote:
>>
>>> On May 15, 2023, at 4:21 PM, Jeff Layton <[email protected]> wrote:
>>>
>>> On Mon, 2023-05-15 at 16:10 -0400, Olga Kornievskaia wrote:
>>>> On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
>>>>>
>>>>> On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
>>>>>> On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
>>>>>>> On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
>>>>>>>> If the GETATTR request on a file that has write delegation in effect
>>>>>>>> and the request attributes include the change info and size attribute
>>>>>>>> then the request is handled as below:
>>>>>>>>
>>>>>>>> Server sends CB_GETATTR to client to get the latest change info and file
>>>>>>>> size. If these values are the same as the server's cached values then
>>>>>>>> the GETATTR proceeds as normal.
>>>>>>>>
>>>>>>>> If either the change info or file size is different from the server's
>>>>>>>> cached values, or the file was already marked as modified, then:
>>>>>>>>
>>>>>>>> . update time_modify and time_metadata into file's metadata
>>>>>>>> with current time
>>>>>>>>
>>>>>>>> . encode GETATTR as normal except the file size is encoded with
>>>>>>>> the value returned from CB_GETATTR
>>>>>>>>
>>>>>>>> . mark the file as modified
>>>>>>>>
>>>>>>>> If the CB_GETATTR fails for any reasons, the delegation is recalled
>>>>>>>> and NFS4ERR_DELAY is returned for the GETATTR.
>>>>>>> Hi Dai,
>>>>>>>
>>>>>>> I'm curious what does the server gain by implementing handling of
>>>>>>> GETATTR with delegations? As far as I can tell it is not strictly
>>>>>>> required by the RFC(s). When the file is being written any attempt at
>>>>>>> querying its attribute is immediately stale.
>>>>>>
>>>>>> Yes, you're right that handling of GETATTR with delegations is not
>>>>>> required by the spec. The only benefit I see is that the server
>>>>>> provides a more accurate state of the file as whether the file has
>>>>>> been changed/updated since the client's last GETATTR. This allows
>>>>>> the app on the client to take appropriate action (whatever that
>>>>>> might be) when sharing files among multiple clients.
>>>>>>
>>>>>
>>>>>
>>>>>
>>>>> From RFC 8881 10.4.3:
>>>>>
>>>>> "It should be noted that the server is under no obligation to use
>>>>> CB_GETATTR, and therefore the server MAY simply recall the delegation to
>>>>> avoid its use."
>>>>
>>>> This is a "MAY" which means the server can choose to not to and just
>>>> return the info it currently has without recalling a delegation.
>>>>
>>>>
>>>
>>> That's not at all how I read that. To me, it sounds like it's saying
>>> that the only alternative to implementing CB_GETATTR is to recall the
>>> delegation. If that's not the case, then we should clarify that in the
>>> spec.
>>
>> The meaning of MAY is spelled out in RFC 2119. MAY does not mean
>> "the only alternative". I read this statement as alerting client
>> implementers that a compliant server is permitted to skip
>> CB_GETATTR, simply by recalling the delegation. Technically
>> speaking this compliance statement does not otherwise restrict
>> server behavior, though the author might have had something else
>> in mind.
>>
>> I'm leery of the complexity that CB_GETATTR adds to NFSD and
>> the protocol. In addition, section 10.4 is riddled with errors,
>> albeit minor ones; that suggests this part of the protocol is
>> not well-reviewed.
>>
>> It's not apparent how much gain is provided by CB_GETATTR.
>> IIRC NFSD can recall a delegation on the same nfsd thread as an
>> incoming request, so the turnaround for a recall from a local
>> client is going to be quick.
>>
>> It would be good to know how many other server implementations
>> support CB_GETATTR.
>
>> I'm rather leaning towards postponing 3/4 and 4/4 and instead
>> taking a more incremental approach. Let's get the basic Write
>> delegation support in, and possibly add a counter or two to
>> find out how often a GETATTR on a write-delegated file results
>> in a delegation recall.
>>
>> We can then take some time to disambiguate the spec language and
>> look at other implementations to see if this extra protocol is
>> really of value.
>>
>> I think it would be good to understand whether Write delegation
>> without CB_GETATTR can result in a performance regression (say,
>> because the server is recalling delegations more often for a
>> given workload).
>
> Ganesha has had write delegation and CB_GETATTR support for years.

Does OnTAP support write delegation? I heard a rumor NetApp
disabled it because of the volume of customer calls involving
delegation with the Linux client, but that could be old news.

How about Solaris? My close contact with the Solaris NFS team
as the Linux NFS client implementation matured has colored my
experience with write delegation. It is complex and subtle.


> Isn't CB_GETATTR the main benefit of a write delegation in the first
> place? A write deleg doesn't really give any benefit otherwise, as you
> can buffer writes anyway without one.
>
> AIUI, the point of a write delegation is to allow other clients (and
> potentially the server) to get up to date information on file sizes and
> change attr when there is a single, active writer.

The benefits of write delegation depend on the client implementation
and the workload. A client may use a write delegation to indicate
that it can handle locking requests for a file locally, for example.


> Without CB_GETATTR, your first stat() against the file will give you
> fairly up to date info (since you'll have to recall the delegation), but
> then you'll be back to the server just reporting the size and change
> attr that it has at the time.

Which is the current behavior, yes? As long as behavior is not
regressing, I don't foresee a problem with doing CB_GETATTR in 6.6
or later.


--
Chuck Lever


2023-05-16 00:54:33

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation


On 5/15/23 5:06 PM, Chuck Lever III wrote:
>
>> On May 15, 2023, at 6:53 PM, Jeff Layton <[email protected]> wrote:
>>
>> On Mon, 2023-05-15 at 21:37 +0000, Chuck Lever III wrote:
>>>> On May 15, 2023, at 4:21 PM, Jeff Layton <[email protected]> wrote:
>>>>
>>>> On Mon, 2023-05-15 at 16:10 -0400, Olga Kornievskaia wrote:
>>>>> On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
>>>>>> On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
>>>>>>> On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
>>>>>>>> On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
>>>>>>>>> If the GETATTR request on a file that has write delegation in effect
>>>>>>>>> and the request attributes include the change info and size attribute
>>>>>>>>> then the request is handled as below:
>>>>>>>>>
>>>>>>>>> Server sends CB_GETATTR to client to get the latest change info and file
>>>>>>>>> size. If these values are the same as the server's cached values then
>>>>>>>>> the GETATTR proceeds as normal.
>>>>>>>>>
>>>>>>>>> If either the change info or file size is different from the server's
>>>>>>>>> cached values, or the file was already marked as modified, then:
>>>>>>>>>
>>>>>>>>> . update time_modify and time_metadata into file's metadata
>>>>>>>>> with current time
>>>>>>>>>
>>>>>>>>> . encode GETATTR as normal except the file size is encoded with
>>>>>>>>> the value returned from CB_GETATTR
>>>>>>>>>
>>>>>>>>> . mark the file as modified
>>>>>>>>>
>>>>>>>>> If the CB_GETATTR fails for any reasons, the delegation is recalled
>>>>>>>>> and NFS4ERR_DELAY is returned for the GETATTR.
>>>>>>>> Hi Dai,
>>>>>>>>
>>>>>>>> I'm curious what does the server gain by implementing handling of
>>>>>>>> GETATTR with delegations? As far as I can tell it is not strictly
>>>>>>>> required by the RFC(s). When the file is being written any attempt at
>>>>>>>> querying its attribute is immediately stale.
>>>>>>> Yes, you're right that handling of GETATTR with delegations is not
>>>>>>> required by the spec. The only benefit I see is that the server
>>>>>>> provides a more accurate state of the file as whether the file has
>>>>>>> been changed/updated since the client's last GETATTR. This allows
>>>>>>> the app on the client to take appropriate action (whatever that
>>>>>>> might be) when sharing files among multiple clients.
>>>>>>>
>>>>>>
>>>>>>
>>>>>> From RFC 8881 10.4.3:
>>>>>>
>>>>>> "It should be noted that the server is under no obligation to use
>>>>>> CB_GETATTR, and therefore the server MAY simply recall the delegation to
>>>>>> avoid its use."
>>>>> This is a "MAY" which means the server can choose to not to and just
>>>>> return the info it currently has without recalling a delegation.
>>>>>
>>>>>
>>>> That's not at all how I read that. To me, it sounds like it's saying
>>>> that the only alternative to implementing CB_GETATTR is to recall the
>>>> delegation. If that's not the case, then we should clarify that in the
>>>> spec.
>>> The meaning of MAY is spelled out in RFC 2119. MAY does not mean
>>> "the only alternative". I read this statement as alerting client
>>> implementers that a compliant server is permitted to skip
>>> CB_GETATTR, simply by recalling the delegation. Technically
>>> speaking this compliance statement does not otherwise restrict
>>> server behavior, though the author might have had something else
>>> in mind.
>>>
>>> I'm leery of the complexity that CB_GETATTR adds to NFSD and
>>> the protocol. In addition, section 10.4 is riddled with errors,
>>> albeit minor ones; that suggests this part of the protocol is
>>> not well-reviewed.
>>>
>>> It's not apparent how much gain is provided by CB_GETATTR.
>>> IIRC NFSD can recall a delegation on the same nfsd thread as an
>>> incoming request, so the turnaround for a recall from a local
>>> client is going to be quick.
>>>
>>> It would be good to know how many other server implementations
>>> support CB_GETATTR.
>>> I'm rather leaning towards postponing 3/4 and 4/4 and instead
>>> taking a more incremental approach. Let's get the basic Write
>>> delegation support in, and possibly add a counter or two to
>>> find out how often a GETATTR on a write-delegated file results
>>> in a delegation recall.
>>>
>>> We can then take some time to disambiguate the spec language and
>>> look at other implementations to see if this extra protocol is
>>> really of value.
>>>
>>> I think it would be good to understand whether Write delegation
>>> without CB_GETATTR can result in a performance regression (say,
>>> because the server is recalling delegations more often for a
>>> given workload).
>> Ganesha has had write delegation and CB_GETATTR support for years.
> Does OnTAP support write delegation? I heard a rumor NetApp
> disabled it because of the volume of customer calls involving
> delegation with the Linux client, but that could be old news.

So far I have not run into any problem of NFS client using write
delegation. My testing so far, besides manual tests, were the delegation
tests in pynfs for 4.0 and 4.1 and nfstest_delegation test.

The nfstest_delegation result is:
1785 tests (1783 passed, 2 failed)

The 2 failed test is regarding whether the NFSv4 server should allow
the client to use write delegation state to do READ.

>
> How about Solaris? My close contact with the Solaris NFS team
> as the Linux NFS client implementation matured has colored my
> experience with write delegation. It is complex and subtle.

Solaris has write delegation support for years.

>
>
>> Isn't CB_GETATTR the main benefit of a write delegation in the first
>> place? A write deleg doesn't really give any benefit otherwise, as you
>> can buffer writes anyway without one.
>>
>> AIUI, the point of a write delegation is to allow other clients (and
>> potentially the server) to get up to date information on file sizes and
>> change attr when there is a single, active writer.
> The benefits of write delegation depend on the client implementation
> and the workload. A client may use a write delegation to indicate
> that it can handle locking requests for a file locally, for example.

This is what the Linux NFS client does, handling locking requests for a
file locally if the file has write delegation.

>
>
>> Without CB_GETATTR, your first stat() against the file will give you
>> fairly up to date info (since you'll have to recall the delegation), but
>> then you'll be back to the server just reporting the size and change
>> attr that it has at the time.
> Which is the current behavior, yes? As long as behavior is not
> regressing, I don't foresee a problem with doing CB_GETATTR in 6.6
> or later.

I think defer the handling of GETATTR with CB_GETATTR until 6.6 is okay
so we can get some runtime with write delegation to have some confident
with its stability.

For now we just return the (potentially stale) file attribute available
at the server. It should not cause significant problem since there won't
be any real application depends on this feature yet, at least on the Linux
NFS server.

-Dai

>
>
> --
> Chuck Lever
>
>

2023-05-16 01:12:08

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Mon, May 15, 2023 at 8:06 PM Chuck Lever III <[email protected]> wrote:
>
>
>
> > On May 15, 2023, at 6:53 PM, Jeff Layton <[email protected]> wrote:
> >
> > On Mon, 2023-05-15 at 21:37 +0000, Chuck Lever III wrote:
> >>
> >>> On May 15, 2023, at 4:21 PM, Jeff Layton <[email protected]> wrote:
> >>>
> >>> On Mon, 2023-05-15 at 16:10 -0400, Olga Kornievskaia wrote:
> >>>> On Mon, May 15, 2023 at 2:58 PM Jeff Layton <[email protected]> wrote:
> >>>>>
> >>>>> On Mon, 2023-05-15 at 11:26 -0700, [email protected] wrote:
> >>>>>> On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
> >>>>>>> On Sun, May 14, 2023 at 8:56 PM Dai Ngo <[email protected]> wrote:
> >>>>>>>> If the GETATTR request on a file that has write delegation in effect
> >>>>>>>> and the request attributes include the change info and size attribute
> >>>>>>>> then the request is handled as below:
> >>>>>>>>
> >>>>>>>> Server sends CB_GETATTR to client to get the latest change info and file
> >>>>>>>> size. If these values are the same as the server's cached values then
> >>>>>>>> the GETATTR proceeds as normal.
> >>>>>>>>
> >>>>>>>> If either the change info or file size is different from the server's
> >>>>>>>> cached values, or the file was already marked as modified, then:
> >>>>>>>>
> >>>>>>>> . update time_modify and time_metadata into file's metadata
> >>>>>>>> with current time
> >>>>>>>>
> >>>>>>>> . encode GETATTR as normal except the file size is encoded with
> >>>>>>>> the value returned from CB_GETATTR
> >>>>>>>>
> >>>>>>>> . mark the file as modified
> >>>>>>>>
> >>>>>>>> If the CB_GETATTR fails for any reasons, the delegation is recalled
> >>>>>>>> and NFS4ERR_DELAY is returned for the GETATTR.
> >>>>>>> Hi Dai,
> >>>>>>>
> >>>>>>> I'm curious what does the server gain by implementing handling of
> >>>>>>> GETATTR with delegations? As far as I can tell it is not strictly
> >>>>>>> required by the RFC(s). When the file is being written any attempt at
> >>>>>>> querying its attribute is immediately stale.
> >>>>>>
> >>>>>> Yes, you're right that handling of GETATTR with delegations is not
> >>>>>> required by the spec. The only benefit I see is that the server
> >>>>>> provides a more accurate state of the file as whether the file has
> >>>>>> been changed/updated since the client's last GETATTR. This allows
> >>>>>> the app on the client to take appropriate action (whatever that
> >>>>>> might be) when sharing files among multiple clients.
> >>>>>>
> >>>>>
> >>>>>
> >>>>>
> >>>>> From RFC 8881 10.4.3:
> >>>>>
> >>>>> "It should be noted that the server is under no obligation to use
> >>>>> CB_GETATTR, and therefore the server MAY simply recall the delegation to
> >>>>> avoid its use."
> >>>>
> >>>> This is a "MAY" which means the server can choose to not to and just
> >>>> return the info it currently has without recalling a delegation.
> >>>>
> >>>>
> >>>
> >>> That's not at all how I read that. To me, it sounds like it's saying
> >>> that the only alternative to implementing CB_GETATTR is to recall the
> >>> delegation. If that's not the case, then we should clarify that in the
> >>> spec.
> >>
> >> The meaning of MAY is spelled out in RFC 2119. MAY does not mean
> >> "the only alternative". I read this statement as alerting client
> >> implementers that a compliant server is permitted to skip
> >> CB_GETATTR, simply by recalling the delegation. Technically
> >> speaking this compliance statement does not otherwise restrict
> >> server behavior, though the author might have had something else
> >> in mind.
> >>
> >> I'm leery of the complexity that CB_GETATTR adds to NFSD and
> >> the protocol. In addition, section 10.4 is riddled with errors,
> >> albeit minor ones; that suggests this part of the protocol is
> >> not well-reviewed.
> >>
> >> It's not apparent how much gain is provided by CB_GETATTR.
> >> IIRC NFSD can recall a delegation on the same nfsd thread as an
> >> incoming request, so the turnaround for a recall from a local
> >> client is going to be quick.
> >>
> >> It would be good to know how many other server implementations
> >> support CB_GETATTR.
> >
> >> I'm rather leaning towards postponing 3/4 and 4/4 and instead
> >> taking a more incremental approach. Let's get the basic Write
> >> delegation support in, and possibly add a counter or two to
> >> find out how often a GETATTR on a write-delegated file results
> >> in a delegation recall.
> >>
> >> We can then take some time to disambiguate the spec language and
> >> look at other implementations to see if this extra protocol is
> >> really of value.
> >>
> >> I think it would be good to understand whether Write delegation
> >> without CB_GETATTR can result in a performance regression (say,
> >> because the server is recalling delegations more often for a
> >> given workload).
> >
> > Ganesha has had write delegation and CB_GETATTR support for years.
>
> Does OnTAP support write delegation? I heard a rumor NetApp
> disabled it because of the volume of customer calls involving
> delegation with the Linux client, but that could be old news.

ONTAP supports delegations (though i'm not sure if the default is on.
So I'm not sure if the current recommendation is not to enable it due
to known issues). ONTAP does not send any CB_GETATTR. Whether or not
it makes it a non-spec compliant I'm not sure as the spec isn't too
clear (at least to me). We as a linux team did raise the issue with
ONTAP about them not implementing CB_GETATTR. But so far no complaints
have been made about lacking CB_GETATTR.

I raised the question because I was really curious what real
usefulness really comes from it? or perhaps what are the expectations
that the server can guarantee or what conclusions can the 2nd client
can make? As I mentioned in my initial posting, the information that
the server (and then the 2nd client) gets is (possibly) stale as soon
as it gets it because the client with delegation keeps writing to the
file after getting the CB_GETATTR. The complexity that's being added
to the server (and to the client) doesn't seem to be worth it.

> How about Solaris? My close contact with the Solaris NFS team
> as the Linux NFS client implementation matured has colored my
> experience with write delegation. It is complex and subtle.
>
>
> > Isn't CB_GETATTR the main benefit of a write delegation in the first
> > place? A write deleg doesn't really give any benefit otherwise, as you
> > can buffer writes anyway without one.
> >
> > AIUI, the point of a write delegation is to allow other clients (and
> > potentially the server) to get up to date information on file sizes and
> > change attr when there is a single, active writer.
>
> The benefits of write delegation depend on the client implementation
> and the workload. A client may use a write delegation to indicate
> that it can handle locking requests for a file locally, for example.

I thought the benefits of delegations have always been the savings
gotten from doing local opens and locking. Open is an expensive server
operation.

Because the (linux) client is flushing the writes exactly the same way
whether or not it's holding a delegation so the server's view is the
same (with or without delegation). Now of course I'm speaking about
the linux client so one can speculate what if there is a client
implementation that uses delegation to do more caching? Sure... But I
still don't really see how the 2nd client can expect to rely/make
critical decisions based on the information it gets from a GETATTR.

> > Without CB_GETATTR, your first stat() against the file will give you
> > fairly up to date info (since you'll have to recall the delegation), but
> > then you'll be back to the server just reporting the size and change
> > attr that it has at the time.
>
> Which is the current behavior, yes? As long as behavior is not
> regressing, I don't foresee a problem with doing CB_GETATTR in 6.6
> or later.
>
>
> --
> Chuck Lever
>
>

2023-05-16 01:52:19

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

On Mon, 2023-05-15 at 18:53 -0400, Jeff Layton wrote:
>
> Isn't CB_GETATTR the main benefit of a write delegation in the first
> place? A write deleg doesn't really give any benefit otherwise, as
> you
> can buffer writes anyway without one.
>
> AIUI, the point of a write delegation is to allow other clients (and
> potentially the server) to get up to date information on file sizes
> and
> change attr when there is a single, active writer.
>
> Without CB_GETATTR, your first stat() against the file will give you
> fairly up to date info (since you'll have to recall the delegation),
> but
> then you'll be back to the server just reporting the size and change
> attr that it has at the time.
>

The only advantage of CB_GETATTR is that it allows you to determine
whether or not the client holding the delegation is also holding cached
writes. Since we pretty much always rely on close-to-open semantics
anyway, the benefit of implementing it is pretty marginal.

Personally, I see CB_GETATTR as being more useful once servers start
implementing the timestamp/attribute delegations as per
https://datatracker.ietf.org/doc/draft-ietf-nfsv4-delstid/ 
Since those delegations allow the client to be authoritative for the
atime and mtime timestamps, and to also cache those, then CB_GETATTR
becomes a necessity in order to correctly timestamp writes that have
already been committed to the file.

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]