nfsd_setattr() can kick off a CB_RECALL (via
notify_change() -> break_lease()) if a delegation is present. Before
returning NFS4ERR_DELAY, give the client holding that delegation a
chance to return it and then retry the nfsd_setattr() again, once.
Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4proc.c | 18 +++++++++++++++---
fs/nfsd/nfs4state.c | 17 +++++++++++++++++
fs/nfsd/nfsd.h | 1 +
fs/nfsd/trace.h | 19 +++++++++++++++++++
fs/nfsd/xdr4.h | 2 ++
5 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 42bfe0d769ec..62a267bb2ce5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_setattr *setattr = &u->setattr;
__be32 status = nfs_ok;
- int err;
+ int err, retries;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
@@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&setattr->sa_label);
if (status)
goto out;
- status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
- 0, (time64_t)0);
+
+ retries = 1;
+ do {
+ status = nfsd_setattr(rqstp, &cstate->current_fh,
+ &setattr->sa_iattr, 0, (time64_t)0);
+ if (status != nfserr_jukebox)
+ break;
+ if (!retries--)
+ break;
+
+ fh_clear_pre_post_attrs(&cstate->current_fh);
+ nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
+ } while (1);
+
out:
fh_drop_write(&cstate->current_fh);
return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0cf5a4bb36df..e3ac89d4a859 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
return ret;
}
+/**
+ * nfsd4_wait_for_delegreturn - wait for delegations to be returned
+ * @rqstp: the RPC transaction being executed
+ * @fhp: filehandle of file being waited for
+ *
+ * A better approach would wait for the DELEGRETURN operation, and
+ * retry just as soon as it was done.
+ *
+ * The timeout prevents deadlock if all nfsd threads happen to be
+ * tied up waiting for returning delegations.
+ */
+void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp)
+{
+ trace_nfsd_delegreturn_wait(rqstp, fhp);
+ msleep(NFSD_DELEGRETURN_TIMEOUT);
+}
+
static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 9a8b09afc173..0b800a154828 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
+#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */
/*
* The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 8c3d5f88072f..dd2654cac132 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
#include "filecache.h"
#include "vfs.h"
+TRACE_EVENT(nfsd_delegreturn_wait,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp
+ ),
+ TP_ARGS(rqstp, fhp),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, fh_hash)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x",
+ __entry->xid, __entry->fh_hash
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
TP_ARGS(stp),
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 7b744011f2d3..5b9213076e95 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st
union nfsd4_op_u *u);
__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
union nfsd4_op_u *u);
+extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp,
+ struct svc_fh *fhp);
extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
struct nfsd4_open *open, struct nfsd_net *nn);
extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote:
> nfsd_setattr() can kick off a CB_RECALL (via
> notify_change() -> break_lease()) if a delegation is present. Before
> returning NFS4ERR_DELAY, give the client holding that delegation a
> chance to return it and then retry the nfsd_setattr() again, once.
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> fs/nfsd/nfs4proc.c | 18 +++++++++++++++---
> fs/nfsd/nfs4state.c | 17 +++++++++++++++++
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/trace.h | 19 +++++++++++++++++++
> fs/nfsd/xdr4.h | 2 ++
> 5 files changed, 54 insertions(+), 3 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 42bfe0d769ec..62a267bb2ce5 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> {
> struct nfsd4_setattr *setattr = &u->setattr;
> __be32 status = nfs_ok;
> - int err;
> + int err, retries;
>
> if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
> status = nfs4_preprocess_stateid_op(rqstp, cstate,
> @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> &setattr->sa_label);
> if (status)
> goto out;
> - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
> - 0, (time64_t)0);
> +
> + retries = 1;
> + do {
> + status = nfsd_setattr(rqstp, &cstate->current_fh,
> + &setattr->sa_iattr, 0, (time64_t)0);
> + if (status != nfserr_jukebox)
> + break;
> + if (!retries--)
> + break;
> +
> + fh_clear_pre_post_attrs(&cstate->current_fh);
> + nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
> + } while (1);
> +
> out:
> fh_drop_write(&cstate->current_fh);
> return status;
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0cf5a4bb36df..e3ac89d4a859 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
> return ret;
> }
>
> +/**
> + * nfsd4_wait_for_delegreturn - wait for delegations to be returned
> + * @rqstp: the RPC transaction being executed
> + * @fhp: filehandle of file being waited for
> + *
> + * A better approach would wait for the DELEGRETURN operation, and
> + * retry just as soon as it was done.
> + *
> + * The timeout prevents deadlock if all nfsd threads happen to be
> + * tied up waiting for returning delegations.
> + */
> +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp)
> +{
> + trace_nfsd_delegreturn_wait(rqstp, fhp);
> + msleep(NFSD_DELEGRETURN_TIMEOUT);
Like you mentioned in the cover letter, this is pretty nasty.
You could use wait_var_event_timeout here on the inode, paired with a
wake_up_var when a delegation is returned.
For the condition, you could use something like this:
!inode->i_flctx || list_empty(&inode->i_flctx->flc_lease)
Maybe even a similar lockless check as the one in break_deleg?
> +}
> +
> static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
> {
> struct nfs4_delegation *dp = cb_to_delegation(cb);
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 9a8b09afc173..0b800a154828 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void);
>
> #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
> #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
> +#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */
>
> /*
> * The following attributes are currently not supported by the NFSv4 server:
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 8c3d5f88072f..dd2654cac132 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
> #include "filecache.h"
> #include "vfs.h"
>
> +TRACE_EVENT(nfsd_delegreturn_wait,
> + TP_PROTO(
> + const struct svc_rqst *rqstp,
> + const struct svc_fh *fhp
> + ),
> + TP_ARGS(rqstp, fhp),
> + TP_STRUCT__entry(
> + __field(u32, xid)
> + __field(u32, fh_hash)
> + ),
> + TP_fast_assign(
> + __entry->xid = be32_to_cpu(rqstp->rq_xid);
> + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
> + ),
> + TP_printk("xid=0x%08x fh_hash=0x%08x",
> + __entry->xid, __entry->fh_hash
> + )
> +);
> +
> DECLARE_EVENT_CLASS(nfsd_stateid_class,
> TP_PROTO(stateid_t *stp),
> TP_ARGS(stp),
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 7b744011f2d3..5b9213076e95 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st
> union nfsd4_op_u *u);
> __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
> union nfsd4_op_u *u);
> +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp,
> + struct svc_fh *fhp);
> extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
> struct nfsd4_open *open, struct nfsd_net *nn);
> extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
>
>
--
Jeff Layton <[email protected]>
> On Aug 3, 2022, at 3:47 PM, Jeff Layton <[email protected]> wrote:
>
> On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote:
>> nfsd_setattr() can kick off a CB_RECALL (via
>> notify_change() -> break_lease()) if a delegation is present. Before
>> returning NFS4ERR_DELAY, give the client holding that delegation a
>> chance to return it and then retry the nfsd_setattr() again, once.
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> fs/nfsd/nfs4proc.c | 18 +++++++++++++++---
>> fs/nfsd/nfs4state.c | 17 +++++++++++++++++
>> fs/nfsd/nfsd.h | 1 +
>> fs/nfsd/trace.h | 19 +++++++++++++++++++
>> fs/nfsd/xdr4.h | 2 ++
>> 5 files changed, 54 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 42bfe0d769ec..62a267bb2ce5 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> {
>> struct nfsd4_setattr *setattr = &u->setattr;
>> __be32 status = nfs_ok;
>> - int err;
>> + int err, retries;
>>
>> if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
>> status = nfs4_preprocess_stateid_op(rqstp, cstate,
>> @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> &setattr->sa_label);
>> if (status)
>> goto out;
>> - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
>> - 0, (time64_t)0);
>> +
>> + retries = 1;
>> + do {
>> + status = nfsd_setattr(rqstp, &cstate->current_fh,
>> + &setattr->sa_iattr, 0, (time64_t)0);
>> + if (status != nfserr_jukebox)
>> + break;
>> + if (!retries--)
>> + break;
>> +
>> + fh_clear_pre_post_attrs(&cstate->current_fh);
>> + nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
>> + } while (1);
>> +
>> out:
>> fh_drop_write(&cstate->current_fh);
>> return status;
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0cf5a4bb36df..e3ac89d4a859 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
>> return ret;
>> }
>>
>> +/**
>> + * nfsd4_wait_for_delegreturn - wait for delegations to be returned
>> + * @rqstp: the RPC transaction being executed
>> + * @fhp: filehandle of file being waited for
>> + *
>> + * A better approach would wait for the DELEGRETURN operation, and
>> + * retry just as soon as it was done.
>> + *
>> + * The timeout prevents deadlock if all nfsd threads happen to be
>> + * tied up waiting for returning delegations.
>> + */
>> +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp)
>> +{
>> + trace_nfsd_delegreturn_wait(rqstp, fhp);
>> + msleep(NFSD_DELEGRETURN_TIMEOUT);
>
> Like you mentioned in the cover letter, this is pretty nasty.
Right, it's proof-of-concept stuff.
> You could use wait_var_event_timeout here on the inode, paired with a
> wake_up_var when a delegation is returned.
I was looking for an NFSD-specific data structure to add a
completion to, but yeah, I guess the inode itself could work.
I'll have a look at that for the next version of this series.
Thanks for the suggestion!
> For the condition, you could use something like this:
>
> !inode->i_flctx || list_empty(&inode->i_flctx->flc_lease)
>
> Maybe even a similar lockless check as the one in break_deleg?
>
>> +}
>> +
>> static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
>> {
>> struct nfs4_delegation *dp = cb_to_delegation(cb);
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index 9a8b09afc173..0b800a154828 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void);
>>
>> #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
>> #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
>> +#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */
>>
>> /*
>> * The following attributes are currently not supported by the NFSv4 server:
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 8c3d5f88072f..dd2654cac132 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
>> #include "filecache.h"
>> #include "vfs.h"
>>
>> +TRACE_EVENT(nfsd_delegreturn_wait,
>> + TP_PROTO(
>> + const struct svc_rqst *rqstp,
>> + const struct svc_fh *fhp
>> + ),
>> + TP_ARGS(rqstp, fhp),
>> + TP_STRUCT__entry(
>> + __field(u32, xid)
>> + __field(u32, fh_hash)
>> + ),
>> + TP_fast_assign(
>> + __entry->xid = be32_to_cpu(rqstp->rq_xid);
>> + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
>> + ),
>> + TP_printk("xid=0x%08x fh_hash=0x%08x",
>> + __entry->xid, __entry->fh_hash
>> + )
>> +);
>> +
>> DECLARE_EVENT_CLASS(nfsd_stateid_class,
>> TP_PROTO(stateid_t *stp),
>> TP_ARGS(stp),
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 7b744011f2d3..5b9213076e95 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st
>> union nfsd4_op_u *u);
>> __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
>> union nfsd4_op_u *u);
>> +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp,
>> + struct svc_fh *fhp);
>> extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
>> struct nfsd4_open *open, struct nfsd_net *nn);
>> extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
>>
>>
>
> --
> Jeff Layton <[email protected]>
--
Chuck Lever