From: Anna Schumaker <[email protected]>
The scenario is as follows:
- The client attempts to remove a file on the server, but the remove is
interrupted AFTER the server receives it.
- At the same time, another thread removes the same file on the server
before NFSD has a chance to remove it
- The client then attempts another NFS operation with the same slot.
Because another thread removed the file the vfs returns -ENOENT to NFSD,
which causes NFSD to reply to the next operation on the same slot with
the result of the REMOVE (even if we asked for an OPEN). The client
detects the mismatched operations during decoding, and returns
-EREMOTEIO to the application.
The timing is tricky to get right on this, so I added a 3-second sleep
to nfsd4_remove() before calling nfsd_unlink():
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a09c35f0f6f0..bd93be50eaa8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,6 +851,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (opens_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
+
+ ssleep(3);
status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
remove->rm_name, remove->rm_namelen);
if (!status) {
I'm able to hit this every time using the following script combined with
the artifical delay on the server:
#!/bin/bash
SERVER=192.168.111.200
SERVER_DIR=/srv/test
CLIENT_DIR=/mnt/test
ssh $SERVER "echo test > $SERVER_DIR/test1"
rm -v $CLIENT_DIR/test1 &
sleep 1
killall -9 rm
ssh $SERVER "rm $SERVER_DIR/test1"
echo "test2" > $CLIENT_DIR/test2
I was able to solve the issue by sending a SEQUENCE using the same slot.
The server replies to this with NFS4ERR_SEQ_FALSE_RETRY instead of an
operation from the reply cache, and we are able to recover from here.
Thoughts?
Anna
Anna Schumaker (1):
NFS: Fix interrupted slots by sending a solo SEQUENCE operation
fs/nfs/nfs4proc.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
--
2.27.0
From: Anna Schumaker <[email protected]>
We used to do this before 3453d5708b33, but this was changed to better
handle the NFS4ERR_SEQ_MISORDERED error code. This commit fixed the slot
re-use case when the server doesn't receive the interrupted operation,
but if the server does receive the operation then it could still end up
replying to the client with mis-matched operations from the reply cache.
We can fix this by sending a SEQUENCE to the server while recovering from
a SEQ_MISORDERED error when we detect that we are in an interrupted slot
situation.
Fixes: 3453d5708b33 (NFSv4.1: Avoid false retries when RPC calls are interrupted)
Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfs/nfs4proc.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e32717fd1169..5de41a5772f0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
slot->seq_nr_last_acked = seqnr;
}
+static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred,
+ struct nfs4_slot *slot)
+{
+ struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true);
+ if (!IS_ERR(task))
+ rpc_wait_for_completion_task(task);
+}
+
static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
goto out;
session = slot->table->session;
+ clp = session->clp;
trace_nfs4_sequence_done(session, res);
@@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
nfs4_slot_sequence_acked(slot, slot->seq_nr);
/* Update the slot's sequence and clientid lease timer */
slot->seq_done = 1;
- clp = session->clp;
do_renew_lease(clp, res->sr_timestamp);
/* Check sequence flags */
nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags,
@@ -852,10 +860,15 @@ static int nfs41_sequence_process(struct rpc_task *task,
/*
* Were one or more calls using this slot interrupted?
* If the server never received the request, then our
- * transmitted slot sequence number may be too high.
+ * transmitted slot sequence number may be too high. However,
+ * if the server did receive the request then it might
+ * accidentally give us a reply with a mismatched operation.
+ * We can sort this out by sending a lone sequence operation
+ * to the server on the same slot.
*/
if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
slot->seq_nr--;
+ nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot);
goto retry_nowait;
}
/*
--
2.27.0
On Wed, 2020-07-08 at 11:50 -0400, [email protected] wrote:
> From: Anna Schumaker <[email protected]>
>
> We used to do this before 3453d5708b33, but this was changed to
> better
> handle the NFS4ERR_SEQ_MISORDERED error code. This commit fixed the
> slot
> re-use case when the server doesn't receive the interrupted
> operation,
> but if the server does receive the operation then it could still end
> up
> replying to the client with mis-matched operations from the reply
> cache.
>
> We can fix this by sending a SEQUENCE to the server while recovering
> from
> a SEQ_MISORDERED error when we detect that we are in an interrupted
> slot
> situation.
>
> Fixes: 3453d5708b33 (NFSv4.1: Avoid false retries when RPC calls are
> interrupted)
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> fs/nfs/nfs4proc.c | 17 +++++++++++++++--
> 1 file changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index e32717fd1169..5de41a5772f0 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct
> nfs4_slot *slot,
> slot->seq_nr_last_acked = seqnr;
> }
>
> +static void nfs4_probe_sequence(struct nfs_client *client, const
> struct cred *cred,
> + struct nfs4_slot *slot)
> +{
> + struct rpc_task *task = _nfs41_proc_sequence(client, cred,
> slot, true);
> + if (!IS_ERR(task))
> + rpc_wait_for_completion_task(task);
Hmm... I am a little concerned about the wait here, since we don't know
what kind of thread this is.
Any chance we could kick off a _nfs41_proc_sequence asynchronously, and
then perhaps requeue the original task to wait for the next free slot?
I suppose one issue there would be if the 'original task is an earlier
call to _nfs41_proc_sequence, but perhaps that can be worked around?
> +}
> +
> static int nfs41_sequence_process(struct rpc_task *task,
> struct nfs4_sequence_res *res)
> {
> @@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task
> *task,
> goto out;
>
> session = slot->table->session;
> + clp = session->clp;
>
> trace_nfs4_sequence_done(session, res);
>
> @@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task
> *task,
> nfs4_slot_sequence_acked(slot, slot->seq_nr);
> /* Update the slot's sequence and clientid lease timer
> */
> slot->seq_done = 1;
> - clp = session->clp;
> do_renew_lease(clp, res->sr_timestamp);
> /* Check sequence flags */
> nfs41_handle_sequence_flag_errors(clp, res-
> >sr_status_flags,
> @@ -852,10 +860,15 @@ static int nfs41_sequence_process(struct
> rpc_task *task,
> /*
> * Were one or more calls using this slot interrupted?
> * If the server never received the request, then our
> - * transmitted slot sequence number may be too high.
> + * transmitted slot sequence number may be too high.
> However,
> + * if the server did receive the request then it might
> + * accidentally give us a reply with a mismatched
> operation.
> + * We can sort this out by sending a lone sequence
> operation
> + * to the server on the same slot.
> */
> if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1)
> {
> slot->seq_nr--;
> + nfs4_probe_sequence(clp, task->tk_msg.rpc_cred,
> slot);
> goto retry_nowait;
> }
> /*
--
Trond Myklebust
CTO, Hammerspace Inc
4984 El Camino Real, Suite 208
Los Altos, CA 94022
http://www.hammer.space
On Wed, Jul 8, 2020 at 12:00 PM Trond Myklebust <[email protected]> wrote:
>
> On Wed, 2020-07-08 at 11:50 -0400, [email protected] wrote:
> > From: Anna Schumaker <[email protected]>
> >
> > We used to do this before 3453d5708b33, but this was changed to
> > better
> > handle the NFS4ERR_SEQ_MISORDERED error code. This commit fixed the
> > slot
> > re-use case when the server doesn't receive the interrupted
> > operation,
> > but if the server does receive the operation then it could still end
> > up
> > replying to the client with mis-matched operations from the reply
> > cache.
> >
> > We can fix this by sending a SEQUENCE to the server while recovering
> > from
> > a SEQ_MISORDERED error when we detect that we are in an interrupted
> > slot
> > situation.
> >
> > Fixes: 3453d5708b33 (NFSv4.1: Avoid false retries when RPC calls are
> > interrupted)
> > Signed-off-by: Anna Schumaker <[email protected]>
> > ---
> > fs/nfs/nfs4proc.c | 17 +++++++++++++++--
> > 1 file changed, 15 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index e32717fd1169..5de41a5772f0 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct
> > nfs4_slot *slot,
> > slot->seq_nr_last_acked = seqnr;
> > }
> >
> > +static void nfs4_probe_sequence(struct nfs_client *client, const
> > struct cred *cred,
> > + struct nfs4_slot *slot)
> > +{
> > + struct rpc_task *task = _nfs41_proc_sequence(client, cred,
> > slot, true);
> > + if (!IS_ERR(task))
> > + rpc_wait_for_completion_task(task);
>
> Hmm... I am a little concerned about the wait here, since we don't know
> what kind of thread this is.
>
> Any chance we could kick off a _nfs41_proc_sequence asynchronously, and
> then perhaps requeue the original task to wait for the next free slot?
> I suppose one issue there would be if the 'original task is an earlier
> call to _nfs41_proc_sequence, but perhaps that can be worked around?
I'll try it and see what happens. Thanks for the feedback!
Anna
>
> > +}
> > +
> > static int nfs41_sequence_process(struct rpc_task *task,
> > struct nfs4_sequence_res *res)
> > {
> > @@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task
> > *task,
> > goto out;
> >
> > session = slot->table->session;
> > + clp = session->clp;
> >
> > trace_nfs4_sequence_done(session, res);
> >
> > @@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task
> > *task,
> > nfs4_slot_sequence_acked(slot, slot->seq_nr);
> > /* Update the slot's sequence and clientid lease timer
> > */
> > slot->seq_done = 1;
> > - clp = session->clp;
> > do_renew_lease(clp, res->sr_timestamp);
> > /* Check sequence flags */
> > nfs41_handle_sequence_flag_errors(clp, res-
> > >sr_status_flags,
> > @@ -852,10 +860,15 @@ static int nfs41_sequence_process(struct
> > rpc_task *task,
> > /*
> > * Were one or more calls using this slot interrupted?
> > * If the server never received the request, then our
> > - * transmitted slot sequence number may be too high.
> > + * transmitted slot sequence number may be too high.
> > However,
> > + * if the server did receive the request then it might
> > + * accidentally give us a reply with a mismatched
> > operation.
> > + * We can sort this out by sending a lone sequence
> > operation
> > + * to the server on the same slot.
> > */
> > if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1)
> > {
> > slot->seq_nr--;
> > + nfs4_probe_sequence(clp, task->tk_msg.rpc_cred,
> > slot);
> > goto retry_nowait;
> > }
> > /*
> --
> Trond Myklebust
> CTO, Hammerspace Inc
> 4984 El Camino Real, Suite 208
> Los Altos, CA 94022
> http://www.hammer.space
>
>
On Wed, Jul 8, 2020 at 12:08 PM Anna Schumaker <[email protected]> wrote:
>
> On Wed, Jul 8, 2020 at 12:00 PM Trond Myklebust <[email protected]> wrote:
> >
> > On Wed, 2020-07-08 at 11:50 -0400, [email protected] wrote:
> > > From: Anna Schumaker <[email protected]>
> > >
> > > We used to do this before 3453d5708b33, but this was changed to
> > > better
> > > handle the NFS4ERR_SEQ_MISORDERED error code. This commit fixed the
> > > slot
> > > re-use case when the server doesn't receive the interrupted
> > > operation,
> > > but if the server does receive the operation then it could still end
> > > up
> > > replying to the client with mis-matched operations from the reply
> > > cache.
> > >
> > > We can fix this by sending a SEQUENCE to the server while recovering
> > > from
> > > a SEQ_MISORDERED error when we detect that we are in an interrupted
> > > slot
> > > situation.
> > >
> > > Fixes: 3453d5708b33 (NFSv4.1: Avoid false retries when RPC calls are
> > > interrupted)
> > > Signed-off-by: Anna Schumaker <[email protected]>
> > > ---
> > > fs/nfs/nfs4proc.c | 17 +++++++++++++++--
> > > 1 file changed, 15 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > index e32717fd1169..5de41a5772f0 100644
> > > --- a/fs/nfs/nfs4proc.c
> > > +++ b/fs/nfs/nfs4proc.c
> > > @@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct
> > > nfs4_slot *slot,
> > > slot->seq_nr_last_acked = seqnr;
> > > }
> > >
> > > +static void nfs4_probe_sequence(struct nfs_client *client, const
> > > struct cred *cred,
> > > + struct nfs4_slot *slot)
> > > +{
> > > + struct rpc_task *task = _nfs41_proc_sequence(client, cred,
> > > slot, true);
> > > + if (!IS_ERR(task))
> > > + rpc_wait_for_completion_task(task);
> >
> > Hmm... I am a little concerned about the wait here, since we don't know
> > what kind of thread this is.
I've been playing with this all afternoon.
> >
> > Any chance we could kick off a _nfs41_proc_sequence asynchronously, and
> > then perhaps requeue the original task to wait for the next free slot?
I haven't had much luck getting this to work. The asynchronous task is
easy enough, but I haven't been able to get the original onto a new
slot yet. Is there a good way to do this without a new call to
nfs4_setup_sequence()? nfs41_sequence_process() only has the
sequence_res available, and there are enough call sites that adding in
sequence_args creates a lot of churn.
> > I suppose one issue there would be if the 'original task is an earlier
> > call to _nfs41_proc_sequence, but perhaps that can be worked around?
I could use the rpc task to see if it's sending a sequence, and only
do this if it's not. I don't know if there is a cleaner way to do
this.
Do you have any suggestions?
Anna
>
> I'll try it and see what happens. Thanks for the feedback!
> Anna
>
> >
> > > +}
> > > +
> > > static int nfs41_sequence_process(struct rpc_task *task,
> > > struct nfs4_sequence_res *res)
> > > {
> > > @@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task
> > > *task,
> > > goto out;
> > >
> > > session = slot->table->session;
> > > + clp = session->clp;
> > >
> > > trace_nfs4_sequence_done(session, res);
> > >
> > > @@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task
> > > *task,
> > > nfs4_slot_sequence_acked(slot, slot->seq_nr);
> > > /* Update the slot's sequence and clientid lease timer
> > > */
> > > slot->seq_done = 1;
> > > - clp = session->clp;
> > > do_renew_lease(clp, res->sr_timestamp);
> > > /* Check sequence flags */
> > > nfs41_handle_sequence_flag_errors(clp, res-
> > > >sr_status_flags,
> > > @@ -852,10 +860,15 @@ static int nfs41_sequence_process(struct
> > > rpc_task *task,
> > > /*
> > > * Were one or more calls using this slot interrupted?
> > > * If the server never received the request, then our
> > > - * transmitted slot sequence number may be too high.
> > > + * transmitted slot sequence number may be too high.
> > > However,
> > > + * if the server did receive the request then it might
> > > + * accidentally give us a reply with a mismatched
> > > operation.
> > > + * We can sort this out by sending a lone sequence
> > > operation
> > > + * to the server on the same slot.
> > > */
> > > if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1)
> > > {
> > > slot->seq_nr--;
> > > + nfs4_probe_sequence(clp, task->tk_msg.rpc_cred,
> > > slot);
> > > goto retry_nowait;
> > > }
> > > /*
> > --
> > Trond Myklebust
> > CTO, Hammerspace Inc
> > 4984 El Camino Real, Suite 208
> > Los Altos, CA 94022
> > http://www.hammer.space
> >
> >
On Wed, 2020-07-08 at 16:19 -0400, Anna Schumaker wrote:
> On Wed, Jul 8, 2020 at 12:08 PM Anna Schumaker <
> [email protected]> wrote:
> > On Wed, Jul 8, 2020 at 12:00 PM Trond Myklebust <
> > [email protected]> wrote:
> > > On Wed, 2020-07-08 at 11:50 -0400, [email protected]
> > > wrote:
> > > > From: Anna Schumaker <[email protected]>
> > > >
> > > > We used to do this before 3453d5708b33, but this was changed to
> > > > better
> > > > handle the NFS4ERR_SEQ_MISORDERED error code. This commit fixed
> > > > the
> > > > slot
> > > > re-use case when the server doesn't receive the interrupted
> > > > operation,
> > > > but if the server does receive the operation then it could
> > > > still end
> > > > up
> > > > replying to the client with mis-matched operations from the
> > > > reply
> > > > cache.
> > > >
> > > > We can fix this by sending a SEQUENCE to the server while
> > > > recovering
> > > > from
> > > > a SEQ_MISORDERED error when we detect that we are in an
> > > > interrupted
> > > > slot
> > > > situation.
> > > >
> > > > Fixes: 3453d5708b33 (NFSv4.1: Avoid false retries when RPC
> > > > calls are
> > > > interrupted)
> > > > Signed-off-by: Anna Schumaker <[email protected]>
> > > > ---
> > > > fs/nfs/nfs4proc.c | 17 +++++++++++++++--
> > > > 1 file changed, 15 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > index e32717fd1169..5de41a5772f0 100644
> > > > --- a/fs/nfs/nfs4proc.c
> > > > +++ b/fs/nfs/nfs4proc.c
> > > > @@ -774,6 +774,14 @@ static void
> > > > nfs4_slot_sequence_acked(struct
> > > > nfs4_slot *slot,
> > > > slot->seq_nr_last_acked = seqnr;
> > > > }
> > > >
> > > > +static void nfs4_probe_sequence(struct nfs_client *client,
> > > > const
> > > > struct cred *cred,
> > > > + struct nfs4_slot *slot)
> > > > +{
> > > > + struct rpc_task *task = _nfs41_proc_sequence(client,
> > > > cred,
> > > > slot, true);
> > > > + if (!IS_ERR(task))
> > > > + rpc_wait_for_completion_task(task);
> > >
> > > Hmm... I am a little concerned about the wait here, since we
> > > don't know
> > > what kind of thread this is.
>
> I've been playing with this all afternoon.
> > > Any chance we could kick off a _nfs41_proc_sequence
> > > asynchronously, and
> > > then perhaps requeue the original task to wait for the next free
> > > slot?
>
> I haven't had much luck getting this to work. The asynchronous task
> is
> easy enough, but I haven't been able to get the original onto a new
> slot yet. Is there a good way to do this without a new call to
> nfs4_setup_sequence()? nfs41_sequence_process() only has the
> sequence_res available, and there are enough call sites that adding
> in
> sequence_args creates a lot of churn.
No, you really have to let it restart the request. So perhaps just
reset res->sr_slot to NULL (since the new sequence task now owns the
old slot) and then run a retry.
>
> > > I suppose one issue there would be if the 'original task is an
> > > earlier
> > > call to _nfs41_proc_sequence, but perhaps that can be worked
> > > around?
>
> I could use the rpc task to see if it's sending a sequence, and only
> do this if it's not. I don't know if there is a cleaner way to do
> this.
>
> Do you have any suggestions?
> Anna
>
> > I'll try it and see what happens. Thanks for the feedback!
> > Anna
> >
> > > > +}
> > > > +
> > > > static int nfs41_sequence_process(struct rpc_task *task,
> > > > struct nfs4_sequence_res *res)
> > > > {
> > > > @@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct
> > > > rpc_task
> > > > *task,
> > > > goto out;
> > > >
> > > > session = slot->table->session;
> > > > + clp = session->clp;
> > > >
> > > > trace_nfs4_sequence_done(session, res);
> > > >
> > > > @@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct
> > > > rpc_task
> > > > *task,
> > > > nfs4_slot_sequence_acked(slot, slot->seq_nr);
> > > > /* Update the slot's sequence and clientid lease
> > > > timer
> > > > */
> > > > slot->seq_done = 1;
> > > > - clp = session->clp;
> > > > do_renew_lease(clp, res->sr_timestamp);
> > > > /* Check sequence flags */
> > > > nfs41_handle_sequence_flag_errors(clp, res-
> > > > > sr_status_flags,
> > > > @@ -852,10 +860,15 @@ static int nfs41_sequence_process(struct
> > > > rpc_task *task,
> > > > /*
> > > > * Were one or more calls using this slot
> > > > interrupted?
> > > > * If the server never received the request, then
> > > > our
> > > > - * transmitted slot sequence number may be too
> > > > high.
> > > > + * transmitted slot sequence number may be too
> > > > high.
> > > > However,
> > > > + * if the server did receive the request then it
> > > > might
> > > > + * accidentally give us a reply with a mismatched
> > > > operation.
> > > > + * We can sort this out by sending a lone
> > > > sequence
> > > > operation
> > > > + * to the server on the same slot.
> > > > */
> > > > if ((s32)(slot->seq_nr - slot->seq_nr_last_acked)
> > > > > 1)
> > > > {
> > > > slot->seq_nr--;
> > > > + nfs4_probe_sequence(clp, task-
> > > > >tk_msg.rpc_cred,
> > > > slot);
> > > > goto retry_nowait;
> > > > }
> > > > /*
> > > --
> > > Trond Myklebust
> > > CTO, Hammerspace Inc
> > > 4984 El Camino Real, Suite 208
> > > Los Altos, CA 94022
> > > http://www.hammer.space
> > >
> > >
--
Trond Myklebust
CTO, Hammerspace Inc
4984 El Camino Real, Suite 208
Los Altos, CA 94022
http://www.hammer.space