2019-10-24 10:18:01

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

When we're destroying the client lease, and we call
nfsd4_shutdown_callback(), we must ensure that we do not return
before all outstanding callbacks have terminated and have
released their payloads.

Signed-off-by: Trond Myklebust <[email protected]>
---
v2 - Fix a leak of clp->cl_cb_inflight when running null probes

fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++---------
fs/nfsd/state.h | 1 +
2 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 524111420b48..a3c9517bcc64 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
}

+static struct workqueue_struct *callback_wq;
+
+static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
+{
+ return queue_work(callback_wq, &cb->cb_work);
+}
+
+static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
+{
+ atomic_inc(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
+{
+
+ if (atomic_dec_and_test(&clp->cl_cb_inflight))
+ wake_up_var(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
+{
+ wait_var_event(&clp->cl_cb_inflight,
+ !atomic_read(&clp->cl_cb_inflight));
+}
+
static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
@@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
clp->cl_cb_state = NFSD4_CB_UP;
}

+static void nfsd4_cb_probe_release(void *calldata)
+{
+ struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+ nfsd41_cb_inflight_end(clp);
+
+}
+
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
/* XXX: release method to ensure we set the cb channel down if
* necessary on early failure? */
.rpc_call_done = nfsd4_cb_probe_done,
+ .rpc_release = nfsd4_cb_probe_release,
};

-static struct workqueue_struct *callback_wq;
-
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
@@ -975,9 +1007,12 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
-static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
+static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (!cb->cb_holds_slot &&
+ test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
@@ -985,10 +1020,32 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
return false;
}
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
+ cb->cb_holds_slot = true;
}
return true;
}

+static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (cb->cb_holds_slot) {
+ cb->cb_holds_slot = false;
+ clear_bit(0, &clp->cl_cb_slot_busy);
+ rpc_wake_up_next(&clp->cl_cb_waitq);
+ }
+}
+
+static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ nfsd41_cb_release_slot(cb);
+ if (cb->cb_ops && cb->cb_ops->release)
+ cb->cb_ops->release(cb);
+ nfsd41_cb_inflight_end(clp);
+}
+
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
@@ -1005,11 +1062,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
*/
cb->cb_seq_status = 1;
cb->cb_status = 0;
- if (minorversion) {
- if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
- return;
- cb->cb_holds_slot = true;
- }
+ if (minorversion && !nfsd41_cb_get_slot(cb, task))
+ return;
rpc_call_start(task);
}

@@ -1076,9 +1130,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
cb->cb_seq_status);
}

- cb->cb_holds_slot = false;
- clear_bit(0, &clp->cl_cb_slot_busy);
- rpc_wake_up_next(&clp->cl_cb_waitq);
+ nfsd41_cb_release_slot(cb);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);

@@ -1091,8 +1143,10 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
ret = false;
goto out;
need_restart:
- task->tk_status = 0;
- cb->cb_need_restart = true;
+ if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ task->tk_status = 0;
+ cb->cb_need_restart = true;
+ }
return false;
}

@@ -1134,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
struct nfsd4_callback *cb = calldata;

if (cb->cb_need_restart)
- nfsd4_run_cb(cb);
+ nfsd4_queue_cb(cb);
else
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);

}

@@ -1170,6 +1224,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
*/
nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq);
+ nfsd41_cb_inflight_wait_complete(clp);
}

/* requires cl_lock: */
@@ -1255,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
- if (cb->cb_ops && cb->cb_ops->release)
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);
return;
}

@@ -1265,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
*/
if (!cb->cb_ops && clp->cl_minorversion) {
clp->cl_cb_state = NFSD4_CB_UP;
+ nfsd41_destroy_cb(cb);
return;
}

@@ -1290,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,

void nfsd4_run_cb(struct nfsd4_callback *cb)
{
- queue_work(callback_wq, &cb->cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+
+ nfsd41_cb_inflight_begin(clp);
+ if (!nfsd4_queue_cb(cb))
+ nfsd41_cb_inflight_end(clp);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 46f56afb6cb8..d61b83b9654c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -367,6 +367,7 @@ struct nfs4_client {
struct net *net;
struct list_head async_copies; /* list of async copies */
spinlock_t async_lock; /* lock for async copies */
+ atomic_t cl_cb_inflight; /* Outstanding callbacks */
};

/* struct nfs4_client_reset
--
2.21.0


2019-10-25 20:38:17

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Fri, 2019-10-25 at 10:51 -0400, J. Bruce Fields wrote:
> On Wed, Oct 23, 2019 at 05:43:18PM -0400, Trond Myklebust wrote:
> > When we're destroying the client lease, and we call
> > nfsd4_shutdown_callback(), we must ensure that we do not return
> > before all outstanding callbacks have terminated and have
> > released their payloads.
>
> This is great, thanks! We've seen what I'm fairly sure is the same
> bug
> from Red Hat users. I think my blind spot was an assumption that
> rpc tasks wouldn't outlive rpc_shutdown_client().
>
> However, it's causing xfstests runs to hang, and I haven't worked out
> why yet.
>
> I'll spend some time on it this afternoon and let you know what I
> figure
> out.
>

Is that happening with v2 or with v1? With v1 there is definitely a
hang in __destroy_client() due to the refcount leak that I believe I
fixed in v2.

> --b.
>
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > v2 - Fix a leak of clp->cl_cb_inflight when running null probes
> >
> > fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++-----
> > ----
> > fs/nfsd/state.h | 1 +
> > 2 files changed, 79 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> > index 524111420b48..a3c9517bcc64 100644
> > --- a/fs/nfsd/nfs4callback.c
> > +++ b/fs/nfsd/nfs4callback.c
> > @@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
> > return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
> > }
> >
> > +static struct workqueue_struct *callback_wq;
> > +
> > +static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
> > +{
> > + return queue_work(callback_wq, &cb->cb_work);
> > +}
> > +
> > +static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
> > +{
> > + atomic_inc(&clp->cl_cb_inflight);
> > +}
> > +
> > +static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
> > +{
> > +
> > + if (atomic_dec_and_test(&clp->cl_cb_inflight))
> > + wake_up_var(&clp->cl_cb_inflight);
> > +}
> > +
> > +static void nfsd41_cb_inflight_wait_complete(struct nfs4_client
> > *clp)
> > +{
> > + wait_var_event(&clp->cl_cb_inflight,
> > + !atomic_read(&clp->cl_cb_inflight));
> > +}
> > +
> > static const struct cred *get_backchannel_cred(struct nfs4_client
> > *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
> > {
> > if (clp->cl_minorversion == 0) {
> > @@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct
> > rpc_task *task, void *calldata)
> > clp->cl_cb_state = NFSD4_CB_UP;
> > }
> >
> > +static void nfsd4_cb_probe_release(void *calldata)
> > +{
> > + struct nfs4_client *clp = container_of(calldata, struct
> > nfs4_client, cl_cb_null);
> > +
> > + nfsd41_cb_inflight_end(clp);
> > +
> > +}
> > +
> > static const struct rpc_call_ops nfsd4_cb_probe_ops = {
> > /* XXX: release method to ensure we set the cb channel down if
> > * necessary on early failure? */
> > .rpc_call_done = nfsd4_cb_probe_done,
> > + .rpc_release = nfsd4_cb_probe_release,
> > };
> >
> > -static struct workqueue_struct *callback_wq;
> > -
> > /*
> > * Poke the callback thread to process any updates to the callback
> > * parameters, and send a null probe.
> > @@ -975,9 +1007,12 @@ void nfsd4_change_callback(struct nfs4_client
> > *clp, struct nfs4_cb_conn *conn)
> > * If the slot is available, then mark it busy. Otherwise, set
> > the
> > * thread for sleeping on the callback RPC wait queue.
> > */
> > -static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct
> > rpc_task *task)
> > +static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct
> > rpc_task *task)
> > {
> > - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + if (!cb->cb_holds_slot &&
> > + test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
> > /* Race breaker */
> > if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > @@ -985,10 +1020,32 @@ static bool nfsd41_cb_get_slot(struct
> > nfs4_client *clp, struct rpc_task *task)
> > return false;
> > }
> > rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
> > + cb->cb_holds_slot = true;
> > }
> > return true;
> > }
> >
> > +static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + if (cb->cb_holds_slot) {
> > + cb->cb_holds_slot = false;
> > + clear_bit(0, &clp->cl_cb_slot_busy);
> > + rpc_wake_up_next(&clp->cl_cb_waitq);
> > + }
> > +}
> > +
> > +static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + nfsd41_cb_release_slot(cb);
> > + if (cb->cb_ops && cb->cb_ops->release)
> > + cb->cb_ops->release(cb);
> > + nfsd41_cb_inflight_end(clp);
> > +}
> > +
> > /*
> > * TODO: cb_sequence should support referring call lists,
> > cachethis, multiple
> > * slots, and mark callback channel down on communication errors.
> > @@ -1005,11 +1062,8 @@ static void nfsd4_cb_prepare(struct rpc_task
> > *task, void *calldata)
> > */
> > cb->cb_seq_status = 1;
> > cb->cb_status = 0;
> > - if (minorversion) {
> > - if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp,
> > task))
> > - return;
> > - cb->cb_holds_slot = true;
> > - }
> > + if (minorversion && !nfsd41_cb_get_slot(cb, task))
> > + return;
> > rpc_call_start(task);
> > }
> >
> > @@ -1076,9 +1130,7 @@ static bool nfsd4_cb_sequence_done(struct
> > rpc_task *task, struct nfsd4_callback
> > cb->cb_seq_status);
> > }
> >
> > - cb->cb_holds_slot = false;
> > - clear_bit(0, &clp->cl_cb_slot_busy);
> > - rpc_wake_up_next(&clp->cl_cb_waitq);
> > + nfsd41_cb_release_slot(cb);
> > dprintk("%s: freed slot, new seqid=%d\n", __func__,
> > clp->cl_cb_session->se_cb_seq_nr);
> >
> > @@ -1091,8 +1143,10 @@ static bool nfsd4_cb_sequence_done(struct
> > rpc_task *task, struct nfsd4_callback
> > ret = false;
> > goto out;
> > need_restart:
> > - task->tk_status = 0;
> > - cb->cb_need_restart = true;
> > + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
> > + task->tk_status = 0;
> > + cb->cb_need_restart = true;
> > + }
> > return false;
> > }
> >
> > @@ -1134,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
> > struct nfsd4_callback *cb = calldata;
> >
> > if (cb->cb_need_restart)
> > - nfsd4_run_cb(cb);
> > + nfsd4_queue_cb(cb);
> > else
> > - cb->cb_ops->release(cb);
> > + nfsd41_destroy_cb(cb);
> >
> > }
> >
> > @@ -1170,6 +1224,7 @@ void nfsd4_shutdown_callback(struct
> > nfs4_client *clp)
> > */
> > nfsd4_run_cb(&clp->cl_cb_null);
> > flush_workqueue(callback_wq);
> > + nfsd41_cb_inflight_wait_complete(clp);
> > }
> >
> > /* requires cl_lock: */
> > @@ -1255,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> > clnt = clp->cl_cb_client;
> > if (!clnt) {
> > /* Callback channel broken, or client killed; give up:
> > */
> > - if (cb->cb_ops && cb->cb_ops->release)
> > - cb->cb_ops->release(cb);
> > + nfsd41_destroy_cb(cb);
> > return;
> > }
> >
> > @@ -1265,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> > */
> > if (!cb->cb_ops && clp->cl_minorversion) {
> > clp->cl_cb_state = NFSD4_CB_UP;
> > + nfsd41_destroy_cb(cb);
> > return;
> > }
> >
> > @@ -1290,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb,
> > struct nfs4_client *clp,
> >
> > void nfsd4_run_cb(struct nfsd4_callback *cb)
> > {
> > - queue_work(callback_wq, &cb->cb_work);
> > + struct nfs4_client *clp = cb->cb_clp;
> > +
> > + nfsd41_cb_inflight_begin(clp);
> > + if (!nfsd4_queue_cb(cb))
> > + nfsd41_cb_inflight_end(clp);
> > }
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 46f56afb6cb8..d61b83b9654c 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -367,6 +367,7 @@ struct nfs4_client {
> > struct net *net;
> > struct list_head async_copies; /* list of async copies */
> > spinlock_t async_lock; /* lock for async copies */
> > + atomic_t cl_cb_inflight; /* Outstanding callbacks */
> > };
> >
> > /* struct nfs4_client_reset
> > --
> > 2.21.0
> >
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2019-10-25 20:39:20

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Wed, Oct 23, 2019 at 05:43:18PM -0400, Trond Myklebust wrote:
> When we're destroying the client lease, and we call
> nfsd4_shutdown_callback(), we must ensure that we do not return
> before all outstanding callbacks have terminated and have
> released their payloads.

This is great, thanks! We've seen what I'm fairly sure is the same bug
from Red Hat users. I think my blind spot was an assumption that
rpc tasks wouldn't outlive rpc_shutdown_client().

However, it's causing xfstests runs to hang, and I haven't worked out
why yet.

I'll spend some time on it this afternoon and let you know what I figure
out.

--b.

> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> v2 - Fix a leak of clp->cl_cb_inflight when running null probes
>
> fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++---------
> fs/nfsd/state.h | 1 +
> 2 files changed, 79 insertions(+), 19 deletions(-)
>
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index 524111420b48..a3c9517bcc64 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
> return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
> }
>
> +static struct workqueue_struct *callback_wq;
> +
> +static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
> +{
> + return queue_work(callback_wq, &cb->cb_work);
> +}
> +
> +static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
> +{
> + atomic_inc(&clp->cl_cb_inflight);
> +}
> +
> +static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
> +{
> +
> + if (atomic_dec_and_test(&clp->cl_cb_inflight))
> + wake_up_var(&clp->cl_cb_inflight);
> +}
> +
> +static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
> +{
> + wait_var_event(&clp->cl_cb_inflight,
> + !atomic_read(&clp->cl_cb_inflight));
> +}
> +
> static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
> {
> if (clp->cl_minorversion == 0) {
> @@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
> clp->cl_cb_state = NFSD4_CB_UP;
> }
>
> +static void nfsd4_cb_probe_release(void *calldata)
> +{
> + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
> +
> + nfsd41_cb_inflight_end(clp);
> +
> +}
> +
> static const struct rpc_call_ops nfsd4_cb_probe_ops = {
> /* XXX: release method to ensure we set the cb channel down if
> * necessary on early failure? */
> .rpc_call_done = nfsd4_cb_probe_done,
> + .rpc_release = nfsd4_cb_probe_release,
> };
>
> -static struct workqueue_struct *callback_wq;
> -
> /*
> * Poke the callback thread to process any updates to the callback
> * parameters, and send a null probe.
> @@ -975,9 +1007,12 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
> * If the slot is available, then mark it busy. Otherwise, set the
> * thread for sleeping on the callback RPC wait queue.
> */
> -static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
> +static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
> {
> - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> + struct nfs4_client *clp = cb->cb_clp;
> +
> + if (!cb->cb_holds_slot &&
> + test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
> /* Race breaker */
> if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> @@ -985,10 +1020,32 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
> return false;
> }
> rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
> + cb->cb_holds_slot = true;
> }
> return true;
> }
>
> +static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
> +{
> + struct nfs4_client *clp = cb->cb_clp;
> +
> + if (cb->cb_holds_slot) {
> + cb->cb_holds_slot = false;
> + clear_bit(0, &clp->cl_cb_slot_busy);
> + rpc_wake_up_next(&clp->cl_cb_waitq);
> + }
> +}
> +
> +static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
> +{
> + struct nfs4_client *clp = cb->cb_clp;
> +
> + nfsd41_cb_release_slot(cb);
> + if (cb->cb_ops && cb->cb_ops->release)
> + cb->cb_ops->release(cb);
> + nfsd41_cb_inflight_end(clp);
> +}
> +
> /*
> * TODO: cb_sequence should support referring call lists, cachethis, multiple
> * slots, and mark callback channel down on communication errors.
> @@ -1005,11 +1062,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
> */
> cb->cb_seq_status = 1;
> cb->cb_status = 0;
> - if (minorversion) {
> - if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
> - return;
> - cb->cb_holds_slot = true;
> - }
> + if (minorversion && !nfsd41_cb_get_slot(cb, task))
> + return;
> rpc_call_start(task);
> }
>
> @@ -1076,9 +1130,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
> cb->cb_seq_status);
> }
>
> - cb->cb_holds_slot = false;
> - clear_bit(0, &clp->cl_cb_slot_busy);
> - rpc_wake_up_next(&clp->cl_cb_waitq);
> + nfsd41_cb_release_slot(cb);
> dprintk("%s: freed slot, new seqid=%d\n", __func__,
> clp->cl_cb_session->se_cb_seq_nr);
>
> @@ -1091,8 +1143,10 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
> ret = false;
> goto out;
> need_restart:
> - task->tk_status = 0;
> - cb->cb_need_restart = true;
> + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
> + task->tk_status = 0;
> + cb->cb_need_restart = true;
> + }
> return false;
> }
>
> @@ -1134,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
> struct nfsd4_callback *cb = calldata;
>
> if (cb->cb_need_restart)
> - nfsd4_run_cb(cb);
> + nfsd4_queue_cb(cb);
> else
> - cb->cb_ops->release(cb);
> + nfsd41_destroy_cb(cb);
>
> }
>
> @@ -1170,6 +1224,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
> */
> nfsd4_run_cb(&clp->cl_cb_null);
> flush_workqueue(callback_wq);
> + nfsd41_cb_inflight_wait_complete(clp);
> }
>
> /* requires cl_lock: */
> @@ -1255,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> clnt = clp->cl_cb_client;
> if (!clnt) {
> /* Callback channel broken, or client killed; give up: */
> - if (cb->cb_ops && cb->cb_ops->release)
> - cb->cb_ops->release(cb);
> + nfsd41_destroy_cb(cb);
> return;
> }
>
> @@ -1265,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> */
> if (!cb->cb_ops && clp->cl_minorversion) {
> clp->cl_cb_state = NFSD4_CB_UP;
> + nfsd41_destroy_cb(cb);
> return;
> }
>
> @@ -1290,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
>
> void nfsd4_run_cb(struct nfsd4_callback *cb)
> {
> - queue_work(callback_wq, &cb->cb_work);
> + struct nfs4_client *clp = cb->cb_clp;
> +
> + nfsd41_cb_inflight_begin(clp);
> + if (!nfsd4_queue_cb(cb))
> + nfsd41_cb_inflight_end(clp);
> }
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 46f56afb6cb8..d61b83b9654c 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -367,6 +367,7 @@ struct nfs4_client {
> struct net *net;
> struct list_head async_copies; /* list of async copies */
> spinlock_t async_lock; /* lock for async copies */
> + atomic_t cl_cb_inflight; /* Outstanding callbacks */
> };
>
> /* struct nfs4_client_reset
> --
> 2.21.0
>

2019-10-25 20:39:53

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Fri, Oct 25, 2019 at 02:55:45PM +0000, Trond Myklebust wrote:
> On Fri, 2019-10-25 at 10:51 -0400, J. Bruce Fields wrote:
> > On Wed, Oct 23, 2019 at 05:43:18PM -0400, Trond Myklebust wrote:
> > > When we're destroying the client lease, and we call
> > > nfsd4_shutdown_callback(), we must ensure that we do not return
> > > before all outstanding callbacks have terminated and have
> > > released their payloads.
> >
> > This is great, thanks! We've seen what I'm fairly sure is the same
> > bug
> > from Red Hat users. I think my blind spot was an assumption that
> > rpc tasks wouldn't outlive rpc_shutdown_client().
> >
> > However, it's causing xfstests runs to hang, and I haven't worked out
> > why yet.
> >
> > I'll spend some time on it this afternoon and let you know what I
> > figure
> > out.
> >
>
> Is that happening with v2 or with v1? With v1 there is definitely a
> hang in __destroy_client() due to the refcount leak that I believe I
> fixed in v2.

I thought I was running v2, let me double-check....

--b.

>
> > --b.
> >
> > > Signed-off-by: Trond Myklebust <[email protected]>
> > > ---
> > > v2 - Fix a leak of clp->cl_cb_inflight when running null probes
> > >
> > > fs/nfsd/nfs4callback.c | 97 +++++++++++++++++++++++++++++++++-----
> > > ----
> > > fs/nfsd/state.h | 1 +
> > > 2 files changed, 79 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> > > index 524111420b48..a3c9517bcc64 100644
> > > --- a/fs/nfsd/nfs4callback.c
> > > +++ b/fs/nfsd/nfs4callback.c
> > > @@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
> > > return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
> > > }
> > >
> > > +static struct workqueue_struct *callback_wq;
> > > +
> > > +static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
> > > +{
> > > + return queue_work(callback_wq, &cb->cb_work);
> > > +}
> > > +
> > > +static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
> > > +{
> > > + atomic_inc(&clp->cl_cb_inflight);
> > > +}
> > > +
> > > +static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
> > > +{
> > > +
> > > + if (atomic_dec_and_test(&clp->cl_cb_inflight))
> > > + wake_up_var(&clp->cl_cb_inflight);
> > > +}
> > > +
> > > +static void nfsd41_cb_inflight_wait_complete(struct nfs4_client
> > > *clp)
> > > +{
> > > + wait_var_event(&clp->cl_cb_inflight,
> > > + !atomic_read(&clp->cl_cb_inflight));
> > > +}
> > > +
> > > static const struct cred *get_backchannel_cred(struct nfs4_client
> > > *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
> > > {
> > > if (clp->cl_minorversion == 0) {
> > > @@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct
> > > rpc_task *task, void *calldata)
> > > clp->cl_cb_state = NFSD4_CB_UP;
> > > }
> > >
> > > +static void nfsd4_cb_probe_release(void *calldata)
> > > +{
> > > + struct nfs4_client *clp = container_of(calldata, struct
> > > nfs4_client, cl_cb_null);
> > > +
> > > + nfsd41_cb_inflight_end(clp);
> > > +
> > > +}
> > > +
> > > static const struct rpc_call_ops nfsd4_cb_probe_ops = {
> > > /* XXX: release method to ensure we set the cb channel down if
> > > * necessary on early failure? */
> > > .rpc_call_done = nfsd4_cb_probe_done,
> > > + .rpc_release = nfsd4_cb_probe_release,
> > > };
> > >
> > > -static struct workqueue_struct *callback_wq;
> > > -
> > > /*
> > > * Poke the callback thread to process any updates to the callback
> > > * parameters, and send a null probe.
> > > @@ -975,9 +1007,12 @@ void nfsd4_change_callback(struct nfs4_client
> > > *clp, struct nfs4_cb_conn *conn)
> > > * If the slot is available, then mark it busy. Otherwise, set
> > > the
> > > * thread for sleeping on the callback RPC wait queue.
> > > */
> > > -static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct
> > > rpc_task *task)
> > > +static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct
> > > rpc_task *task)
> > > {
> > > - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > > + struct nfs4_client *clp = cb->cb_clp;
> > > +
> > > + if (!cb->cb_holds_slot &&
> > > + test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > > rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
> > > /* Race breaker */
> > > if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
> > > @@ -985,10 +1020,32 @@ static bool nfsd41_cb_get_slot(struct
> > > nfs4_client *clp, struct rpc_task *task)
> > > return false;
> > > }
> > > rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
> > > + cb->cb_holds_slot = true;
> > > }
> > > return true;
> > > }
> > >
> > > +static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
> > > +{
> > > + struct nfs4_client *clp = cb->cb_clp;
> > > +
> > > + if (cb->cb_holds_slot) {
> > > + cb->cb_holds_slot = false;
> > > + clear_bit(0, &clp->cl_cb_slot_busy);
> > > + rpc_wake_up_next(&clp->cl_cb_waitq);
> > > + }
> > > +}
> > > +
> > > +static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
> > > +{
> > > + struct nfs4_client *clp = cb->cb_clp;
> > > +
> > > + nfsd41_cb_release_slot(cb);
> > > + if (cb->cb_ops && cb->cb_ops->release)
> > > + cb->cb_ops->release(cb);
> > > + nfsd41_cb_inflight_end(clp);
> > > +}
> > > +
> > > /*
> > > * TODO: cb_sequence should support referring call lists,
> > > cachethis, multiple
> > > * slots, and mark callback channel down on communication errors.
> > > @@ -1005,11 +1062,8 @@ static void nfsd4_cb_prepare(struct rpc_task
> > > *task, void *calldata)
> > > */
> > > cb->cb_seq_status = 1;
> > > cb->cb_status = 0;
> > > - if (minorversion) {
> > > - if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp,
> > > task))
> > > - return;
> > > - cb->cb_holds_slot = true;
> > > - }
> > > + if (minorversion && !nfsd41_cb_get_slot(cb, task))
> > > + return;
> > > rpc_call_start(task);
> > > }
> > >
> > > @@ -1076,9 +1130,7 @@ static bool nfsd4_cb_sequence_done(struct
> > > rpc_task *task, struct nfsd4_callback
> > > cb->cb_seq_status);
> > > }
> > >
> > > - cb->cb_holds_slot = false;
> > > - clear_bit(0, &clp->cl_cb_slot_busy);
> > > - rpc_wake_up_next(&clp->cl_cb_waitq);
> > > + nfsd41_cb_release_slot(cb);
> > > dprintk("%s: freed slot, new seqid=%d\n", __func__,
> > > clp->cl_cb_session->se_cb_seq_nr);
> > >
> > > @@ -1091,8 +1143,10 @@ static bool nfsd4_cb_sequence_done(struct
> > > rpc_task *task, struct nfsd4_callback
> > > ret = false;
> > > goto out;
> > > need_restart:
> > > - task->tk_status = 0;
> > > - cb->cb_need_restart = true;
> > > + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
> > > + task->tk_status = 0;
> > > + cb->cb_need_restart = true;
> > > + }
> > > return false;
> > > }
> > >
> > > @@ -1134,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
> > > struct nfsd4_callback *cb = calldata;
> > >
> > > if (cb->cb_need_restart)
> > > - nfsd4_run_cb(cb);
> > > + nfsd4_queue_cb(cb);
> > > else
> > > - cb->cb_ops->release(cb);
> > > + nfsd41_destroy_cb(cb);
> > >
> > > }
> > >
> > > @@ -1170,6 +1224,7 @@ void nfsd4_shutdown_callback(struct
> > > nfs4_client *clp)
> > > */
> > > nfsd4_run_cb(&clp->cl_cb_null);
> > > flush_workqueue(callback_wq);
> > > + nfsd41_cb_inflight_wait_complete(clp);
> > > }
> > >
> > > /* requires cl_lock: */
> > > @@ -1255,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> > > clnt = clp->cl_cb_client;
> > > if (!clnt) {
> > > /* Callback channel broken, or client killed; give up:
> > > */
> > > - if (cb->cb_ops && cb->cb_ops->release)
> > > - cb->cb_ops->release(cb);
> > > + nfsd41_destroy_cb(cb);
> > > return;
> > > }
> > >
> > > @@ -1265,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
> > > */
> > > if (!cb->cb_ops && clp->cl_minorversion) {
> > > clp->cl_cb_state = NFSD4_CB_UP;
> > > + nfsd41_destroy_cb(cb);
> > > return;
> > > }
> > >
> > > @@ -1290,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb,
> > > struct nfs4_client *clp,
> > >
> > > void nfsd4_run_cb(struct nfsd4_callback *cb)
> > > {
> > > - queue_work(callback_wq, &cb->cb_work);
> > > + struct nfs4_client *clp = cb->cb_clp;
> > > +
> > > + nfsd41_cb_inflight_begin(clp);
> > > + if (!nfsd4_queue_cb(cb))
> > > + nfsd41_cb_inflight_end(clp);
> > > }
> > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > index 46f56afb6cb8..d61b83b9654c 100644
> > > --- a/fs/nfsd/state.h
> > > +++ b/fs/nfsd/state.h
> > > @@ -367,6 +367,7 @@ struct nfs4_client {
> > > struct net *net;
> > > struct list_head async_copies; /* list of async copies */
> > > spinlock_t async_lock; /* lock for async copies */
> > > + atomic_t cl_cb_inflight; /* Outstanding callbacks */
> > > };
> > >
> > > /* struct nfs4_client_reset
> > > --
> > > 2.21.0
> > >
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]
>
>

2019-10-25 20:40:59

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Fri, Oct 25, 2019 at 11:21:19AM -0400, J. Bruce Fields wrote:
> On Fri, Oct 25, 2019 at 02:55:45PM +0000, Trond Myklebust wrote:
> > On Fri, 2019-10-25 at 10:51 -0400, J. Bruce Fields wrote:
> > > On Wed, Oct 23, 2019 at 05:43:18PM -0400, Trond Myklebust wrote:
> > > > When we're destroying the client lease, and we call
> > > > nfsd4_shutdown_callback(), we must ensure that we do not return
> > > > before all outstanding callbacks have terminated and have
> > > > released their payloads.
> > >
> > > This is great, thanks! We've seen what I'm fairly sure is the same
> > > bug
> > > from Red Hat users. I think my blind spot was an assumption that
> > > rpc tasks wouldn't outlive rpc_shutdown_client().
> > >
> > > However, it's causing xfstests runs to hang, and I haven't worked out
> > > why yet.
> > >
> > > I'll spend some time on it this afternoon and let you know what I
> > > figure
> > > out.
> > >
> >
> > Is that happening with v2 or with v1? With v1 there is definitely a
> > hang in __destroy_client() due to the refcount leak that I believe I
> > fixed in v2.
>
> I thought I was running v2, let me double-check....

Yes, with v2 I'm getting a hang on generic/013.

I checked quickly and didn't see anything interesting in the logs,
otherwise I haven't done any digging.

--b.

2019-10-29 21:49:44

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Fri, Oct 25, 2019 at 11:33:36AM -0400, bfields wrote:
> On Fri, Oct 25, 2019 at 11:21:19AM -0400, J. Bruce Fields wrote:
> > I thought I was running v2, let me double-check....
>
> Yes, with v2 I'm getting a hang on generic/013.
>
> I checked quickly and didn't see anything interesting in the logs,
> otherwise I haven't done any digging.

Reproduceable just with ./check -nfs generic/013. The last thing I see
in wireshark is an asynchronous COPY call and reply. Which means it's
probably trying to do a CB_OFFLOAD. Hm.

--b.

2019-11-07 22:27:53

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Tue, Oct 29, 2019 at 05:47:05PM -0400, J. Bruce Fields wrote:
> On Fri, Oct 25, 2019 at 11:33:36AM -0400, bfields wrote:
> > On Fri, Oct 25, 2019 at 11:21:19AM -0400, J. Bruce Fields wrote:
> > > I thought I was running v2, let me double-check....
> >
> > Yes, with v2 I'm getting a hang on generic/013.
> >
> > I checked quickly and didn't see anything interesting in the logs,
> > otherwise I haven't done any digging.
>
> Reproduceable just with ./check -nfs generic/013. The last thing I see
> in wireshark is an asynchronous COPY call and reply. Which means it's
> probably trying to do a CB_OFFLOAD. Hm.

Oh, I think it just needs the following.

--b.

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index fb71e7f9d0d9..e49604701a71 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1026,8 +1026,8 @@ static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
return false;
}
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
- cb->cb_holds_slot = true;
}
+ cb->cb_holds_slot = true;
return true;
}

2019-11-08 17:51:57

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Thu, Nov 07, 2019 at 05:27:12PM -0500, J. Bruce Fields wrote:
> On Tue, Oct 29, 2019 at 05:47:05PM -0400, J. Bruce Fields wrote:
> > On Fri, Oct 25, 2019 at 11:33:36AM -0400, bfields wrote:
> > > On Fri, Oct 25, 2019 at 11:21:19AM -0400, J. Bruce Fields wrote:
> > > > I thought I was running v2, let me double-check....
> > >
> > > Yes, with v2 I'm getting a hang on generic/013.
> > >
> > > I checked quickly and didn't see anything interesting in the logs,
> > > otherwise I haven't done any digging.
> >
> > Reproduceable just with ./check -nfs generic/013. The last thing I see
> > in wireshark is an asynchronous COPY call and reply. Which means it's
> > probably trying to do a CB_OFFLOAD. Hm.
>
> Oh, I think it just needs the following.

Applying as follows, with part of the change split out into a separate
patch (since that's how I noticed the bug).

--b.

2019-11-08 17:52:57

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Fri, Nov 08, 2019 at 12:51:09PM -0500, J. Bruce Fields wrote:
> On Thu, Nov 07, 2019 at 05:27:12PM -0500, J. Bruce Fields wrote:
> > On Tue, Oct 29, 2019 at 05:47:05PM -0400, J. Bruce Fields wrote:
> > > On Fri, Oct 25, 2019 at 11:33:36AM -0400, bfields wrote:
> > > > On Fri, Oct 25, 2019 at 11:21:19AM -0400, J. Bruce Fields wrote:
> > > > > I thought I was running v2, let me double-check....
> > > >
> > > > Yes, with v2 I'm getting a hang on generic/013.
> > > >
> > > > I checked quickly and didn't see anything interesting in the logs,
> > > > otherwise I haven't done any digging.
> > >
> > > Reproduceable just with ./check -nfs generic/013. The last thing I see
> > > in wireshark is an asynchronous COPY call and reply. Which means it's
> > > probably trying to do a CB_OFFLOAD. Hm.
> >
> > Oh, I think it just needs the following.
>
> Applying as follows, with part of the change split out into a separate
> patch (since that's how I noticed the bug).
>
> --b.

Oops, remembered to append the patches this time.--b.

commit 2bbfed98a4d8
Author: Trond Myklebust <[email protected]>
Date: Wed Oct 23 17:43:18 2019 -0400

nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

When we're destroying the client lease, and we call
nfsd4_shutdown_callback(), we must ensure that we do not return
before all outstanding callbacks have terminated and have
released their payloads.

Signed-off-by: Trond Myklebust <[email protected]>
Signed-off-by: J. Bruce Fields <[email protected]>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1542e1d6dd1a..67d24a536082 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -826,6 +826,31 @@ static int max_cb_time(struct net *net)
return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
}

+static struct workqueue_struct *callback_wq;
+
+static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
+{
+ return queue_work(callback_wq, &cb->cb_work);
+}
+
+static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
+{
+ atomic_inc(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
+{
+
+ if (atomic_dec_and_test(&clp->cl_cb_inflight))
+ wake_up_var(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
+{
+ wait_var_event(&clp->cl_cb_inflight,
+ !atomic_read(&clp->cl_cb_inflight));
+}
+
static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
@@ -937,14 +962,21 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
clp->cl_cb_state = NFSD4_CB_UP;
}

+static void nfsd4_cb_probe_release(void *calldata)
+{
+ struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+ nfsd41_cb_inflight_end(clp);
+
+}
+
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
/* XXX: release method to ensure we set the cb channel down if
* necessary on early failure? */
.rpc_call_done = nfsd4_cb_probe_done,
+ .rpc_release = nfsd4_cb_probe_release,
};

-static struct workqueue_struct *callback_wq;
-
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
@@ -1004,6 +1036,16 @@ static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
}
}

+static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ nfsd41_cb_release_slot(cb);
+ if (cb->cb_ops && cb->cb_ops->release)
+ cb->cb_ops->release(cb);
+ nfsd41_cb_inflight_end(clp);
+}
+
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
@@ -1101,8 +1143,10 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
ret = false;
goto out;
need_restart:
- task->tk_status = 0;
- cb->cb_need_restart = true;
+ if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ task->tk_status = 0;
+ cb->cb_need_restart = true;
+ }
return false;
}

@@ -1144,9 +1188,9 @@ static void nfsd4_cb_release(void *calldata)
struct nfsd4_callback *cb = calldata;

if (cb->cb_need_restart)
- nfsd4_run_cb(cb);
+ nfsd4_queue_cb(cb);
else
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);

}

@@ -1180,6 +1224,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
*/
nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq);
+ nfsd41_cb_inflight_wait_complete(clp);
}

/* requires cl_lock: */
@@ -1265,8 +1310,7 @@ nfsd4_run_cb_work(struct work_struct *work)
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
- if (cb->cb_ops && cb->cb_ops->release)
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);
return;
}

@@ -1275,6 +1319,7 @@ nfsd4_run_cb_work(struct work_struct *work)
*/
if (!cb->cb_ops && clp->cl_minorversion) {
clp->cl_cb_state = NFSD4_CB_UP;
+ nfsd41_destroy_cb(cb);
return;
}

@@ -1300,5 +1345,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,

void nfsd4_run_cb(struct nfsd4_callback *cb)
{
- queue_work(callback_wq, &cb->cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+
+ nfsd41_cb_inflight_begin(clp);
+ if (!nfsd4_queue_cb(cb))
+ nfsd41_cb_inflight_end(clp);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 46f56afb6cb8..d61b83b9654c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -367,6 +367,7 @@ struct nfs4_client {
struct net *net;
struct list_head async_copies; /* list of async copies */
spinlock_t async_lock; /* lock for async copies */
+ atomic_t cl_cb_inflight; /* Outstanding callbacks */
};

/* struct nfs4_client_reset

commit 12357f1b2c8e
Author: Trond Myklebust <[email protected]>
Date: Thu Nov 7 17:11:57 2019 -0500

nfsd: minor 4.1 callback cleanup

Move all the cb_holds_slot management into helper functions. No change
in behavior.

Signed-off-by: Trond Myklebust <[email protected]>
Signed-off-by: J. Bruce Fields <[email protected]>

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 524111420b48..1542e1d6dd1a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -975,9 +975,12 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
-static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
+static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (!cb->cb_holds_slot &&
+ test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
@@ -986,9 +989,21 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
}
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
+ cb->cb_holds_slot = true;
return true;
}

+static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (cb->cb_holds_slot) {
+ cb->cb_holds_slot = false;
+ clear_bit(0, &clp->cl_cb_slot_busy);
+ rpc_wake_up_next(&clp->cl_cb_waitq);
+ }
+}
+
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
@@ -1005,11 +1020,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
*/
cb->cb_seq_status = 1;
cb->cb_status = 0;
- if (minorversion) {
- if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
- return;
- cb->cb_holds_slot = true;
- }
+ if (minorversion && !nfsd41_cb_get_slot(cb, task))
+ return;
rpc_call_start(task);
}

@@ -1076,9 +1088,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
cb->cb_seq_status);
}

- cb->cb_holds_slot = false;
- clear_bit(0, &clp->cl_cb_slot_busy);
- rpc_wake_up_next(&clp->cl_cb_waitq);
+ nfsd41_cb_release_slot(cb);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);

2019-11-08 17:54:43

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH] nfsd: document callback_wq serialization of callback code

From: "J. Bruce Fields" <[email protected]>
Date: Tue, 29 Oct 2019 16:02:18 -0400

The callback code relies on the fact that much of it is only ever called
from the ordered workqueue callback_wq, and this is worth documenting.

Reported-by: Trond Myklebust <[email protected]>
Signed-off-by: J. Bruce Fields <[email protected]>
---
fs/nfsd/nfs4callback.c | 6 ++++++
1 file changed, 6 insertions(+)

Also adding a comment, since I know this was a source of confusion when
investigating these races.

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c94768b096a3..24534db87e86 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1243,6 +1243,12 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
return NULL;
}

+/*
+ * Note there isn't a lot of locking in this code; instead we depend on
+ * the fact that it is run from the callback_wq, which won't run two
+ * work items at once. So, for example, callback_wq handles all access
+ * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
+ */
static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
struct nfs4_cb_conn conn;
--
2.23.0

2019-11-08 17:59:22

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH] nfsd: mark cb path down on unknown errors

From: "J. Bruce Fields" <[email protected]>
Date: Tue, 22 Oct 2019 12:29:37 -0400

An unexpected error is probably a sign that something is wrong with the
callback path.

Signed-off-by: J. Bruce Fields <[email protected]>
---
fs/nfsd/nfs4callback.c | 1 +
1 file changed, 1 insertion(+)

Also, while we're here....--b.

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 67d24a536082..c94768b096a3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1126,6 +1126,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
}
break;
default:
+ nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
dprintk("%s: unprocessed error %d\n", __func__,
cb->cb_seq_status);
}
--
2.23.0

2019-11-08 18:45:45

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: Fix races between nfsd4_cb_release() and nfsd4_shutdown_callback()

On Thu, 2019-11-07 at 17:27 -0500, J. Bruce Fields wrote:
> On Tue, Oct 29, 2019 at 05:47:05PM -0400, J. Bruce Fields wrote:
> > On Fri, Oct 25, 2019 at 11:33:36AM -0400, bfields wrote:
> > > On Fri, Oct 25, 2019 at 11:21:19AM -0400, J. Bruce Fields wrote:
> > > > I thought I was running v2, let me double-check....
> > >
> > > Yes, with v2 I'm getting a hang on generic/013.
> > >
> > > I checked quickly and didn't see anything interesting in the
> > > logs,
> > > otherwise I haven't done any digging.
> >
> > Reproduceable just with ./check -nfs generic/013. The last thing I
> > see
> > in wireshark is an asynchronous COPY call and reply. Which means
> > it's
> > probably trying to do a CB_OFFLOAD. Hm.
>
> Oh, I think it just needs the following.
>
> --b.
>
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index fb71e7f9d0d9..e49604701a71 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -1026,8 +1026,8 @@ static bool nfsd41_cb_get_slot(struct
> nfsd4_callback *cb, struct rpc_task *task)
> return false;
> }
> rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
> - cb->cb_holds_slot = true;
> }
> + cb->cb_holds_slot = true;
> return true;
> }
>

Doh! Thanks for spotting that.

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]