Fix session reset deadlocks. These patches apply to tronds nfs-2.6 bugfix
branch (2.6.32-rc6 on top of these two already posted patches:
nfs41-fix-multiple-free-slot-calls.patch
nfs41-fix-race-on-session-reset.patch
Fix two problems reported by Trond:
0001-nfs41-remove-nfs4_recover_session.patch
0002-nfs41-fix-state-manager-deadlock-in-session-reset.patch
Testing:
CONFIG_NFS_V4_1
v41 mount: Connectathon tests passed. PyNFS testclient.py SESSIONRESET tests
and the new INJECT_ERROR_CTHON test where NFS4ERR_BADSESSION was returned
every 50th SEQUENCE operation durring a Connectathon basic test run. This passed
with the session being reset 390 times from a variety of client compounds
(READ/WRITE/COMMIT/OPEN/CLOSE/ to name a few).
v4 mount: Connectathon tests passed.
no CONFIG_NFS_V4_1
v4 mount: Connectathon tests passed.
-->Andy
From: Andy Adamson <[email protected]>
If the session is reset during state recovery, the state manager thread can
sleep on the slot_tbl_waitq causing a deadlock.
Add a completion framework to the session. Have the state manager thread set
a new session state (NFS4CLNT_SESSION_DRAINING) and wait for the session slot
table to drain.
Signal the state manager thread in nfs41_sequence_free_slot when the
NFS4CLNT_SESSION_DRAINING bit is set and the session is drained.
Reported-by: Trond Myklebust <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
---
fs/nfs/nfs4_fs.h | 1 +
fs/nfs/nfs4proc.c | 25 ++++++++++++++++---------
fs/nfs/nfs4state.c | 15 +++++++++++++++
include/linux/nfs_fs_sb.h | 1 +
4 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6ea07a3..e8604af 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -45,6 +45,7 @@ enum nfs4_client_state {
NFS4CLNT_RECLAIM_NOGRACE,
NFS4CLNT_DELEGRETURN,
NFS4CLNT_SESSION_SETUP,
+ NFS4CLNT_SESSION_DRAINING,
};
/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 44d1c14..ebde948 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -355,6 +355,16 @@ void nfs41_sequence_free_slot(const struct nfs_client *clp,
}
nfs4_free_slot(tbl, res->sr_slotid);
res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+
+ /* Signal state manager thread if session is drained */
+ if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
+ spin_lock(&tbl->slot_tbl_lock);
+ if (tbl->highest_used_slotid == -1) {
+ dprintk("%s COMPLETE: Session Drained\n", __func__);
+ complete(&clp->cl_session->complete);
+ }
+ spin_unlock(&tbl->slot_tbl_lock);
+ }
}
static void nfs41_sequence_done(struct nfs_client *clp,
@@ -448,15 +458,11 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
spin_lock(&tbl->slot_tbl_lock);
if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) {
- if (tbl->highest_used_slotid != -1) {
- rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
- spin_unlock(&tbl->slot_tbl_lock);
- dprintk("<-- %s: Session reset: draining\n", __func__);
- return -EAGAIN;
- }
-
- /* The slot table is empty; start the reset thread */
- dprintk("%s Session Reset\n", __func__);
+ /*
+ * The state manager will wait until the slot table is empty.
+ * Schedule the reset thread
+ */
+ dprintk("%s Schedule Session Reset\n", __func__);
rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
nfs4_schedule_state_manager(session->clp);
spin_unlock(&tbl->slot_tbl_lock);
@@ -4583,6 +4589,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
* nfs_client struct
*/
clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ init_completion(&session->complete);
tbl = &session->fc_slot_table;
spin_lock_init(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 50a1b04..f08abe8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1162,8 +1162,23 @@ static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err)
static int nfs4_reset_session(struct nfs_client *clp)
{
+ struct nfs4_session *ses = clp->cl_session;
+ struct nfs4_slot_table *tbl = &ses->fc_slot_table;
int status;
+ spin_lock(&tbl->slot_tbl_lock);
+ if (tbl->highest_used_slotid != -1) {
+ set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
+ spin_unlock(&tbl->slot_tbl_lock);
+ status = wait_for_completion_interruptible(&ses->complete);
+ clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
+ if (status) /* -ERESTARTSYS */
+ goto out;
+ INIT_COMPLETION(ses->complete);
+ } else {
+ spin_unlock(&tbl->slot_tbl_lock);
+ }
+
status = nfs4_proc_destroy_session(clp->cl_session);
if (status && status != -NFS4ERR_BADSESSION &&
status != -NFS4ERR_DEADSESSION) {
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 320569e..34fc6be 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -209,6 +209,7 @@ struct nfs4_session {
unsigned long session_state;
u32 hash_alg;
u32 ssv_len;
+ struct completion complete;
/* The fore and back channel */
struct nfs4_channel_attrs fc_attrs;
--
1.6.0.6
From: Andy Adamson <[email protected]>
nfs4_recover_session can put rpciod to sleep. Just use nfs4_schedule_recovery.
Reported-by: Trond Myklebust <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
---
fs/nfs/nfs4proc.c | 26 +++-----------------------
1 files changed, 3 insertions(+), 23 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0f1afde..44d1c14 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -427,24 +427,6 @@ out:
return ret_id;
}
-static int nfs4_recover_session(struct nfs4_session *session)
-{
- struct nfs_client *clp = session->clp;
- unsigned int loop;
- int ret;
-
- for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
- ret = nfs4_wait_clnt_recover(clp);
- if (ret != 0)
- break;
- if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
- break;
- nfs4_schedule_state_manager(clp);
- ret = -EIO;
- }
- return ret;
-}
-
static int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
@@ -453,7 +435,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
{
struct nfs4_slot *slot;
struct nfs4_slot_table *tbl;
- int status = 0;
u8 slotid;
dprintk("--> %s\n", __func__);
@@ -476,11 +457,10 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
/* The slot table is empty; start the reset thread */
dprintk("%s Session Reset\n", __func__);
+ rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+ nfs4_schedule_state_manager(session->clp);
spin_unlock(&tbl->slot_tbl_lock);
- status = nfs4_recover_session(session);
- if (status)
- return status;
- spin_lock(&tbl->slot_tbl_lock);
+ return -EAGAIN;
}
slotid = nfs4_find_slot(tbl, task);
--
1.6.0.6
On Wed, Nov 11, 2009 at 2:20 PM, <[email protected]> wrote:
> From: Andy Adamson <[email protected]>
>
> If the session is reset during state recovery, the state manager thre=
ad can
> sleep on the slot_tbl_waitq causing a deadlock.
>
> Add a completion framework to the session. =A0Have the state manager =
thread set
> a new session state (NFS4CLNT_SESSION_DRAINING) and wait for the sess=
ion slot
> table to drain.
>
> Signal the state manager thread in nfs41_sequence_free_slot when the
> NFS4CLNT_SESSION_DRAINING bit is set and the session is drained.
>
> Reported-by: Trond Myklebust <[email protected]>
> Signed-off-by: Andy Adamson <[email protected]>
> ---
> =A0fs/nfs/nfs4_fs.h =A0 =A0 =A0 =A0 =A0| =A0 =A01 +
> =A0fs/nfs/nfs4proc.c =A0 =A0 =A0 =A0 | =A0 25 ++++++++++++++++-------=
--
> =A0fs/nfs/nfs4state.c =A0 =A0 =A0 =A0| =A0 15 +++++++++++++++
> =A0include/linux/nfs_fs_sb.h | =A0 =A01 +
> =A04 files changed, 33 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index 6ea07a3..e8604af 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -45,6 +45,7 @@ enum nfs4_client_state {
> =A0 =A0 =A0 =A0NFS4CLNT_RECLAIM_NOGRACE,
> =A0 =A0 =A0 =A0NFS4CLNT_DELEGRETURN,
> =A0 =A0 =A0 =A0NFS4CLNT_SESSION_SETUP,
> + =A0 =A0 =A0 NFS4CLNT_SESSION_DRAINING,
> =A0};
>
> =A0/*
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 44d1c14..ebde948 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -355,6 +355,16 @@ void nfs41_sequence_free_slot(const struct nfs_c=
lient *clp,
> =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0nfs4_free_slot(tbl, res->sr_slotid);
> =A0 =A0 =A0 =A0res->sr_slotid =3D NFS4_MAX_SLOT_TABLE;
> +
> + =A0 =A0 =A0 /* Signal state manager thread if session is drained */
> + =A0 =A0 =A0 if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)=
) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&tbl->slot_tbl_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (tbl->highest_used_slotid =3D=3D -1)=
{
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s COMPLETE: S=
ession Drained\n", __func__);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 complete(&clp->cl_sessi=
on->complete);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&tbl->slot_tbl_lock);
> + =A0 =A0 =A0 }
> =A0}
>
> =A0static void nfs41_sequence_done(struct nfs_client *clp,
> @@ -448,15 +458,11 @@ static int nfs41_setup_sequence(struct nfs4_ses=
sion *session,
>
> =A0 =A0 =A0 =A0spin_lock(&tbl->slot_tbl_lock);
> =A0 =A0 =A0 =A0if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl=
_state)) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (tbl->highest_used_slotid !=3D -1) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rpc_sleep_on(&tbl->slot=
_tbl_waitq, task, NULL);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&tbl->slot_=
tbl_lock);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("<-- %s: Sessio=
n reset: draining\n", __func__);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return -EAGAIN;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
> -
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* The slot table is empty; start the r=
eset thread */
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s Session Reset\n", __func__)=
;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 /*
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0* The state manager will wait until =
the slot table is empty.
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0* Schedule the reset thread
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*/
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s Schedule Session Reset\n", =
__func__);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0rpc_sleep_on(&tbl->slot_tbl_waitq, tas=
k, NULL);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfs4_schedule_state_manager(session->c=
lp);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_unlock(&tbl->slot_tbl_lock);
> @@ -4583,6 +4589,7 @@ struct nfs4_session *nfs4_alloc_session(struct =
nfs_client *clp)
> =A0 =A0 =A0 =A0 * nfs_client struct
> =A0 =A0 =A0 =A0 */
> =A0 =A0 =A0 =A0clp->cl_cons_state =3D NFS_CS_SESSION_INITING;
> + =A0 =A0 =A0 init_completion(&session->complete);
>
> =A0 =A0 =A0 =A0tbl =3D &session->fc_slot_table;
> =A0 =A0 =A0 =A0spin_lock_init(&tbl->slot_tbl_lock);
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 50a1b04..f08abe8 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -1162,8 +1162,23 @@ static void nfs4_session_recovery_handle_error=
(struct nfs_client *clp, int err)
>
> =A0static int nfs4_reset_session(struct nfs_client *clp)
> =A0{
> + =A0 =A0 =A0 struct nfs4_session *ses =3D clp->cl_session;
> + =A0 =A0 =A0 struct nfs4_slot_table *tbl =3D &ses->fc_slot_table;
> =A0 =A0 =A0 =A0int status;
>
> + =A0 =A0 =A0 spin_lock(&tbl->slot_tbl_lock);
> + =A0 =A0 =A0 if (tbl->highest_used_slotid !=3D -1) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 set_bit(NFS4CLNT_SESSION_DRAINING, &clp=
->cl_state);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&tbl->slot_tbl_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D wait_for_completion_interrup=
tible(&ses->complete);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 clear_bit(NFS4CLNT_SESSION_DRAINING, &c=
lp->cl_state);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status) /* -ERESTARTSYS */
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 INIT_COMPLETION(ses->complete);
Looking at my own code - I need to move the INIT_COMPLETION after the
wait_for_completion_interruptible and before the status check so that
it gets called even on -ERESTARTSYS.
I'll wait for other comments before sending a fix.
-->Andy
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&tbl->slot_tbl_lock);
> + =A0 =A0 =A0 }
> +
> =A0 =A0 =A0 =A0status =3D nfs4_proc_destroy_session(clp->cl_session);
> =A0 =A0 =A0 =A0if (status && status !=3D -NFS4ERR_BADSESSION &&
> =A0 =A0 =A0 =A0 =A0 =A0status !=3D -NFS4ERR_DEADSESSION) {
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 320569e..34fc6be 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -209,6 +209,7 @@ struct nfs4_session {
> =A0 =A0 =A0 =A0unsigned long =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 sess=
ion_state;
> =A0 =A0 =A0 =A0u32 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 hash_alg;
> =A0 =A0 =A0 =A0u32 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 ssv_len;
> + =A0 =A0 =A0 struct completion =A0 =A0 =A0 =A0 =A0 =A0 =A0 complete;
>
> =A0 =A0 =A0 =A0/* The fore and back channel */
> =A0 =A0 =A0 =A0struct nfs4_channel_attrs =A0 =A0 =A0 fc_attrs;
> --
> 1.6.0.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to [email protected]
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>