2017-06-23 14:36:30

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH 1/1] PNFS fix EACCESS on commit to DS handling

Commit fabbbee0eb0f "PNFS fix fallback to MDS if got error on
commit to DS" moved the pnfs_set_lo_fail() to unhandled errors
which was not correct and lead to a kernel oops on umount.

Instead, fix the original EACCESS on commit to DS error by
getting the new layout and re-doing the IO.

Fixes: fabbbee0eb0f ("PNFS fix fallback to MDS if got error on commit to DS")
Signed-off-by: Olga Kornievskaia <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 61dcb4b..8075efe 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -153,6 +153,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
/* Invalidate Layout errors */
+ case -NFS4ERR_ACCESS:
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
@@ -183,10 +184,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
+ pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
- pnfs_set_lo_fail(lseg);
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
--
1.8.3.1



2017-06-23 14:27:04

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH 1/1] PNFS for stateid errors retry against MDS first

Upon receiving a stateid error such as BAD_STATEID, the client
should retry the operation against the MDS before deciding to
do stateid recovery.

Previously, the code would initiate state recovery and it could
lead to a race in a state manager that could chose an incorrect
recovery method which would lead to the EIO failure for the
application.

Signed-off-by: Olga Kornievskaia <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 28 ----------------------------
fs/nfs/flexfilelayout/flexfilelayout.c | 33 ---------------------------------
2 files changed, 61 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index ce82da1..61dcb4b 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
- struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
- struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;

if (task->tk_status >= 0)
return 0;

switch (task->tk_status) {
- /* MDS state errors */
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OPENMODE:
- if (state == NULL)
- break;
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- goto wait_on_recovery;
- case -NFS4ERR_EXPIRED:
- if (state != NULL) {
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- }
- nfs4_schedule_lease_recovery(mds_client);
- goto wait_on_recovery;
/* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -211,17 +192,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
}
-out:
task->tk_status = 0;
return -EAGAIN;
-out_bad_stateid:
- task->tk_status = -EIO;
- return 0;
-wait_on_recovery:
- rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
- if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
- rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
- goto out;
}

/* NFS_PROTO call done callback routines */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 23542dc..1f2ac3d 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
- struct nfs_server *mds_server = NFS_SERVER(inode);
-
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
- struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;

switch (task->tk_status) {
- /* MDS state errors */
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- if (state == NULL)
- break;
- nfs_remove_bad_delegation(state->inode, NULL);
- case -NFS4ERR_OPENMODE:
- if (state == NULL)
- break;
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- goto wait_on_recovery;
- case -NFS4ERR_EXPIRED:
- if (state != NULL) {
- if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
- goto out_bad_stateid;
- }
- nfs4_schedule_lease_recovery(mds_client);
- goto wait_on_recovery;
- /* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
}
-out:
task->tk_status = 0;
return -EAGAIN;
-out_bad_stateid:
- task->tk_status = -EIO;
- return 0;
-wait_on_recovery:
- rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
- if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
- rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
- goto out;
}

/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
--
1.8.3.1


2017-06-23 14:33:27

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH 1/1] PNFS for stateid errors retry against MDS first

On Fri, Jun 23, 2017 at 10:26 AM, Olga Kornievskaia <[email protected]> wrote:
> Upon receiving a stateid error such as BAD_STATEID, the client
> should retry the operation against the MDS before deciding to
> do stateid recovery.
>
> Previously, the code would initiate state recovery and it could
> lead to a race in a state manager that could chose an incorrect
> recovery method which would lead to the EIO failure for the
> application.
>
> Signed-off-by: Olga Kornievskaia <[email protected]>
> ---
> fs/nfs/filelayout/filelayout.c | 28 ----------------------------
> fs/nfs/flexfilelayout/flexfilelayout.c | 33 ---------------------------------
> 2 files changed, 61 deletions(-)
>
> diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
> index ce82da1..61dcb4b 100644
> --- a/fs/nfs/filelayout/filelayout.c
> +++ b/fs/nfs/filelayout/filelayout.c
> @@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
> {
> struct pnfs_layout_hdr *lo = lseg->pls_layout;
> struct inode *inode = lo->plh_inode;
> - struct nfs_server *mds_server = NFS_SERVER(inode);
> struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
> - struct nfs_client *mds_client = mds_server->nfs_client;
> struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
>
> if (task->tk_status >= 0)
> return 0;
>
> switch (task->tk_status) {
> - /* MDS state errors */
> - case -NFS4ERR_DELEG_REVOKED:
> - case -NFS4ERR_ADMIN_REVOKED:
> - case -NFS4ERR_BAD_STATEID:
> - case -NFS4ERR_OPENMODE:
> - if (state == NULL)
> - break;
> - if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
> - goto out_bad_stateid;
> - goto wait_on_recovery;
> - case -NFS4ERR_EXPIRED:
> - if (state != NULL) {
> - if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
> - goto out_bad_stateid;
> - }
> - nfs4_schedule_lease_recovery(mds_client);
> - goto wait_on_recovery;
> /* DS session errors */
> case -NFS4ERR_BADSESSION:
> case -NFS4ERR_BADSLOT:
> @@ -211,17 +192,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
> task->tk_status);
> return -NFS4ERR_RESET_TO_MDS;
> }
> -out:
> task->tk_status = 0;
> return -EAGAIN;
> -out_bad_stateid:
> - task->tk_status = -EIO;
> - return 0;
> -wait_on_recovery:
> - rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
> - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
> - rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
> - goto out;
> }
>
> /* NFS_PROTO call done callback routines */
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 23542dc..1f2ac3d 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
> {
> struct pnfs_layout_hdr *lo = lseg->pls_layout;
> struct inode *inode = lo->plh_inode;
> - struct nfs_server *mds_server = NFS_SERVER(inode);
> -
> struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
> - struct nfs_client *mds_client = mds_server->nfs_client;
> struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
>
> switch (task->tk_status) {
> - /* MDS state errors */
> - case -NFS4ERR_DELEG_REVOKED:
> - case -NFS4ERR_ADMIN_REVOKED:
> - case -NFS4ERR_BAD_STATEID:
> - if (state == NULL)
> - break;
> - nfs_remove_bad_delegation(state->inode, NULL);
> - case -NFS4ERR_OPENMODE:
> - if (state == NULL)
> - break;
> - if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
> - goto out_bad_stateid;
> - goto wait_on_recovery;
> - case -NFS4ERR_EXPIRED:
> - if (state != NULL) {
> - if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
> - goto out_bad_stateid;
> - }
> - nfs4_schedule_lease_recovery(mds_client);
> - goto wait_on_recovery;
> - /* DS session errors */
> case -NFS4ERR_BADSESSION:
> case -NFS4ERR_BADSLOT:
> case -NFS4ERR_BAD_HIGH_SLOT:
> @@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
> task->tk_status);
> return -NFS4ERR_RESET_TO_MDS;
> }
> -out:
> task->tk_status = 0;
> return -EAGAIN;
> -out_bad_stateid:
> - task->tk_status = -EIO;
> - return 0;
> -wait_on_recovery:
> - rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
> - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
> - rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
> - goto out;
> }
>
> /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
> --
> 1.8.3.1
>

Trond, I pre-emptively added the same correction to the flexlayout as
I think it needs to do the same for the error but I haven't tested it.
Also this patch depends, on the PNFS fix EACCESS patch otherwise, it
leads to the same kernel oops on the umount.

> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2017-06-30 18:44:03

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH 1/1] PNFS fix EACCESS on commit to DS handling

Hi Olga,

On 06/23/2017 10:26 AM, Olga Kornievskaia wrote:
> Commit fabbbee0eb0f "PNFS fix fallback to MDS if got error on
> commit to DS" moved the pnfs_set_lo_fail() to unhandled errors
> which was not correct and lead to a kernel oops on umount.
>
> Instead, fix the original EACCESS on commit to DS error by
> getting the new layout and re-doing the IO.
>
> Fixes: fabbbee0eb0f ("PNFS fix fallback to MDS if got error on commit to DS")
> Signed-off-by: Olga Kornievskaia <[email protected]>
> ---
> fs/nfs/filelayout/filelayout.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
> index 61dcb4b..8075efe 100644
> --- a/fs/nfs/filelayout/filelayout.c
> +++ b/fs/nfs/filelayout/filelayout.c
> @@ -153,6 +153,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
> case -NFS4ERR_RETRY_UNCACHED_REP:
> break;
> /* Invalidate Layout errors */
> + case -NFS4ERR_ACCESS:

Should this be -EACCES instead? The fields right below this are using mapped errors based on the nfs_errtbl in nfs4xdr.c, so NFS4ERR_ACCESS might not ever get passed to this function.

Thanks,
Anna

> case -NFS4ERR_PNFS_NO_LAYOUT:
> case -ESTALE: /* mapped NFS4ERR_STALE */
> case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
> @@ -183,10 +184,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
> task->tk_status);
> nfs4_mark_deviceid_unavailable(devid);
> pnfs_error_mark_layout_for_return(inode, lseg);
> + pnfs_set_lo_fail(lseg);
> rpc_wake_up(&tbl->slot_tbl_waitq);
> /* fall through */
> default:
> - pnfs_set_lo_fail(lseg);
> reset:
> dprintk("%s Retry through MDS. Error %d\n", __func__,
> task->tk_status);
>

2017-06-30 18:48:01

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH 1/1] PNFS fix EACCESS on commit to DS handling


> On Jun 30, 2017, at 2:43 PM, Anna Schumaker =
<[email protected]> wrote:
>=20
> Hi Olga,
>=20
> On 06/23/2017 10:26 AM, Olga Kornievskaia wrote:
>> Commit fabbbee0eb0f "PNFS fix fallback to MDS if got error on
>> commit to DS" moved the pnfs_set_lo_fail() to unhandled errors
>> which was not correct and lead to a kernel oops on umount.
>>=20
>> Instead, fix the original EACCESS on commit to DS error by
>> getting the new layout and re-doing the IO.
>>=20
>> Fixes: fabbbee0eb0f ("PNFS fix fallback to MDS if got error on commit =
to DS")
>> Signed-off-by: Olga Kornievskaia <[email protected]>
>> ---
>> fs/nfs/filelayout/filelayout.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>=20
>> diff --git a/fs/nfs/filelayout/filelayout.c =
b/fs/nfs/filelayout/filelayout.c
>> index 61dcb4b..8075efe 100644
>> --- a/fs/nfs/filelayout/filelayout.c
>> +++ b/fs/nfs/filelayout/filelayout.c
>> @@ -153,6 +153,7 @@ static int filelayout_async_handle_error(struct =
rpc_task *task,
>> case -NFS4ERR_RETRY_UNCACHED_REP:
>> break;
>> /* Invalidate Layout errors */
>> + case -NFS4ERR_ACCESS:
>=20
> Should this be -EACCES instead? The fields right below this are using =
mapped errors based on the nfs_errtbl in nfs4xdr.c, so NFS4ERR_ACCESS =
might not ever get passed to this function.

NFS4ERR_ACCESS and EACCES are numerically the same: 13.

>=20
> Thanks,
> Anna
>=20
>> case -NFS4ERR_PNFS_NO_LAYOUT:
>> case -ESTALE: /* mapped NFS4ERR_STALE */
>> case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
>> @@ -183,10 +184,10 @@ static int filelayout_async_handle_error(struct =
rpc_task *task,
>> task->tk_status);
>> nfs4_mark_deviceid_unavailable(devid);
>> pnfs_error_mark_layout_for_return(inode, lseg);
>> + pnfs_set_lo_fail(lseg);
>> rpc_wake_up(&tbl->slot_tbl_waitq);
>> /* fall through */
>> default:
>> - pnfs_set_lo_fail(lseg);
>> reset:
>> dprintk("%s Retry through MDS. Error %d\n", __func__,
>> task->tk_status);
>>=20


2017-06-30 18:57:05

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH 1/1] PNFS fix EACCESS on commit to DS handling



On 06/30/2017 02:47 PM, Olga Kornievskaia wrote:
>
>> On Jun 30, 2017, at 2:43 PM, Anna Schumaker <[email protected]> wrote:
>>
>> Hi Olga,
>>
>> On 06/23/2017 10:26 AM, Olga Kornievskaia wrote:
>>> Commit fabbbee0eb0f "PNFS fix fallback to MDS if got error on
>>> commit to DS" moved the pnfs_set_lo_fail() to unhandled errors
>>> which was not correct and lead to a kernel oops on umount.
>>>
>>> Instead, fix the original EACCESS on commit to DS error by
>>> getting the new layout and re-doing the IO.
>>>
>>> Fixes: fabbbee0eb0f ("PNFS fix fallback to MDS if got error on commit to DS")
>>> Signed-off-by: Olga Kornievskaia <[email protected]>
>>> ---
>>> fs/nfs/filelayout/filelayout.c | 3 ++-
>>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
>>> index 61dcb4b..8075efe 100644
>>> --- a/fs/nfs/filelayout/filelayout.c
>>> +++ b/fs/nfs/filelayout/filelayout.c
>>> @@ -153,6 +153,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
>>> case -NFS4ERR_RETRY_UNCACHED_REP:
>>> break;
>>> /* Invalidate Layout errors */
>>> + case -NFS4ERR_ACCESS:
>>
>> Should this be -EACCES instead? The fields right below this are using mapped errors based on the nfs_errtbl in nfs4xdr.c, so NFS4ERR_ACCESS might not ever get passed to this function.
>
> NFS4ERR_ACCESS and EACCES are numerically the same: 13.

You're right, and I should have double checked that. Thanks for pointing that out to me!

Anna

>
>>
>> Thanks,
>> Anna
>>
>>> case -NFS4ERR_PNFS_NO_LAYOUT:
>>> case -ESTALE: /* mapped NFS4ERR_STALE */
>>> case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
>>> @@ -183,10 +184,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
>>> task->tk_status);
>>> nfs4_mark_deviceid_unavailable(devid);
>>> pnfs_error_mark_layout_for_return(inode, lseg);
>>> + pnfs_set_lo_fail(lseg);
>>> rpc_wake_up(&tbl->slot_tbl_waitq);
>>> /* fall through */
>>> default:
>>> - pnfs_set_lo_fail(lseg);
>>> reset:
>>> dprintk("%s Retry through MDS. Error %d\n", __func__,
>>> task->tk_status);
>>>
>