The non-retry error path is currently broken and ends up releasing the
reference to the layout twice. It also can end up clearing the
NFS_LAYOUT_FIRST_LAYOUTGET flag twice, causing a race.
In addition, the retry path will fail to decrement the plh_outstanding
counter.
Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: [email protected] # 4.7
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0fbe734cc38c..73b0dc90265a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1645,6 +1645,7 @@ lookup_again:
lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
+ atomic_dec(&lo->plh_outstanding);
if (IS_ERR(lseg)) {
switch(PTR_ERR(lseg)) {
case -ERECALLCONFLICT:
@@ -1652,26 +1653,25 @@ lookup_again:
lseg = NULL;
/* Fallthrough */
case -EAGAIN:
- pnfs_put_layout_hdr(lo);
- if (first)
- pnfs_clear_first_layoutget(lo);
- if (lseg) {
- trace_pnfs_update_layout(ino, pos, count,
- iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
- goto lookup_again;
- }
- /* Fallthrough */
+ break;
default:
if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
lseg = NULL;
}
}
+ if (lseg) {
+ pnfs_put_layout_hdr(lo);
+ if (first)
+ pnfs_clear_first_layoutget(lo);
+ trace_pnfs_update_layout(ino, pos, count,
+ iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
+ goto lookup_again;
+ }
} else {
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
}
- atomic_dec(&lo->plh_outstanding);
out_put_layout_hdr:
if (first)
pnfs_clear_first_layoutget(lo);
--
2.7.4
They are not the same error, and need to be handled differently.
Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: [email protected] # 4.7
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 23 ++++++++++++++---------
fs/nfs/pnfs.c | 1 +
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 519368b98762..ee8efe0a5202 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -437,6 +437,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
case -NFS4ERR_GRACE:
+ case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
exception->delay = 1;
return 0;
@@ -7883,11 +7884,12 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
- int status = task->tk_status;
+ int nfs4err = task->tk_status;
+ int err, status = 0;
dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
- switch (status) {
+ switch (nfs4err) {
case 0:
goto out;
@@ -7919,12 +7921,11 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
status = -EOVERFLOW;
goto out;
}
- /* Fallthrough */
+ status = -EBUSY;
+ break;
case -NFS4ERR_RECALLCONFLICT:
- nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT,
- exception);
status = -ERECALLCONFLICT;
- goto out;
+ break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
exception->timeout = 0;
@@ -7955,9 +7956,13 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
spin_unlock(&inode->i_lock);
}
- status = nfs4_handle_exception(server, status, exception);
- if (exception->retry)
- status = -EAGAIN;
+ err = nfs4_handle_exception(server, nfs4err, exception);
+ if (!status) {
+ if (exception->retry)
+ status = -EAGAIN;
+ else
+ status = err;
+ }
out:
dprintk("<-- %s\n", __func__);
return status;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 73b0dc90265a..b5c1306f63c1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1648,6 +1648,7 @@ lookup_again:
atomic_dec(&lo->plh_outstanding);
if (IS_ERR(lseg)) {
switch(PTR_ERR(lseg)) {
+ case -EBUSY:
case -ERECALLCONFLICT:
if (time_after(jiffies, giveup))
lseg = NULL;
--
2.7.4
We want to recover the open stateid if there is no layout stateid
and/or the stateid argument matches an open stateid.
Otherwise throw out the existing layout and recover from scratch, as
the layout stateid is bad.
Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: [email protected] # 4.7
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 34 +++++++++++++++-------------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ee8efe0a5202..a1a3b4c9a563 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7886,6 +7886,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
struct pnfs_layout_hdr *lo;
int nfs4err = task->tk_status;
int err, status = 0;
+ LIST_HEAD(head);
dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
@@ -7930,30 +7931,25 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
case -NFS4ERR_BAD_STATEID:
exception->timeout = 0;
spin_lock(&inode->i_lock);
- if (nfs4_stateid_match(&lgp->args.stateid,
+ lo = NFS_I(inode)->layout;
+ /* If the open stateid was bad, then recover it. */
+ if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
+ nfs4_stateid_match_other(&lgp->args.stateid,
&lgp->args.ctx->state->stateid)) {
spin_unlock(&inode->i_lock);
- /* If the open stateid was bad, then recover it. */
exception->state = lgp->args.ctx->state;
break;
}
- lo = NFS_I(inode)->layout;
- if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
- nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
- LIST_HEAD(head);
-
- /*
- * Mark the bad layout state as invalid, then retry
- * with the current stateid.
- */
- set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
- pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
- spin_unlock(&inode->i_lock);
- pnfs_free_lseg_list(&head);
- status = -EAGAIN;
- goto out;
- } else
- spin_unlock(&inode->i_lock);
+
+ /*
+ * Mark the bad layout state as invalid, then retry
+ */
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&head);
+ status = -EAGAIN;
+ goto out;
}
err = nfs4_handle_exception(server, nfs4err, exception);
--
2.7.4
Instead of giving up altogether and falling back to doing I/O
through the MDS, which may make the situation worse, wait for
2 lease periods for the callback to resolve itself, and then
try destroying the existing layout.
Only if this was an attempt at getting a first layout, do we
give up altogether, as the server is clearly crazy.
Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: [email protected] # 4.7
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b5c1306f63c1..0da9353f0441 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1505,7 +1505,7 @@ pnfs_update_layout(struct inode *ino,
struct pnfs_layout_segment *lseg = NULL;
nfs4_stateid stateid;
long timeout = 0;
- unsigned long giveup = jiffies + rpc_get_timeout(server->client);
+ unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
bool first;
if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
@@ -1649,9 +1649,18 @@ lookup_again:
if (IS_ERR(lseg)) {
switch(PTR_ERR(lseg)) {
case -EBUSY:
- case -ERECALLCONFLICT:
if (time_after(jiffies, giveup))
lseg = NULL;
+ break;
+ case -ERECALLCONFLICT:
+ /* Huh? We hold no layouts, how is there a recall? */
+ if (first) {
+ lseg = NULL;
+ break;
+ }
+ /* Destroy the existing layout and start over */
+ if (time_after(jiffies, giveup))
+ pnfs_destroy_layout(NFS_I(ino));
/* Fallthrough */
case -EAGAIN:
break;
--
2.7.4
> On Jul 14, 2016, at 18:58, Trond Myklebust <[email protected]> wrote:
>
> The non-retry error path is currently broken and ends up releasing the
> reference to the layout twice. It also can end up clearing the
> NFS_LAYOUT_FIRST_LAYOUTGET flag twice, causing a race.
>
> In addition, the retry path will fail to decrement the plh_outstanding
> counter.
>
> Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
> Cc: [email protected] # 4.7
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/pnfs.c | 20 ++++++++++----------
> 1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 0fbe734cc38c..73b0dc90265a 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1645,6 +1645,7 @@ lookup_again:
> lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);
> trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
> PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
> + atomic_dec(&lo->plh_outstanding);
> if (IS_ERR(lseg)) {
> switch(PTR_ERR(lseg)) {
> case -ERECALLCONFLICT:
> @@ -1652,26 +1653,25 @@ lookup_again:
> lseg = NULL;
> /* Fallthrough */
> case -EAGAIN:
> - pnfs_put_layout_hdr(lo);
> - if (first)
> - pnfs_clear_first_layoutget(lo);
> - if (lseg) {
> - trace_pnfs_update_layout(ino, pos, count,
> - iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
> - goto lookup_again;
> - }
> - /* Fallthrough */
> + break;
> default:
> if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
> pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
> lseg = NULL;
> }
> }
> + if (lseg) {
> + pnfs_put_layout_hdr(lo);
Yes, I?m aware I need to move this to after the trace_pnfs_update_layout() below (and before ?goto lookup_again?). I managed to send out an earlier version of the patch that is being committed to linux-next.
> + if (first)
> + pnfs_clear_first_layoutget(lo);
> + trace_pnfs_update_layout(ino, pos, count,
> + iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
> + goto lookup_again;
> + }
> } else {
> pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
> }
>
> - atomic_dec(&lo->plh_outstanding);
> out_put_layout_hdr:
> if (first)
> pnfs_clear_first_layoutget(lo);
> --
> 2.7.4
>
On Thu, 2016-07-14 at 18:58 -0400, Trond Myklebust wrote:
> They are not the same error, and need to be handled differently.
>
> Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
> Cc: [email protected] # 4.7
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/nfs4proc.c | 23 ++++++++++++++---------
> fs/nfs/pnfs.c | 1 +
> 2 files changed, 15 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 519368b98762..ee8efe0a5202 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -437,6 +437,7 @@ static int nfs4_do_handle_exception(struct
> nfs_server *server,
> case -NFS4ERR_DELAY:
> nfs_inc_server_stats(server, NFSIOS_DELAY);
> case -NFS4ERR_GRACE:
> + case -NFS4ERR_LAYOUTTRYLATER:
> case -NFS4ERR_RECALLCONFLICT:
> exception->delay = 1;
> return 0;
> @@ -7883,11 +7884,12 @@ nfs4_layoutget_handle_exception(struct
> rpc_task *task,
> struct inode *inode = lgp->args.inode;
> struct nfs_server *server = NFS_SERVER(inode);
> struct pnfs_layout_hdr *lo;
> - int status = task->tk_status;
> + int nfs4err = task->tk_status;
> + int err, status = 0;
>
> dprintk("--> %s tk_status => %d\n", __func__, -task-
> >tk_status);
>
> - switch (status) {
> + switch (nfs4err) {
> case 0:
> goto out;
>
> @@ -7919,12 +7921,11 @@ nfs4_layoutget_handle_exception(struct
> rpc_task *task,
> status = -EOVERFLOW;
> goto out;
> }
> - /* Fallthrough */
> + status = -EBUSY;
> + break;
May also want to update the comment above this stanza. It says that we
treat the two cases identically, but I'm not sure that that is valid
now.
> case -NFS4ERR_RECALLCONFLICT:
> - nfs4_handle_exception(server,
> -NFS4ERR_RECALLCONFLICT,
> - exception);
> status = -ERECALLCONFLICT;
> - goto out;
> + break;
> case -NFS4ERR_EXPIRED:
> case -NFS4ERR_BAD_STATEID:
> exception->timeout = 0;
> @@ -7955,9 +7956,13 @@ nfs4_layoutget_handle_exception(struct
> rpc_task *task,
> spin_unlock(&inode->i_lock);
> }
>
> - status = nfs4_handle_exception(server, status, exception);
> - if (exception->retry)
> - status = -EAGAIN;
> + err = nfs4_handle_exception(server, nfs4err, exception);
> + if (!status) {
> + if (exception->retry)
> + status = -EAGAIN;
> + else
> + status = err;
> + }
> out:
> dprintk("<-- %s\n", __func__);
> return status;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 73b0dc90265a..b5c1306f63c1 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1648,6 +1648,7 @@ lookup_again:
> atomic_dec(&lo->plh_outstanding);
> if (IS_ERR(lseg)) {
> switch(PTR_ERR(lseg)) {
> + case -EBUSY:
> case -ERECALLCONFLICT:
> if (time_after(jiffies, giveup))
> lseg = NULL;
--
Jeff Layton <[email protected]>
On Thu, 2016-07-14 at 18:58 -0400, Trond Myklebust wrote:
> The non-retry error path is currently broken and ends up releasing the
> reference to the layout twice. It also can end up clearing the
> NFS_LAYOUT_FIRST_LAYOUTGET flag twice, causing a race.
>
> In addition, the retry path will fail to decrement the plh_outstanding
> counter.
>
> Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
> Cc: [email protected] # 4.7
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/pnfs.c | 20 ++++++++++----------
> 1 file changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 0fbe734cc38c..73b0dc90265a 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1645,6 +1645,7 @@ lookup_again:
> lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);
> trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
> PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
> + atomic_dec(&lo->plh_outstanding);
> if (IS_ERR(lseg)) {
> switch(PTR_ERR(lseg)) {
> case -ERECALLCONFLICT:
> @@ -1652,26 +1653,25 @@ lookup_again:
> lseg = NULL;
> /* Fallthrough */
> case -EAGAIN:
> - pnfs_put_layout_hdr(lo);
> - if (first)
> - pnfs_clear_first_layoutget(lo);
> - if (lseg) {
> - trace_pnfs_update_layout(ino, pos, count,
> - iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
> - goto lookup_again;
> - }
> - /* Fallthrough */
> + break;
> default:
> if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
> pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
> lseg = NULL;
> }
> }
> + if (lseg) {
> + pnfs_put_layout_hdr(lo);
> + if (first)
> + pnfs_clear_first_layoutget(lo);
> + trace_pnfs_update_layout(ino, pos, count,
> + iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
> + goto lookup_again;
> + }
> } else {
> pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
> }
>
> - atomic_dec(&lo->plh_outstanding);
> out_put_layout_hdr:
> if (first)
> pnfs_clear_first_layoutget(lo);
The whole set looks good to me. Nice catches all around!
Reviewed-by: Jeff Layton <[email protected]>