From: Trond Myklebust <[email protected]>
If we have multiple outstanding layoutget requests, the current code to
update the layout barrier assumes that the outstanding layout stateids
are updated in order. That's not necessarily the case.
Instead of using the value of lo->plh_outstanding as a guesstimate for
the window of values we need to accept, just wait to update the window
until we're processing the last one. The intention here is just to
ensure that we don't process 2^31 seqid updates without also updating
the barrier.
Fixes: 1bcf34fdac5f ("pNFS/NFSv4: Update the layout barrier when we schedule a layoutreturn")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2c01ee805306..ffe02e43f8c0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -966,10 +966,8 @@ void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
const struct cred *cred, bool update_barrier)
{
- u32 oldseq, newseq, new_barrier = 0;
-
- oldseq = be32_to_cpu(lo->plh_stateid.seqid);
- newseq = be32_to_cpu(new->seqid);
+ u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+ u32 newseq = be32_to_cpu(new->seqid);
if (!pnfs_layout_is_valid(lo)) {
pnfs_set_layout_cred(lo, cred);
@@ -979,19 +977,21 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
return;
}
- if (pnfs_seqid_is_newer(newseq, oldseq)) {
+
+ if (pnfs_seqid_is_newer(newseq, oldseq))
nfs4_stateid_copy(&lo->plh_stateid, new);
- /*
- * Because of wraparound, we want to keep the barrier
- * "close" to the current seqids.
- */
- new_barrier = newseq - atomic_read(&lo->plh_outstanding);
- }
- if (update_barrier)
- new_barrier = be32_to_cpu(new->seqid);
- else if (new_barrier == 0)
+
+ if (update_barrier) {
+ pnfs_barrier_update(lo, newseq);
return;
- pnfs_barrier_update(lo, new_barrier);
+ }
+ /*
+ * Because of wraparound, we want to keep the barrier
+ * "close" to the current seqids. We really only want to
+ * get here from a layoutget call.
+ */
+ if (atomic_read(&lo->plh_outstanding) == 1)
+ pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
}
static bool
--
2.31.1
From: Trond Myklebust <[email protected]>
If the layout gets invalidated, we should wait for any outstanding
layoutget requests for that layout to complete, and we should resend
them only after re-establishing the layout stateid.
Fixes: d29b468da4f9 ("pNFS/NFSv4: Improve rejection of out-of-order layouts")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ffe02e43f8c0..be960e47d7f6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2014,7 +2014,7 @@ pnfs_update_layout(struct inode *ino,
* If the layout segment list is empty, but there are outstanding
* layoutget calls, then they might be subject to a layoutrecall.
*/
- if (list_empty(&lo->plh_segs) &&
+ if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
atomic_read(&lo->plh_outstanding) != 0) {
spin_unlock(&ino->i_lock);
lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
@@ -2390,11 +2390,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget;
}
+ if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+ goto out_forget;
+
if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
/* existing state ID, make sure the sequence number matches. */
if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
- if (!pnfs_layout_is_valid(lo) &&
- pnfs_is_first_layoutget(lo))
+ if (!pnfs_layout_is_valid(lo))
lo->plh_barrier = 0;
dprintk("%s forget reply due to sequence\n", __func__);
goto out_forget;
@@ -2413,8 +2415,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget;
} else {
/* We have a completely new layout */
- if (!pnfs_is_first_layoutget(lo))
- goto out_forget;
pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
}
--
2.31.1
From: Trond Myklebust <[email protected]>
Cache the layout in the arguments so we don't have to keep looking it up
from the inode.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 6 +-----
fs/nfs/pnfs.c | 28 ++++++++++++++++------------
include/linux/nfs_xdr.h | 1 +
3 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9fbe04cca07e..e1214bb6b7ee 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9444,7 +9444,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
{
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_hdr *lo = lgp->lo;
int nfs4err = task->tk_status;
int err, status = 0;
LIST_HEAD(head);
@@ -9496,7 +9496,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
case -NFS4ERR_BAD_STATEID:
exception->timeout = 0;
spin_lock(&inode->i_lock);
- lo = NFS_I(inode)->layout;
/* If the open stateid was bad, then recover it. */
if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
!nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
@@ -9580,9 +9579,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
dprintk("--> %s\n", __func__);
- /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
- pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
task = rpc_run_task(&task_setup_data);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index be960e47d7f6..ef14ea0b6ab8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1128,8 +1128,7 @@ void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
nfs4_free_pages(lgp->args.layout.pages, max_pages);
- if (lgp->args.inode)
- pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
+ pnfs_put_layout_hdr(lgp->lo);
put_nfs_open_context(lgp->args.ctx);
kfree(lgp);
}
@@ -2124,6 +2123,9 @@ pnfs_update_layout(struct inode *ino,
goto out_put_layout_hdr;
}
+ lgp->lo = lo;
+ pnfs_get_layout_hdr(lo);
+
lseg = nfs4_proc_layoutget(lgp, &timeout);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
@@ -2255,6 +2257,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data,
pnfs_put_layout_hdr(lo);
return;
}
+ lgp->lo = lo;
data->lgp = lgp;
data->o_arg.lg_args = &lgp->args;
data->o_res.lg_res = &lgp->res;
@@ -2263,6 +2266,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data,
static void _lgopen_prepare_floating(struct nfs4_opendata *data,
struct nfs_open_context *ctx)
{
+ struct inode *ino = data->dentry->d_inode;
struct pnfs_layout_range rng = {
.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
IOMODE_RW: IOMODE_READ,
@@ -2271,7 +2275,7 @@ static void _lgopen_prepare_floating(struct nfs4_opendata *data,
};
struct nfs4_layoutget *lgp;
- lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, ¤t_stateid,
+ lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid,
&rng, GFP_KERNEL);
if (!lgp)
return;
@@ -2291,6 +2295,8 @@ void pnfs_lgopen_prepare(struct nfs4_opendata *data,
/* Could check on max_ops, but currently hardcoded high enough */
if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
return;
+ if (data->lgp)
+ return;
if (data->state)
_lgopen_prepare_attached(data, ctx);
else
@@ -2330,13 +2336,13 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
}
return;
}
- if (!lgp->args.inode) {
+ if (!lgp->lo) {
lo = _pnfs_grab_empty_layout(ino, ctx);
if (!lo)
return;
- lgp->args.inode = ino;
+ lgp->lo = lo;
} else
- lo = NFS_I(lgp->args.inode)->layout;
+ lo = lgp->lo;
lseg = pnfs_layout_process(lgp);
if (!IS_ERR(lseg)) {
@@ -2349,11 +2355,9 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
{
if (lgp != NULL) {
- struct inode *inode = lgp->args.inode;
- if (inode) {
- struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
- pnfs_clear_first_layoutget(lo);
- nfs_layoutget_end(lo);
+ if (lgp->lo) {
+ pnfs_clear_first_layoutget(lgp->lo);
+ nfs_layoutget_end(lgp->lo);
}
pnfs_layoutget_free(lgp);
}
@@ -2362,7 +2366,7 @@ void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
struct pnfs_layout_segment *
pnfs_layout_process(struct nfs4_layoutget *lgp)
{
- struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
+ struct pnfs_layout_hdr *lo = lgp->lo;
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->plh_inode;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 717ecc87c9e7..e9698b6278a5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -277,6 +277,7 @@ struct nfs4_layoutget {
struct nfs4_layoutget_args args;
struct nfs4_layoutget_res res;
const struct cred *cred;
+ struct pnfs_layout_hdr *lo;
gfp_t gfp_flags;
};
--
2.31.1
From: Trond Myklebust <[email protected]>
After we grab the lock in nfs4_pnfs_ds_connect(), there is no check for
whether or not ds->ds_clp has already been initialised, so we can end up
adding the same transports multiple times.
Fixes: fc821d59209d ("pnfs/NFSv4.1: Add multipath capabilities to pNFS flexfiles servers over NFSv3")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs_nfs.c | 52 +++++++++++++++++++++++------------------------
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 49d3389bd813..1c2c0d08614e 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -805,19 +805,16 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
}
EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
-static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
+static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
- wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
- TASK_KILLABLE);
+ return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
}
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
{
smp_mb__before_atomic();
- clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
- smp_mb__after_atomic();
- wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
+ clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
}
static struct nfs_client *(*get_v3_ds_connect)(
@@ -993,30 +990,33 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
{
int err;
-again:
- err = 0;
- if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
- if (version == 3) {
- err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
- retrans);
- } else if (version == 4) {
- err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo,
- retrans, minor_version);
- } else {
- dprintk("%s: unsupported DS version %d\n", __func__,
- version);
- err = -EPROTONOSUPPORT;
- }
+ do {
+ err = nfs4_wait_ds_connect(ds);
+ if (err || ds->ds_clp)
+ goto out;
+ if (nfs4_test_deviceid_unavailable(devid))
+ return -ENODEV;
+ } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
- nfs4_clear_ds_conn_bit(ds);
- } else {
- nfs4_wait_ds_connect(ds);
+ if (ds->ds_clp)
+ goto connect_done;
- /* what was waited on didn't connect AND didn't mark unavail */
- if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
- goto again;
+ switch (version) {
+ case 3:
+ err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
+ break;
+ case 4:
+ err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
+ minor_version);
+ break;
+ default:
+ dprintk("%s: unsupported DS version %d\n", __func__, version);
+ err = -EPROTONOSUPPORT;
}
+connect_done:
+ nfs4_clear_ds_conn_bit(ds);
+out:
/*
* At this point the ds->ds_clp should be ready, but it might have
* hit an error.
--
2.31.1