2010-06-15 14:29:06

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 00/10] layout refcounting changes

The following are more of code style changes, as opposed to functionality
changes.

patches 1-3 change refcounting on the layout

patches 4-10 change the layout to be a pointer to a "cache_head".
Layout drivers are expected to embed the cache_head in their private
structures.

Fred



2010-06-15 14:29:06

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 06/10] pnfs-submit: Add state flag for layoutcommit_needed

This avoids locking and existance of layout issues in upcoming patches

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 2 ++
include/linux/nfs4_pnfs.h | 2 +-
include/linux/nfs_fs.h | 1 +
3 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e667208..6def09c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -155,6 +155,7 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
spin_lock(&nfsi->lo_lock);
if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
nfsi->layout.lo_cred = get_rpccred(ctx->state->owner->so_cred);
+ __set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
nfsi->change_attr++;
spin_unlock(&nfsi->lo_lock);
dprintk("%s: Set layoutcommit\n", __func__);
@@ -1686,6 +1687,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
nfsi->layout.pnfs_write_begin_pos = 0;
nfsi->layout.pnfs_write_end_pos = 0;
nfsi->layout.lo_cred = NULL;
+ __clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
pnfs_get_layout_stateid(&data->args.stateid, &nfsi->layout);

spin_unlock(&nfsi->lo_lock);
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 9dac941..0eb9b16 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -83,7 +83,7 @@ has_layout(struct nfs_inode *nfsi)
static inline bool
layoutcommit_needed(struct nfs_inode *nfsi)
{
- return nfsi->layout.lo_cred != NULL;
+ return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
}

#else /* CONFIG_NFS_V4_1 */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0b3419d..71bbc4c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -208,6 +208,7 @@ struct nfs_inode {
#define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
#define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
#define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
+ #define NFS_INO_LAYOUTCOMMIT 3 /* LAYOUTCOMMIT needed */
time_t pnfs_layout_suspend;
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_NFS_V4*/
--
1.6.6.1


2010-06-15 14:29:06

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 07/10] pnfs-submit: avoid race handling return on close

This prepares for the next patch.

NOTE this doesn't really fix any current race, since
layout going to NULL is OK. But layout changing from NULL to nonNULL
is a real race that is not fixed

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4state.c | 5 +++--
fs/nfs/pnfs.c | 11 +++++++++++
include/linux/nfs4_pnfs.h | 2 ++
3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index d5144bd..8a7a64c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
} else {
#ifdef CONFIG_NFS_V4_1
struct nfs_inode *nfsi = NFS_I(state->inode);
+ int roc = nfs4_roc_iomode(nfsi);

- if (has_layout(nfsi) && nfsi->layout.roc_iomode) {
+ if (roc) {
struct nfs4_pnfs_layout_segment range;

- range.iomode = nfsi->layout.roc_iomode;
+ range.iomode = roc;
range.offset = 0;
range.length = NFS4_MAX_UINT64;
pnfs_return_layout(state->inode, &range, NULL,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6def09c..bd11ec7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */

+int nfs4_roc_iomode(struct nfs_inode *nfsi)
+{
+ int rv = 0;
+
+ spin_lock(&pnfs_spinlock);
+ if (has_layout(nfsi))
+ rv = nfsi->layout.roc_iomode;
+ spin_unlock(&pnfs_spinlock);
+ return rv;
+}
+
static inline void
get_layout(struct pnfs_layout_type *lo)
{
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 0eb9b16..2ea131f 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -86,6 +86,8 @@ layoutcommit_needed(struct nfs_inode *nfsi)
return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
}

+int nfs4_roc_iomode(struct nfs_inode *nfs);
+
#else /* CONFIG_NFS_V4_1 */

static inline bool
--
1.6.6.1


2010-06-15 14:29:06

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 10/10] pnfs-submit: filelayout: adjust to new alloc_layout API

From: Andy Adamson <[email protected]>

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 11 +++++++----
fs/nfs/nfs4filelayout.h | 7 +++++++
2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 8a83c0d..f863549 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -308,11 +308,14 @@ filelayout_write_pagelist(struct pnfs_layout_type *layoutid,
* will use the pnfs_layout_type type to refer to the layout for this
* inode from now on.
*/
-static void *
+static struct pnfs_layout_type *
filelayout_alloc_layout(struct inode *inode)
{
+ struct nfs4_filelayout *flp;
+
dprintk("NFS_FILELAYOUT: allocating layout\n");
- return kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
+ flp = kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
+ return flp ? &flp->fl_cache : NULL;
}

/* Free a filelayout layout structure
@@ -478,7 +481,7 @@ static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_type *layoutid,
struct nfs4_pnfs_layoutget_res *lgr)
{
- struct nfs4_filelayout *flo = PNFS_LD_DATA(layoutid);
+ struct nfs4_filelayout *flo = FILE_LO_CACHE(layoutid);
struct pnfs_layout_segment *lseg;
int rc;

@@ -697,7 +700,7 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
ssize_t
filelayout_get_stripesize(struct pnfs_layout_type *layoutid)
{
- struct nfs4_filelayout *flo = PNFS_LD_DATA(layoutid);
+ struct nfs4_filelayout *flo = FILE_LO_CACHE(layoutid);

return flo->stripe_unit;
}
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 3697926..060bf9a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -70,6 +70,7 @@ struct nfs4_filelayout_segment {
};

struct nfs4_filelayout {
+ struct pnfs_layout_type fl_cache;
int uncommitted_write;
loff_t last_commit_size;
u64 layout_id;
@@ -90,6 +91,12 @@ nfs4_fl_select_ds_fh(struct nfs4_filelayout_segment *flseg, u32 idx)
return &flseg->fh_array[idx];
}

+static inline struct nfs4_filelayout *
+FILE_LO_CACHE(struct pnfs_layout_type *lo_cache)
+{
+ return container_of(lo_cache, struct nfs4_filelayout, fl_cache);
+}
+
extern struct pnfs_client_operations *pnfs_callback_ops;

extern void nfs4_fl_free_deviceid_callback(struct kref *);
--
1.6.6.1


2010-06-15 14:29:05

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 01/10] pnfs-submit: separate locking from get and put of layout

This is needed by next patch that changes refcounting

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 63 +++++++++++++++++++++++++++++---------------------------
1 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3f7f50a..937b84a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -320,30 +320,20 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */

-/*
- * get and lock nfsi->layout
- */
static inline struct pnfs_layout_type *
-get_lock_current_layout(struct nfs_inode *nfsi)
+get_current_layout(struct nfs_inode *nfsi)
{
- struct pnfs_layout_type *lo;
+ struct pnfs_layout_type *lo = &nfsi->layout;

- lo = &nfsi->layout;
- spin_lock(&nfsi->lo_lock);
- if (!lo->ld_data) {
- spin_unlock(&nfsi->lo_lock);
+ BUG_ON_UNLOCKED_LO(lo);
+ if (!lo->ld_data)
return NULL;
- }
-
lo->refcount++;
return lo;
}

-/*
- * put and unlock nfs->layout
- */
static inline void
-put_unlock_current_layout(struct pnfs_layout_type *lo)
+put_current_layout(struct pnfs_layout_type *lo)
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
struct nfs_client *clp;
@@ -365,7 +355,6 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
list_del_init(&lo->lo_layouts);
spin_unlock(&clp->cl_lock);
}
- spin_unlock(&nfsi->lo_lock);
}

void
@@ -377,7 +366,8 @@ pnfs_layout_release(struct pnfs_layout_type *lo,
spin_lock(&nfsi->lo_lock);
if (range)
pnfs_free_layout(lo, range);
- put_unlock_current_layout(lo);
+ put_current_layout(lo);
+ spin_unlock(&nfsi->lo_lock);
wake_up_all(&nfsi->lo_waitq);
}

@@ -391,9 +381,13 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
.length = NFS4_MAX_UINT64,
};

- lo = get_lock_current_layout(nfsi);
- pnfs_free_layout(lo, &range);
- put_unlock_current_layout(lo);
+ spin_lock(&nfsi->lo_lock);
+ lo = get_current_layout(nfsi);
+ if (lo) {
+ pnfs_free_layout(lo, &range);
+ put_current_layout(lo);
+ }
+ spin_unlock(&nfsi->lo_lock);
}

static inline void
@@ -760,12 +754,14 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
arg.length = NFS4_MAX_UINT64;

if (type == RETURN_FILE) {
- lo = get_lock_current_layout(nfsi);
+ spin_lock(&nfsi->lo_lock);
+ lo = get_current_layout(nfsi);
if (lo && !has_layout_to_return(lo, &arg)) {
- put_unlock_current_layout(lo);
+ put_current_layout(lo);
lo = NULL;
}
if (!lo) {
+ spin_unlock(&nfsi->lo_lock);
dprintk("%s: no layout segments to return\n", __func__);
goto out;
}
@@ -779,7 +775,8 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
if (stateid) { /* callback */
status = -EAGAIN;
spin_lock(&nfsi->lo_lock);
- put_unlock_current_layout(lo);
+ put_current_layout(lo);
+ spin_unlock(&nfsi->lo_lock);
goto out;
}
dprintk("%s: waiting\n", __func__);
@@ -924,8 +921,7 @@ static int pnfs_wait_schedule(void *word)
* get, possibly allocate, and lock current_layout
*
* Note: If successful, nfsi->lo_lock is taken and the caller
- * must put and unlock current_layout by using put_unlock_current_layout()
- * when the returned layout is released.
+ * must put and unlock current_layout when the returned layout is released.
*/
static struct pnfs_layout_type *
get_lock_alloc_layout(struct inode *ino)
@@ -936,7 +932,9 @@ get_lock_alloc_layout(struct inode *ino)

dprintk("%s Begin\n", __func__);

- while ((lo = get_lock_current_layout(nfsi)) == NULL) {
+ spin_lock(&nfsi->lo_lock);
+ while ((lo = get_current_layout(nfsi)) == NULL) {
+ spin_unlock(&nfsi->lo_lock);
/* Compete against other threads on who's doing the allocation,
* wait until bit is cleared if we lost this race.
*/
@@ -952,8 +950,10 @@ get_lock_alloc_layout(struct inode *ino)
/* Was current_layout already allocated while we slept?
* If so, retry get_lock'ing it. Otherwise, allocate it.
*/
- if (nfsi->layout.ld_data)
+ if (nfsi->layout.ld_data) {
+ spin_lock(&nfsi->lo_lock);
continue;
+ }

lo = alloc_init_layout(ino);
if (lo) {
@@ -1122,7 +1122,8 @@ out:
out_put:
if (lsegpp)
*lsegpp = lseg;
- put_unlock_current_layout(lo);
+ put_current_layout(lo);
+ spin_unlock(&nfsi->lo_lock);
goto out;
}

@@ -1338,11 +1339,13 @@ pnfs_getboundary(struct inode *inode)
goto out;

nfsi = NFS_I(inode);
- lo = get_lock_current_layout(nfsi);;
+ spin_lock(&nfsi->lo_lock);
+ lo = get_current_layout(nfsi);;
if (lo) {
stripe_size = policy_ops->get_stripesize(lo);
- put_unlock_current_layout(lo);
+ put_current_layout(lo);
}
+ spin_unlock(&nfsi->lo_lock);
out:
return stripe_size;
}
--
1.6.6.1


2010-06-15 14:29:06

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 02/10] pnfs-submit: split get_layout and grab_current_layout

Split get_layout (inc refcount) and grab_current_layout (find layout and inc refcount)
functionality and rename appropriately.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 37 ++++++++++++++++++++++---------------
1 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 937b84a..924e6fd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -320,20 +320,27 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */

+static inline void
+get_layout(struct pnfs_layout_type *lo)
+{
+ BUG_ON_UNLOCKED_LO(lo);
+ lo->refcount++;
+}
+
static inline struct pnfs_layout_type *
-get_current_layout(struct nfs_inode *nfsi)
+grab_current_layout(struct nfs_inode *nfsi)
{
struct pnfs_layout_type *lo = &nfsi->layout;

BUG_ON_UNLOCKED_LO(lo);
if (!lo->ld_data)
return NULL;
- lo->refcount++;
+ get_layout(lo);
return lo;
}

static inline void
-put_current_layout(struct pnfs_layout_type *lo)
+put_layout(struct pnfs_layout_type *lo)
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
struct nfs_client *clp;
@@ -366,7 +373,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo,
spin_lock(&nfsi->lo_lock);
if (range)
pnfs_free_layout(lo, range);
- put_current_layout(lo);
+ put_layout(lo);
spin_unlock(&nfsi->lo_lock);
wake_up_all(&nfsi->lo_waitq);
}
@@ -382,10 +389,10 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
};

spin_lock(&nfsi->lo_lock);
- lo = get_current_layout(nfsi);
+ lo = grab_current_layout(nfsi);
if (lo) {
pnfs_free_layout(lo, &range);
- put_current_layout(lo);
+ put_layout(lo);
}
spin_unlock(&nfsi->lo_lock);
}
@@ -555,7 +562,7 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
* arg->length: all ones
*/
static int
-get_layout(struct inode *ino,
+send_layoutget(struct inode *ino,
struct nfs_open_context *ctx,
struct nfs4_pnfs_layout_segment *range,
struct pnfs_layout_segment **lsegpp,
@@ -755,9 +762,9 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,

if (type == RETURN_FILE) {
spin_lock(&nfsi->lo_lock);
- lo = get_current_layout(nfsi);
+ lo = grab_current_layout(nfsi);
if (lo && !has_layout_to_return(lo, &arg)) {
- put_current_layout(lo);
+ put_layout(lo);
lo = NULL;
}
if (!lo) {
@@ -775,7 +782,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
if (stateid) { /* callback */
status = -EAGAIN;
spin_lock(&nfsi->lo_lock);
- put_current_layout(lo);
+ put_layout(lo);
spin_unlock(&nfsi->lo_lock);
goto out;
}
@@ -933,7 +940,7 @@ get_lock_alloc_layout(struct inode *ino)
dprintk("%s Begin\n", __func__);

spin_lock(&nfsi->lo_lock);
- while ((lo = get_current_layout(nfsi)) == NULL) {
+ while ((lo = grab_current_layout(nfsi)) == NULL) {
spin_unlock(&nfsi->lo_lock);
/* Compete against other threads on who's doing the allocation,
* wait until bit is cleared if we lost this race.
@@ -1114,7 +1121,7 @@ _pnfs_update_layout(struct inode *ino,
/* Lose lock, but not reference, match this with pnfs_layout_release */
spin_unlock(&nfsi->lo_lock);

- get_layout(ino, ctx, &arg, lsegpp, lo);
+ send_layoutget(ino, ctx, &arg, lsegpp, lo);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
nfsi->layout.pnfs_layout_state, lseg);
@@ -1122,7 +1129,7 @@ out:
out_put:
if (lsegpp)
*lsegpp = lseg;
- put_current_layout(lo);
+ put_layout(lo);
spin_unlock(&nfsi->lo_lock);
goto out;
}
@@ -1340,10 +1347,10 @@ pnfs_getboundary(struct inode *inode)

nfsi = NFS_I(inode);
spin_lock(&nfsi->lo_lock);
- lo = get_current_layout(nfsi);;
+ lo = grab_current_layout(nfsi);;
if (lo) {
stripe_size = policy_ops->get_stripesize(lo);
- put_current_layout(lo);
+ put_layout(lo);
}
spin_unlock(&nfsi->lo_lock);
out:
--
1.6.6.1


2010-06-15 14:29:04

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 05/10] pnfs-submit: Move pnfs_layout_state and pnfs_layout_suspend back to nfs_inode

Preparing to make layout a pointer, need state flags accessible
before allocation. pnfs_layout_suspend needs to be pulled out too,
since a temp error on first LAYOUTGET erases the layout.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/inode.c | 2 +-
fs/nfs/pnfs.c | 24 ++++++++++++------------
include/linux/nfs_fs.h | 10 +++++-----
3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index be25ffc..1a03f9a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1364,7 +1364,7 @@ void nfs4_clear_inode(struct inode *inode)
static void pnfs_alloc_init_inode(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
- nfsi->layout.pnfs_layout_state = 0;
+ nfsi->pnfs_layout_state = 0;
memset(&nfsi->layout.stateid, 0, NFS4_STATEID_SIZE);
nfsi->layout.roc_iomode = 0;
nfsi->layout.lo_cred = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 21192b6..e667208 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -959,7 +959,7 @@ get_lock_alloc_layout(struct inode *ino)
* wait until bit is cleared if we lost this race.
*/
res = wait_on_bit_lock(
- &nfsi->layout.pnfs_layout_state,
+ &nfsi->pnfs_layout_state,
NFS_INO_LAYOUT_ALLOC,
pnfs_wait_schedule, TASK_KILLABLE);
if (res) {
@@ -993,8 +993,8 @@ get_lock_alloc_layout(struct inode *ino)

/* release the NFS_INO_LAYOUT_ALLOC bit and wake up waiters */
clear_bit_unlock(NFS_INO_LAYOUT_ALLOC,
- &nfsi->layout.pnfs_layout_state);
- wake_up_bit(&nfsi->layout.pnfs_layout_state,
+ &nfsi->pnfs_layout_state);
+ wake_up_bit(&nfsi->pnfs_layout_state,
NFS_INO_LAYOUT_ALLOC);
break;
}
@@ -1122,13 +1122,13 @@ _pnfs_update_layout(struct inode *ino,
}

/* if get layout already failed once goto out */
- if (test_bit(lo_fail_bit(iomode), &nfsi->layout.pnfs_layout_state)) {
- if (unlikely(nfsi->layout.pnfs_layout_suspend &&
- get_seconds() >= nfsi->layout.pnfs_layout_suspend)) {
+ if (test_bit(lo_fail_bit(iomode), &nfsi->pnfs_layout_state)) {
+ if (unlikely(nfsi->pnfs_layout_suspend &&
+ get_seconds() >= nfsi->pnfs_layout_suspend)) {
dprintk("%s: layout_get resumed\n", __func__);
clear_bit(lo_fail_bit(iomode),
- &nfsi->layout.pnfs_layout_state);
- nfsi->layout.pnfs_layout_suspend = 0;
+ &nfsi->pnfs_layout_state);
+ nfsi->pnfs_layout_suspend = 0;
} else
goto out_put;
}
@@ -1139,7 +1139,7 @@ _pnfs_update_layout(struct inode *ino,
send_layoutget(ino, ctx, &arg, lsegpp, lo);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
- nfsi->layout.pnfs_layout_state, lseg);
+ nfsi->pnfs_layout_state, lseg);
return;
out_put:
if (lsegpp)
@@ -1242,14 +1242,14 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
}

/* remember that get layout failed and suspend trying */
- nfsi->layout.pnfs_layout_suspend = suspend;
+ nfsi->pnfs_layout_suspend = suspend;
set_bit(lo_fail_bit(lgp->args.lseg.iomode),
- &nfsi->layout.pnfs_layout_state);
+ &nfsi->pnfs_layout_state);
dprintk("%s: layout_get suspended until %ld\n",
__func__, suspend);
out:
dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
- __func__, lgp->status, nfsi->layout.pnfs_layout_state, lseg);
+ __func__, lgp->status, nfsi->pnfs_layout_state, lseg);
return;
}

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 95d8d53..0b3419d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -105,11 +105,6 @@ struct pnfs_layout_type {
seqlock_t seqlock; /* Protects the stateid */
nfs4_stateid stateid;
void *ld_data; /* layout driver private data */
- unsigned long pnfs_layout_state;
- #define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
- #define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
- #define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
- time_t pnfs_layout_suspend;
struct rpc_cred *lo_cred; /* layoutcommit credential */
/* DH: These vars keep track of the maximum write range
* so the values can be used for layoutcommit.
@@ -209,6 +204,11 @@ struct nfs_inode {
wait_queue_head_t lo_waitq;
spinlock_t lo_lock;
struct pnfs_layout_type layout;
+ unsigned long pnfs_layout_state;
+ #define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
+ #define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
+ #define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
+ time_t pnfs_layout_suspend;
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
--
1.6.6.1


2010-06-15 14:29:04

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 08/10] pnfs-submit: change nfsi->layout to a pointer

From: Andy Adamson <[email protected]>

Preparing for the lo_cache_head patch

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback_proc.c | 2 +-
fs/nfs/inode.c | 20 +--------
fs/nfs/nfs4proc.c | 4 +-
fs/nfs/nfs4xdr.c | 4 +-
fs/nfs/pnfs.c | 93 ++++++++++++++++++++++++---------------------
include/linux/nfs4_pnfs.h | 2 +-
include/linux/nfs_fs.h | 2 +-
7 files changed, 60 insertions(+), 67 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index a598b5a..f5c4859 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -243,7 +243,7 @@ static int pnfs_recall_layout(void *data)
rl.cbl_seg.length = NFS4_MAX_UINT64;

if (rl.cbl_recall_type == RETURN_FILE) {
- if (pnfs_is_next_layout_stateid(&NFS_I(inode)->layout,
+ if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
rl.cbl_stateid))
status = pnfs_return_layout(inode, &rl.cbl_seg,
&rl.cbl_stateid, RETURN_FILE,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1a03f9a..5802787 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1364,12 +1364,8 @@ void nfs4_clear_inode(struct inode *inode)
static void pnfs_alloc_init_inode(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
+ nfsi->layout = NULL;
nfsi->pnfs_layout_state = 0;
- memset(&nfsi->layout.stateid, 0, NFS4_STATEID_SIZE);
- nfsi->layout.roc_iomode = 0;
- nfsi->layout.lo_cred = NULL;
- nfsi->layout.pnfs_write_begin_pos = 0;
- nfsi->layout.pnfs_write_end_pos = 0;
#endif /* CONFIG_NFS_V4_1 */
}

@@ -1395,13 +1391,9 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
static void pnfs_destroy_inode(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
- if (!list_empty(&nfsi->layout.segs))
+ if (nfsi->layout)
pnfs_destroy_layout(nfsi);
-
- BUG_ON(!list_empty(&nfsi->layout.lo_layouts));
- BUG_ON(!list_empty(&nfsi->layout.segs));
- BUG_ON(nfsi->layout.refcount);
- BUG_ON(nfsi->layout.ld_data);
+ BUG_ON(nfsi->layout);
#endif /* CONFIG_NFS_V4_1 */
}

@@ -1418,12 +1410,6 @@ static void pnfs_init_once(struct nfs_inode *nfsi)
#ifdef CONFIG_NFS_V4_1
init_waitqueue_head(&nfsi->lo_waitq);
spin_lock_init(&nfsi->lo_lock);
- seqlock_init(&nfsi->layout.seqlock);
- INIT_LIST_HEAD(&nfsi->layout.lo_layouts);
- INIT_LIST_HEAD(&nfsi->layout.segs);
- nfsi->layout.refcount = 0;
- nfsi->layout.ld_data = NULL;
- nfsi->layout.lo_inode = &nfsi->vfs_inode;
#endif /* CONFIG_NFS_V4_1 */
}

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c7d68f5..e9dca4c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1089,7 +1089,7 @@ static void pnfs4_layout_reclaim(struct nfs4_state *state)
*/
pnfs_return_layout(state->inode, NULL, &state->open_stateid,
RETURN_FILE, true);
- pnfs_set_layout_stateid(&NFS_I(state->inode)->layout, &zero_stateid);
+ pnfs_set_layout_stateid(NFS_I(state->inode)->layout, &zero_stateid);
#endif /* CONFIG_NFS_V4_1 */
}

@@ -2392,7 +2392,7 @@ pnfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);

- if (pnfs_enabled_sb(server) && has_layout(nfsi) &&
+ if (has_layout(nfsi) &&
pnfs_ld_layoutret_on_setattr(server->pnfs_curr_ld))
pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
return nfs4_proc_setattr(dentry, fattr, sattr);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 301ae14..ada46d6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1885,7 +1885,7 @@ encode_layoutcommit(struct xdr_stream *xdr,

if (ld_io_ops->encode_layoutcommit)
ld_io_ops->encode_layoutcommit(
- &NFS_I(args->inode)->layout, xdr, args);
+ NFS_I(args->inode)->layout, xdr, args);
else {
p = reserve_space(xdr, 4);
xdr_encode_opaque(p, NULL, 0);
@@ -1923,7 +1923,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
ld_io_ops->encode_layoutreturn);
if (ld_io_ops->encode_layoutreturn) {
ld_io_ops->encode_layoutreturn(
- &NFS_I(args->inode)->layout, xdr, args);
+ NFS_I(args->inode)->layout, xdr, args);
} else {
p = reserve_space(xdr, 4);
*p = cpu_to_be32(0);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bd11ec7..943519b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -154,7 +154,7 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
spin_lock(&nfsi->lo_lock);
if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
- nfsi->layout.lo_cred = get_rpccred(ctx->state->owner->so_cred);
+ nfsi->layout->lo_cred = get_rpccred(ctx->state->owner->so_cred);
__set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
nfsi->change_attr++;
spin_unlock(&nfsi->lo_lock);
@@ -175,17 +175,20 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
loff_t end_pos;

spin_lock(&nfsi->lo_lock);
- if (offset < nfsi->layout.pnfs_write_begin_pos)
- nfsi->layout.pnfs_write_begin_pos = offset;
+ if (!has_layout(nfsi))
+ goto out_unlock;
+ if (offset < nfsi->layout->pnfs_write_begin_pos)
+ nfsi->layout->pnfs_write_begin_pos = offset;
end_pos = offset + extent - 1; /* I'm being inclusive */
- if (end_pos > nfsi->layout.pnfs_write_end_pos)
- nfsi->layout.pnfs_write_end_pos = end_pos;
+ if (end_pos > nfsi->layout->pnfs_write_end_pos)
+ nfsi->layout->pnfs_write_end_pos = end_pos;
dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
__func__,
(unsigned long) extent,
(unsigned long) offset ,
- (unsigned long) nfsi->layout.pnfs_write_begin_pos,
- (unsigned long) nfsi->layout.pnfs_write_end_pos);
+ (unsigned long) nfsi->layout->pnfs_write_begin_pos,
+ (unsigned long) nfsi->layout->pnfs_write_end_pos);
+ out_unlock:
spin_unlock(&nfsi->lo_lock);
}

@@ -315,8 +318,8 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
* pNFS client layout cache
*/
#if defined(CONFIG_SMP)
-#define BUG_ON_UNLOCKED_LO(lo) \
- BUG_ON(!spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
+#define BUG_ON_UNLOCKED_LO(nfsi) \
+ BUG_ON(!spin_is_locked(&nfsi->lo_lock))
#else /* CONFIG_SMP */
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */
@@ -327,7 +330,7 @@ int nfs4_roc_iomode(struct nfs_inode *nfsi)

spin_lock(&pnfs_spinlock);
if (has_layout(nfsi))
- rv = nfsi->layout.roc_iomode;
+ rv = nfsi->layout->roc_iomode;
spin_unlock(&pnfs_spinlock);
return rv;
}
@@ -335,19 +338,18 @@ int nfs4_roc_iomode(struct nfs_inode *nfsi)
static inline void
get_layout(struct pnfs_layout_type *lo)
{
- BUG_ON_UNLOCKED_LO(lo);
+ BUG_ON_UNLOCKED_LO(PNFS_NFS_INODE(lo));
lo->refcount++;
}

static inline struct pnfs_layout_type *
grab_current_layout(struct nfs_inode *nfsi)
{
- struct pnfs_layout_type *lo = &nfsi->layout;
+ struct pnfs_layout_type *lo = nfsi->layout;

- BUG_ON_UNLOCKED_LO(lo);
- if (!lo->ld_data)
- return NULL;
- get_layout(lo);
+ BUG_ON_UNLOCKED_LO(nfsi);
+ if (lo)
+ get_layout(lo);
return lo;
}

@@ -357,7 +359,7 @@ put_layout(struct pnfs_layout_type *lo)
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
struct nfs_client *clp;

- BUG_ON_UNLOCKED_LO(lo);
+ BUG_ON_UNLOCKED_LO(nfsi);
BUG_ON(lo->refcount <= 0);

lo->refcount--;
@@ -380,7 +382,8 @@ put_layout(struct pnfs_layout_type *lo)

dprintk("%s: freeing layout %p\n", __func__, lo->ld_data);
io_ops->free_layout(lo->ld_data);
- lo->ld_data = NULL;
+ nfsi->layout = NULL;
+ kfree(lo);
}
}

@@ -661,7 +664,7 @@ has_layout_to_return(struct pnfs_layout_type *lo,
dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
__func__, lo, range->offset, range->length, range->iomode);

- BUG_ON_UNLOCKED_LO(lo);
+ BUG_ON_UNLOCKED_LO(PNFS_NFS_INODE(lo));
list_for_each_entry (lseg, &lo->segs, fi_list)
if (should_free_lseg(lseg, range)) {
out = lseg;
@@ -687,7 +690,7 @@ pnfs_free_layout(struct pnfs_layout_type *lo,
dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
__func__, lo, range->offset, range->length, range->iomode);

- BUG_ON_UNLOCKED_LO(lo);
+ BUG_ON_UNLOCKED_LO(PNFS_NFS_INODE(lo));
list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
if (!should_free_lseg(lseg, range) ||
!_pnfs_can_return_lseg(lseg))
@@ -711,7 +714,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
bool ret = false;

spin_lock(&nfsi->lo_lock);
- list_for_each_entry (lseg, &nfsi->layout.segs, fi_list) {
+ list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
if (!should_free_lseg(lseg, range))
continue;
lseg->valid = false;
@@ -887,7 +890,7 @@ pnfs_insert_layout(struct pnfs_layout_type *lo,

dprintk("%s:Begin\n", __func__);

- BUG_ON_UNLOCKED_LO(lo);
+ BUG_ON_UNLOCKED_LO(PNFS_NFS_INODE(lo));
list_for_each_entry (lp, &lo->segs, fi_list) {
if (cmp_layout(&lp->range, &lseg->range) > 0)
continue;
@@ -919,25 +922,28 @@ alloc_init_layout(struct inode *ino)
void *ld_data;
struct pnfs_layout_type *lo;
struct layoutdriver_io_operations *io_ops;
- struct nfs_inode *nfsi = NFS_I(ino);

+ lo = kzalloc(sizeof(struct pnfs_layout_type), GFP_KERNEL);
+ if (!lo)
+ return NULL;
io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
- lo = &nfsi->layout;
ld_data = io_ops->alloc_layout(ino);
if (!ld_data) {
printk(KERN_ERR
"%s: out of memory: io_ops->alloc_layout failed\n",
__func__);
+ kfree(lo);
return NULL;
}

- spin_lock(&nfsi->lo_lock);
- BUG_ON(lo->ld_data != NULL);
lo->ld_data = ld_data;
memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
lo->roc_iomode = 0;
- spin_unlock(&nfsi->lo_lock);
+ lo->lo_inode = ino;
+ INIT_LIST_HEAD(&lo->lo_layouts);
+ INIT_LIST_HEAD(&lo->segs);
+ seqlock_init(&lo->seqlock);
return lo;
}

@@ -982,8 +988,9 @@ get_lock_alloc_layout(struct inode *ino)
/* Was current_layout already allocated while we slept?
* If so, retry get_lock'ing it. Otherwise, allocate it.
*/
- if (nfsi->layout.ld_data) {
+ if (nfsi->layout) {
spin_lock(&nfsi->lo_lock);
+ /* FIXME XXX deadlock here, need to relese ALLOC lock */
continue;
}

@@ -993,7 +1000,7 @@ get_lock_alloc_layout(struct inode *ino)

/* must grab the layout lock before the client lock */
spin_lock(&nfsi->lo_lock);
-
+ nfsi->layout = lo;
spin_lock(&clp->cl_lock);
if (list_empty(&lo->lo_layouts)) {
list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
@@ -1060,7 +1067,7 @@ pnfs_has_layout(struct pnfs_layout_type *lo,

dprintk("%s:Begin\n", __func__);

- BUG_ON_UNLOCKED_LO(lo);
+ BUG_ON_UNLOCKED_LO(PNFS_NFS_INODE(lo));
list_for_each_entry (lseg, &lo->segs, fi_list) {
if (has_matching_lseg(lseg, range) &&
(lseg->valid || !only_valid)) {
@@ -1342,7 +1349,7 @@ pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)

pgio->pg_test = NULL;

- laytype = &NFS_I(inode)->layout;
+ laytype = NFS_I(inode)->layout;
ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !laytype)
return;
@@ -1475,7 +1482,7 @@ pnfs_writepages(struct nfs_write_data *wdata, int how)
numpages);
wdata->pdata.lseg = lseg;
trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(
- &nfsi->layout,
+ nfsi->layout,
args->pages,
args->pgbase,
numpages,
@@ -1528,7 +1535,7 @@ pnfs_readpages(struct nfs_read_data *rdata)
__func__, numpages);
rdata->pdata.lseg = lseg;
trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(
- &nfsi->layout,
+ nfsi->layout,
args->pages,
args->pgbase,
numpages,
@@ -1589,7 +1596,7 @@ pnfs_commit(struct nfs_write_data *data, int sync)
* This will be done by passing the buck to the layout driver.
*/
data->pdata.lseg = NULL;
- trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(&nfsi->layout,
+ trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(nfsi->layout,
sync, data);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
@@ -1613,7 +1620,7 @@ pnfs_layoutcommit_done(struct pnfs_layoutcommit_data *data)
*/
if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
- &nfsi->layout,
+ nfsi->layout,
&data->args,
data->status);
put_rpccred(data->cred);
@@ -1657,7 +1664,7 @@ pnfs_layoutcommit_setup(struct inode *inode,
*/
if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
- &nfsi->layout, &data->args);
+ nfsi->layout, &data->args);

dprintk("%s End Status %d\n", __func__, result);
return result;
@@ -1692,14 +1699,14 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
/* Clear layoutcommit properties in the inode so
* new lc info can be generated
*/
- write_begin_pos = nfsi->layout.pnfs_write_begin_pos;
- write_end_pos = nfsi->layout.pnfs_write_end_pos;
- data->cred = nfsi->layout.lo_cred;
- nfsi->layout.pnfs_write_begin_pos = 0;
- nfsi->layout.pnfs_write_end_pos = 0;
- nfsi->layout.lo_cred = NULL;
+ write_begin_pos = nfsi->layout->pnfs_write_begin_pos;
+ write_end_pos = nfsi->layout->pnfs_write_end_pos;
+ data->cred = nfsi->layout->lo_cred;
+ nfsi->layout->pnfs_write_begin_pos = 0;
+ nfsi->layout->pnfs_write_end_pos = 0;
+ nfsi->layout->lo_cred = NULL;
__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
- pnfs_get_layout_stateid(&data->args.stateid, &nfsi->layout);
+ pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);

spin_unlock(&nfsi->lo_lock);

diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 2ea131f..5885352 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -77,7 +77,7 @@ PNFS_LD_POLICY_OPS(struct pnfs_layout_type *lo)
static inline bool
has_layout(struct nfs_inode *nfsi)
{
- return nfsi->layout.ld_data != NULL;
+ return nfsi->layout != NULL;
}

static inline bool
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 71bbc4c..6b62a53 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -203,7 +203,7 @@ struct nfs_inode {
#if defined(CONFIG_NFS_V4_1)
wait_queue_head_t lo_waitq;
spinlock_t lo_lock;
- struct pnfs_layout_type layout;
+ struct pnfs_layout_type *layout;
unsigned long pnfs_layout_state;
#define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
#define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
--
1.6.6.1


2010-06-15 14:29:04

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 09/10] pnfs-submit: API change: alloc_layout returns layout cache head

From: Andy Adamson <[email protected]>

pnfs_layout_type (to be renamed later) is the head of the inode layout
segment cache. Embed the layout cache head struct in the per layout cache
head struct and allocate them both in the alloc_layout
layoutdriver_io_operation.

This requires layout drivers to change their alloc/free_layout routines.

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 21 +++++----------------
include/linux/nfs4_pnfs.h | 8 +-------
include/linux/nfs_fs.h | 1 -
3 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 943519b..edffee3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -380,10 +380,9 @@ put_layout(struct pnfs_layout_type *lo)
struct layoutdriver_io_operations *io_ops =
PNFS_LD_IO_OPS(lo);

- dprintk("%s: freeing layout %p\n", __func__, lo->ld_data);
- io_ops->free_layout(lo->ld_data);
+ dprintk("%s: freeing layout %p\n", __func__, lo);
+ io_ops->free_layout(lo);
nfsi->layout = NULL;
- kfree(lo);
}
}

@@ -919,27 +918,17 @@ pnfs_insert_layout(struct pnfs_layout_type *lo,
static struct pnfs_layout_type *
alloc_init_layout(struct inode *ino)
{
- void *ld_data;
struct pnfs_layout_type *lo;
- struct layoutdriver_io_operations *io_ops;

- lo = kzalloc(sizeof(struct pnfs_layout_type), GFP_KERNEL);
- if (!lo)
- return NULL;
- io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
- ld_data = io_ops->alloc_layout(ino);
- if (!ld_data) {
+ /* Layoutdriver must zero (kzalloc) lo_cache fields */
+ lo = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->alloc_layout(ino);
+ if (!lo) {
printk(KERN_ERR
"%s: out of memory: io_ops->alloc_layout failed\n",
__func__);
- kfree(lo);
return NULL;
}
-
- lo->ld_data = ld_data;
- memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
- lo->roc_iomode = 0;
lo->lo_inode = ino;
INIT_LIST_HEAD(&lo->lo_layouts);
INIT_LIST_HEAD(&lo->segs);
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 5885352..dd11022 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -50,12 +50,6 @@ PNFS_NFS_SERVER(struct pnfs_layout_type *lo)
return NFS_SERVER(PNFS_INODE(lo));
}

-static inline void *
-PNFS_LD_DATA(struct pnfs_layout_type *lo)
-{
- return lo->ld_data;
-}
-
static inline struct pnfs_layoutdriver_type *
PNFS_LD(struct pnfs_layout_type *lo)
{
@@ -151,7 +145,7 @@ struct layoutdriver_io_operations {
/* Layout information. For each inode, alloc_layout is executed once to retrieve an
* inode specific layout structure. Each subsequent layoutget operation results in
* a set_layout call to set the opaque layout in the layout driver.*/
- void * (*alloc_layout) (struct inode *inode);
+ struct pnfs_layout_type * (*alloc_layout) (struct inode *inode);
void (*free_layout) (void *layoutid);
struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_type *layoutid, struct nfs4_pnfs_layoutget_res *lgr);
void (*free_lseg) (struct pnfs_layout_segment *lseg);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6b62a53..149051c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -104,7 +104,6 @@ struct pnfs_layout_type {
int roc_iomode; /* iomode to return on close, 0=none */
seqlock_t seqlock; /* Protects the stateid */
nfs4_stateid stateid;
- void *ld_data; /* layout driver private data */
struct rpc_cred *lo_cred; /* layoutcommit credential */
/* DH: These vars keep track of the maximum write range
* so the values can be used for layoutcommit.
--
1.6.6.1


2010-06-15 14:29:04

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 04/10] pnfs-submit: add backpointer to pnfs_layout_type

From: Andy Adamson <[email protected]>

Note: This should be avoided by passing struct inode instead of struct
pnfs_layout_type in all layoutdriver_io_operaitons.(which i suggest we do)

In preparation to changing the nfs_inode->layout to be a pointer to
struct pnfs_layout_type and allocate it in the alloc_layout layoutdriver io
operation.

-->Andy Adamson <[email protected]>

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/inode.c | 1 +
include/linux/nfs4_pnfs.h | 4 ++--
include/linux/nfs_fs.h | 1 +
3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index cdec539..be25ffc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1423,6 +1423,7 @@ static void pnfs_init_once(struct nfs_inode *nfsi)
INIT_LIST_HEAD(&nfsi->layout.segs);
nfsi->layout.refcount = 0;
nfsi->layout.ld_data = NULL;
+ nfsi->layout.lo_inode = &nfsi->vfs_inode;
#endif /* CONFIG_NFS_V4_1 */
}

diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index a88cd69..9dac941 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -35,13 +35,13 @@ struct pnfs_layoutdriver_type {
static inline struct nfs_inode *
PNFS_NFS_INODE(struct pnfs_layout_type *lo)
{
- return container_of(lo, struct nfs_inode, layout);
+ return NFS_I(lo->lo_inode);
}

static inline struct inode *
PNFS_INODE(struct pnfs_layout_type *lo)
{
- return &PNFS_NFS_INODE(lo)->vfs_inode;
+ return lo->lo_inode;
}

static inline struct nfs_server *
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 41026cb..95d8d53 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -116,6 +116,7 @@ struct pnfs_layout_type {
*/
loff_t pnfs_write_begin_pos;
loff_t pnfs_write_end_pos;
+ struct inode *lo_inode;
};

/*
--
1.6.6.1


2010-06-15 14:29:04

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 03/10] pnfs-submit: remove list_empty check from put_layout

The check on list empty before destroying a layout has always bothered me.
Get rid of it by having lsegs take a reference on their backpointer.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 36 +++++++++++++++++++++++++-----------
1 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 924e6fd..21192b6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -348,19 +348,27 @@ put_layout(struct pnfs_layout_type *lo)
BUG_ON_UNLOCKED_LO(lo);
BUG_ON(lo->refcount <= 0);

- if (--lo->refcount == 0 && list_empty(&lo->segs)) {
+ lo->refcount--;
+
+ if (lo->refcount > 1)
+ return;
+
+ /* Make sure is removed from inode list */
+ clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
+ spin_lock(&clp->cl_lock);
+ if (!list_empty(&lo->lo_layouts)) {
+ list_del_init(&lo->lo_layouts);
+ lo->refcount--;
+ }
+ spin_unlock(&clp->cl_lock);
+
+ if (lo->refcount == 0) {
struct layoutdriver_io_operations *io_ops =
PNFS_LD_IO_OPS(lo);

dprintk("%s: freeing layout %p\n", __func__, lo->ld_data);
io_ops->free_layout(lo->ld_data);
lo->ld_data = NULL;
-
- /* Unlist the layout. */
- clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
- spin_lock(&clp->cl_lock);
- list_del_init(&lo->lo_layouts);
- spin_unlock(&clp->cl_lock);
}
}

@@ -404,6 +412,7 @@ init_lseg(struct pnfs_layout_type *lo, struct pnfs_layout_segment *lseg)
kref_init(&lseg->kref);
lseg->valid = true;
lseg->layout = lo;
+ get_layout(lo);
}

static void
@@ -413,6 +422,7 @@ destroy_lseg(struct kref *kref)
container_of(kref, struct pnfs_layout_segment, kref);

dprintk("--> %s\n", __func__);
+ put_layout(lseg->layout);
PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
}

@@ -897,9 +907,10 @@ alloc_init_layout(struct inode *ino)
void *ld_data;
struct pnfs_layout_type *lo;
struct layoutdriver_io_operations *io_ops;
+ struct nfs_inode *nfsi = NFS_I(ino);

io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
- lo = &NFS_I(ino)->layout;
+ lo = &nfsi->layout;
ld_data = io_ops->alloc_layout(ino);
if (!ld_data) {
printk(KERN_ERR
@@ -908,11 +919,13 @@ alloc_init_layout(struct inode *ino)
return NULL;
}

+ spin_lock(&nfsi->lo_lock);
BUG_ON(lo->ld_data != NULL);
lo->ld_data = ld_data;
memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
lo->roc_iomode = 0;
+ spin_unlock(&nfsi->lo_lock);
return lo;
}

@@ -970,8 +983,10 @@ get_lock_alloc_layout(struct inode *ino)
spin_lock(&nfsi->lo_lock);

spin_lock(&clp->cl_lock);
- if (list_empty(&lo->lo_layouts))
+ if (list_empty(&lo->lo_layouts)) {
list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
+ get_layout(lo);
+ }
spin_unlock(&clp->cl_lock);
} else
lo = ERR_PTR(-ENOMEM);
@@ -1259,14 +1274,13 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
goto out;
}

+ spin_lock(&nfsi->lo_lock);
init_lseg(lo, lseg);
lseg->range = res->lseg;
if (lgp->lsegpp) {
get_lseg(lseg);
*lgp->lsegpp = lseg;
}
-
- spin_lock(&nfsi->lo_lock);
pnfs_insert_layout(lo, lseg);

if (res->return_on_close) {
--
1.6.6.1


2010-06-15 16:57:02

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 03/10] pnfs-submit: remove list_empty check from put_layout

On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote:
> The check on list empty before destroying a layout has always bothered me.
> Get rid of it by having lsegs take a reference on their backpointer.

OK, that should work and be somewhat cleaner than what we have today.
Please see notes below...

>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/pnfs.c | 36 +++++++++++++++++++++++++-----------
> 1 files changed, 25 insertions(+), 11 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 924e6fd..21192b6 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -348,19 +348,27 @@ put_layout(struct pnfs_layout_type *lo)
> BUG_ON_UNLOCKED_LO(lo);
> BUG_ON(lo->refcount <= 0);
>
> - if (--lo->refcount == 0 && list_empty(&lo->segs)) {
> + lo->refcount--;
> +
> + if (lo->refcount > 1)
> + return;
> +
> + /* Make sure is removed from inode list */
> + clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
> + spin_lock(&clp->cl_lock);
> + if (!list_empty(&lo->lo_layouts)) {
> + list_del_init(&lo->lo_layouts);
> + lo->refcount--;
> + }
> + spin_unlock(&clp->cl_lock);

This is awkward.
If we're already doing this overhaul the right thing to do
is separate out listing/unlisting of the layout on the clp list
so that the lo is added to the clp list when its lo->segs list
goes from empty to not empty (and in this case get_layout can be called
as you did as there's a reference from the clp to the lo)
and when its segs list empties it should be unlisted and
put_layout(), then if its refcount will go back to 0
it can be destroyed.

> +
> + if (lo->refcount == 0) {

In this case I'd like to add a
WARN_ON(!list_empty(&lo->segs))
to make sure that the refcounting agrees with the list manipulation.

Thanks!

Benny

> struct layoutdriver_io_operations *io_ops =
> PNFS_LD_IO_OPS(lo);
>
> dprintk("%s: freeing layout %p\n", __func__, lo->ld_data);
> io_ops->free_layout(lo->ld_data);
> lo->ld_data = NULL;
> -
> - /* Unlist the layout. */
> - clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
> - spin_lock(&clp->cl_lock);
> - list_del_init(&lo->lo_layouts);
> - spin_unlock(&clp->cl_lock);
> }
> }
>
> @@ -404,6 +412,7 @@ init_lseg(struct pnfs_layout_type *lo, struct pnfs_layout_segment *lseg)
> kref_init(&lseg->kref);
> lseg->valid = true;
> lseg->layout = lo;
> + get_layout(lo);
> }
>
> static void
> @@ -413,6 +422,7 @@ destroy_lseg(struct kref *kref)
> container_of(kref, struct pnfs_layout_segment, kref);
>
> dprintk("--> %s\n", __func__);
> + put_layout(lseg->layout);
> PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
> }
>
> @@ -897,9 +907,10 @@ alloc_init_layout(struct inode *ino)
> void *ld_data;
> struct pnfs_layout_type *lo;
> struct layoutdriver_io_operations *io_ops;
> + struct nfs_inode *nfsi = NFS_I(ino);
>
> io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
> - lo = &NFS_I(ino)->layout;
> + lo = &nfsi->layout;
> ld_data = io_ops->alloc_layout(ino);
> if (!ld_data) {
> printk(KERN_ERR
> @@ -908,11 +919,13 @@ alloc_init_layout(struct inode *ino)
> return NULL;
> }
>
> + spin_lock(&nfsi->lo_lock);
> BUG_ON(lo->ld_data != NULL);
> lo->ld_data = ld_data;
> memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
> lo->refcount = 1;
> lo->roc_iomode = 0;
> + spin_unlock(&nfsi->lo_lock);
> return lo;
> }
>
> @@ -970,8 +983,10 @@ get_lock_alloc_layout(struct inode *ino)
> spin_lock(&nfsi->lo_lock);
>
> spin_lock(&clp->cl_lock);
> - if (list_empty(&lo->lo_layouts))
> + if (list_empty(&lo->lo_layouts)) {
> list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
> + get_layout(lo);
> + }
> spin_unlock(&clp->cl_lock);
> } else
> lo = ERR_PTR(-ENOMEM);
> @@ -1259,14 +1274,13 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
> goto out;
> }
>
> + spin_lock(&nfsi->lo_lock);
> init_lseg(lo, lseg);
> lseg->range = res->lseg;
> if (lgp->lsegpp) {
> get_lseg(lseg);
> *lgp->lsegpp = lseg;
> }
> -
> - spin_lock(&nfsi->lo_lock);
> pnfs_insert_layout(lo, lseg);
>
> if (res->return_on_close) {


2010-06-15 17:00:21

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 04/10] pnfs-submit: add backpointer to pnfs_layout_type

On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote:
> From: Andy Adamson <[email protected]>
>
> Note: This should be avoided by passing struct inode instead of struct
> pnfs_layout_type in all layoutdriver_io_operaitons.(which i suggest we do)
>
> In preparation to changing the nfs_inode->layout to be a pointer to
> struct pnfs_layout_type and allocate it in the alloc_layout layoutdriver io
> operation.
>

We've already been there and decided to embed the layout in the struct nfs_inode
What has changed?

> -->Andy Adamson <[email protected]>

Should that be Andy's sign-off? :-)

Benny

>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/inode.c | 1 +
> include/linux/nfs4_pnfs.h | 4 ++--
> include/linux/nfs_fs.h | 1 +
> 3 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index cdec539..be25ffc 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -1423,6 +1423,7 @@ static void pnfs_init_once(struct nfs_inode *nfsi)
> INIT_LIST_HEAD(&nfsi->layout.segs);
> nfsi->layout.refcount = 0;
> nfsi->layout.ld_data = NULL;
> + nfsi->layout.lo_inode = &nfsi->vfs_inode;
> #endif /* CONFIG_NFS_V4_1 */
> }
>
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index a88cd69..9dac941 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -35,13 +35,13 @@ struct pnfs_layoutdriver_type {
> static inline struct nfs_inode *
> PNFS_NFS_INODE(struct pnfs_layout_type *lo)
> {
> - return container_of(lo, struct nfs_inode, layout);
> + return NFS_I(lo->lo_inode);
> }
>
> static inline struct inode *
> PNFS_INODE(struct pnfs_layout_type *lo)
> {
> - return &PNFS_NFS_INODE(lo)->vfs_inode;
> + return lo->lo_inode;
> }
>
> static inline struct nfs_server *
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index 41026cb..95d8d53 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -116,6 +116,7 @@ struct pnfs_layout_type {
> */
> loff_t pnfs_write_begin_pos;
> loff_t pnfs_write_end_pos;
> + struct inode *lo_inode;
> };
>
> /*


2010-06-15 17:06:12

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 07/10] pnfs-submit: avoid race handling return on close

On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote:
> This prepares for the next patch.
>
> NOTE this doesn't really fix any current race, since
> layout going to NULL is OK. But layout changing from NULL to nonNULL
> is a real race that is not fixed
>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/nfs4state.c | 5 +++--
> fs/nfs/pnfs.c | 11 +++++++++++
> include/linux/nfs4_pnfs.h | 2 ++
> 3 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index d5144bd..8a7a64c 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
> } else {
> #ifdef CONFIG_NFS_V4_1
> struct nfs_inode *nfsi = NFS_I(state->inode);
> + int roc = nfs4_roc_iomode(nfsi);
>
> - if (has_layout(nfsi) && nfsi->layout.roc_iomode) {
> + if (roc) {
> struct nfs4_pnfs_layout_segment range;
>
> - range.iomode = nfsi->layout.roc_iomode;
> + range.iomode = roc;
> range.offset = 0;
> range.length = NFS4_MAX_UINT64;
> pnfs_return_layout(state->inode, &range, NULL,
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 6def09c..bd11ec7 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
> #define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
> #endif /* CONFIG_SMP */
>
> +int nfs4_roc_iomode(struct nfs_inode *nfsi)
> +{
> + int rv = 0;
> +
> + spin_lock(&pnfs_spinlock);

Why take the global lock rather than nfsi->lo_lock?

Benny

> + if (has_layout(nfsi))
> + rv = nfsi->layout.roc_iomode;
> + spin_unlock(&pnfs_spinlock);
> + return rv;
> +}
> +
> static inline void
> get_layout(struct pnfs_layout_type *lo)
> {
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 0eb9b16..2ea131f 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -86,6 +86,8 @@ layoutcommit_needed(struct nfs_inode *nfsi)
> return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout_state);
> }
>
> +int nfs4_roc_iomode(struct nfs_inode *nfs);
> +
> #else /* CONFIG_NFS_V4_1 */
>
> static inline bool


2010-06-15 17:32:24

by Fred Isaman

[permalink] [raw]
Subject: Re: [PATCH 07/10] pnfs-submit: avoid race handling return on close

On Tue, Jun 15, 2010 at 1:06 PM, Benny Halevy <[email protected]> wro=
te:
> On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote=
:
>> This prepares for the next patch.
>>
>> NOTE this doesn't really fix any current race, since
>> layout going to NULL is OK. =A0But layout changing from NULL to nonN=
ULL
>> is a real race that is not fixed
>>
>> Signed-off-by: Fred Isaman <[email protected]>
>> ---
>> =A0fs/nfs/nfs4state.c =A0 =A0 =A0 =A0| =A0 =A05 +++--
>> =A0fs/nfs/pnfs.c =A0 =A0 =A0 =A0 =A0 =A0 | =A0 11 +++++++++++
>> =A0include/linux/nfs4_pnfs.h | =A0 =A02 ++
>> =A03 files changed, 16 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>> index d5144bd..8a7a64c 100644
>> --- a/fs/nfs/nfs4state.c
>> +++ b/fs/nfs/nfs4state.c
>> @@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path, st=
ruct nfs4_state *state,
>> =A0 =A0 =A0 } else {
>> =A0#ifdef CONFIG_NFS_V4_1
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs_inode *nfsi =3D NFS_I(state->=
inode);
>> + =A0 =A0 =A0 =A0 =A0 =A0 int roc =3D nfs4_roc_iomode(nfsi);
>>
>> - =A0 =A0 =A0 =A0 =A0 =A0 if (has_layout(nfsi) && nfsi->layout.roc_i=
omode) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (roc) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_pnfs_layout_=
segment range;
>>
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.iomode =3D nfsi->lay=
out.roc_iomode;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.iomode =3D roc;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.offset =3D 0;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.length =3D NFS4_MA=
X_UINT64;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pnfs_return_layout(state=
->inode, &range, NULL,
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 6def09c..bd11ec7 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layout=
driver_type *ld_type)
>> =A0#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
>> =A0#endif /* CONFIG_SMP */
>>
>> +int nfs4_roc_iomode(struct nfs_inode *nfsi)
>> +{
>> + =A0 =A0 int rv =3D 0;
>> +
>> + =A0 =A0 spin_lock(&pnfs_spinlock);
>
> Why take the global lock rather than nfsi->lo_lock?
>
> Benny

You are right. That would be a copy-paste error.

=46red

>
>> + =A0 =A0 if (has_layout(nfsi))
>> + =A0 =A0 =A0 =A0 =A0 =A0 rv =3D nfsi->layout.roc_iomode;
>> + =A0 =A0 spin_unlock(&pnfs_spinlock);
>> + =A0 =A0 return rv;
>> +}
>> +
>> =A0static inline void
>> =A0get_layout(struct pnfs_layout_type *lo)
>> =A0{
>> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
>> index 0eb9b16..2ea131f 100644
>> --- a/include/linux/nfs4_pnfs.h
>> +++ b/include/linux/nfs4_pnfs.h
>> @@ -86,6 +86,8 @@ layoutcommit_needed(struct nfs_inode *nfsi)
>> =A0 =A0 =A0 return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->pnfs_layout=
_state);
>> =A0}
>>
>> +int nfs4_roc_iomode(struct nfs_inode *nfs);
>> +
>> =A0#else /* CONFIG_NFS_V4_1 */
>>
>> =A0static inline bool
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to [email protected]
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>

2010-06-15 17:34:01

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 07/10] pnfs-submit: avoid race handling return on close

On Tue, 2010-06-15 at 13:32 -0400, Fred Isaman wrote:
> On Tue, Jun 15, 2010 at 1:06 PM, Benny Halevy <[email protected]> wrote:
> > On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote:
> >> This prepares for the next patch.
> >>
> >> NOTE this doesn't really fix any current race, since
> >> layout going to NULL is OK. But layout changing from NULL to nonNULL
> >> is a real race that is not fixed
> >>
> >> Signed-off-by: Fred Isaman <[email protected]>
> >> ---
> >> fs/nfs/nfs4state.c | 5 +++--
> >> fs/nfs/pnfs.c | 11 +++++++++++
> >> include/linux/nfs4_pnfs.h | 2 ++
> >> 3 files changed, 16 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> >> index d5144bd..8a7a64c 100644
> >> --- a/fs/nfs/nfs4state.c
> >> +++ b/fs/nfs/nfs4state.c
> >> @@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
> >> } else {
> >> #ifdef CONFIG_NFS_V4_1
> >> struct nfs_inode *nfsi = NFS_I(state->inode);
> >> + int roc = nfs4_roc_iomode(nfsi);
> >>
> >> - if (has_layout(nfsi) && nfsi->layout.roc_iomode) {
> >> + if (roc) {
> >> struct nfs4_pnfs_layout_segment range;
> >>
> >> - range.iomode = nfsi->layout.roc_iomode;
> >> + range.iomode = roc;
> >> range.offset = 0;
> >> range.length = NFS4_MAX_UINT64;
> >> pnfs_return_layout(state->inode, &range, NULL,
> >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> >> index 6def09c..bd11ec7 100644
> >> --- a/fs/nfs/pnfs.c
> >> +++ b/fs/nfs/pnfs.c
> >> @@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
> >> #define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
> >> #endif /* CONFIG_SMP */
> >>
> >> +int nfs4_roc_iomode(struct nfs_inode *nfsi)
> >> +{
> >> + int rv = 0;
> >> +
> >> + spin_lock(&pnfs_spinlock);
> >
> > Why take the global lock rather than nfsi->lo_lock?
> >
> > Benny
>
> You are right. That would be a copy-paste error.

What's an nfsi->lo_lock, and why do we need one?

Trond


2010-06-15 17:52:50

by Fred Isaman

[permalink] [raw]
Subject: Re: [PATCH 07/10] pnfs-submit: avoid race handling return on close

On Tue, Jun 15, 2010 at 1:33 PM, Trond Myklebust
<[email protected]> wrote:
> On Tue, 2010-06-15 at 13:32 -0400, Fred Isaman wrote:
>> On Tue, Jun 15, 2010 at 1:06 PM, Benny Halevy <[email protected]> =
wrote:
>> > On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wr=
ote:
>> >> This prepares for the next patch.
>> >>
>> >> NOTE this doesn't really fix any current race, since
>> >> layout going to NULL is OK. =A0But layout changing from NULL to n=
onNULL
>> >> is a real race that is not fixed
>> >>
>> >> Signed-off-by: Fred Isaman <[email protected]>
>> >> ---
>> >> =A0fs/nfs/nfs4state.c =A0 =A0 =A0 =A0| =A0 =A05 +++--
>> >> =A0fs/nfs/pnfs.c =A0 =A0 =A0 =A0 =A0 =A0 | =A0 11 +++++++++++
>> >> =A0include/linux/nfs4_pnfs.h | =A0 =A02 ++
>> >> =A03 files changed, 16 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>> >> index d5144bd..8a7a64c 100644
>> >> --- a/fs/nfs/nfs4state.c
>> >> +++ b/fs/nfs/nfs4state.c
>> >> @@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path,=
struct nfs4_state *state,
>> >> =A0 =A0 =A0 } else {
>> >> =A0#ifdef CONFIG_NFS_V4_1
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs_inode *nfsi =3D NFS_I(stat=
e->inode);
>> >> + =A0 =A0 =A0 =A0 =A0 =A0 int roc =3D nfs4_roc_iomode(nfsi);
>> >>
>> >> - =A0 =A0 =A0 =A0 =A0 =A0 if (has_layout(nfsi) && nfsi->layout.ro=
c_iomode) {
>> >> + =A0 =A0 =A0 =A0 =A0 =A0 if (roc) {
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_pnfs_layo=
ut_segment range;
>> >>
>> >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.iomode =3D nfsi->=
layout.roc_iomode;
>> >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.iomode =3D roc;
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.offset =3D 0;
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 range.length =3D NFS4=
_MAX_UINT64;
>> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pnfs_return_layout(st=
ate->inode, &range, NULL,
>> >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> >> index 6def09c..bd11ec7 100644
>> >> --- a/fs/nfs/pnfs.c
>> >> +++ b/fs/nfs/pnfs.c
>> >> @@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_lay=
outdriver_type *ld_type)
>> >> =A0#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
>> >> =A0#endif /* CONFIG_SMP */
>> >>
>> >> +int nfs4_roc_iomode(struct nfs_inode *nfsi)
>> >> +{
>> >> + =A0 =A0 int rv =3D 0;
>> >> +
>> >> + =A0 =A0 spin_lock(&pnfs_spinlock);
>> >
>> > Why take the global lock rather than nfsi->lo_lock?
>> >
>> > Benny
>>
>> You are right. That would be a copy-paste error.
>
> What's an nfsi->lo_lock, and why do we need one?
>
> Trond
>
>

It protects nfsi->layout and its contents.

=46red

2010-06-15 18:20:01

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 07/10] pnfs-submit: avoid race handling return on close

On Tue, 2010-06-15 at 13:52 -0400, Fred Isaman wrote:
> On Tue, Jun 15, 2010 at 1:33 PM, Trond Myklebust
> <[email protected]> wrote:
> > On Tue, 2010-06-15 at 13:32 -0400, Fred Isaman wrote:
> >> On Tue, Jun 15, 2010 at 1:06 PM, Benny Halevy <[email protected]> wrote:
> >> > On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote:
> >> >> This prepares for the next patch.
> >> >>
> >> >> NOTE this doesn't really fix any current race, since
> >> >> layout going to NULL is OK. But layout changing from NULL to nonNULL
> >> >> is a real race that is not fixed
> >> >>
> >> >> Signed-off-by: Fred Isaman <[email protected]>
> >> >> ---
> >> >> fs/nfs/nfs4state.c | 5 +++--
> >> >> fs/nfs/pnfs.c | 11 +++++++++++
> >> >> include/linux/nfs4_pnfs.h | 2 ++
> >> >> 3 files changed, 16 insertions(+), 2 deletions(-)
> >> >>
> >> >> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> >> >> index d5144bd..8a7a64c 100644
> >> >> --- a/fs/nfs/nfs4state.c
> >> >> +++ b/fs/nfs/nfs4state.c
> >> >> @@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
> >> >> } else {
> >> >> #ifdef CONFIG_NFS_V4_1
> >> >> struct nfs_inode *nfsi = NFS_I(state->inode);
> >> >> + int roc = nfs4_roc_iomode(nfsi);
> >> >>
> >> >> - if (has_layout(nfsi) && nfsi->layout.roc_iomode) {
> >> >> + if (roc) {
> >> >> struct nfs4_pnfs_layout_segment range;
> >> >>
> >> >> - range.iomode = nfsi->layout.roc_iomode;
> >> >> + range.iomode = roc;
> >> >> range.offset = 0;
> >> >> range.length = NFS4_MAX_UINT64;
> >> >> pnfs_return_layout(state->inode, &range, NULL,
> >> >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> >> >> index 6def09c..bd11ec7 100644
> >> >> --- a/fs/nfs/pnfs.c
> >> >> +++ b/fs/nfs/pnfs.c
> >> >> @@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
> >> >> #define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
> >> >> #endif /* CONFIG_SMP */
> >> >>
> >> >> +int nfs4_roc_iomode(struct nfs_inode *nfsi)
> >> >> +{
> >> >> + int rv = 0;
> >> >> +
> >> >> + spin_lock(&pnfs_spinlock);
> >> >
> >> > Why take the global lock rather than nfsi->lo_lock?
> >> >
> >> > Benny
> >>
> >> You are right. That would be a copy-paste error.
> >
> > What's an nfsi->lo_lock, and why do we need one?
> >
> > Trond
> >
> >
>
> It protects nfsi->layout and its contents.
>
> Fred

Yes, but why do we need an extra spinlock? We already have
inode->i_lock. Why can't you just reuse that?


2010-06-15 18:47:28

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 07/10] pnfs-submit: avoid race handling return on close

On Jun. 15, 2010, 14:19 -0400, Trond Myklebust <[email protected]> wrote:
> On Tue, 2010-06-15 at 13:52 -0400, Fred Isaman wrote:
>> On Tue, Jun 15, 2010 at 1:33 PM, Trond Myklebust
>> <[email protected]> wrote:
>>> On Tue, 2010-06-15 at 13:32 -0400, Fred Isaman wrote:
>>>> On Tue, Jun 15, 2010 at 1:06 PM, Benny Halevy <[email protected]> wrote:
>>>>> On Jun. 14, 2010, 21:46 -0400, Fred Isaman <[email protected]> wrote:
>>>>>> This prepares for the next patch.
>>>>>>
>>>>>> NOTE this doesn't really fix any current race, since
>>>>>> layout going to NULL is OK. But layout changing from NULL to nonNULL
>>>>>> is a real race that is not fixed
>>>>>>
>>>>>> Signed-off-by: Fred Isaman <[email protected]>
>>>>>> ---
>>>>>> fs/nfs/nfs4state.c | 5 +++--
>>>>>> fs/nfs/pnfs.c | 11 +++++++++++
>>>>>> include/linux/nfs4_pnfs.h | 2 ++
>>>>>> 3 files changed, 16 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>>>>>> index d5144bd..8a7a64c 100644
>>>>>> --- a/fs/nfs/nfs4state.c
>>>>>> +++ b/fs/nfs/nfs4state.c
>>>>>> @@ -594,11 +594,12 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
>>>>>> } else {
>>>>>> #ifdef CONFIG_NFS_V4_1
>>>>>> struct nfs_inode *nfsi = NFS_I(state->inode);
>>>>>> + int roc = nfs4_roc_iomode(nfsi);
>>>>>>
>>>>>> - if (has_layout(nfsi) && nfsi->layout.roc_iomode) {
>>>>>> + if (roc) {
>>>>>> struct nfs4_pnfs_layout_segment range;
>>>>>>
>>>>>> - range.iomode = nfsi->layout.roc_iomode;
>>>>>> + range.iomode = roc;
>>>>>> range.offset = 0;
>>>>>> range.length = NFS4_MAX_UINT64;
>>>>>> pnfs_return_layout(state->inode, &range, NULL,
>>>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>>>> index 6def09c..bd11ec7 100644
>>>>>> --- a/fs/nfs/pnfs.c
>>>>>> +++ b/fs/nfs/pnfs.c
>>>>>> @@ -321,6 +321,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
>>>>>> #define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
>>>>>> #endif /* CONFIG_SMP */
>>>>>>
>>>>>> +int nfs4_roc_iomode(struct nfs_inode *nfsi)
>>>>>> +{
>>>>>> + int rv = 0;
>>>>>> +
>>>>>> + spin_lock(&pnfs_spinlock);
>>>>>
>>>>> Why take the global lock rather than nfsi->lo_lock?
>>>>>
>>>>> Benny
>>>>
>>>> You are right. That would be a copy-paste error.
>>>
>>> What's an nfsi->lo_lock, and why do we need one?
>>>
>>> Trond
>>>
>>>
>>
>> It protects nfsi->layout and its contents.
>>
>> Fred
>
> Yes, but why do we need an extra spinlock? We already have
> inode->i_lock. Why can't you just reuse that?
>

I agree. We can and should reuse i_lock.

Benny