2013-03-21 14:13:06

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit

We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
where the two are out of sync.
The first half of the problem is to ensure that pnfs_layoutcommit_inode
clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
We still need to keep the reference to those segments until the RPC call
is finished, so in order to make it clear _where_ those references come
from, we add a helper pnfs_list_write_lseg_done() that cleans up after
pnfs_list_write_lseg.

Signed-off-by: Trond Myklebust <[email protected]>
Acked-by: Benny Halevy <[email protected]>
Cc: [email protected]
---
fs/nfs/nfs4proc.c | 14 --------------
fs/nfs/pnfs.c | 19 ++++++++++++++++++-
2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb..6ccdd4f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
- struct pnfs_layout_segment *lseg, *tmp;
- unsigned long *bitlock = &NFS_I(data->args.inode)->flags;

pnfs_cleanup_layoutcommit(data);
- /* Matched by references in pnfs_set_layoutcommit */
- list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
- list_del_init(&lseg->pls_lc_list);
- if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
- &lseg->pls_flags))
- pnfs_put_lseg(lseg);
- }
-
- clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
- smp_mb__after_clear_bit();
- wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
put_rpccred(data->cred);
kfree(data);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aa..c6bbb53 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1746,11 +1746,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)

list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
if (lseg->pls_range.iomode == IOMODE_RW &&
- test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
list_add(&lseg->pls_lc_list, listp);
}
}

+void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+ struct pnfs_layout_segment *lseg, *tmp;
+ unsigned long *bitlock = &NFS_I(inode)->flags;
+
+ /* Matched by references in pnfs_set_layoutcommit */
+ list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+ list_del_init(&lseg->pls_lc_list);
+ pnfs_put_lseg(lseg);
+ }
+
+ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+ smp_mb__after_clear_bit();
+ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
{
pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1811,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)

if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+ pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
}

/*
--
1.8.1.4



2013-03-21 14:32:35

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit

On Thu, 2013-03-21 at 10:12 -0400, Trond Myklebust wrote:
> We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
> NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
> where the two are out of sync.
> The first half of the problem is to ensure that pnfs_layoutcommit_inode
> clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
> We still need to keep the reference to those segments until the RPC call
> is finished, so in order to make it clear _where_ those references come
> from, we add a helper pnfs_list_write_lseg_done() that cleans up after
> pnfs_list_write_lseg.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> Acked-by: Benny Halevy <[email protected]>
> Cc: [email protected]
> ---
> fs/nfs/nfs4proc.c | 14 --------------
> fs/nfs/pnfs.c | 19 ++++++++++++++++++-
> 2 files changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index b2671cb..6ccdd4f 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
> static void nfs4_layoutcommit_release(void *calldata)
> {
> struct nfs4_layoutcommit_data *data = calldata;
> - struct pnfs_layout_segment *lseg, *tmp;
> - unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
>
> pnfs_cleanup_layoutcommit(data);
> - /* Matched by references in pnfs_set_layoutcommit */
> - list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
> - list_del_init(&lseg->pls_lc_list);
> - if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
> - &lseg->pls_flags))
> - pnfs_put_lseg(lseg);
> - }
> -
> - clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
> - smp_mb__after_clear_bit();
> - wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
> -
> put_rpccred(data->cred);
> kfree(data);
> }
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 48ac5aa..c6bbb53 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1746,11 +1746,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
>
> list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
> if (lseg->pls_range.iomode == IOMODE_RW &&
> - test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> + test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> list_add(&lseg->pls_lc_list, listp);
> }
> }
>
> +void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)

Doh! This needs to be declared static... Will fix.

> +{
> + struct pnfs_layout_segment *lseg, *tmp;
> + unsigned long *bitlock = &NFS_I(inode)->flags;
> +
> + /* Matched by references in pnfs_set_layoutcommit */
> + list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
> + list_del_init(&lseg->pls_lc_list);
> + pnfs_put_lseg(lseg);
> + }
> +
> + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
> + smp_mb__after_clear_bit();
> + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
> +}
> +
> void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
> {
> pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
> @@ -1795,6 +1811,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
>
> if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
> nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
> + pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
> }
>
> /*


--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com

2013-03-21 21:59:41

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn

On 03/21/2013 07:06 PM, Peng Tao wrote:
> On Thu, Mar 21, 2013 at 10:13:00AM -0400, Trond Myklebust wrote:
> <snip>
>> @@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
>> dprintk("pnfs write error = %d\n", hdr->pnfs_error);
>> if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
>> PNFS_LAYOUTRET_ON_ERROR) {
>> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
> Hi Trond and Boaz,
>
> If object layout requires layout being committed before returned (as fixed in
> the 3/3 patch), is it a potential problem to directly return layout here as
> well? e.g., if one lseg is successfully written and pending layoutcommit,
> then another lseg of the same file failed read/write, then layout will be
> returned w/o layoutcommit. For blocklayout, it is a potential data corruption
> and that's why block layout doesn't set PNFS_LAYOUTRET_ON_ERROR bit. So
> I'm wondering if object will suffer from the same issue?
>

Hi Tao

No, not at all. The objects layout has error reported as part of the
layout_return OPT. With exact devices that failed and why. In fact the
data should not be "committed" per ce, but a recovery process must be
preformed because we know that not all data of a stripe was committed
including parity, and the raid5 check-some is surly wrong. This is why
there is an error bit in layout_commit OPT to denote that this is not
a true commit and that there is an Error report on the way, for those
clients that must always lo_commit before lo_return even on Error.

(I know that 4.2 has plans for error-report RETURNs for other layout
types as well, this is part of why)

> Thanks,
> Tao
>

Cheers
Boaz


2013-03-21 14:33:53

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn

On Thu, 2013-03-21 at 10:13 -0400, Trond Myklebust wrote:
> Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
> you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
> segments.
> The only two sites that need to do this are the ones that call
> pnfs_return_layout() without first doing a layout commit.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> Acked-by: Benny Halevy <[email protected]>
> Cc: [email protected]
> ---
> fs/nfs/nfs4filelayout.c | 1 -
> fs/nfs/pnfs.c | 35 +++++++++++++++++++++++++++--------
> 2 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 49eeb04..4fb234d 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
> {
> if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
> return;
> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
> pnfs_return_layout(inode);
> }
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index c6bbb53..b9d0682 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
> lo_seg_intersecting(lseg_range, recall_range);
> }
>
> +static bool lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,

I'm adding a 'pnfs' prefix for looks...

--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com

2013-03-21 14:13:06

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn

Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
segments.
The only two sites that need to do this are the ones that call
pnfs_return_layout() without first doing a layout commit.

Signed-off-by: Trond Myklebust <[email protected]>
Acked-by: Benny Halevy <[email protected]>
Cc: [email protected]
---
fs/nfs/nfs4filelayout.c | 1 -
fs/nfs/pnfs.c | 35 +++++++++++++++++++++++++++--------
2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb04..4fb234d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
{
if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return;
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
pnfs_return_layout(inode);
}

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c6bbb53..b9d0682 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
lo_seg_intersecting(lseg_range, recall_range);
}

+static bool lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ if (!atomic_dec_and_test(&lseg->pls_refcount))
+ return false;
+ pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+ list_add(&lseg->pls_list, tmp_list);
+ return true;
+}
+
/* Returns 1 if lseg is removed from list, 0 otherwise */
static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
- pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
- list_add(&lseg->pls_list, tmp_list);
+ if (lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
- }
}
return rv;
}
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
return lseg;
}

+static void pnfs_clear_layoutcommit(struct inode *inode,
+ struct list_head *head)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ return;
+ list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+ if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ continue;
+ lseg_dec_and_remove_zero(lseg, head);
+ }
+}
+
/*
* Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
* when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
+ pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);

- WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
-
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
@@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1633,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
--
1.8.1.4


2013-03-22 01:28:40

by Peng Tao

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn

On Fri, Mar 22, 2013 at 5:59 AM, Boaz Harrosh <[email protected]> wrote:
> On 03/21/2013 07:06 PM, Peng Tao wrote:
>> On Thu, Mar 21, 2013 at 10:13:00AM -0400, Trond Myklebust wrote:
>> <snip>
>>> @@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
>>> dprintk("pnfs write error = %d\n", hdr->pnfs_error);
>>> if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
>>> PNFS_LAYOUTRET_ON_ERROR) {
>>> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
>> Hi Trond and Boaz,
>>
>> If object layout requires layout being committed before returned (as fixed in
>> the 3/3 patch), is it a potential problem to directly return layout here as
>> well? e.g., if one lseg is successfully written and pending layoutcommit,
>> then another lseg of the same file failed read/write, then layout will be
>> returned w/o layoutcommit. For blocklayout, it is a potential data corruption
>> and that's why block layout doesn't set PNFS_LAYOUTRET_ON_ERROR bit. So
>> I'm wondering if object will suffer from the same issue?
>>
>
> Hi Tao
>
> No, not at all. The objects layout has error reported as part of the
> layout_return OPT. With exact devices that failed and why. In fact the
> data should not be "committed" per ce, but a recovery process must be
> preformed because we know that not all data of a stripe was committed
> including parity, and the raid5 check-some is surly wrong. This is why
> there is an error bit in layout_commit OPT to denote that this is not
> a true commit and that there is an Error report on the way, for those
> clients that must always lo_commit before lo_return even on Error.
>
Thanks for explaining. I learned more about objects :)

Cheers,
Tao

2013-03-21 17:19:09

by Peng Tao

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit

On Thu, Mar 21, 2013 at 10:12:59AM -0400, Trond Myklebust wrote:
> We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
> NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
> where the two are out of sync.
> The first half of the problem is to ensure that pnfs_layoutcommit_inode
> clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
> We still need to keep the reference to those segments until the RPC call
> is finished, so in order to make it clear _where_ those references come
> from, we add a helper pnfs_list_write_lseg_done() that cleans up after
> pnfs_list_write_lseg.
>
I don't see any harm to block layout client in all the three patches.
Thanks for sorting it out.

Best,
Tao

2013-03-21 14:13:07

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH v2 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout

In order to be able to safely return the layout in nfs4_proc_setattr,
we need to block new uses of the layout, wait for all outstanding
users of the layout to complete, commit the layout and then return it.

This patch adds a helper in order to do all this safely.

Signed-off-by: Trond Myklebust <[email protected]>
Cc: Boaz Harrosh <[email protected]>
---
fs/nfs/nfs4proc.c | 2 +-
fs/nfs/pnfs.c | 27 +++++++++++++++++++++++++++
fs/nfs/pnfs.h | 6 ++++++
3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6ccdd4f..26431cf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
int status;

if (pnfs_ld_layoutret_on_setattr(inode))
- pnfs_return_layout(inode);
+ pnfs_commit_and_return_layout(inode);

nfs_fattr_init(fattr);

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b9d0682..2b5d98e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -866,6 +866,33 @@ out:
}
EXPORT_SYMBOL_GPL(_pnfs_return_layout);

+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+ struct pnfs_layout_hdr *lo;
+ int ret;
+
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo == NULL) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ pnfs_get_layout_hdr(lo);
+ /* Block new layoutgets and read/write to ds */
+ lo->plh_block_lgets++;
+ spin_unlock(&inode->i_lock);
+ filemap_fdatawait(inode->i_mapping);
+ ret = pnfs_layoutcommit_inode(inode, true);
+ if (ret == 0)
+ ret = _pnfs_return_layout(inode);
+ spin_lock(&inode->i_lock);
+ lo->plh_block_lgets--;
+ spin_unlock(&inode->i_lock);
+ pnfs_put_layout_hdr(lo);
+ return ret;
+}
+
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba804..f5f8a47 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_write_data *);
void pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}

+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+ return 0;
+}
+
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
--
1.8.1.4


2013-03-21 17:13:22

by Peng Tao

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn

On Thu, Mar 21, 2013 at 10:13:00AM -0400, Trond Myklebust wrote:
<snip>
> @@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
> dprintk("pnfs write error = %d\n", hdr->pnfs_error);
> if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
> PNFS_LAYOUTRET_ON_ERROR) {
> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
Hi Trond and Boaz,

If object layout requires layout being committed before returned (as fixed in
the 3/3 patch), is it a potential problem to directly return layout here as
well? e.g., if one lseg is successfully written and pending layoutcommit,
then another lseg of the same file failed read/write, then layout will be
returned w/o layoutcommit. For blocklayout, it is a potential data corruption
and that's why block layout doesn't set PNFS_LAYOUTRET_ON_ERROR bit. So
I'm wondering if object will suffer from the same issue?

Thanks,
Tao

--
Here lieth one whos name was writ on water.