2011-11-29 16:03:42

by Peng Tao

[permalink] [raw]
Subject: [PATCH 0/9] pnfsblock patches

Hi, Trond,

Following are some bugfix and improvements for blocklayout driver. All have been
sent to the list before and have been tested for several weeks.

Peng Tao (9):
pnfsblock: cleanup bl_mark_sectors_init
pnfsblock: acquire im_lock in _preload_range
pnfsblock: move find lock page logic out of bl_write_pagelist
pnfsblock: set read/write tk_status to pnfs_error
pnfsblock: remove rpc_call_ops from struct parallel_io
pnfsblock: clean up _add_entry
pnfsblock: alloc short extent before submit bio
pnfsblock: don't spinlock when freeing block_dev
pnfsblock: limit bio page count

fs/nfs/blocklayout/blocklayout.c | 202 ++++++++++++++++++++++++-------------
fs/nfs/blocklayout/blocklayout.h | 12 ++-
fs/nfs/blocklayout/extents.c | 176 ++++++++++++++-------------------
3 files changed, 215 insertions(+), 175 deletions(-)



2011-11-29 16:04:22

by Peng Tao

[permalink] [raw]
Subject: [PATCH 4/9] pnfsblock: set read/write tk_status to pnfs_error

To pass the IO status to upper layer.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index c83ad70..883bb27 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -216,6 +216,7 @@ bl_end_par_io_read(void *data)
{
struct nfs_read_data *rdata = data;

+ rdata->task.tk_status = rdata->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}
@@ -405,7 +406,7 @@ static void bl_end_par_io_write(void *data)
{
struct nfs_write_data *wdata = data;

- wdata->task.tk_status = 0;
+ wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
--
1.7.1.262.g5ef3d


2011-11-29 16:04:06

by Peng Tao

[permalink] [raw]
Subject: [PATCH 1/9] pnfsblock: cleanup bl_mark_sectors_init

It does not need to manipulate on partial initialized blocks.
Writeback code takes care of it.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 6 +--
fs/nfs/blocklayout/blocklayout.h | 3 +-
fs/nfs/blocklayout/extents.c | 76 ++-----------------------------------
3 files changed, 8 insertions(+), 77 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 281ae95..4ced0b0 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -571,8 +571,7 @@ fill_invalid_ext:
unlock_page(page);

ret = bl_mark_sectors_init(be->be_inval, isect,
- PAGE_CACHE_SECTORS,
- NULL);
+ PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
@@ -621,8 +620,7 @@ next_page:
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
ret = bl_mark_sectors_init(be->be_inval, isect,
- PAGE_CACHE_SECTORS,
- NULL);
+ PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 42acf7e..60728ac 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -186,8 +186,7 @@ struct pnfs_block_extent *
bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent **cow_read);
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
- sector_t offset, sector_t length,
- sector_t **pages);
+ sector_t offset, sector_t length);
void bl_put_extent(struct pnfs_block_extent *be);
struct pnfs_block_extent *bl_alloc_extent(void);
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 19fa7b0..0fc1321 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -179,33 +179,6 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
return status;
}

-static void set_needs_init(sector_t *array, sector_t offset)
-{
- sector_t *p = array;
-
- dprintk("%s enter\n", __func__);
- if (!p)
- return;
- while (*p < offset)
- p++;
- if (*p == offset)
- return;
- else if (*p == ~0) {
- *p++ = offset;
- *p = ~0;
- return;
- } else {
- sector_t *save = p;
- dprintk("%s Adding %llu\n", __func__, (u64)offset);
- while (*p != ~0)
- p++;
- p++;
- memmove(save + 1, save, (char *)p - (char *)save);
- *save = offset;
- return;
- }
-}
-
/* We are relying on page lock to serialize this */
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{
@@ -261,28 +234,15 @@ static int is_range_written(struct pnfs_inval_markings *marks,

/* Marks sectors in [offest, offset_length) as having been initialized.
* All lengths are step-aligned, where step is min(pagesize, blocksize).
- * Notes where partial block is initialized, and helps prepare it for
- * complete initialization later.
+ * Currently assumes offset is page-aligned
*/
-/* Currently assumes offset is page-aligned */
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
- sector_t offset, sector_t length,
- sector_t **pages)
+ sector_t offset, sector_t length)
{
- sector_t s, start, end;
- sector_t *array = NULL; /* Pages to mark */
+ sector_t start, end;

dprintk("%s(offset=%llu,len=%llu) enter\n",
__func__, (u64)offset, (u64)length);
- s = max((sector_t) 3,
- 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
- dprintk("%s set max=%llu\n", __func__, (u64)s);
- if (pages) {
- array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
- if (!array)
- goto outerr;
- array[0] = ~0;
- }

start = normalize(offset, marks->im_block_size);
end = normalize_up(offset + length, marks->im_block_size);
@@ -290,41 +250,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
goto outerr;

spin_lock(&marks->im_lock);
-
- for (s = normalize_up(start, PAGE_CACHE_SECTORS);
- s < offset; s += PAGE_CACHE_SECTORS) {
- dprintk("%s pre-area pages\n", __func__);
- /* Portion of used block is not initialized */
- if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
- set_needs_init(array, s);
- }
if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
goto out_unlock;
- for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
- s < end; s += PAGE_CACHE_SECTORS) {
- dprintk("%s post-area pages\n", __func__);
- if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
- set_needs_init(array, s);
- }
-
spin_unlock(&marks->im_lock);

- if (pages) {
- if (array[0] == ~0) {
- kfree(array);
- *pages = NULL;
- } else
- *pages = array;
- }
return 0;

- out_unlock:
+out_unlock:
spin_unlock(&marks->im_lock);
- outerr:
- if (pages) {
- kfree(array);
- *pages = NULL;
- }
+outerr:
return -ENOMEM;
}

--
1.7.1.262.g5ef3d


2011-11-29 16:04:17

by Peng Tao

[permalink] [raw]
Subject: [PATCH 3/9] pnfsblock: move find lock page logic out of bl_write_pagelist

Also avoid unnecessary lock_page if page is handled by others.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 78 ++++++++++++++++++++++++++------------
1 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4ced0b0..c83ad70 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -484,6 +484,55 @@ cleanup:
return ret;
}

+/* Find or create a zeroing page marked being writeback.
+ * Return ERR_PTR on error, NULL to indicate skip this page and page itself
+ * to indicate write out.
+ */
+static struct page *
+bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
+ struct pnfs_block_extent *cow_read)
+{
+ struct page *page;
+ int locked = 0;
+ page = find_get_page(inode->i_mapping, index);
+ if (page)
+ goto check_page;
+
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (unlikely(!page)) {
+ dprintk("%s oom\n", __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+ locked = 1;
+
+check_page:
+ /* PageDirty: Other will write this out
+ * PageWriteback: Other is writing this out
+ * PageUptodate: It was read before
+ */
+ if (PageDirty(page) || PageWriteback(page)) {
+ print_page(page);
+ if (locked)
+ unlock_page(page);
+ page_cache_release(page);
+ return NULL;
+ }
+
+ if (!locked) {
+ lock_page(page);
+ locked = 1;
+ goto check_page;
+ }
+ if (!PageUptodate(page)) {
+ /* New page, readin or zero it */
+ init_page_for_write(page, cow_read);
+ }
+ set_page_writeback(page);
+ unlock_page(page);
+
+ return page;
+}
+
static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{
@@ -543,32 +592,13 @@ fill_invalid_ext:
dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index,
(unsigned long long)isect);
- page =
- find_or_create_page(wdata->inode->i_mapping, index,
- GFP_NOFS);
- if (!page) {
- dprintk("%s oom\n", __func__);
- wdata->pnfs_error = -ENOMEM;
+ page = bl_find_get_zeroing_page(wdata->inode, index,
+ cow_read);
+ if (unlikely(IS_ERR(page))) {
+ wdata->pnfs_error = PTR_ERR(page);
goto out;
- }
-
- /* PageDirty: Other will write this out
- * PageWriteback: Other is writing this out
- * PageUptodate: It was read before
- * sector_initialized: already written out
- */
- if (PageDirty(page) || PageWriteback(page)) {
- print_page(page);
- unlock_page(page);
- page_cache_release(page);
+ } else if (page == NULL)
goto next_page;
- }
- if (!PageUptodate(page)) {
- /* New page, readin or zero it */
- init_page_for_write(page, cow_read);
- }
- set_page_writeback(page);
- unlock_page(page);

ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
--
1.7.1.262.g5ef3d


2011-11-29 16:04:31

by Peng Tao

[permalink] [raw]
Subject: [PATCH 6/9] pnfsblock: clean up _add_entry

It is wrong to kmalloc in _add_entry() as it is inside
spinlock. memory should be already allocated _add_entry() is called.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/extents.c | 8 +-------
1 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index f383524..d0f52ed 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
return 0;
} else {
struct pnfs_inval_tracking *new;
- if (storage)
- new = storage;
- else {
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
- }
+ new = storage;
new->it_sector = s;
new->it_tags = (1 << tag);
list_add(&new->it_link, &pos->it_link);
--
1.7.1.262.g5ef3d


2011-11-29 16:04:26

by Peng Tao

[permalink] [raw]
Subject: [PATCH 5/9] pnfsblock: remove rpc_call_ops from struct parallel_io

block layout can just make use of generic read/write_done.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 13 -------------
1 files changed, 0 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 883bb27..7fc69c9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,7 +90,6 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
- struct rpc_call_ops call_ops;
void (*pnfs_callback) (void *data);
void *data;
};
@@ -221,14 +220,6 @@ bl_end_par_io_read(void *data)
schedule_work(&rdata->task.u.tk_work);
}

-/* We don't want normal .rpc_call_done callback used, so we replace it
- * with this stub.
- */
-static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
-{
- return;
-}
-
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
@@ -248,8 +239,6 @@ bl_read_pagelist(struct nfs_read_data *rdata)
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
- par->call_ops = *rdata->mds_ops;
- par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_read;
/* At this point, we can no longer jump to use_mds */

@@ -559,8 +548,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
par = alloc_parallel(wdata);
if (!par)
return PNFS_NOT_ATTEMPTED;
- par->call_ops = *wdata->mds_ops;
- par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */

--
1.7.1.262.g5ef3d


2011-11-30 13:05:14

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 0/9] pnfsblock patches

I merged patches 8 and 9 into my tree

Benny

On 2011-11-30 04:37, [email protected] wrote:
>> -----Original Message-----
>> From: [email protected] [mailto:[email protected]] On Behalf Of Jim Rees
>> Sent: Wednesday, November 30, 2011 2:44 AM
>> To: Peng Tao
>> Cc: [email protected]; [email protected]
>> Subject: Re: [PATCH 0/9] pnfsblock patches
>>
>> Peng Tao wrote:
>>
>> Hi, Trond,
>>
>> Following are some bugfix and improvements for blocklayout driver. All have been
>> sent to the list before and have been tested for several weeks.
>>
>> Patches 2/9 and 8/9 are missing again. I wonder why these particular
>> patches are getting filtered out?
> I have received these patches in both my gmail box and emc mail box. So I guess vger dropped them, again... Somehow it thinks I'm a spammer?
> I attached 2/9, 8/9, 9/9 here...
>
> Thanks,
> Tao
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

2011-11-30 02:37:46

by Peng, Tao

[permalink] [raw]
Subject: RE: [PATCH 0/9] pnfsblock patches

> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of Jim Rees
> Sent: Wednesday, November 30, 2011 2:44 AM
> To: Peng Tao
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH 0/9] pnfsblock patches
>
> Peng Tao wrote:
>
> Hi, Trond,
>
> Following are some bugfix and improvements for blocklayout driver. All have been
> sent to the list before and have been tested for several weeks.
>
> Patches 2/9 and 8/9 are missing again. I wonder why these particular
> patches are getting filtered out?
I have received these patches in both my gmail box and emc mail box. So I guess vger dropped them, again... Somehow it thinks I'm a spammer?
I attached 2/9, 8/9, 9/9 here...

Thanks,
Tao
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


Attachments:
0002-pnfsblock-acquire-im_lock-in-_preload_range.patch (1.96 kB)
0002-pnfsblock-acquire-im_lock-in-_preload_range.patch
0008-pnfsblock-don-t-spinlock-when-freeing-block_dev.patch (1.38 kB)
0008-pnfsblock-don-t-spinlock-when-freeing-block_dev.patch
0009-pnfsblock-limit-bio-page-count.patch (1.36 kB)
0009-pnfsblock-limit-bio-page-count.patch
Download all attachments

2011-11-29 21:38:49

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 0/9] pnfsblock patches

On 12/02/2011 08:46 PM, Peng Tao wrote:
> Hi, Trond,
>
> Following are some bugfix and improvements for blocklayout driver. All have been
> sent to the list before and have been tested for several weeks.
>

I never got 9 patches in the mail.

I only have 1,3,4,5,6,7

Do you have them on a gitweb, or re-post 2,8,9

Thanks
Boaz

> Peng Tao (9):
> pnfsblock: cleanup bl_mark_sectors_init
> pnfsblock: acquire im_lock in _preload_range
> pnfsblock: move find lock page logic out of bl_write_pagelist
> pnfsblock: set read/write tk_status to pnfs_error
> pnfsblock: remove rpc_call_ops from struct parallel_io
> pnfsblock: clean up _add_entry
> pnfsblock: alloc short extent before submit bio
> pnfsblock: don't spinlock when freeing block_dev
> pnfsblock: limit bio page count
>
> fs/nfs/blocklayout/blocklayout.c | 202 ++++++++++++++++++++++++-------------
> fs/nfs/blocklayout/blocklayout.h | 12 ++-
> fs/nfs/blocklayout/extents.c | 176 ++++++++++++++-------------------
> 3 files changed, 215 insertions(+), 175 deletions(-)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2011-11-30 13:26:20

by Peng Tao

[permalink] [raw]
Subject: Re: [PATCH 0/9] pnfsblock patches

On Wed, Nov 30, 2011 at 9:05 PM, Benny Halevy <[email protected]> wrote:
> I merged patches 8 and 9 into my tree
>
Thanks!

> Benny
>
> On 2011-11-30 04:37, [email protected] wrote:
>>> -----Original Message-----
>>> From: [email protected] [mailto:[email protected]] On Behalf Of Jim Rees
>>> Sent: Wednesday, November 30, 2011 2:44 AM
>>> To: Peng Tao
>>> Cc: [email protected]; [email protected]
>>> Subject: Re: [PATCH 0/9] pnfsblock patches
>>>
>>> Peng Tao wrote:
>>>
>>>   Hi, Trond,
>>>
>>>   Following are some bugfix and improvements for blocklayout driver. All have been
>>>   sent to the list before and have been tested for several weeks.
>>>
>>> Patches 2/9 and 8/9 are missing again.  I wonder why these particular
>>> patches are getting filtered out?
>> I have received these patches in both my gmail box and emc mail box. So I guess vger dropped them, again... Somehow it thinks I'm a spammer?
>> I attached 2/9, 8/9, 9/9 here...
>>
>> Thanks,
>> Tao
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to [email protected]
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>

2011-11-29 16:04:36

by Peng Tao

[permalink] [raw]
Subject: [PATCH 7/9] pnfsblock: alloc short extent before submit bio

As discussed earlier, it is better for block client to allocate memory for
tracking extents state before submitting bio. So the patch does it by allocating
a short_extent for every INVALID extent touched by write pagelist and for
every zeroing page we created, saving them in layout header. Then in end_io we
can just use them to create commit list items and avoid memory allocation there.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 74 ++++++++++++++++++++++++++-------
fs/nfs/blocklayout/blocklayout.h | 9 ++++-
fs/nfs/blocklayout/extents.c | 85 +++++++++++++++++++++++++++++---------
3 files changed, 131 insertions(+), 37 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7fc69c9..1dd2983 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
- void (*pnfs_callback) (void *data);
+ void (*pnfs_callback) (void *data, int num_se);
void *data;
+ int bse_count;
};

static inline struct parallel_io *alloc_parallel(void *data)
@@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
if (rv) {
rv->data = data;
kref_init(&rv->refcnt);
+ rv->bse_count = 0;
}
return rv;
}
@@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);

dprintk("%s enter\n", __func__);
- p->pnfs_callback(p->data);
+ p->pnfs_callback(p->data, p->bse_count);
kfree(p);
}

@@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
}

static void
-bl_end_par_io_read(void *data)
+bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;

@@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
{
sector_t isect, end;
struct pnfs_block_extent *be;
+ struct pnfs_block_short_extent *se;

dprintk("%s(%llu, %u)\n", __func__, offset, count);
if (count == 0)
@@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
be = bl_find_get_extent(bl, isect, NULL);
BUG_ON(!be); /* FIXME */
len = min(end, be->be_f_offset + be->be_length) - isect;
- if (be->be_state == PNFS_BLOCK_INVALID_DATA)
- bl_mark_for_commit(be, isect, len); /* What if fails? */
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ se = bl_pop_one_short_extent(be->be_inval);
+ BUG_ON(!se);
+ bl_mark_for_commit(be, isect, len, se);
+ }
isect += len;
bl_put_extent(be);
}
@@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
end_page_writeback(page);
page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
+
+ if (unlikely(!uptodate)) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
pnfs_set_lo_fail(wdata->lseg);
@@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
put_parallel(par);
}

-/* This is basically copied from mpage_end_io_read */
static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
@@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
- if (!wdata->pnfs_error) {
+ if (likely(!wdata->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
wdata->args.offset, wdata->args.count);
@@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work)
}

/* Called when last of bios associated with a bl_write_pagelist call finishes */
-static void bl_end_par_io_write(void *data)
+static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;

+ if (unlikely(wdata->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+ num_se);
+ }
+
wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
@@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
*/
par = alloc_parallel(wdata);
if (!par)
- return PNFS_NOT_ATTEMPTED;
+ goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */

@@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
- wdata->pnfs_error = -EINVAL;
- goto out;
+ goto out_mds;
}

/* First page inside INVALID extent */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else
+ goto out_mds;
temp = offset >> PAGE_CACHE_SHIFT;
npg_zero = do_div(temp, npg_per_block);
isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
@@ -598,6 +612,19 @@ fill_invalid_ext:
wdata->pnfs_error = ret;
goto out;
}
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else {
+ end_page_writeback(page);
+ page_cache_release(page);
+ wdata->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ /* FIXME: This should be done in bi_end_io */
+ mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE);
+
bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
isect, page, be,
bl_end_io_write_zero, par);
@@ -606,10 +633,6 @@ fill_invalid_ext:
bio = NULL;
goto out;
}
- /* FIXME: This should be done in bi_end_io */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
- page->index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE);
next_page:
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
@@ -633,6 +656,15 @@ next_page:
wdata->pnfs_error = -EINVAL;
goto out;
}
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(
+ be->be_inval)))
+ par->bse_count++;
+ else {
+ wdata->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ }
extent_length = be->be_length -
(isect - be->be_f_offset);
}
@@ -680,6 +712,10 @@ out:
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
+out_mds:
+ bl_put_extent(be);
+ kfree(par);
+ return PNFS_NOT_ATTEMPTED;
}

/* FIXME - range ignored */
@@ -706,11 +742,17 @@ static void
release_inval_marks(struct pnfs_inval_markings *marks)
{
struct pnfs_inval_tracking *pos, *temp;
+ struct pnfs_block_short_extent *se, *stemp;

list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
list_del(&pos->it_link);
kfree(pos);
}
+
+ list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
+ list_del(&se->bse_node);
+ kfree(se);
+ }
return;
}

diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 60728ac..e31a2df 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -70,6 +70,7 @@ struct pnfs_inval_markings {
spinlock_t im_lock;
struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
sector_t im_block_size; /* Server blocksize in sectors */
+ struct list_head im_extents; /* Short extents for INVAL->RW conversion */
};

struct pnfs_inval_tracking {
@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
{
spin_lock_init(&marks->im_lock);
INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
+ INIT_LIST_HEAD(&marks->im_extents);
marks->im_block_size = blocksize;
marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
blocksize);
@@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
int bl_add_merge_extent(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new);
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length);
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new);
+int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
+struct pnfs_block_short_extent *
+bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
+void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);

#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index d0f52ed..1abac09 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -157,10 +157,10 @@ static int _preload_range(struct pnfs_inval_markings *marks,
goto out_cleanup;
}

- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
for (s = start; s < end; s += tree->mtt_step_size)
used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);

status = 0;

@@ -179,9 +179,9 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{
int rv;

- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return rv;
}

@@ -221,9 +221,9 @@ static int is_range_written(struct pnfs_inval_markings *marks,
{
int rv;

- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return rv;
}

@@ -244,15 +244,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
if (_preload_range(marks, start, end - start))
goto outerr;

- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
goto out_unlock;
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);

return 0;

out_unlock:
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
outerr:
return -ENOMEM;
}
@@ -267,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks,

dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
(u64)offset, (u64)length);
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return status;
}

@@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,

/* Note the range described by offset, length is guaranteed to be contained
* within be.
+ * new will be freed, either by this function or add_to_commitlist if they
+ * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
*/
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length)
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new)
{
sector_t new_end, end = offset + length;
- struct pnfs_block_short_extent *new;
struct pnfs_block_layout *bl = container_of(be->be_inval,
struct pnfs_block_layout,
bl_inval);

- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
-
mark_written_sectors(be->be_inval, offset, length);
/* We want to add the range to commit list, but it must be
* block-normalized, and verified that the normalized range has
@@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
new->bse_mdev = be->be_mdev;

spin_lock(&bl->bl_ext_lock);
- /* new will be freed, either by add_to_commitlist if it decides not
- * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
- */
add_to_commitlist(bl, new);
spin_unlock(&bl->bl_ext_lock);
return 0;
@@ -862,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
}
}
}
+
+int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
+{
+ struct pnfs_block_short_extent *new;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (unlikely(!new))
+ return -ENOMEM;
+
+ spin_lock_bh(&marks->im_lock);
+ list_add(&new->bse_node, &marks->im_extents);
+ spin_unlock_bh(&marks->im_lock);
+
+ return 0;
+}
+
+struct pnfs_block_short_extent *
+bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
+{
+ struct pnfs_block_short_extent *rv = NULL;
+
+ spin_lock_bh(&marks->im_lock);
+ if (!list_empty(&marks->im_extents)) {
+ rv = list_entry((&marks->im_extents)->next,
+ struct pnfs_block_short_extent, bse_node);
+ list_del_init(&rv->bse_node);
+ }
+ spin_unlock_bh(&marks->im_lock);
+
+ return rv;
+}
+
+void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
+{
+ struct pnfs_block_short_extent *se = NULL, *tmp;
+
+ if (num_to_free <= 0)
+ return;
+
+ spin_lock(&marks->im_lock);
+ list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
+ list_del(&se->bse_node);
+ kfree(se);
+ if (--num_to_free == 0)
+ break;
+ }
+ spin_unlock(&marks->im_lock);
+
+ BUG_ON(num_to_free > 0);
+}
--
1.7.1.262.g5ef3d


2011-11-29 18:44:04

by Jim Rees

[permalink] [raw]
Subject: Re: [PATCH 0/9] pnfsblock patches

Peng Tao wrote:

Hi, Trond,

Following are some bugfix and improvements for blocklayout driver. All have been
sent to the list before and have been tested for several weeks.

Patches 2/9 and 8/9 are missing again. I wonder why these particular
patches are getting filtered out?

2011-11-29 22:59:01

by Jim Rees

[permalink] [raw]
Subject: Re: [PATCH 0/9] pnfsblock patches

Boaz Harrosh wrote:

On 12/02/2011 08:46 PM, Peng Tao wrote:
> Hi, Trond,
>
> Following are some bugfix and improvements for blocklayout driver. All have been
> sent to the list before and have been tested for several weeks.
>

I never got 9 patches in the mail.

I only have 1,3,4,5,6,7

Do you have them on a gitweb, or re-post 2,8,9

1-7 are in benny's tree:

% git log --oneline -7 benny/pnfs-block
ec11faf pnfsblock: alloc short extent before submit bio
7c93c37 pnfsblock: clean up _add_entry
85e8c84 pnfsblock: remove rpc_call_ops from struct parallel_io
ab0c07d pnfsblock: set read/write tk_status to pnfs_error
60d5135 pnfsblock: move find lock page logic out of bl_write_pagelist
657c2f4 pnfsblock: acquire im_lock in _preload_range
e5b35df pnfsblock: cleanup bl_mark_sectors_init

I never saw 8,9 and can't find them in the list archive.

2011-11-29 23:39:57

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 0/9] pnfsblock patches

On 11/29/2011 02:58 PM, Jim Rees wrote:
> Boaz Harrosh wrote:
>
> On 12/02/2011 08:46 PM, Peng Tao wrote:
> > Hi, Trond,
> >
> > Following are some bugfix and improvements for blocklayout driver. All have been
> > sent to the list before and have been tested for several weeks.
> >
>
> I never got 9 patches in the mail.
>
> I only have 1,3,4,5,6,7
>
> Do you have them on a gitweb, or re-post 2,8,9
>
> 1-7 are in benny's tree:
>
> % git log --oneline -7 benny/pnfs-block
> ec11faf pnfsblock: alloc short extent before submit bio
> 7c93c37 pnfsblock: clean up _add_entry
> 85e8c84 pnfsblock: remove rpc_call_ops from struct parallel_io
> ab0c07d pnfsblock: set read/write tk_status to pnfs_error
> 60d5135 pnfsblock: move find lock page logic out of bl_write_pagelist
> 657c2f4 pnfsblock: acquire im_lock in _preload_range
> e5b35df pnfsblock: cleanup bl_mark_sectors_init
>
> I never saw 8,9 and can't find them in the list archive.

Actually I wanted 9. I'm old and senile but I did not remember
I ever saw it so I was wondering.

We know their names from [0/9]:
pnfsblock: don't spinlock when freeing block_dev
pnfsblock: limit bio page count

Thanks
Heart

2011-11-30 02:44:28

by Peng, Tao

[permalink] [raw]
Subject: RE: [PATCH 0/9] pnfsblock patches

PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBsaW51eC1uZnMtb3duZXJAdmdl
ci5rZXJuZWwub3JnIFttYWlsdG86bGludXgtbmZzLW93bmVyQHZnZXIua2VybmVsLm9yZ10gT24g
QmVoYWxmIE9mIEJvYXoNCj4gSGFycm9zaA0KPiBTZW50OiBXZWRuZXNkYXksIE5vdmVtYmVyIDMw
LCAyMDExIDc6NDAgQU0NCj4gVG86IEppbSBSZWVzDQo+IENjOiBQZW5nIFRhbzsgVHJvbmQuTXlr
bGVidXN0QG5ldGFwcC5jb207IGxpbnV4LW5mc0B2Z2VyLmtlcm5lbC5vcmcNCj4gU3ViamVjdDog
UmU6IFtQQVRDSCAwLzldIHBuZnNibG9jayBwYXRjaGVzDQo+IA0KPiBPbiAxMS8yOS8yMDExIDAy
OjU4IFBNLCBKaW0gUmVlcyB3cm90ZToNCj4gPiBCb2F6IEhhcnJvc2ggd3JvdGU6DQo+ID4NCj4g
PiAgIE9uIDEyLzAyLzIwMTEgMDg6NDYgUE0sIFBlbmcgVGFvIHdyb3RlOg0KPiA+ICAgPiBIaSwg
VHJvbmQsDQo+ID4gICA+DQo+ID4gICA+IEZvbGxvd2luZyBhcmUgc29tZSBidWdmaXggYW5kIGlt
cHJvdmVtZW50cyBmb3IgYmxvY2tsYXlvdXQgZHJpdmVyLiBBbGwgaGF2ZSBiZWVuDQo+ID4gICA+
IHNlbnQgdG8gdGhlIGxpc3QgYmVmb3JlIGFuZCBoYXZlIGJlZW4gdGVzdGVkIGZvciBzZXZlcmFs
IHdlZWtzLg0KPiA+ICAgPg0KPiA+DQo+ID4gICBJIG5ldmVyIGdvdCA5IHBhdGNoZXMgaW4gdGhl
IG1haWwuDQo+ID4NCj4gPiAgIEkgb25seSBoYXZlIDEsMyw0LDUsNiw3DQo+ID4NCj4gPiAgIERv
IHlvdSBoYXZlIHRoZW0gb24gYSBnaXR3ZWIsIG9yIHJlLXBvc3QgMiw4LDkNCj4gPg0KPiA+IDEt
NyBhcmUgaW4gYmVubnkncyB0cmVlOg0KPiA+DQo+ID4gJSBnaXQgbG9nIC0tb25lbGluZSAtNyBi
ZW5ueS9wbmZzLWJsb2NrDQo+ID4gZWMxMWZhZiBwbmZzYmxvY2s6IGFsbG9jIHNob3J0IGV4dGVu
dCBiZWZvcmUgc3VibWl0IGJpbw0KPiA+IDdjOTNjMzcgcG5mc2Jsb2NrOiBjbGVhbiB1cCBfYWRk
X2VudHJ5DQo+ID4gODVlOGM4NCBwbmZzYmxvY2s6IHJlbW92ZSBycGNfY2FsbF9vcHMgZnJvbSBz
dHJ1Y3QgcGFyYWxsZWxfaW8NCj4gPiBhYjBjMDdkIHBuZnNibG9jazogc2V0IHJlYWQvd3JpdGUg
dGtfc3RhdHVzIHRvIHBuZnNfZXJyb3INCj4gPiA2MGQ1MTM1IHBuZnNibG9jazogbW92ZSBmaW5k
IGxvY2sgcGFnZSBsb2dpYyBvdXQgb2YgYmxfd3JpdGVfcGFnZWxpc3QNCj4gPiA2NTdjMmY0IHBu
ZnNibG9jazogYWNxdWlyZSBpbV9sb2NrIGluIF9wcmVsb2FkX3JhbmdlDQo+ID4gZTViMzVkZiBw
bmZzYmxvY2s6IGNsZWFudXAgYmxfbWFya19zZWN0b3JzX2luaXQNCj4gPg0KPiA+IEkgbmV2ZXIg
c2F3IDgsOSBhbmQgY2FuJ3QgZmluZCB0aGVtIGluIHRoZSBsaXN0IGFyY2hpdmUuDQo+IA0KPiBB
Y3R1YWxseSBJIHdhbnRlZCA5LiBJJ20gb2xkIGFuZCBzZW5pbGUgYnV0IEkgZGlkIG5vdCByZW1l
bWJlcg0KPiBJIGV2ZXIgc2F3IGl0IHNvIEkgd2FzIHdvbmRlcmluZy4NCj4gDQo+IFdlIGtub3cg
dGhlaXIgbmFtZXMgZnJvbSBbMC85XToNCj4gICBwbmZzYmxvY2s6IGRvbid0IHNwaW5sb2NrIHdo
ZW4gZnJlZWluZyBibG9ja19kZXYNCj4gICBwbmZzYmxvY2s6IGxpbWl0IGJpbyBwYWdlIGNvdW50
DQpZb3UgY2FuIGZpbmQgYm90aCB0aGUgOSBwYXRjaGVzIGFuZCB0aGUgNCBsYXlvdXQgc2l6ZSBw
YXRjaGVzIGF0IGh0dHBzOi8vZ2l0aHViLmNvbS9iZXJnd29sZi9saW51eC90cmVlL3BuZnNibG9j
ay1mb3ItdHJvbmQNCg0KTG9va3MgbGlrZSBJIG5lZWQgdG8gZmluZCBhbm90aGVyIHdheS9tYWNo
aW5lL21haWwgYWNjb3VudCB0byBzZW5kIHBhdGNoZXMuLi4NCg0KVGhhbmtzLA0KVGFvDQo+IA0K
PiBUaGFua3MNCj4gSGVhcnQNCj4gLS0NCj4gVG8gdW5zdWJzY3JpYmUgZnJvbSB0aGlzIGxpc3Q6
IHNlbmQgdGhlIGxpbmUgInVuc3Vic2NyaWJlIGxpbnV4LW5mcyIgaW4NCj4gdGhlIGJvZHkgb2Yg
YSBtZXNzYWdlIHRvIG1ham9yZG9tb0B2Z2VyLmtlcm5lbC5vcmcNCj4gTW9yZSBtYWpvcmRvbW8g
aW5mbyBhdCAgaHR0cDovL3ZnZXIua2VybmVsLm9yZy9tYWpvcmRvbW8taW5mby5odG1sDQoNCg==