Hi, Trond,
Following are some bugfix and improvements for blocklayout driver. All have been
sent to the list before and have been tested for several weeks.
Peng Tao (9):
pnfsblock: cleanup bl_mark_sectors_init
pnfsblock: acquire im_lock in _preload_range
pnfsblock: move find lock page logic out of bl_write_pagelist
pnfsblock: set read/write tk_status to pnfs_error
pnfsblock: remove rpc_call_ops from struct parallel_io
pnfsblock: clean up _add_entry
pnfsblock: alloc short extent before submit bio
pnfsblock: don't spinlock when freeing block_dev
pnfsblock: limit bio page count
fs/nfs/blocklayout/blocklayout.c | 202 ++++++++++++++++++++++++-------------
fs/nfs/blocklayout/blocklayout.h | 12 ++-
fs/nfs/blocklayout/extents.c | 176 ++++++++++++++-------------------
3 files changed, 215 insertions(+), 175 deletions(-)
To pass the IO status to upper layer.
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index c83ad70..883bb27 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -216,6 +216,7 @@ bl_end_par_io_read(void *data)
{
struct nfs_read_data *rdata = data;
+ rdata->task.tk_status = rdata->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}
@@ -405,7 +406,7 @@ static void bl_end_par_io_write(void *data)
{
struct nfs_write_data *wdata = data;
- wdata->task.tk_status = 0;
+ wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
--
1.7.1.262.g5ef3d
It does not need to manipulate on partial initialized blocks.
Writeback code takes care of it.
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 6 +--
fs/nfs/blocklayout/blocklayout.h | 3 +-
fs/nfs/blocklayout/extents.c | 76 ++-----------------------------------
3 files changed, 8 insertions(+), 77 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 281ae95..4ced0b0 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -571,8 +571,7 @@ fill_invalid_ext:
unlock_page(page);
ret = bl_mark_sectors_init(be->be_inval, isect,
- PAGE_CACHE_SECTORS,
- NULL);
+ PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
@@ -621,8 +620,7 @@ next_page:
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
ret = bl_mark_sectors_init(be->be_inval, isect,
- PAGE_CACHE_SECTORS,
- NULL);
+ PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 42acf7e..60728ac 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -186,8 +186,7 @@ struct pnfs_block_extent *
bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent **cow_read);
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
- sector_t offset, sector_t length,
- sector_t **pages);
+ sector_t offset, sector_t length);
void bl_put_extent(struct pnfs_block_extent *be);
struct pnfs_block_extent *bl_alloc_extent(void);
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 19fa7b0..0fc1321 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -179,33 +179,6 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
return status;
}
-static void set_needs_init(sector_t *array, sector_t offset)
-{
- sector_t *p = array;
-
- dprintk("%s enter\n", __func__);
- if (!p)
- return;
- while (*p < offset)
- p++;
- if (*p == offset)
- return;
- else if (*p == ~0) {
- *p++ = offset;
- *p = ~0;
- return;
- } else {
- sector_t *save = p;
- dprintk("%s Adding %llu\n", __func__, (u64)offset);
- while (*p != ~0)
- p++;
- p++;
- memmove(save + 1, save, (char *)p - (char *)save);
- *save = offset;
- return;
- }
-}
-
/* We are relying on page lock to serialize this */
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{
@@ -261,28 +234,15 @@ static int is_range_written(struct pnfs_inval_markings *marks,
/* Marks sectors in [offest, offset_length) as having been initialized.
* All lengths are step-aligned, where step is min(pagesize, blocksize).
- * Notes where partial block is initialized, and helps prepare it for
- * complete initialization later.
+ * Currently assumes offset is page-aligned
*/
-/* Currently assumes offset is page-aligned */
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
- sector_t offset, sector_t length,
- sector_t **pages)
+ sector_t offset, sector_t length)
{
- sector_t s, start, end;
- sector_t *array = NULL; /* Pages to mark */
+ sector_t start, end;
dprintk("%s(offset=%llu,len=%llu) enter\n",
__func__, (u64)offset, (u64)length);
- s = max((sector_t) 3,
- 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
- dprintk("%s set max=%llu\n", __func__, (u64)s);
- if (pages) {
- array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
- if (!array)
- goto outerr;
- array[0] = ~0;
- }
start = normalize(offset, marks->im_block_size);
end = normalize_up(offset + length, marks->im_block_size);
@@ -290,41 +250,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
goto outerr;
spin_lock(&marks->im_lock);
-
- for (s = normalize_up(start, PAGE_CACHE_SECTORS);
- s < offset; s += PAGE_CACHE_SECTORS) {
- dprintk("%s pre-area pages\n", __func__);
- /* Portion of used block is not initialized */
- if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
- set_needs_init(array, s);
- }
if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
goto out_unlock;
- for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
- s < end; s += PAGE_CACHE_SECTORS) {
- dprintk("%s post-area pages\n", __func__);
- if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
- set_needs_init(array, s);
- }
-
spin_unlock(&marks->im_lock);
- if (pages) {
- if (array[0] == ~0) {
- kfree(array);
- *pages = NULL;
- } else
- *pages = array;
- }
return 0;
- out_unlock:
+out_unlock:
spin_unlock(&marks->im_lock);
- outerr:
- if (pages) {
- kfree(array);
- *pages = NULL;
- }
+outerr:
return -ENOMEM;
}
--
1.7.1.262.g5ef3d
Also avoid unnecessary lock_page if page is handled by others.
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 78 ++++++++++++++++++++++++++------------
1 files changed, 54 insertions(+), 24 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4ced0b0..c83ad70 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -484,6 +484,55 @@ cleanup:
return ret;
}
+/* Find or create a zeroing page marked being writeback.
+ * Return ERR_PTR on error, NULL to indicate skip this page and page itself
+ * to indicate write out.
+ */
+static struct page *
+bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
+ struct pnfs_block_extent *cow_read)
+{
+ struct page *page;
+ int locked = 0;
+ page = find_get_page(inode->i_mapping, index);
+ if (page)
+ goto check_page;
+
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (unlikely(!page)) {
+ dprintk("%s oom\n", __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+ locked = 1;
+
+check_page:
+ /* PageDirty: Other will write this out
+ * PageWriteback: Other is writing this out
+ * PageUptodate: It was read before
+ */
+ if (PageDirty(page) || PageWriteback(page)) {
+ print_page(page);
+ if (locked)
+ unlock_page(page);
+ page_cache_release(page);
+ return NULL;
+ }
+
+ if (!locked) {
+ lock_page(page);
+ locked = 1;
+ goto check_page;
+ }
+ if (!PageUptodate(page)) {
+ /* New page, readin or zero it */
+ init_page_for_write(page, cow_read);
+ }
+ set_page_writeback(page);
+ unlock_page(page);
+
+ return page;
+}
+
static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{
@@ -543,32 +592,13 @@ fill_invalid_ext:
dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index,
(unsigned long long)isect);
- page =
- find_or_create_page(wdata->inode->i_mapping, index,
- GFP_NOFS);
- if (!page) {
- dprintk("%s oom\n", __func__);
- wdata->pnfs_error = -ENOMEM;
+ page = bl_find_get_zeroing_page(wdata->inode, index,
+ cow_read);
+ if (unlikely(IS_ERR(page))) {
+ wdata->pnfs_error = PTR_ERR(page);
goto out;
- }
-
- /* PageDirty: Other will write this out
- * PageWriteback: Other is writing this out
- * PageUptodate: It was read before
- * sector_initialized: already written out
- */
- if (PageDirty(page) || PageWriteback(page)) {
- print_page(page);
- unlock_page(page);
- page_cache_release(page);
+ } else if (page == NULL)
goto next_page;
- }
- if (!PageUptodate(page)) {
- /* New page, readin or zero it */
- init_page_for_write(page, cow_read);
- }
- set_page_writeback(page);
- unlock_page(page);
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
--
1.7.1.262.g5ef3d
It is wrong to kmalloc in _add_entry() as it is inside
spinlock. memory should be already allocated _add_entry() is called.
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/extents.c | 8 +-------
1 files changed, 1 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index f383524..d0f52ed 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
return 0;
} else {
struct pnfs_inval_tracking *new;
- if (storage)
- new = storage;
- else {
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
- }
+ new = storage;
new->it_sector = s;
new->it_tags = (1 << tag);
list_add(&new->it_link, &pos->it_link);
--
1.7.1.262.g5ef3d
block layout can just make use of generic read/write_done.
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 13 -------------
1 files changed, 0 insertions(+), 13 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 883bb27..7fc69c9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,7 +90,6 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
- struct rpc_call_ops call_ops;
void (*pnfs_callback) (void *data);
void *data;
};
@@ -221,14 +220,6 @@ bl_end_par_io_read(void *data)
schedule_work(&rdata->task.u.tk_work);
}
-/* We don't want normal .rpc_call_done callback used, so we replace it
- * with this stub.
- */
-static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
-{
- return;
-}
-
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
@@ -248,8 +239,6 @@ bl_read_pagelist(struct nfs_read_data *rdata)
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
- par->call_ops = *rdata->mds_ops;
- par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_read;
/* At this point, we can no longer jump to use_mds */
@@ -559,8 +548,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
par = alloc_parallel(wdata);
if (!par)
return PNFS_NOT_ATTEMPTED;
- par->call_ops = *wdata->mds_ops;
- par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */
--
1.7.1.262.g5ef3d
I merged patches 8 and 9 into my tree
Benny
On 2011-11-30 04:37, [email protected] wrote:
>> -----Original Message-----
>> From: [email protected] [mailto:[email protected]] On Behalf Of Jim Rees
>> Sent: Wednesday, November 30, 2011 2:44 AM
>> To: Peng Tao
>> Cc: [email protected]; [email protected]
>> Subject: Re: [PATCH 0/9] pnfsblock patches
>>
>> Peng Tao wrote:
>>
>> Hi, Trond,
>>
>> Following are some bugfix and improvements for blocklayout driver. All have been
>> sent to the list before and have been tested for several weeks.
>>
>> Patches 2/9 and 8/9 are missing again. I wonder why these particular
>> patches are getting filtered out?
> I have received these patches in both my gmail box and emc mail box. So I guess vger dropped them, again... Somehow it thinks I'm a spammer?
> I attached 2/9, 8/9, 9/9 here...
>
> Thanks,
> Tao
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of Jim Rees
> Sent: Wednesday, November 30, 2011 2:44 AM
> To: Peng Tao
> Cc: [email protected]; [email protected]
> Subject: Re: [PATCH 0/9] pnfsblock patches
>
> Peng Tao wrote:
>
> Hi, Trond,
>
> Following are some bugfix and improvements for blocklayout driver. All have been
> sent to the list before and have been tested for several weeks.
>
> Patches 2/9 and 8/9 are missing again. I wonder why these particular
> patches are getting filtered out?
I have received these patches in both my gmail box and emc mail box. So I guess vger dropped them, again... Somehow it thinks I'm a spammer?
I attached 2/9, 8/9, 9/9 here...
Thanks,
Tao
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/02/2011 08:46 PM, Peng Tao wrote:
> Hi, Trond,
>
> Following are some bugfix and improvements for blocklayout driver. All have been
> sent to the list before and have been tested for several weeks.
>
I never got 9 patches in the mail.
I only have 1,3,4,5,6,7
Do you have them on a gitweb, or re-post 2,8,9
Thanks
Boaz
> Peng Tao (9):
> pnfsblock: cleanup bl_mark_sectors_init
> pnfsblock: acquire im_lock in _preload_range
> pnfsblock: move find lock page logic out of bl_write_pagelist
> pnfsblock: set read/write tk_status to pnfs_error
> pnfsblock: remove rpc_call_ops from struct parallel_io
> pnfsblock: clean up _add_entry
> pnfsblock: alloc short extent before submit bio
> pnfsblock: don't spinlock when freeing block_dev
> pnfsblock: limit bio page count
>
> fs/nfs/blocklayout/blocklayout.c | 202 ++++++++++++++++++++++++-------------
> fs/nfs/blocklayout/blocklayout.h | 12 ++-
> fs/nfs/blocklayout/extents.c | 176 ++++++++++++++-------------------
> 3 files changed, 215 insertions(+), 175 deletions(-)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Nov 30, 2011 at 9:05 PM, Benny Halevy <[email protected]> wrote:
> I merged patches 8 and 9 into my tree
>
Thanks!
> Benny
>
> On 2011-11-30 04:37, [email protected] wrote:
>>> -----Original Message-----
>>> From: [email protected] [mailto:[email protected]] On Behalf Of Jim Rees
>>> Sent: Wednesday, November 30, 2011 2:44 AM
>>> To: Peng Tao
>>> Cc: [email protected]; [email protected]
>>> Subject: Re: [PATCH 0/9] pnfsblock patches
>>>
>>> Peng Tao wrote:
>>>
>>> Hi, Trond,
>>>
>>> Following are some bugfix and improvements for blocklayout driver. All have been
>>> sent to the list before and have been tested for several weeks.
>>>
>>> Patches 2/9 and 8/9 are missing again. I wonder why these particular
>>> patches are getting filtered out?
>> I have received these patches in both my gmail box and emc mail box. So I guess vger dropped them, again... Somehow it thinks I'm a spammer?
>> I attached 2/9, 8/9, 9/9 here...
>>
>> Thanks,
>> Tao
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to [email protected]
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
As discussed earlier, it is better for block client to allocate memory for
tracking extents state before submitting bio. So the patch does it by allocating
a short_extent for every INVALID extent touched by write pagelist and for
every zeroing page we created, saving them in layout header. Then in end_io we
can just use them to create commit list items and avoid memory allocation there.
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 74 ++++++++++++++++++++++++++-------
fs/nfs/blocklayout/blocklayout.h | 9 ++++-
fs/nfs/blocklayout/extents.c | 85 +++++++++++++++++++++++++++++---------
3 files changed, 131 insertions(+), 37 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7fc69c9..1dd2983 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
- void (*pnfs_callback) (void *data);
+ void (*pnfs_callback) (void *data, int num_se);
void *data;
+ int bse_count;
};
static inline struct parallel_io *alloc_parallel(void *data)
@@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
if (rv) {
rv->data = data;
kref_init(&rv->refcnt);
+ rv->bse_count = 0;
}
return rv;
}
@@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
dprintk("%s enter\n", __func__);
- p->pnfs_callback(p->data);
+ p->pnfs_callback(p->data, p->bse_count);
kfree(p);
}
@@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
}
static void
-bl_end_par_io_read(void *data)
+bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;
@@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
{
sector_t isect, end;
struct pnfs_block_extent *be;
+ struct pnfs_block_short_extent *se;
dprintk("%s(%llu, %u)\n", __func__, offset, count);
if (count == 0)
@@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
be = bl_find_get_extent(bl, isect, NULL);
BUG_ON(!be); /* FIXME */
len = min(end, be->be_f_offset + be->be_length) - isect;
- if (be->be_state == PNFS_BLOCK_INVALID_DATA)
- bl_mark_for_commit(be, isect, len); /* What if fails? */
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ se = bl_pop_one_short_extent(be->be_inval);
+ BUG_ON(!se);
+ bl_mark_for_commit(be, isect, len, se);
+ }
isect += len;
bl_put_extent(be);
}
@@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
end_page_writeback(page);
page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
+
+ if (unlikely(!uptodate)) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
pnfs_set_lo_fail(wdata->lseg);
@@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
put_parallel(par);
}
-/* This is basically copied from mpage_end_io_read */
static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
@@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
- if (!wdata->pnfs_error) {
+ if (likely(!wdata->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
wdata->args.offset, wdata->args.count);
@@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work)
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
-static void bl_end_par_io_write(void *data)
+static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;
+ if (unlikely(wdata->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+ num_se);
+ }
+
wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
@@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
*/
par = alloc_parallel(wdata);
if (!par)
- return PNFS_NOT_ATTEMPTED;
+ goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */
@@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
- wdata->pnfs_error = -EINVAL;
- goto out;
+ goto out_mds;
}
/* First page inside INVALID extent */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else
+ goto out_mds;
temp = offset >> PAGE_CACHE_SHIFT;
npg_zero = do_div(temp, npg_per_block);
isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
@@ -598,6 +612,19 @@ fill_invalid_ext:
wdata->pnfs_error = ret;
goto out;
}
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else {
+ end_page_writeback(page);
+ page_cache_release(page);
+ wdata->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ /* FIXME: This should be done in bi_end_io */
+ mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE);
+
bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
isect, page, be,
bl_end_io_write_zero, par);
@@ -606,10 +633,6 @@ fill_invalid_ext:
bio = NULL;
goto out;
}
- /* FIXME: This should be done in bi_end_io */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
- page->index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE);
next_page:
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
@@ -633,6 +656,15 @@ next_page:
wdata->pnfs_error = -EINVAL;
goto out;
}
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(
+ be->be_inval)))
+ par->bse_count++;
+ else {
+ wdata->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ }
extent_length = be->be_length -
(isect - be->be_f_offset);
}
@@ -680,6 +712,10 @@ out:
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
+out_mds:
+ bl_put_extent(be);
+ kfree(par);
+ return PNFS_NOT_ATTEMPTED;
}
/* FIXME - range ignored */
@@ -706,11 +742,17 @@ static void
release_inval_marks(struct pnfs_inval_markings *marks)
{
struct pnfs_inval_tracking *pos, *temp;
+ struct pnfs_block_short_extent *se, *stemp;
list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
list_del(&pos->it_link);
kfree(pos);
}
+
+ list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
+ list_del(&se->bse_node);
+ kfree(se);
+ }
return;
}
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 60728ac..e31a2df 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -70,6 +70,7 @@ struct pnfs_inval_markings {
spinlock_t im_lock;
struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
sector_t im_block_size; /* Server blocksize in sectors */
+ struct list_head im_extents; /* Short extents for INVAL->RW conversion */
};
struct pnfs_inval_tracking {
@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
{
spin_lock_init(&marks->im_lock);
INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
+ INIT_LIST_HEAD(&marks->im_extents);
marks->im_block_size = blocksize;
marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
blocksize);
@@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
int bl_add_merge_extent(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new);
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length);
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new);
+int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
+struct pnfs_block_short_extent *
+bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
+void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index d0f52ed..1abac09 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -157,10 +157,10 @@ static int _preload_range(struct pnfs_inval_markings *marks,
goto out_cleanup;
}
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
for (s = start; s < end; s += tree->mtt_step_size)
used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
status = 0;
@@ -179,9 +179,9 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{
int rv;
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return rv;
}
@@ -221,9 +221,9 @@ static int is_range_written(struct pnfs_inval_markings *marks,
{
int rv;
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return rv;
}
@@ -244,15 +244,15 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
if (_preload_range(marks, start, end - start))
goto outerr;
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
goto out_unlock;
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return 0;
out_unlock:
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
outerr:
return -ENOMEM;
}
@@ -267,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks,
dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
(u64)offset, (u64)length);
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return status;
}
@@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
/* Note the range described by offset, length is guaranteed to be contained
* within be.
+ * new will be freed, either by this function or add_to_commitlist if they
+ * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
*/
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length)
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new)
{
sector_t new_end, end = offset + length;
- struct pnfs_block_short_extent *new;
struct pnfs_block_layout *bl = container_of(be->be_inval,
struct pnfs_block_layout,
bl_inval);
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
-
mark_written_sectors(be->be_inval, offset, length);
/* We want to add the range to commit list, but it must be
* block-normalized, and verified that the normalized range has
@@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
new->bse_mdev = be->be_mdev;
spin_lock(&bl->bl_ext_lock);
- /* new will be freed, either by add_to_commitlist if it decides not
- * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
- */
add_to_commitlist(bl, new);
spin_unlock(&bl->bl_ext_lock);
return 0;
@@ -862,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
}
}
}
+
+int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
+{
+ struct pnfs_block_short_extent *new;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (unlikely(!new))
+ return -ENOMEM;
+
+ spin_lock_bh(&marks->im_lock);
+ list_add(&new->bse_node, &marks->im_extents);
+ spin_unlock_bh(&marks->im_lock);
+
+ return 0;
+}
+
+struct pnfs_block_short_extent *
+bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
+{
+ struct pnfs_block_short_extent *rv = NULL;
+
+ spin_lock_bh(&marks->im_lock);
+ if (!list_empty(&marks->im_extents)) {
+ rv = list_entry((&marks->im_extents)->next,
+ struct pnfs_block_short_extent, bse_node);
+ list_del_init(&rv->bse_node);
+ }
+ spin_unlock_bh(&marks->im_lock);
+
+ return rv;
+}
+
+void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
+{
+ struct pnfs_block_short_extent *se = NULL, *tmp;
+
+ if (num_to_free <= 0)
+ return;
+
+ spin_lock(&marks->im_lock);
+ list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
+ list_del(&se->bse_node);
+ kfree(se);
+ if (--num_to_free == 0)
+ break;
+ }
+ spin_unlock(&marks->im_lock);
+
+ BUG_ON(num_to_free > 0);
+}
--
1.7.1.262.g5ef3d
Peng Tao wrote:
Hi, Trond,
Following are some bugfix and improvements for blocklayout driver. All have been
sent to the list before and have been tested for several weeks.
Patches 2/9 and 8/9 are missing again. I wonder why these particular
patches are getting filtered out?
Boaz Harrosh wrote:
On 12/02/2011 08:46 PM, Peng Tao wrote:
> Hi, Trond,
>
> Following are some bugfix and improvements for blocklayout driver. All have been
> sent to the list before and have been tested for several weeks.
>
I never got 9 patches in the mail.
I only have 1,3,4,5,6,7
Do you have them on a gitweb, or re-post 2,8,9
1-7 are in benny's tree:
% git log --oneline -7 benny/pnfs-block
ec11faf pnfsblock: alloc short extent before submit bio
7c93c37 pnfsblock: clean up _add_entry
85e8c84 pnfsblock: remove rpc_call_ops from struct parallel_io
ab0c07d pnfsblock: set read/write tk_status to pnfs_error
60d5135 pnfsblock: move find lock page logic out of bl_write_pagelist
657c2f4 pnfsblock: acquire im_lock in _preload_range
e5b35df pnfsblock: cleanup bl_mark_sectors_init
I never saw 8,9 and can't find them in the list archive.
On 11/29/2011 02:58 PM, Jim Rees wrote:
> Boaz Harrosh wrote:
>
> On 12/02/2011 08:46 PM, Peng Tao wrote:
> > Hi, Trond,
> >
> > Following are some bugfix and improvements for blocklayout driver. All have been
> > sent to the list before and have been tested for several weeks.
> >
>
> I never got 9 patches in the mail.
>
> I only have 1,3,4,5,6,7
>
> Do you have them on a gitweb, or re-post 2,8,9
>
> 1-7 are in benny's tree:
>
> % git log --oneline -7 benny/pnfs-block
> ec11faf pnfsblock: alloc short extent before submit bio
> 7c93c37 pnfsblock: clean up _add_entry
> 85e8c84 pnfsblock: remove rpc_call_ops from struct parallel_io
> ab0c07d pnfsblock: set read/write tk_status to pnfs_error
> 60d5135 pnfsblock: move find lock page logic out of bl_write_pagelist
> 657c2f4 pnfsblock: acquire im_lock in _preload_range
> e5b35df pnfsblock: cleanup bl_mark_sectors_init
>
> I never saw 8,9 and can't find them in the list archive.
Actually I wanted 9. I'm old and senile but I did not remember
I ever saw it so I was wondering.
We know their names from [0/9]:
pnfsblock: don't spinlock when freeing block_dev
pnfsblock: limit bio page count
Thanks
Heart
PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBsaW51eC1uZnMtb3duZXJAdmdl
ci5rZXJuZWwub3JnIFttYWlsdG86bGludXgtbmZzLW93bmVyQHZnZXIua2VybmVsLm9yZ10gT24g
QmVoYWxmIE9mIEJvYXoNCj4gSGFycm9zaA0KPiBTZW50OiBXZWRuZXNkYXksIE5vdmVtYmVyIDMw
LCAyMDExIDc6NDAgQU0NCj4gVG86IEppbSBSZWVzDQo+IENjOiBQZW5nIFRhbzsgVHJvbmQuTXlr
bGVidXN0QG5ldGFwcC5jb207IGxpbnV4LW5mc0B2Z2VyLmtlcm5lbC5vcmcNCj4gU3ViamVjdDog
UmU6IFtQQVRDSCAwLzldIHBuZnNibG9jayBwYXRjaGVzDQo+IA0KPiBPbiAxMS8yOS8yMDExIDAy
OjU4IFBNLCBKaW0gUmVlcyB3cm90ZToNCj4gPiBCb2F6IEhhcnJvc2ggd3JvdGU6DQo+ID4NCj4g
PiAgIE9uIDEyLzAyLzIwMTEgMDg6NDYgUE0sIFBlbmcgVGFvIHdyb3RlOg0KPiA+ICAgPiBIaSwg
VHJvbmQsDQo+ID4gICA+DQo+ID4gICA+IEZvbGxvd2luZyBhcmUgc29tZSBidWdmaXggYW5kIGlt
cHJvdmVtZW50cyBmb3IgYmxvY2tsYXlvdXQgZHJpdmVyLiBBbGwgaGF2ZSBiZWVuDQo+ID4gICA+
IHNlbnQgdG8gdGhlIGxpc3QgYmVmb3JlIGFuZCBoYXZlIGJlZW4gdGVzdGVkIGZvciBzZXZlcmFs
IHdlZWtzLg0KPiA+ICAgPg0KPiA+DQo+ID4gICBJIG5ldmVyIGdvdCA5IHBhdGNoZXMgaW4gdGhl
IG1haWwuDQo+ID4NCj4gPiAgIEkgb25seSBoYXZlIDEsMyw0LDUsNiw3DQo+ID4NCj4gPiAgIERv
IHlvdSBoYXZlIHRoZW0gb24gYSBnaXR3ZWIsIG9yIHJlLXBvc3QgMiw4LDkNCj4gPg0KPiA+IDEt
NyBhcmUgaW4gYmVubnkncyB0cmVlOg0KPiA+DQo+ID4gJSBnaXQgbG9nIC0tb25lbGluZSAtNyBi
ZW5ueS9wbmZzLWJsb2NrDQo+ID4gZWMxMWZhZiBwbmZzYmxvY2s6IGFsbG9jIHNob3J0IGV4dGVu
dCBiZWZvcmUgc3VibWl0IGJpbw0KPiA+IDdjOTNjMzcgcG5mc2Jsb2NrOiBjbGVhbiB1cCBfYWRk
X2VudHJ5DQo+ID4gODVlOGM4NCBwbmZzYmxvY2s6IHJlbW92ZSBycGNfY2FsbF9vcHMgZnJvbSBz
dHJ1Y3QgcGFyYWxsZWxfaW8NCj4gPiBhYjBjMDdkIHBuZnNibG9jazogc2V0IHJlYWQvd3JpdGUg
dGtfc3RhdHVzIHRvIHBuZnNfZXJyb3INCj4gPiA2MGQ1MTM1IHBuZnNibG9jazogbW92ZSBmaW5k
IGxvY2sgcGFnZSBsb2dpYyBvdXQgb2YgYmxfd3JpdGVfcGFnZWxpc3QNCj4gPiA2NTdjMmY0IHBu
ZnNibG9jazogYWNxdWlyZSBpbV9sb2NrIGluIF9wcmVsb2FkX3JhbmdlDQo+ID4gZTViMzVkZiBw
bmZzYmxvY2s6IGNsZWFudXAgYmxfbWFya19zZWN0b3JzX2luaXQNCj4gPg0KPiA+IEkgbmV2ZXIg
c2F3IDgsOSBhbmQgY2FuJ3QgZmluZCB0aGVtIGluIHRoZSBsaXN0IGFyY2hpdmUuDQo+IA0KPiBB
Y3R1YWxseSBJIHdhbnRlZCA5LiBJJ20gb2xkIGFuZCBzZW5pbGUgYnV0IEkgZGlkIG5vdCByZW1l
bWJlcg0KPiBJIGV2ZXIgc2F3IGl0IHNvIEkgd2FzIHdvbmRlcmluZy4NCj4gDQo+IFdlIGtub3cg
dGhlaXIgbmFtZXMgZnJvbSBbMC85XToNCj4gICBwbmZzYmxvY2s6IGRvbid0IHNwaW5sb2NrIHdo
ZW4gZnJlZWluZyBibG9ja19kZXYNCj4gICBwbmZzYmxvY2s6IGxpbWl0IGJpbyBwYWdlIGNvdW50
DQpZb3UgY2FuIGZpbmQgYm90aCB0aGUgOSBwYXRjaGVzIGFuZCB0aGUgNCBsYXlvdXQgc2l6ZSBw
YXRjaGVzIGF0IGh0dHBzOi8vZ2l0aHViLmNvbS9iZXJnd29sZi9saW51eC90cmVlL3BuZnNibG9j
ay1mb3ItdHJvbmQNCg0KTG9va3MgbGlrZSBJIG5lZWQgdG8gZmluZCBhbm90aGVyIHdheS9tYWNo
aW5lL21haWwgYWNjb3VudCB0byBzZW5kIHBhdGNoZXMuLi4NCg0KVGhhbmtzLA0KVGFvDQo+IA0K
PiBUaGFua3MNCj4gSGVhcnQNCj4gLS0NCj4gVG8gdW5zdWJzY3JpYmUgZnJvbSB0aGlzIGxpc3Q6
IHNlbmQgdGhlIGxpbmUgInVuc3Vic2NyaWJlIGxpbnV4LW5mcyIgaW4NCj4gdGhlIGJvZHkgb2Yg
YSBtZXNzYWdlIHRvIG1ham9yZG9tb0B2Z2VyLmtlcm5lbC5vcmcNCj4gTW9yZSBtYWpvcmRvbW8g
aW5mbyBhdCAgaHR0cDovL3ZnZXIua2VybmVsLm9yZy9tYWpvcmRvbW8taW5mby5odG1sDQoNCg==