2012-08-23 16:28:30

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 0/6] NFS41 and pnfsblock bugfixes

Trond, hi,

The 6 patches were sent before. They are all bugfixes and 4 of them
are needed in stable as well. Please see if they are OK to be merged
in one of the rc pushes.

Thanks,
Tao

Peng Tao (6):
NFSv41: fix DIO write_io calculation
NFS41: fix error of setting blocklayoutdriver
Revert "pnfsblock: bail out partial page IO"
pnfsblock: fix partial page buffer wirte
pnfsblock: fix non-aligned DIO read
pnfsblock: fix non-aligned DIO write

fs/nfs/blocklayout/blocklayout.c | 270 ++++++++++++++++++++++++++++++++-----
fs/nfs/blocklayout/blocklayout.h | 1 +
fs/nfs/client.c | 1 -
fs/nfs/direct.c | 4 +-
fs/nfs/nfs4proc.c | 5 +-
5 files changed, 240 insertions(+), 41 deletions(-)



2012-08-23 16:28:36

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 2/6] NFS41: fix error of setting blocklayoutdriver

After commit e38eb650 (NFS: set_pnfs_layoutdriver() from
nfs4_proc_fsinfo()), set_pnfs_layoutdriver() is called inside
nfs4_proc_fsinfo(), but pnfs_blksize is not set. It causes setting
blocklayoutdriver failure and pnfsblock mount failure.

Cc: stable <[email protected]> [since v3.5]
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/client.c | 1 -
fs/nfs/nfs4proc.c | 5 ++++-
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9969444..0e7cd89 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -855,7 +855,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- server->pnfs_blksize = fsinfo->blksize;

server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6352741..1653091 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3362,8 +3362,11 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s

nfs_fattr_init(fsinfo->fattr);
error = nfs4_do_fsinfo(server, fhandle, fsinfo);
- if (error == 0)
+ if (error == 0) {
+ /* block layout checks this! */
+ server->pnfs_blksize = fsinfo->blksize;
set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
+ }

return error;
}
--
1.7.1.262.g5ef3d


2012-08-23 16:28:40

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 3/6] Revert "pnfsblock: bail out partial page IO"

This reverts commit 159e0561e322dd8008fff59e36efff8d2bdd0b0e, in favor
of a more complete fix to the alignment issue.

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 39 ++-----------------------------------
1 files changed, 3 insertions(+), 36 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dd392ed..7ae8a60 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -228,14 +228,6 @@ bl_end_par_io_read(void *data, int unused)
schedule_work(&rdata->task.u.tk_work);
}

-static bool
-bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
-{
- if ((offset & blkmask) || (len & blkmask))
- return false;
- return true;
-}
-
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
@@ -252,9 +244,6 @@ bl_read_pagelist(struct nfs_read_data *rdata)
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);

- if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
- goto use_mds;
-
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
@@ -563,7 +552,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0;
- struct parallel_io *par = NULL;
+ struct parallel_io *par;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
struct page **pages = wdata->args.pages;
@@ -574,10 +563,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;

dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
- /* Check for alignment first */
- if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
- goto out_mds;
-
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
@@ -1011,32 +996,14 @@ bl_clear_layoutdriver(struct nfs_server *server)
return 0;
}

-static void
-bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
- nfs_pageio_reset_read_mds(pgio);
- else
- pnfs_generic_pg_init_read(pgio, req);
-}
-
-static void
-bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
- nfs_pageio_reset_write_mds(pgio);
- else
- pnfs_generic_pg_init_write(pgio, req);
-}
-
static const struct nfs_pageio_ops bl_pg_read_ops = {
- .pg_init = bl_pg_init_read,
+ .pg_init = pnfs_generic_pg_init_read,
.pg_test = pnfs_generic_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops bl_pg_write_ops = {
- .pg_init = bl_pg_init_write,
+ .pg_init = pnfs_generic_pg_init_write,
.pg_test = pnfs_generic_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
--
1.7.1.262.g5ef3d


2012-08-23 16:28:51

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 6/6] pnfsblock: fix non-aligned DIO write

For DIO writes, if it is not blocksize aligned, we need to do
internal serialization. It may slow down writers anyway. So we
just bail them out and resend to MDS.

Cc: stable <[email protected]> [since v3.4]
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 34 +++++++++++++++++++++++++++++++---
1 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index e5dfef5..1093968 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -685,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0;
- struct parallel_io *par;
+ struct parallel_io *par = NULL;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
unsigned int pg_offset, pg_len, saved_len;
@@ -697,6 +697,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;

dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
+
+ if (header->dreq != NULL &&
+ (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) ||
+ !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) {
+ dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
+ goto out_mds;
+ }
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
@@ -1197,6 +1204,27 @@ bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return pnfs_generic_pg_test(pgio, prev, req);
}

+void
+bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, PAGE_CACHE_SIZE))
+ nfs_pageio_reset_write_mds(pgio);
+ else
+ pnfs_generic_pg_init_write(pgio, req);
+}
+
+static bool
+bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, PAGE_CACHE_SIZE))
+ return false;
+
+ return pnfs_generic_pg_test(pgio, prev, req);
+}
+
static const struct nfs_pageio_ops bl_pg_read_ops = {
.pg_init = bl_pg_init_read,
.pg_test = bl_pg_test_read,
@@ -1204,8 +1232,8 @@ static const struct nfs_pageio_ops bl_pg_read_ops = {
};

static const struct nfs_pageio_ops bl_pg_write_ops = {
- .pg_init = pnfs_generic_pg_init_write,
- .pg_test = pnfs_generic_pg_test,
+ .pg_init = bl_pg_init_write,
+ .pg_test = bl_pg_test_write,
.pg_doio = pnfs_generic_pg_writepages,
};

--
1.7.1.262.g5ef3d


2012-08-23 16:28:43

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 4/6] pnfsblock: fix partial page buffer wirte

If applications use flock to protect its write range, generic NFS
will not do read-modify-write cycle at page cache level. Therefore
LD should know how to handle non-sector aligned writes. Otherwise
there will be data corruption.

Cc: stable <[email protected]>
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 177 +++++++++++++++++++++++++++++++++++---
fs/nfs/blocklayout/blocklayout.h | 1 +
2 files changed, 166 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7ae8a60..39fa002 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -162,25 +162,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
return bio;
}

-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
sector_t isect, struct page *page,
struct pnfs_block_extent *be,
void (*end_io)(struct bio *, int err),
- struct parallel_io *par)
+ struct parallel_io *par,
+ unsigned int offset, int len)
{
+ isect = isect + (offset >> SECTOR_SHIFT);
+ dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
+ npg, rw, (unsigned long long)isect, offset, len);
retry:
if (!bio) {
bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
}
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ if (bio_add_page(bio, page, len, offset) < len) {
bio = bl_submit_bio(rw, bio);
goto retry;
}
return bio;
}

+static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+ sector_t isect, struct page *page,
+ struct pnfs_block_extent *be,
+ void (*end_io)(struct bio *, int err),
+ struct parallel_io *par)
+{
+ return do_add_page_to_bio(bio, npg, rw, isect, page, be,
+ end_io, par, 0, PAGE_CACHE_SIZE);
+}
+
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
@@ -450,6 +464,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
return;
}

+static void
+bl_read_single_end_io(struct bio *bio, int error)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct page *page = bvec->bv_page;
+
+ /* Only one page in bvec */
+ unlock_page(page);
+}
+
+static int
+bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int offset, unsigned int len)
+{
+ struct bio *bio;
+ struct page *shadow_page;
+ sector_t isect;
+ char *kaddr, *kshadow_addr;
+ int ret = 0;
+
+ dprintk("%s: offset %u len %u\n", __func__, offset, len);
+
+ shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (shadow_page == NULL)
+ return -ENOMEM;
+
+ bio = bio_alloc(GFP_NOIO, 1);
+ if (bio == NULL)
+ return -ENOMEM;
+
+ isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
+ (offset / SECTOR_SIZE);
+
+ bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_bdev = be->be_mdev;
+ bio->bi_end_io = bl_read_single_end_io;
+
+ lock_page(shadow_page);
+ if (bio_add_page(bio, shadow_page,
+ SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
+ unlock_page(shadow_page);
+ bio_put(bio);
+ return -EIO;
+ }
+
+ submit_bio(READ, bio);
+ wait_on_page_locked(shadow_page);
+ if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
+ ret = -EIO;
+ } else {
+ kaddr = kmap_atomic(page);
+ kshadow_addr = kmap_atomic(shadow_page);
+ memcpy(kaddr + offset, kshadow_addr + offset, len);
+ kunmap_atomic(kshadow_addr);
+ kunmap_atomic(kaddr);
+ }
+ __free_page(shadow_page);
+ bio_put(bio);
+
+ return ret;
+}
+
+static int
+bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int dirty_offset, unsigned int dirty_len,
+ bool full_page)
+{
+ int ret = 0;
+ unsigned int start, end;
+
+ if (full_page) {
+ start = 0;
+ end = PAGE_CACHE_SIZE;
+ } else {
+ start = round_down(dirty_offset, SECTOR_SIZE);
+ end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
+ }
+
+ dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
+ if (!be) {
+ zero_user_segments(page, start, dirty_offset,
+ dirty_offset + dirty_len, end);
+ if (start == 0 && end == PAGE_CACHE_SIZE &&
+ trylock_page(page)) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return ret;
+ }
+
+ if (start != dirty_offset)
+ ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
+
+ if (!ret && (dirty_offset + dirty_len < end))
+ ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
+ end - dirty_offset - dirty_len);
+
+ return ret;
+}
+
/* Given an unmapped page, zero it or read in page for COW, page is locked
* by caller.
*/
@@ -483,7 +597,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
SetPageUptodate(page);

cleanup:
- bl_put_extent(cow_read);
if (bh)
free_buffer_head(bh);
if (ret) {
@@ -555,6 +668,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
struct parallel_io *par;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
struct page **pages = wdata->args.pages;
struct page *page;
pgoff_t index;
@@ -659,10 +773,11 @@ next_page:
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
+ bl_put_extent(cow_read);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
- isect, NULL);
+ isect, &cow_read);
if (!be || !is_writable(be, isect)) {
header->pnfs_error = -EINVAL;
goto out;
@@ -679,7 +794,26 @@ next_page:
extent_length = be->be_length -
(isect - be->be_f_offset);
}
- if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+
+ dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
+ pg_offset = offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + count > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = count;
+
+ saved_len = pg_len;
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
+ !bl_is_sector_init(be->be_inval, isect)) {
+ ret = bl_read_partial_page_sync(pages[i], cow_read,
+ pg_offset, pg_len, true);
+ if (ret) {
+ dprintk("%s bl_read_partial_page_sync fail %d\n",
+ __func__, ret);
+ header->pnfs_error = ret;
+ goto out;
+ }
+
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
@@ -688,15 +822,35 @@ next_page:
header->pnfs_error = ret;
goto out;
}
+
+ /* Expand to full page write */
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
+ (pg_len & (SECTOR_SIZE - 1))){
+ /* ahh, nasty case. We have to do sync full sector
+ * read-modify-write cycles.
+ */
+ unsigned int saved_offset = pg_offset;
+ ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
+ pg_len, false);
+ pg_offset = round_down(pg_offset, SECTOR_SIZE);
+ pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
+ - pg_offset;
}
- bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+
+
+ bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
- bl_end_io_write, par);
+ bl_end_io_write, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
+ offset += saved_len;
+ count -= saved_len;
isect += PAGE_CACHE_SECTORS;
last_isect = isect;
extent_length -= PAGE_CACHE_SECTORS;
@@ -714,17 +868,16 @@ next_page:
}

write_done:
- wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
- if (count < wdata->res.count) {
- wdata->res.count = count;
- }
+ wdata->res.count = wdata->args.count;
out:
bl_put_extent(be);
+ bl_put_extent(cow_read);
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
+ bl_put_extent(cow_read);
kfree(par);
return PNFS_NOT_ATTEMPTED;
}
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 0335069..39bb51a 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -41,6 +41,7 @@

#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)

struct block_mount_id {
spinlock_t bm_lock; /* protects list */
--
1.7.1.262.g5ef3d


2012-08-23 16:28:32

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 1/6] NFSv41: fix DIO write_io calculation

pnfs_within_mdsthreshold() is called inside pg_init. We need to set
read_io/write_io before that. Otherwise we fail pnfs_within_mdsthreshold()
and IO goes to MDS.
A simple test case:
dd if=foo of=/mnt/pnfs/bar bs=10M count=1 oflag=direct

Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/direct.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1ba385b..34a6282 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -463,10 +463,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;

+ NFS_I(inode)->read_io += iov_length(iov, nr_segs);
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
- NFS_I(inode)->read_io += result;
out_release:
nfs_direct_req_release(dreq);
out:
@@ -814,6 +814,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
get_dreq(dreq);
atomic_inc(&inode->i_dio_count);

+ NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
@@ -825,7 +826,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
- NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;

/*
* If no bytes were started, return the error, and let the
--
1.7.1.262.g5ef3d


2012-08-23 16:28:47

by Peng Tao

[permalink] [raw]
Subject: [PATCH-resend 5/6] pnfsblock: fix non-aligned DIO read

For DIO read, if it is not sector aligned, we should reject it
and resend via MDS. Otherwise there might be data corruption.
Also teach bl_read_pagelist to handle partial page reads for DIO.

Cc: stable <[email protected]> [since v3.4]
Signed-off-by: Peng Tao <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 64 +++++++++++++++++++++++++++++++++-----
1 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 39fa002..e5dfef5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -252,8 +252,11 @@ bl_read_pagelist(struct nfs_read_data *rdata)
sector_t isect, extent_length = 0;
struct parallel_io *par;
loff_t f_offset = rdata->args.offset;
+ size_t bytes_left = rdata->args.count;
+ unsigned int pg_offset, pg_len;
struct page **pages = rdata->args.pages;
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+ const bool is_dio = (header->dreq != NULL);

dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
@@ -287,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *rdata)
extent_length = min(extent_length, cow_length);
}
}
+
+ if (is_dio) {
+ pg_offset = f_offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = bytes_left;
+
+ f_offset += pg_len;
+ bytes_left -= pg_len;
+ isect += (pg_offset >> SECTOR_SHIFT);
+ } else {
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ }
+
hole = is_hole(be, isect);
if (hole && !cow_read) {
bio = bl_submit_bio(READ, bio);
/* Fill hole w/ zeroes w/o accessing device */
dprintk("%s Zeroing page for hole\n", __func__);
- zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+ zero_user_segment(pages[i], pg_offset, pg_len);
print_page(pages[i]);
SetPageUptodate(pages[i]);
} else {
struct pnfs_block_extent *be_read;

be_read = (hole && cow_read) ? cow_read : be;
- bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+ bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
READ,
isect, pages[i], be_read,
- bl_end_io_read, par);
+ bl_end_io_read, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
}
- isect += PAGE_CACHE_SECTORS;
+ isect += (pg_len >> SECTOR_SHIFT);
extent_length -= PAGE_CACHE_SECTORS;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1;
- rdata->res.count = header->inode->i_size - f_offset;
+ rdata->res.count = header->inode->i_size - rdata->args.offset;
} else {
- rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
+ rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
}
out:
bl_put_extent(be);
@@ -1149,9 +1169,37 @@ bl_clear_layoutdriver(struct nfs_server *server)
return 0;
}

+static bool
+is_aligned_req(struct nfs_page *req, unsigned int alignment)
+{
+ return IS_ALIGNED(req->wb_offset, alignment) &&
+ IS_ALIGNED(req->wb_bytes, alignment);
+}
+
+static void
+bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, SECTOR_SIZE))
+ nfs_pageio_reset_read_mds(pgio);
+ else
+ pnfs_generic_pg_init_read(pgio, req);
+}
+
+static bool
+bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, SECTOR_SIZE))
+ return false;
+
+ return pnfs_generic_pg_test(pgio, prev, req);
+}
+
static const struct nfs_pageio_ops bl_pg_read_ops = {
- .pg_init = pnfs_generic_pg_init_read,
- .pg_test = pnfs_generic_pg_test,
+ .pg_init = bl_pg_init_read,
+ .pg_test = bl_pg_test_read,
.pg_doio = pnfs_generic_pg_readpages,
};

--
1.7.1.262.g5ef3d


2012-09-20 02:20:47

by Peng, Tao

[permalink] [raw]
Subject: RE: [PATCH-resend 0/6] NFS41 and pnfsblock bugfixes

SGkgVHJvbmQsDQoNCkFueSBjb21tZW50cyBvbiBiZWxvdyBwYXRjaGVzPyBJIHN0aWxsIGRvbuKA
mXQgc2VlIHRoZW0gaW4geW91ciB0cmVlIGFuZCAzLjYgaXMgcmVhY2hpbmcgbGFzdCBSQy4uLg0K
DQpUaGFua3MsDQpUYW8NCg0KPiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBs
aW51eC1uZnMtb3duZXJAdmdlci5rZXJuZWwub3JnIFttYWlsdG86bGludXgtbmZzLW93bmVyQHZn
ZXIua2VybmVsLm9yZ10gT24gQmVoYWxmIE9mIFBlbmcgVGFvDQo+IFNlbnQ6IEZyaWRheSwgQXVn
dXN0IDI0LCAyMDEyIDEyOjI4IEFNDQo+IFRvOiBUcm9uZC5NeWtsZWJ1c3RAbmV0YXBwLmNvbQ0K
PiBDYzogbGludXgtbmZzQHZnZXIua2VybmVsLm9yZw0KPiBTdWJqZWN0OiBbUEFUQ0gtcmVzZW5k
IDAvNl0gTkZTNDEgYW5kIHBuZnNibG9jayBidWdmaXhlcw0KPiANCj4gVHJvbmQsIGhpLA0KPiAN
Cj4gVGhlIDYgcGF0Y2hlcyB3ZXJlIHNlbnQgYmVmb3JlLiBUaGV5IGFyZSBhbGwgYnVnZml4ZXMg
YW5kIDQgb2YgdGhlbQ0KPiBhcmUgbmVlZGVkIGluIHN0YWJsZSBhcyB3ZWxsLiBQbGVhc2Ugc2Vl
IGlmIHRoZXkgYXJlIE9LIHRvIGJlIG1lcmdlZA0KPiBpbiBvbmUgb2YgdGhlIHJjIHB1c2hlcy4N
Cj4gDQo+IFRoYW5rcywNCj4gVGFvDQo+IA0KPiBQZW5nIFRhbyAoNik6DQo+ICAgTkZTdjQxOiBm
aXggRElPIHdyaXRlX2lvIGNhbGN1bGF0aW9uDQo+ICAgTkZTNDE6IGZpeCBlcnJvciBvZiBzZXR0
aW5nIGJsb2NrbGF5b3V0ZHJpdmVyDQo+ICAgUmV2ZXJ0ICJwbmZzYmxvY2s6IGJhaWwgb3V0IHBh
cnRpYWwgcGFnZSBJTyINCj4gICBwbmZzYmxvY2s6IGZpeCBwYXJ0aWFsIHBhZ2UgYnVmZmVyIHdp
cnRlDQo+ICAgcG5mc2Jsb2NrOiBmaXggbm9uLWFsaWduZWQgRElPIHJlYWQNCj4gICBwbmZzYmxv
Y2s6IGZpeCBub24tYWxpZ25lZCBESU8gd3JpdGUNCj4gDQo+ICBmcy9uZnMvYmxvY2tsYXlvdXQv
YmxvY2tsYXlvdXQuYyB8ICAyNzAgKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKystLS0t
LQ0KPiAgZnMvbmZzL2Jsb2NrbGF5b3V0L2Jsb2NrbGF5b3V0LmggfCAgICAxICsNCj4gIGZzL25m
cy9jbGllbnQuYyAgICAgICAgICAgICAgICAgIHwgICAgMSAtDQo+ICBmcy9uZnMvZGlyZWN0LmMg
ICAgICAgICAgICAgICAgICB8ICAgIDQgKy0NCj4gIGZzL25mcy9uZnM0cHJvYy5jICAgICAgICAg
ICAgICAgIHwgICAgNSArLQ0KPiAgNSBmaWxlcyBjaGFuZ2VkLCAyNDAgaW5zZXJ0aW9ucygrKSwg
NDEgZGVsZXRpb25zKC0pDQo+IA0KPiAtLQ0KPiBUbyB1bnN1YnNjcmliZSBmcm9tIHRoaXMgbGlz
dDogc2VuZCB0aGUgbGluZSAidW5zdWJzY3JpYmUgbGludXgtbmZzIiBpbg0KPiB0aGUgYm9keSBv
ZiBhIG1lc3NhZ2UgdG8gbWFqb3Jkb21vQHZnZXIua2VybmVsLm9yZw0KPiBNb3JlIG1ham9yZG9t
byBpbmZvIGF0ICBodHRwOi8vdmdlci5rZXJuZWwub3JnL21ham9yZG9tby1pbmZvLmh0bWwNCg0K