Only treat write goes up to the inode size as aligned request,
because it always write PAGE_CACHE_SIZE, but read a dynamic size.
Signed-off-by: Kinglong Mee <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index ddd0138..8bc870e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -743,7 +743,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
static bool
is_aligned_req(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req, unsigned int alignment)
+ struct nfs_page *req, unsigned int alignment, bool is_write)
{
/*
* Always accept buffered writes, higher layers take care of the
@@ -758,7 +758,8 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio,
if (IS_ALIGNED(req->wb_bytes, alignment))
return true;
- if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) {
+ if (is_write &&
+ (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
/*
* If the write goes up to the inode size, just write
* the full page. Data past the inode size is
@@ -775,7 +776,7 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio,
static void
bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (!is_aligned_req(pgio, req, SECTOR_SIZE)) {
+ if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
nfs_pageio_reset_read_mds(pgio);
return;
}
@@ -791,7 +792,7 @@ static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (!is_aligned_req(pgio, req, SECTOR_SIZE))
+ if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -824,7 +825,7 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 wb_size;
- if (!is_aligned_req(pgio, req, PAGE_SIZE)) {
+ if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
nfs_pageio_reset_write_mds(pgio);
return;
}
@@ -846,7 +847,7 @@ static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (!is_aligned_req(pgio, req, PAGE_SIZE))
+ if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
--
2.5.0
On Sat, Feb 13, 2016 at 09:51:31PM +0800, Kinglong Mee wrote:
> Only treat write goes up to the inode size as aligned request,
> because it always write PAGE_CACHE_SIZE, but read a dynamic size.
Can you explain what the point is? We'll never use data pas the block size
in the page cache, but per the block size requirement in the spec we must
be able to read it. This patch means we can't direct storage reads where
we previously could, without any obvious upside.
On 2/29/2016 17:57, Christoph Hellwig wrote:
> On Sat, Feb 13, 2016 at 09:51:31PM +0800, Kinglong Mee wrote:
>> Only treat write goes up to the inode size as aligned request,
>> because it always write PAGE_CACHE_SIZE, but read a dynamic size.
>
> Can you explain what the point is?
I run ltp tests with read02 hang.
There seams a loop in block codes.
It is caused by passing an unaligned read to bio.
So this patch is out as making a aligned read request.
> We'll never use data pas the block size
> in the page cache, but per the block size requirement in the spec we must
> be able to read it. This patch means we can't direct storage reads where
> we previously could, without any obvious upside.
bl_pg_init_read/bl_pg_test_read checks aligned base on SECTOR_SIZE.
bl_pg_init_write/bl_pg_test_write checks aligned base on PAGE_SIZE.
If according the codes, reads data per block size is okay.
But, there is a comment in bl_read_pagelist() as,
250 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
251 /* Code assumes extents are page-aligned */
252 for (i = pg_index; i < header->page_array.npages; i++) {
253 if (extent_length <= 0) {
I don't known the meaning of "extents are page-aligned",
extent's start offset is aligned to page size?
or extent's start offset is aligned to page size and length
is equal to PAGE_SIZE too ?
thanks,
Kinglong Mee
On Mon, Feb 29, 2016 at 08:00:21PM +0800, Kinglong Mee wrote:
> On 2/29/2016 17:57, Christoph Hellwig wrote:
> > On Sat, Feb 13, 2016 at 09:51:31PM +0800, Kinglong Mee wrote:
> >> Only treat write goes up to the inode size as aligned request,
> >> because it always write PAGE_CACHE_SIZE, but read a dynamic size.
> >
> > Can you explain what the point is?
>
> I run ltp tests with read02 hang.
> There seams a loop in block codes.
> It is caused by passing an unaligned read to bio.
> So this patch is out as making a aligned read request.
Do you have any additional details?
> > We'll never use data pas the block size
> > in the page cache, but per the block size requirement in the spec we must
> > be able to read it. This patch means we can't direct storage reads where
> > we previously could, without any obvious upside.
>
> bl_pg_init_read/bl_pg_test_read checks aligned base on SECTOR_SIZE.
> bl_pg_init_write/bl_pg_test_write checks aligned base on PAGE_SIZE.
>
> If according the codes, reads data per block size is okay.
>
> But, there is a comment in bl_read_pagelist() as,
>
> 250 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
> 251 /* Code assumes extents are page-aligned */
> 252 for (i = pg_index; i < header->page_array.npages; i++) {
> 253 if (extent_length <= 0) {
>
> I don't known the meaning of "extents are page-aligned",
> extent's start offset is aligned to page size?
> or extent's start offset is aligned to page size and length
> is equal to PAGE_SIZE too ?
All of them should start aligned to PAGE_SIZE, and also have a length
that is a multiple of PAGE_SIZE.
Cc Peng Tao,
On 2/29/2016 21:34, Christoph Hellwig wrote:
> On Mon, Feb 29, 2016 at 08:00:21PM +0800, Kinglong Mee wrote:
>> On 2/29/2016 17:57, Christoph Hellwig wrote:
>>> On Sat, Feb 13, 2016 at 09:51:31PM +0800, Kinglong Mee wrote:
>>>> Only treat write goes up to the inode size as aligned request,
>>>> because it always write PAGE_CACHE_SIZE, but read a dynamic size.
>>>
>>> Can you explain what the point is?
>>
>> I run ltp tests with read02 hang.
>> There seams a loop in block codes.
>> It is caused by passing an unaligned read to bio.
>> So this patch is out as making a aligned read request.
>
> Do you have any additional details?
See the following comments.
>
>>> We'll never use data pas the block size
>>> in the page cache, but per the block size requirement in the spec we must
>>> be able to read it. This patch means we can't direct storage reads where
>>> we previously could, without any obvious upside.
>>
>> bl_pg_init_read/bl_pg_test_read checks aligned base on SECTOR_SIZE.
>> bl_pg_init_write/bl_pg_test_write checks aligned base on PAGE_SIZE.
>>
>> If according the codes, reads data per block size is okay.
>>
>> But, there is a comment in bl_read_pagelist() as,
>>
>> 250 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
>> 251 /* Code assumes extents are page-aligned */
>> 252 for (i = pg_index; i < header->page_array.npages; i++) {
>> 253 if (extent_length <= 0) {
>>
>> I don't known the meaning of "extents are page-aligned",
>> extent's start offset is aligned to page size?
>> or extent's start offset is aligned to page size and length
>> is equal to PAGE_SIZE too ?
>
> All of them should start aligned to PAGE_SIZE, and also have a length
> that is a multiple of PAGE_SIZE.
Commit f742dc4a3258 ("pnfsblock: fix non-aligned DIO read") has
change the aligned size to SECTOR_SIZE but the request is aligned.
+static bool
+is_aligned_req(struct nfs_page *req, unsigned int alignment)
+{
+ return IS_ALIGNED(req->wb_offset, alignment) &&
+ IS_ALIGNED(req->wb_bytes, alignment);
+}
Commit 3a6fd1f004fc ("pnfs/blocklayout: remove read-modify-write
handling in bl_write_pagelist") adds reading up to the inode size.
+ if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) {
+ /*
+ * If the write goes up to the inode size, just write
+ * the full page. Data past the inode size is
+ * guaranteed to be zeroed by the higher level client
+ * code, and this behaviour is mandated by RFC 5663
+ * section 2.3.2.
+ */
+ return true;
+ }
But the comments are about write request, without any read.
After that patch, the read request can be unaligned.
thanks,
Kinglong Mee
On Tue, Mar 1, 2016 at 7:35 AM, Kinglong Mee <[email protected]> wrote:
> Cc Peng Tao,
>
> On 2/29/2016 21:34, Christoph Hellwig wrote:
>> On Mon, Feb 29, 2016 at 08:00:21PM +0800, Kinglong Mee wrote:
>>> On 2/29/2016 17:57, Christoph Hellwig wrote:
>>>> On Sat, Feb 13, 2016 at 09:51:31PM +0800, Kinglong Mee wrote:
>>>>> Only treat write goes up to the inode size as aligned request,
>>>>> because it always write PAGE_CACHE_SIZE, but read a dynamic size.
>>>>
>>>> Can you explain what the point is?
>>>
>>> I run ltp tests with read02 hang.
>>> There seams a loop in block codes.
>>> It is caused by passing an unaligned read to bio.
>>> So this patch is out as making a aligned read request.
>>
>> Do you have any additional details?
>
> See the following comments.
>
>>
>>>> We'll never use data pas the block size
>>>> in the page cache, but per the block size requirement in the spec we must
>>>> be able to read it. This patch means we can't direct storage reads where
>>>> we previously could, without any obvious upside.
>>>
>>> bl_pg_init_read/bl_pg_test_read checks aligned base on SECTOR_SIZE.
>>> bl_pg_init_write/bl_pg_test_write checks aligned base on PAGE_SIZE.
>>>
>>> If according the codes, reads data per block size is okay.
>>>
>>> But, there is a comment in bl_read_pagelist() as,
>>>
>>> 250 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
>>> 251 /* Code assumes extents are page-aligned */
>>> 252 for (i = pg_index; i < header->page_array.npages; i++) {
>>> 253 if (extent_length <= 0) {
>>>
>>> I don't known the meaning of "extents are page-aligned",
>>> extent's start offset is aligned to page size?
>>> or extent's start offset is aligned to page size and length
>>> is equal to PAGE_SIZE too ?
>>
>> All of them should start aligned to PAGE_SIZE, and also have a length
>> that is a multiple of PAGE_SIZE.
>
> Commit f742dc4a3258 ("pnfsblock: fix non-aligned DIO read") has
> change the aligned size to SECTOR_SIZE but the request is aligned.
>
> +static bool
> +is_aligned_req(struct nfs_page *req, unsigned int alignment)
> +{
> + return IS_ALIGNED(req->wb_offset, alignment) &&
> + IS_ALIGNED(req->wb_bytes, alignment);
> +}
>
> Commit 3a6fd1f004fc ("pnfs/blocklayout: remove read-modify-write
> handling in bl_write_pagelist") adds reading up to the inode size.
>
> + if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) {
> + /*
> + * If the write goes up to the inode size, just write
> + * the full page. Data past the inode size is
> + * guaranteed to be zeroed by the higher level client
> + * code, and this behaviour is mandated by RFC 5663
> + * section 2.3.2.
> + */
> + return true;
> + }
>
> But the comments are about write request, without any read.
> After that patch, the read request can be unaligned.
>
Christoph, does Kinglong's explanation satisfy your concerns, or are
there still unresolved differences that should prevent me from taking
this patch?
Cheers
Trond
On Wed, Mar 16, 2016 at 03:41:06PM -0400, Trond Myklebust wrote:
> Christoph, does Kinglong's explanation satisfy your concerns, or are
> there still unresolved differences that should prevent me from taking
> this patch?
I still don't think it's the right thing to do, but I don't have time
time to debug the issue fully myself. Let's take the patch for now
as it should be less harmful than not having it.
Hello Trond,
If this patch fixes a read hang, is it a good candidate for stable?
Regards,
--
William