2022-05-23 08:19:38

by Hsin-Yi Wang

[permalink] [raw]
Subject: [PATCH v3 0/3] Implement readahead for squashfs

Commit c1f6925e1091("mm: put readahead pages in cache earlier") requires
fs to implement readahead callback. Otherwise there will be a
performance regression.

Commit 9eec1d897139("squashfs: provide backing_dev_info in order to
disable read-ahead") mitigates the performance drop issue for squashfs
by closing readahead for it.

This series implements readahead callback for squashfs. The previous
discussion are in [1] and [2].

[1] https://lore.kernel.org/all/CAJMQK-g9G6KQmH-V=BRGX0swZji9Wxe_2c7ht-MMAapdFy2pXw@mail.gmail.com/T/
[2] https://lore.kernel.org/linux-mm/[email protected]/t/#m4af4473b94f98a4996cb11756b633a07e5e059d1

Hsin-Yi Wang (2):
Revert "squashfs: provide backing_dev_info in order to disable
read-ahead"
squashfs: implement readahead

Phillip Lougher (1):
squashfs: always build "file direct" version of page actor

fs/squashfs/Makefile | 4 +-
fs/squashfs/file.c | 91 +++++++++++++++++++++++++++++++++++++++-
fs/squashfs/page_actor.h | 41 ------------------
fs/squashfs/super.c | 33 ---------------
4 files changed, 92 insertions(+), 77 deletions(-)

--
2.36.1.124.g0e6072fb45-goog



2022-05-23 08:19:56

by Hsin-Yi Wang

[permalink] [raw]
Subject: [PATCH v3 3/3] squashfs: implement readahead

Implement readahead callback for squashfs. It will read datablocks
which cover pages in readahead request. For a few cases it will
not mark page as uptodate, including:
- file end is 0.
- zero filled blocks.
- current batch of pages isn't in the same datablock or not enough in a
datablock.
- decompressor error.
Otherwise pages will be marked as uptodate. The unhandled pages will be
updated by readpage later.

Suggested-by: Matthew Wilcox <[email protected]>
Signed-off-by: Hsin-Yi Wang <[email protected]>
Reported-by: Matthew Wilcox <[email protected]>
Reported-by: Phillip Lougher <[email protected]>
Reported-by: Xiongwei Song <[email protected]>
---
v2->v3: Add checks on
- decompressed block size.
- fill zeros if the last page is not a full page.

v2: https://lore.kernel.org/lkml/[email protected]/
v1: https://lore.kernel.org/lkml/[email protected]/
---
fs/squashfs/file.c | 91 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index a8e495d8eb86..c311fc685fe4 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -39,6 +39,7 @@
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
+#include "page_actor.h"

/*
* Locate cache slot in range [offset, index] for specified inode. If
@@ -495,7 +496,95 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
return 0;
}

+static void squashfs_readahead(struct readahead_control *ractl)
+{
+ struct inode *inode = ractl->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ size_t mask = (1UL << msblk->block_log) - 1;
+ size_t shift = msblk->block_log - PAGE_SHIFT;
+ loff_t start = readahead_pos(ractl) &~ mask;
+ size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
+ struct squashfs_page_actor *actor;
+ unsigned int nr_pages = 0;
+ struct page **pages;
+ u64 block = 0;
+ int bsize, res, i, index, bytes, expected;
+ int file_end = i_size_read(inode) >> msblk->block_log;
+ unsigned int max_pages = 1UL << shift;
+ void *pageaddr;
+
+ readahead_expand(ractl, start, (len | mask) + 1);
+
+ if (file_end == 0)
+ return;
+
+ pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
+ if (!pages)
+ return;
+
+ actor = squashfs_page_actor_init_special(pages, max_pages, 0);
+ if (!actor)
+ goto out;
+
+ for (;;) {
+ nr_pages = __readahead_batch(ractl, pages, max_pages);
+ if (!nr_pages)
+ break;
+
+ if (readahead_pos(ractl) >= i_size_read(inode) ||
+ nr_pages < max_pages)
+ goto skip_pages;
+
+ index = pages[0]->index >> shift;
+ if ((pages[nr_pages - 1]->index >> shift) != index)
+ goto skip_pages;
+
+ expected = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ bsize = read_blocklist(inode, index, &block);
+ if (bsize == 0)
+ goto skip_pages;
+
+ res = squashfs_read_data(inode->i_sb, block, bsize, NULL,
+ actor);
+
+ if (res == expected) {
+ /* Last page may have trailing bytes not filled */
+ bytes = res % PAGE_SIZE;
+ if (bytes) {
+ pageaddr = kmap_atomic(pages[nr_pages - 1]);
+ memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
+ kunmap_atomic(pageaddr);
+ }
+
+ for (i = 0; i < nr_pages; i++)
+ SetPageUptodate(pages[i]);
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
+ }
+
+ kfree(actor);
+ kfree(pages);
+ return;
+
+skip_pages:
+ for (i = 0; i < nr_pages; i++) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
+
+ kfree(actor);
+out:
+ kfree(pages);
+}

const struct address_space_operations squashfs_aops = {
- .read_folio = squashfs_read_folio
+ .read_folio = squashfs_read_folio,
+ .readahead = squashfs_readahead
};
--
2.36.1.124.g0e6072fb45-goog


2022-05-23 08:20:39

by Hsin-Yi Wang

[permalink] [raw]
Subject: [PATCH v3 2/3] squashfs: always build "file direct" version of page actor

From: Phillip Lougher <[email protected]>

Squashfs_readahead uses the "file direct" version of the page
actor, and so build it unconditionally.

Reported-by: kernel test robot <[email protected]>
Signed-off-by: Phillip Lougher <[email protected]>
Signed-off-by: Hsin-Yi Wang <[email protected]>
---
fs/squashfs/Makefile | 4 ++--
fs/squashfs/page_actor.h | 41 ----------------------------------------
2 files changed, 2 insertions(+), 43 deletions(-)

diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 7bd9b8b856d0..477c89a519ee 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,9 +5,9 @@

obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
-squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-y += namei.o super.o symlink.o decompressor.o page_actor.o
squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
-squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 2e3073ace009..26e07373af8a 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -6,46 +6,6 @@
* Phillip Lougher <[email protected]>
*/

-#ifndef CONFIG_SQUASHFS_FILE_DIRECT
-struct squashfs_page_actor {
- void **page;
- int pages;
- int length;
- int next_page;
-};
-
-static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
- int pages, int length)
-{
- struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
- if (actor == NULL)
- return NULL;
-
- actor->length = length ? : pages * PAGE_SIZE;
- actor->page = page;
- actor->pages = pages;
- actor->next_page = 0;
- return actor;
-}
-
-static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
-{
- actor->next_page = 1;
- return actor->page[0];
-}
-
-static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
-{
- return actor->next_page == actor->pages ? NULL :
- actor->page[actor->next_page++];
-}
-
-static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
-{
- /* empty */
-}
-#else
struct squashfs_page_actor {
union {
void **buffer;
@@ -76,4 +36,3 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
actor->squashfs_finish_page(actor);
}
#endif
-#endif
--
2.36.1.124.g0e6072fb45-goog


2022-05-23 08:20:40

by Hsin-Yi Wang

[permalink] [raw]
Subject: [PATCH v3 1/3] Revert "squashfs: provide backing_dev_info in order to disable read-ahead"

This reverts commit 9eec1d897139e5de287af5d559a02b811b844d82.

Revert closing the readahead to squashfs since the readahead callback
for squashfs is implemented.

Suggested-by: Xiongwei Song <[email protected]>
Signed-off-by: Hsin-Yi Wang <[email protected]>
---
fs/squashfs/super.c | 33 ---------------------------------
1 file changed, 33 deletions(-)

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 6d594ba2ed28..32565dafa7f3 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -29,7 +29,6 @@
#include <linux/module.h>
#include <linux/magic.h>
#include <linux/xattr.h>
-#include <linux/backing-dev.h>

#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -113,24 +112,6 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(
return decompressor;
}

-static int squashfs_bdi_init(struct super_block *sb)
-{
- int err;
- unsigned int major = MAJOR(sb->s_dev);
- unsigned int minor = MINOR(sb->s_dev);
-
- bdi_put(sb->s_bdi);
- sb->s_bdi = &noop_backing_dev_info;
-
- err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor);
- if (err)
- return err;
-
- sb->s_bdi->ra_pages = 0;
- sb->s_bdi->io_pages = 0;
-
- return 0;
-}

static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
@@ -146,20 +127,6 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)

TRACE("Entered squashfs_fill_superblock\n");

- /*
- * squashfs provides 'backing_dev_info' in order to disable read-ahead. For
- * squashfs, I/O is not deferred, it is done immediately in read_folio,
- * which means the user would always have to wait their own I/O. So the effect
- * of readahead is very weak for squashfs. squashfs_bdi_init will set
- * sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for
- * squashfs.
- */
- err = squashfs_bdi_init(sb);
- if (err) {
- errorf(fc, "squashfs init bdi failed");
- return err;
- }
-
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
if (sb->s_fs_info == NULL) {
ERROR("Failed to allocate squashfs_sb_info\n");
--
2.36.1.124.g0e6072fb45-goog


2022-06-01 05:44:21

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] squashfs: implement readahead

On Tue, 31 May 2022 17:51:11 +0800 Hsin-Yi Wang <[email protected]> wrote:

> On Mon, May 23, 2022 at 3:00 PM Hsin-Yi Wang <[email protected]> wrote:
> >
> > Implement readahead callback for squashfs. It will read datablocks
> > which cover pages in readahead request. For a few cases it will
> > not mark page as uptodate, including:
> > - file end is 0.
> > - zero filled blocks.
> > - current batch of pages isn't in the same datablock or not enough in a
> > datablock.
> > - decompressor error.
> > Otherwise pages will be marked as uptodate. The unhandled pages will be
> > updated by readpage later.
> >
> > Suggested-by: Matthew Wilcox <[email protected]>
> > Signed-off-by: Hsin-Yi Wang <[email protected]>
> > Reported-by: Matthew Wilcox <[email protected]>
> > Reported-by: Phillip Lougher <[email protected]>
> > Reported-by: Xiongwei Song <[email protected]>
> > ---
>
> Kindly ping on the thread. Conversations on v2:
> https://patchwork.kernel.org/project/linux-mm/patch/[email protected]/#24869037
> This version mainly addressed the error handling.

Yes, some reviewer input would be helpful please.

2022-06-01 12:31:57

by Phillip Lougher

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] squashfs: implement readahead

On 31/05/2022 21:47, Andrew Morton wrote:
> On Mon, 23 May 2022 14:59:13 +0800 Hsin-Yi Wang <[email protected]> wrote:
>
>> Implement readahead callback for squashfs. It will read datablocks
>> which cover pages in readahead request. For a few cases it will
>> not mark page as uptodate, including:
>> - file end is 0.
>> - zero filled blocks.
>> - current batch of pages isn't in the same datablock or not enough in a
>> datablock.
>> - decompressor error.
>> Otherwise pages will be marked as uptodate. The unhandled pages will be
>> updated by readpage later.
>>
>> ...
>>
>
> The choice of types seems somewhat confused.
>
>> @@ -495,7 +496,95 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
>> return 0;
>> }
>>
>> +static void squashfs_readahead(struct readahead_control *ractl)
>> +{
>> + struct inode *inode = ractl->mapping->host;
>> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
>> + size_t mask = (1UL << msblk->block_log) - 1;
>> + size_t shift = msblk->block_log - PAGE_SHIFT;
>
> block_log is unsigned short. Why size_t?
>
>> + loff_t start = readahead_pos(ractl) &~ mask;
>> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
>> + struct squashfs_page_actor *actor;
>> + unsigned int nr_pages = 0;
>
> OK.
>
>> + struct page **pages;
>> + u64 block = 0;
>> + int bsize, res, i, index, bytes, expected;
>
> `res' could be local to the inner loop.
>
> `i' is used in situations where an unsigned type would be more
> appropriate. If it is made unsigned then `i' is no longer a suitable
> identifier. Doesn't matter much.
>
> `index' is from page.index, which is pgoff_t.
>
> `bytes' could be local to the innermost loop.
>
> `expected' is inappropriately a signed type and could be local to the
> inner loop.
>
>> + int file_end = i_size_read(inode) >> msblk->block_log;
>> + unsigned int max_pages = 1UL << shift;
>> + void *pageaddr;
>> +

pageaddr could be made local to the innermost scope.

Apart from that the patch and updated error handling looks
good.

Phillip

>> + readahead_expand(ractl, start, (len | mask) + 1);
>> +
>> + if (file_end == 0)
>> + return;
>> +
>> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
>> + if (!pages)
>> + return;
>> +
>> + actor = squashfs_page_actor_init_special(pages, max_pages, 0);
>> + if (!actor)
>> + goto out;
>> +
>> + for (;;) {
>> + nr_pages = __readahead_batch(ractl, pages, max_pages);
>> + if (!nr_pages)
>> + break;
>> +
>> + if (readahead_pos(ractl) >= i_size_read(inode) ||
>> + nr_pages < max_pages)
>> + goto skip_pages;
>> +
>> + index = pages[0]->index >> shift;
>> + if ((pages[nr_pages - 1]->index >> shift) != index)
>> + goto skip_pages;
>> +
>> + expected = index == file_end ?
>> + (i_size_read(inode) & (msblk->block_size - 1)) :
>> + msblk->block_size;
>> +
>> + bsize = read_blocklist(inode, index, &block);
>> + if (bsize == 0)
>> + goto skip_pages;
>> +
>> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL,
>> + actor);
>> +
>> + if (res == expected) {
>> + /* Last page may have trailing bytes not filled */
>> + bytes = res % PAGE_SIZE;
>> + if (bytes) {
>> + pageaddr = kmap_atomic(pages[nr_pages - 1]);
>> + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
>> + kunmap_atomic(pageaddr);
>> + }
>> +
>> + for (i = 0; i < nr_pages; i++)
>> + SetPageUptodate(pages[i]);
>> + }
>
> res == -EIO is unhandled?
>
>> + for (i = 0; i < nr_pages; i++) {
>> + unlock_page(pages[i]);
>> + put_page(pages[i]);
>> + }
>> + }
>> +
>> + kfree(actor);
>> + kfree(pages);
>> + return;
>> +
>> +skip_pages:
>> + for (i = 0; i < nr_pages; i++) {
>> + unlock_page(pages[i]);
>> + put_page(pages[i]);
>> + }
>> +
>> + kfree(actor);
>> +out:
>> + kfree(pages);
>> +}
>>
>> const struct address_space_operations squashfs_aops = {
>> - .read_folio = squashfs_read_folio
>> + .read_folio = squashfs_read_folio,
>> + .readahead = squashfs_readahead
>> };
>


2022-06-01 19:07:53

by Hsin-Yi Wang

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] squashfs: implement readahead

On Wed, Jun 1, 2022 at 9:08 AM Phillip Lougher <[email protected]> wrote:
>
> On 31/05/2022 21:47, Andrew Morton wrote:
> > On Mon, 23 May 2022 14:59:13 +0800 Hsin-Yi Wang <[email protected]> wrote:
> >
> >> Implement readahead callback for squashfs. It will read datablocks
> >> which cover pages in readahead request. For a few cases it will
> >> not mark page as uptodate, including:
> >> - file end is 0.
> >> - zero filled blocks.
> >> - current batch of pages isn't in the same datablock or not enough in a
> >> datablock.
> >> - decompressor error.
> >> Otherwise pages will be marked as uptodate. The unhandled pages will be
> >> updated by readpage later.
> >>
> >> ...
> >>
> >
> > The choice of types seems somewhat confused.
> >
> >> @@ -495,7 +496,95 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
> >> return 0;
> >> }
> >>
> >> +static void squashfs_readahead(struct readahead_control *ractl)
> >> +{
> >> + struct inode *inode = ractl->mapping->host;
> >> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> >> + size_t mask = (1UL << msblk->block_log) - 1;
> >> + size_t shift = msblk->block_log - PAGE_SHIFT;
> >
> > block_log is unsigned short. Why size_t?

Will update in the next version.

> >
> >> + loff_t start = readahead_pos(ractl) &~ mask;
> >> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
> >> + struct squashfs_page_actor *actor;
> >> + unsigned int nr_pages = 0;
> >
> > OK.
> >
> >> + struct page **pages;
> >> + u64 block = 0;
> >> + int bsize, res, i, index, bytes, expected;
> >
> > `res' could be local to the inner loop.
> >
> > `i' is used in situations where an unsigned type would be more
> > appropriate. If it is made unsigned then `i' is no longer a suitable
> > identifier. Doesn't matter much.
> >
> > `index' is from page.index, which is pgoff_t.
> >
> > `bytes' could be local to the innermost loop.
> >
> > `expected' is inappropriately a signed type and could be local to the
> > inner loop.

Will update them in the next version.
> >
> >> + int file_end = i_size_read(inode) >> msblk->block_log;
> >> + unsigned int max_pages = 1UL << shift;
> >> + void *pageaddr;
> >> +
>
> pageaddr could be made local to the innermost scope.
>
Will update them in the next version.

Thanks for your comments.

> Apart from that the patch and updated error handling looks
> good.
>
> Phillip
>
> >> + readahead_expand(ractl, start, (len | mask) + 1);
> >> +
> >> + if (file_end == 0)
> >> + return;
> >> +
> >> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
> >> + if (!pages)
> >> + return;
> >> +
> >> + actor = squashfs_page_actor_init_special(pages, max_pages, 0);
> >> + if (!actor)
> >> + goto out;
> >> +
> >> + for (;;) {
> >> + nr_pages = __readahead_batch(ractl, pages, max_pages);
> >> + if (!nr_pages)
> >> + break;
> >> +
> >> + if (readahead_pos(ractl) >= i_size_read(inode) ||
> >> + nr_pages < max_pages)
> >> + goto skip_pages;
> >> +
> >> + index = pages[0]->index >> shift;
> >> + if ((pages[nr_pages - 1]->index >> shift) != index)
> >> + goto skip_pages;
> >> +
> >> + expected = index == file_end ?
> >> + (i_size_read(inode) & (msblk->block_size - 1)) :
> >> + msblk->block_size;
> >> +
> >> + bsize = read_blocklist(inode, index, &block);
> >> + if (bsize == 0)
> >> + goto skip_pages;
> >> +
> >> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL,
> >> + actor);
> >> +
> >> + if (res == expected) {
> >> + /* Last page may have trailing bytes not filled */
> >> + bytes = res % PAGE_SIZE;
> >> + if (bytes) {
> >> + pageaddr = kmap_atomic(pages[nr_pages - 1]);
> >> + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
> >> + kunmap_atomic(pageaddr);
> >> + }
> >> +
> >> + for (i = 0; i < nr_pages; i++)
> >> + SetPageUptodate(pages[i]);
> >> + }
> >
> > res == -EIO is unhandled?
> >
> >> + for (i = 0; i < nr_pages; i++) {
> >> + unlock_page(pages[i]);
> >> + put_page(pages[i]);
> >> + }
> >> + }
> >> +
> >> + kfree(actor);
> >> + kfree(pages);
> >> + return;
> >> +
> >> +skip_pages:
> >> + for (i = 0; i < nr_pages; i++) {
> >> + unlock_page(pages[i]);
> >> + put_page(pages[i]);
> >> + }
> >> +
> >> + kfree(actor);
> >> +out:
> >> + kfree(pages);
> >> +}
> >>
> >> const struct address_space_operations squashfs_aops = {
> >> - .read_folio = squashfs_read_folio
> >> + .read_folio = squashfs_read_folio,
> >> + .readahead = squashfs_readahead
> >> };
> >
>

2022-06-01 20:30:56

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] squashfs: implement readahead

On Tue, May 31, 2022 at 01:47:40PM -0700, Andrew Morton wrote:
> > + for (;;) {
> > + nr_pages = __readahead_batch(ractl, pages, max_pages);
> > + if (!nr_pages)
> > + break;
> > +
> > + if (readahead_pos(ractl) >= i_size_read(inode) ||
> > + nr_pages < max_pages)
> > + goto skip_pages;
> > +
> > + index = pages[0]->index >> shift;
> > + if ((pages[nr_pages - 1]->index >> shift) != index)
> > + goto skip_pages;
> > +
> > + expected = index == file_end ?
> > + (i_size_read(inode) & (msblk->block_size - 1)) :
> > + msblk->block_size;
> > +
> > + bsize = read_blocklist(inode, index, &block);
> > + if (bsize == 0)
> > + goto skip_pages;
> > +
> > + res = squashfs_read_data(inode->i_sb, block, bsize, NULL,
> > + actor);
> > +
> > + if (res == expected) {
> > + /* Last page may have trailing bytes not filled */
> > + bytes = res % PAGE_SIZE;
> > + if (bytes) {
> > + pageaddr = kmap_atomic(pages[nr_pages - 1]);
> > + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
> > + kunmap_atomic(pageaddr);
> > + }
> > +
> > + for (i = 0; i < nr_pages; i++)
> > + SetPageUptodate(pages[i]);
> > + }
>
> res == -EIO is unhandled?

No it isn't ... this is readahead, which means there's nobody to care
about the error. The pages are left !Uptodate, which means that they'll
be retried with a call to ->read_folio later. At that point, somebody
actually wants the data in those pages, and they'll see the error.


2022-06-01 21:25:24

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] squashfs: implement readahead

On Mon, 23 May 2022 14:59:13 +0800 Hsin-Yi Wang <[email protected]> wrote:

> Implement readahead callback for squashfs. It will read datablocks
> which cover pages in readahead request. For a few cases it will
> not mark page as uptodate, including:
> - file end is 0.
> - zero filled blocks.
> - current batch of pages isn't in the same datablock or not enough in a
> datablock.
> - decompressor error.
> Otherwise pages will be marked as uptodate. The unhandled pages will be
> updated by readpage later.
>
> ...
>

The choice of types seems somewhat confused.

> @@ -495,7 +496,95 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
> return 0;
> }
>
> +static void squashfs_readahead(struct readahead_control *ractl)
> +{
> + struct inode *inode = ractl->mapping->host;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + size_t mask = (1UL << msblk->block_log) - 1;
> + size_t shift = msblk->block_log - PAGE_SHIFT;

block_log is unsigned short. Why size_t?

> + loff_t start = readahead_pos(ractl) &~ mask;
> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
> + struct squashfs_page_actor *actor;
> + unsigned int nr_pages = 0;

OK.

> + struct page **pages;
> + u64 block = 0;
> + int bsize, res, i, index, bytes, expected;

`res' could be local to the inner loop.

`i' is used in situations where an unsigned type would be more
appropriate. If it is made unsigned then `i' is no longer a suitable
identifier. Doesn't matter much.

`index' is from page.index, which is pgoff_t.

`bytes' could be local to the innermost loop.

`expected' is inappropriately a signed type and could be local to the
inner loop.

> + int file_end = i_size_read(inode) >> msblk->block_log;
> + unsigned int max_pages = 1UL << shift;
> + void *pageaddr;
> +
> + readahead_expand(ractl, start, (len | mask) + 1);
> +
> + if (file_end == 0)
> + return;
> +
> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
> + if (!pages)
> + return;
> +
> + actor = squashfs_page_actor_init_special(pages, max_pages, 0);
> + if (!actor)
> + goto out;
> +
> + for (;;) {
> + nr_pages = __readahead_batch(ractl, pages, max_pages);
> + if (!nr_pages)
> + break;
> +
> + if (readahead_pos(ractl) >= i_size_read(inode) ||
> + nr_pages < max_pages)
> + goto skip_pages;
> +
> + index = pages[0]->index >> shift;
> + if ((pages[nr_pages - 1]->index >> shift) != index)
> + goto skip_pages;
> +
> + expected = index == file_end ?
> + (i_size_read(inode) & (msblk->block_size - 1)) :
> + msblk->block_size;
> +
> + bsize = read_blocklist(inode, index, &block);
> + if (bsize == 0)
> + goto skip_pages;
> +
> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL,
> + actor);
> +
> + if (res == expected) {
> + /* Last page may have trailing bytes not filled */
> + bytes = res % PAGE_SIZE;
> + if (bytes) {
> + pageaddr = kmap_atomic(pages[nr_pages - 1]);
> + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
> + kunmap_atomic(pageaddr);
> + }
> +
> + for (i = 0; i < nr_pages; i++)
> + SetPageUptodate(pages[i]);
> + }

res == -EIO is unhandled?

> + for (i = 0; i < nr_pages; i++) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> + }
> +
> + kfree(actor);
> + kfree(pages);
> + return;
> +
> +skip_pages:
> + for (i = 0; i < nr_pages; i++) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> +
> + kfree(actor);
> +out:
> + kfree(pages);
> +}
>
> const struct address_space_operations squashfs_aops = {
> - .read_folio = squashfs_read_folio
> + .read_folio = squashfs_read_folio,
> + .readahead = squashfs_readahead
> };


2022-06-01 21:36:39

by Hsin-Yi Wang

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] squashfs: implement readahead

On Mon, May 23, 2022 at 3:00 PM Hsin-Yi Wang <[email protected]> wrote:
>
> Implement readahead callback for squashfs. It will read datablocks
> which cover pages in readahead request. For a few cases it will
> not mark page as uptodate, including:
> - file end is 0.
> - zero filled blocks.
> - current batch of pages isn't in the same datablock or not enough in a
> datablock.
> - decompressor error.
> Otherwise pages will be marked as uptodate. The unhandled pages will be
> updated by readpage later.
>
> Suggested-by: Matthew Wilcox <[email protected]>
> Signed-off-by: Hsin-Yi Wang <[email protected]>
> Reported-by: Matthew Wilcox <[email protected]>
> Reported-by: Phillip Lougher <[email protected]>
> Reported-by: Xiongwei Song <[email protected]>
> ---

Kindly ping on the thread. Conversations on v2:
https://patchwork.kernel.org/project/linux-mm/patch/[email protected]/#24869037
This version mainly addressed the error handling.

Thanks

> v2->v3: Add checks on
> - decompressed block size.
> - fill zeros if the last page is not a full page.
>
> v2: https://lore.kernel.org/lkml/[email protected]/
> v1: https://lore.kernel.org/lkml/[email protected]/
> ---
> fs/squashfs/file.c | 91 +++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 90 insertions(+), 1 deletion(-)
>
> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
> index a8e495d8eb86..c311fc685fe4 100644
> --- a/fs/squashfs/file.c
> +++ b/fs/squashfs/file.c
> @@ -39,6 +39,7 @@
> #include "squashfs_fs_sb.h"
> #include "squashfs_fs_i.h"
> #include "squashfs.h"
> +#include "page_actor.h"
>
> /*
> * Locate cache slot in range [offset, index] for specified inode. If
> @@ -495,7 +496,95 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
> return 0;
> }
>
> +static void squashfs_readahead(struct readahead_control *ractl)
> +{
> + struct inode *inode = ractl->mapping->host;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + size_t mask = (1UL << msblk->block_log) - 1;
> + size_t shift = msblk->block_log - PAGE_SHIFT;
> + loff_t start = readahead_pos(ractl) &~ mask;
> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
> + struct squashfs_page_actor *actor;
> + unsigned int nr_pages = 0;
> + struct page **pages;
> + u64 block = 0;
> + int bsize, res, i, index, bytes, expected;
> + int file_end = i_size_read(inode) >> msblk->block_log;
> + unsigned int max_pages = 1UL << shift;
> + void *pageaddr;
> +
> + readahead_expand(ractl, start, (len | mask) + 1);
> +
> + if (file_end == 0)
> + return;
> +
> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
> + if (!pages)
> + return;
> +
> + actor = squashfs_page_actor_init_special(pages, max_pages, 0);
> + if (!actor)
> + goto out;
> +
> + for (;;) {
> + nr_pages = __readahead_batch(ractl, pages, max_pages);
> + if (!nr_pages)
> + break;
> +
> + if (readahead_pos(ractl) >= i_size_read(inode) ||
> + nr_pages < max_pages)
> + goto skip_pages;
> +
> + index = pages[0]->index >> shift;
> + if ((pages[nr_pages - 1]->index >> shift) != index)
> + goto skip_pages;
> +
> + expected = index == file_end ?
> + (i_size_read(inode) & (msblk->block_size - 1)) :
> + msblk->block_size;
> +
> + bsize = read_blocklist(inode, index, &block);
> + if (bsize == 0)
> + goto skip_pages;
> +
> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL,
> + actor);
> +
> + if (res == expected) {
> + /* Last page may have trailing bytes not filled */
> + bytes = res % PAGE_SIZE;
> + if (bytes) {
> + pageaddr = kmap_atomic(pages[nr_pages - 1]);
> + memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
> + kunmap_atomic(pageaddr);
> + }
> +
> + for (i = 0; i < nr_pages; i++)
> + SetPageUptodate(pages[i]);
> + }
> +
> + for (i = 0; i < nr_pages; i++) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> + }
> +
> + kfree(actor);
> + kfree(pages);
> + return;
> +
> +skip_pages:
> + for (i = 0; i < nr_pages; i++) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> +
> + kfree(actor);
> +out:
> + kfree(pages);
> +}
>
> const struct address_space_operations squashfs_aops = {
> - .read_folio = squashfs_read_folio
> + .read_folio = squashfs_read_folio,
> + .readahead = squashfs_readahead
> };
> --
> 2.36.1.124.g0e6072fb45-goog
>