2022-06-13 08:39:40

by Hsin-Yi Wang

[permalink] [raw]
Subject: [PATCH v6 3/3] squashfs: implement readahead

Implement readahead callback for squashfs. It will read datablocks
which cover pages in readahead request. For a few cases it will
not mark page as uptodate, including:
- file end is 0.
- zero filled blocks.
- current batch of pages isn't in the same datablock.
- decompressor error.
Otherwise pages will be marked as uptodate. The unhandled pages will be
updated by readpage later.

Suggested-by: Matthew Wilcox <[email protected]>
Signed-off-by: Hsin-Yi Wang <[email protected]>
Reported-by: Matthew Wilcox <[email protected]>
Reported-by: Phillip Lougher <[email protected]>
Reported-by: Xiongwei Song <[email protected]>
Reported-by: Andrew Morton <[email protected]>
---
v5->v6:
- use the new squashfs_page_actor_init_special() to handle short file
cases as well.
- use memzero_page().

v5:
https://lore.kernel.org/lkml/[email protected]/
v4:
https://lore.kernel.org/lkml/[email protected]/
v3:
https://lore.kernel.org/lkml/[email protected]/
v2:
https://lore.kernel.org/lkml/[email protected]/
v1:
https://lore.kernel.org/lkml/[email protected]/
---
fs/squashfs/file.c | 92 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 7f0904b203294..f0c64ee272d5d 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -39,6 +39,7 @@
#include "squashfs_fs_sb.h"
#include "squashfs_fs_i.h"
#include "squashfs.h"
+#include "page_actor.h"

/*
* Locate cache slot in range [offset, index] for specified inode. If
@@ -496,7 +497,96 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
return res;
}

+static void squashfs_readahead(struct readahead_control *ractl)
+{
+ struct inode *inode = ractl->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ size_t mask = (1UL << msblk->block_log) - 1;
+ unsigned short shift = msblk->block_log - PAGE_SHIFT;
+ loff_t start = readahead_pos(ractl) & ~mask;
+ size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
+ struct squashfs_page_actor *actor;
+ unsigned int nr_pages = 0;
+ struct page **pages;
+ int i, file_end = i_size_read(inode) >> msblk->block_log;
+ unsigned int max_pages = 1UL << shift;
+
+ readahead_expand(ractl, start, (len | mask) + 1);
+
+ if (file_end == 0)
+ return;
+
+ pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
+ if (!pages)
+ return;
+
+ for (;;) {
+ pgoff_t index;
+ int res, bsize;
+ u64 block = 0;
+ unsigned int expected;
+
+ nr_pages = __readahead_batch(ractl, pages, max_pages);
+ if (!nr_pages)
+ break;
+
+ if (readahead_pos(ractl) >= i_size_read(inode))
+ goto skip_pages;
+
+ index = pages[0]->index >> shift;
+ if ((pages[nr_pages - 1]->index >> shift) != index)
+ goto skip_pages;
+
+ expected = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ bsize = read_blocklist(inode, index, &block);
+ if (bsize == 0)
+ goto skip_pages;
+
+ actor = squashfs_page_actor_init_special(msblk, pages, nr_pages,
+ expected);
+ if (!actor)
+ goto skip_pages;
+
+ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+
+ kfree(actor);
+
+ if (res == expected) {
+ int bytes;
+
+ /* Last page (if present) may have trailing bytes not filled */
+ bytes = res % PAGE_SIZE;
+ if (pages[nr_pages - 1]->index == file_end && bytes)
+ memzero_page(pages[nr_pages - 1], bytes,
+ PAGE_SIZE - bytes);
+
+ for (i = 0; i < nr_pages; i++) {
+ flush_dcache_page(pages[i]);
+ SetPageUptodate(pages[i]);
+ }
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
+ }
+
+ kfree(pages);
+ return;
+
+skip_pages:
+ for (i = 0; i < nr_pages; i++) {
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
+ kfree(pages);
+}

const struct address_space_operations squashfs_aops = {
- .read_folio = squashfs_read_folio
+ .read_folio = squashfs_read_folio,
+ .readahead = squashfs_readahead
};
--
2.36.1.476.g0c4daa206d-goog


2022-06-17 03:06:03

by Phillip Lougher

[permalink] [raw]
Subject: [PATCH 4/3] squashfs: support reading fragments in readahead call

This patch adds a function which can be used to read fragments in the
readahead call.

This function is necessary because filesystems built with the -tailends
(or -always-use-fragments) option may have fragments present which
cannot be currently handled.

Signed-off-by: Phillip Lougher <[email protected]>
---
fs/squashfs/file.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index f0c64ee272d5..3a4cce16d7da 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -497,6 +497,41 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
return res;
}

+static unsigned int squashfs_readahead_fragment(struct page **page,
+ unsigned int pages, unsigned int expected)
+{
+ struct inode *inode = page[0]->mapping->host;
+ struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+
+ if (buffer->error)
+ goto out;
+
+ expected += squashfs_i(inode)->fragment_offset;
+
+ for (n = 0; n < pages; n++) {
+ unsigned int base = (page[n]->index & mask) << PAGE_SHIFT;
+ unsigned int offset = base + squashfs_i(inode)->fragment_offset;
+
+ if (expected > offset) {
+ unsigned int avail = min_t(unsigned int, expected -
+ offset, PAGE_SIZE);
+
+ squashfs_fill_page(page[n], buffer, offset, avail);
+ }
+
+ unlock_page(page[n]);
+ put_page(page[n]);
+ }
+
+out:
+ squashfs_cache_put(buffer);
+ return buffer->error;
+}
+
static void squashfs_readahead(struct readahead_control *ractl)
{
struct inode *inode = ractl->mapping->host;
--
2.34.1

2022-06-17 03:19:14

by Phillip Lougher

[permalink] [raw]
Subject: Re: [PATCH v6 3/3] squashfs: implement readahead

On 13/06/2022 09:28, Hsin-Yi Wang wrote:
> Implement readahead callback for squashfs. It will read datablocks
> which cover pages in readahead request. For a few cases it will
> not mark page as uptodate, including:
> - file end is 0.
> - zero filled blocks.
> - current batch of pages isn't in the same datablock.
> - decompressor error.
> Otherwise pages will be marked as uptodate. The unhandled pages will be
> updated by readpage later.
>
> Suggested-by: Matthew Wilcox <[email protected]>
> Signed-off-by: Hsin-Yi Wang <[email protected]>
> Reported-by: Matthew Wilcox <[email protected]>
> Reported-by: Phillip Lougher <[email protected]>
> Reported-by: Xiongwei Song <[email protected]>
> Reported-by: Andrew Morton <[email protected]>
> ---
> v5->v6:
> - use the new squashfs_page_actor_init_special() to handle short file
> cases as well.

Hi Hsin-Yi,

Thanks for adding the improved page actor support to your patch.

There is currently another problem with the patch, which is it
doesn't handle fragments.

Normally a file (using Mksquashfs defaults) will consist of either:

1. A fragment, stored inside a fragment block, if the file is less
than the block size, OR

2. One or more datablocks, if the file is greater than the block size.

Your readahead patch handles datablocks, and ignores any file less than
block size by the lines

> + if (file_end == 0)
> + return;

So in theory the readahead function doesn't have to handle fragments
stored in fragment blocks.

But you can tell Mksquashfs to pack the file tailend into a fragment
block, using the -tailends (or -always-use-fragments) option.

Here, you will get a file which has one or more datablocks, and the last
datablock will be stored in a fragment block.

The readahead code has to handle this, and it is easy to show the code
doesn't by building a filesystem with that option.

I have written a function which can be called to do the work. This I
have posted here as a patch.

https://lore.kernel.org/all/[email protected]/

Obviously, now that fragment blocks are supported, readahead can be
supported for files less than the block size too.

The diff to update the readahead code to use the new function is

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 1e28fcc22640..aae76de72e2d 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -548,9 +548,6 @@ static void squashfs_readahead(struct
readahead_control *ractl)

readahead_expand(ractl, start, (len | mask) + 1);

- if (file_end == 0)
- return;
-
pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
if (!pages)
return;
@@ -576,6 +573,14 @@ static void squashfs_readahead(struct
readahead_control *ractl)
(i_size_read(inode) & (msblk->block_size - 1)) :
msblk->block_size;

+ if (index == file_end &&
squashfs_i(inode)->fragment_block !=
+ SQUASHFS_INVALID_BLK) {
+ res = squashfs_readahead_fragment(pages,
nr_pages, expected);
+ if (res)
+ goto skip_pages;
+ continue;
+ }
+
bsize = read_blocklist(inode, index, &block);
if (bsize == 0)
goto skip_pages;
--
2.34.1



> - use memzero_page().
>
> v5:
> https://lore.kernel.org/lkml/[email protected]/
> v4:
> https://lore.kernel.org/lkml/[email protected]/
> v3:
> https://lore.kernel.org/lkml/[email protected]/
> v2:
> https://lore.kernel.org/lkml/[email protected]/
> v1:
> https://lore.kernel.org/lkml/[email protected]/
> ---
> fs/squashfs/file.c | 92 +++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 91 insertions(+), 1 deletion(-)
>
> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
> index 7f0904b203294..f0c64ee272d5d 100644
> --- a/fs/squashfs/file.c
> +++ b/fs/squashfs/file.c
> @@ -39,6 +39,7 @@
> #include "squashfs_fs_sb.h"
> #include "squashfs_fs_i.h"
> #include "squashfs.h"
> +#include "page_actor.h"
>
> /*
> * Locate cache slot in range [offset, index] for specified inode. If
> @@ -496,7 +497,96 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
> return res;
> }
>
> +static void squashfs_readahead(struct readahead_control *ractl)
> +{
> + struct inode *inode = ractl->mapping->host;
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + size_t mask = (1UL << msblk->block_log) - 1;
> + unsigned short shift = msblk->block_log - PAGE_SHIFT;
> + loff_t start = readahead_pos(ractl) & ~mask;
> + size_t len = readahead_length(ractl) + readahead_pos(ractl) - start;
> + struct squashfs_page_actor *actor;
> + unsigned int nr_pages = 0;
> + struct page **pages;
> + int i, file_end = i_size_read(inode) >> msblk->block_log;
> + unsigned int max_pages = 1UL << shift;
> +
> + readahead_expand(ractl, start, (len | mask) + 1);
> +
> + if (file_end == 0)
> + return;
> +
> + pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
> + if (!pages)
> + return;
> +
> + for (;;) {
> + pgoff_t index;
> + int res, bsize;
> + u64 block = 0;
> + unsigned int expected;
> +
> + nr_pages = __readahead_batch(ractl, pages, max_pages);
> + if (!nr_pages)
> + break;
> +
> + if (readahead_pos(ractl) >= i_size_read(inode))
> + goto skip_pages;
> +
> + index = pages[0]->index >> shift;
> + if ((pages[nr_pages - 1]->index >> shift) != index)
> + goto skip_pages;
> +
> + expected = index == file_end ?
> + (i_size_read(inode) & (msblk->block_size - 1)) :
> + msblk->block_size;
> +
> + bsize = read_blocklist(inode, index, &block);
> + if (bsize == 0)
> + goto skip_pages;
> +
> + actor = squashfs_page_actor_init_special(msblk, pages, nr_pages,
> + expected);
> + if (!actor)
> + goto skip_pages;
> +
> + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
> +
> + kfree(actor);
> +
> + if (res == expected) {
> + int bytes;
> +
> + /* Last page (if present) may have trailing bytes not filled */
> + bytes = res % PAGE_SIZE;
> + if (pages[nr_pages - 1]->index == file_end && bytes)
> + memzero_page(pages[nr_pages - 1], bytes,
> + PAGE_SIZE - bytes);
> +
> + for (i = 0; i < nr_pages; i++) {
> + flush_dcache_page(pages[i]);
> + SetPageUptodate(pages[i]);
> + }
> + }
> +
> + for (i = 0; i < nr_pages; i++) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> + }
> +
> + kfree(pages);
> + return;
> +
> +skip_pages:
> + for (i = 0; i < nr_pages; i++) {
> + unlock_page(pages[i]);
> + put_page(pages[i]);
> + }
> + kfree(pages);
> +}
>
> const struct address_space_operations squashfs_aops = {
> - .read_folio = squashfs_read_folio
> + .read_folio = squashfs_read_folio,
> + .readahead = squashfs_readahead
> };

2022-06-17 08:39:26

by Hsin-Yi Wang

[permalink] [raw]
Subject: Re: [PATCH 4/3] squashfs: support reading fragments in readahead call

On Fri, Jun 17, 2022 at 11:04 AM Phillip Lougher
<[email protected]> wrote:
>
> This patch adds a function which can be used to read fragments in the
> readahead call.
>
> This function is necessary because filesystems built with the -tailends
> (or -always-use-fragments) option may have fragments present which
> cannot be currently handled.
>
> Signed-off-by: Phillip Lougher <[email protected]>
> ---
> fs/squashfs/file.c | 35 +++++++++++++++++++++++++++++++++++
> 1 file changed, 35 insertions(+)
>
> diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
> index f0c64ee272d5..3a4cce16d7da 100644
> --- a/fs/squashfs/file.c
> +++ b/fs/squashfs/file.c
> @@ -497,6 +497,41 @@ static int squashfs_read_folio(struct file *file, struct folio *folio)
> return res;
> }
>
> +static unsigned int squashfs_readahead_fragment(struct page **page,
hi Phillip,

Since the buffer->error is int, and also the variable to receive the
value is res, I modified it when sending it with the series, though it
won't affect the result.

> + unsigned int pages, unsigned int expected)
> +{
> + struct inode *inode = page[0]->mapping->host;
> + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
> + squashfs_i(inode)->fragment_block,
> + squashfs_i(inode)->fragment_size);
> + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
> + unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
> +
> + if (buffer->error)
> + goto out;
> +
> + expected += squashfs_i(inode)->fragment_offset;
> +
> + for (n = 0; n < pages; n++) {
> + unsigned int base = (page[n]->index & mask) << PAGE_SHIFT;
> + unsigned int offset = base + squashfs_i(inode)->fragment_offset;
> +
> + if (expected > offset) {
> + unsigned int avail = min_t(unsigned int, expected -
> + offset, PAGE_SIZE);
> +
> + squashfs_fill_page(page[n], buffer, offset, avail);
> + }
> +
> + unlock_page(page[n]);
> + put_page(page[n]);
> + }
> +
> +out:
> + squashfs_cache_put(buffer);
> + return buffer->error;
> +}
> +
> static void squashfs_readahead(struct readahead_control *ractl)
> {
> struct inode *inode = ractl->mapping->host;
> --
> 2.34.1
>