2024-03-05 09:15:25

by Gao Xiang

[permalink] [raw]
Subject: [PATCH 1/6] erofs: convert z_erofs_onlinepage_.* to folios

Online folios are locked file-backed folios which will eventually
keep decoded (e.g. decompressed) data of each inode for end users to
utilize. It may belong to a few pclusters and contain other data (e.g.
compressed data for inplace I/Os) temporarily in a time-sharing manner
to reduce memory footprints for low-ended storage devices with high
latencies under heary I/O pressure.

Apart from folio_end_read() usage, it's a straight-forward conversion.

Signed-off-by: Gao Xiang <[email protected]>
---
Some trivial folio conversions for compressed inodes aiming for v6.9.

fs/erofs/zdata.c | 50 +++++++++++++++++++++---------------------------
1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index ff0aa72b0db3..5013fcd4965a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -117,46 +117,39 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
}

/*
- * bit 30: I/O error occurred on this page
- * bit 0 - 29: remaining parts to complete this page
+ * bit 30: I/O error occurred on this folio
+ * bit 0 - 29: remaining parts to complete this folio
*/
-#define Z_EROFS_PAGE_EIO (1 << 30)
+#define Z_EROFS_FOLIO_EIO (1 << 30)

-static inline void z_erofs_onlinepage_init(struct page *page)
+static void z_erofs_onlinefolio_init(struct folio *folio)
{
union {
atomic_t o;
- unsigned long v;
+ void *v;
} u = { .o = ATOMIC_INIT(1) };

- set_page_private(page, u.v);
- smp_wmb();
- SetPagePrivate(page);
+ folio->private = u.v; /* valid only if file-backed folio is locked */
}

-static inline void z_erofs_onlinepage_split(struct page *page)
+static void z_erofs_onlinefolio_split(struct folio *folio)
{
- atomic_inc((atomic_t *)&page->private);
+ atomic_inc((atomic_t *)&folio->private);
}

-static void z_erofs_onlinepage_endio(struct page *page, int err)
+static void z_erofs_onlinefolio_end(struct folio *folio, int err)
{
int orig, v;

- DBG_BUGON(!PagePrivate(page));
-
do {
- orig = atomic_read((atomic_t *)&page->private);
- v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0);
- } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig);
+ orig = atomic_read((atomic_t *)&folio->private);
+ v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0);
+ } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);

- if (!(v & ~Z_EROFS_PAGE_EIO)) {
- set_page_private(page, 0);
- ClearPagePrivate(page);
- if (!(v & Z_EROFS_PAGE_EIO))
- SetPageUptodate(page);
- unlock_page(page);
- }
+ if (v & ~Z_EROFS_FOLIO_EIO)
+ return;
+ folio->private = 0;
+ folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO));
}

#define Z_EROFS_ONSTACK_PAGES 32
@@ -965,6 +958,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page, bool ra)
{
+ struct folio *folio = page_folio(page);
struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
@@ -973,7 +967,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
unsigned int cur, end, len, split;
int err = 0;

- z_erofs_onlinepage_init(page);
+ z_erofs_onlinefolio_init(folio);
split = 0;
end = PAGE_SIZE;
repeat:
@@ -1035,7 +1029,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
if (err)
goto out;

- z_erofs_onlinepage_split(page);
+ z_erofs_onlinefolio_split(folio);
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
if (fe->pcl->length < offset + end - map->m_la) {
@@ -1056,7 +1050,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
goto repeat;

out:
- z_erofs_onlinepage_endio(page, err);
+ z_erofs_onlinefolio_end(folio, err);
return err;
}

@@ -1159,7 +1153,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
cur += len;
}
kunmap_local(dst);
- z_erofs_onlinepage_endio(bvi->bvec.page, err);
+ z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
list_del(p);
kfree(bvi);
}
@@ -1316,7 +1310,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
- z_erofs_onlinepage_endio(page, err);
+ z_erofs_onlinefolio_end(page_folio(page), err);
}

if (be->decompressed_pages != be->onstack_pages)
--
2.39.3



2024-03-05 09:15:50

by Gao Xiang

[permalink] [raw]
Subject: [PATCH 2/6] erofs: convert z_erofs_do_read_page() to folios

It is a straight-forward conversion. Besides, it's renamed as
z_erofs_scan_folio().

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/zdata.c | 31 +++++++++++++++----------------
1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 5013fcd4965a..c25074657708 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -955,21 +955,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
return 0;
}

-static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
- struct page *page, bool ra)
+static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe,
+ struct folio *folio, bool ra)
{
- struct folio *folio = page_folio(page);
struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map;
- const loff_t offset = page_offset(page);
- const unsigned int bs = i_blocksize(inode);
+ const loff_t offset = folio_pos(folio);
+ const unsigned int bs = i_blocksize(inode), fs = folio_size(folio);
bool tight = true, exclusive;
unsigned int cur, end, len, split;
int err = 0;

z_erofs_onlinefolio_init(folio);
split = 0;
- end = PAGE_SIZE;
+ end = fs;
repeat:
if (offset + end - 1 < map->m_la ||
offset + end - 1 >= map->m_la + map->m_llen) {
@@ -986,7 +985,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
++split;

if (!(map->m_flags & EROFS_MAP_MAPPED)) {
- zero_user_segment(page, cur, end);
+ folio_zero_segment(folio, cur, end);
tight = false;
goto next_part;
}
@@ -995,8 +994,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
erofs_off_t fpos = offset + cur - map->m_la;

len = min_t(unsigned int, map->m_llen - fpos, end - cur);
- err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len,
- EROFS_I(inode)->z_fragmentoff + fpos);
+ err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur,
+ cur + len, EROFS_I(inode)->z_fragmentoff + fpos);
if (err)
goto out;
tight = false;
@@ -1011,18 +1010,18 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
}

/*
- * Ensure the current partial page belongs to this submit chain rather
+ * Ensure the current partial folio belongs to this submit chain rather
* than other concurrent submit chains or the noio(bypass) chain since
- * those chains are handled asynchronously thus the page cannot be used
+ * those chains are handled asynchronously thus the folio cannot be used
* for inplace I/O or bvpage (should be processed in a strict order.)
*/
tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
- exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE)));
+ exclusive = (!cur && ((split <= 1) || (tight && bs == fs)));
if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);

err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
- .page = page,
+ .page = &folio->page,
.offset = offset - map->m_la,
.end = end,
}), exclusive);
@@ -1789,7 +1788,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
if (PageUptodate(page))
unlock_page(page);
else
- (void)z_erofs_do_read_page(f, page, !!rac);
+ z_erofs_scan_folio(f, page_folio(page), !!rac);
put_page(page);
}

@@ -1810,7 +1809,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;

z_erofs_pcluster_readmore(&f, NULL, true);
- err = z_erofs_do_read_page(&f, &folio->page, false);
+ err = z_erofs_scan_folio(&f, folio, false);
z_erofs_pcluster_readmore(&f, NULL, false);
z_erofs_pcluster_end(&f);

@@ -1851,7 +1850,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
folio = head;
head = folio_get_private(folio);

- err = z_erofs_do_read_page(&f, &folio->page, true);
+ err = z_erofs_scan_folio(&f, folio, true);
if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid);
--
2.39.3


2024-03-05 09:16:11

by Gao Xiang

[permalink] [raw]
Subject: [PATCH 5/6] erofs: convert z_erofs_submissionqueue_endio() to folios

Use bio_for_each_folio() to iterate over each folio in the bio and
there is no large folios for now.

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/zdata.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index d78cc54a96f5..63990c8192f2 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1566,19 +1566,19 @@ static void z_erofs_submissionqueue_endio(struct bio *bio)
{
struct z_erofs_decompressqueue *q = bio->bi_private;
blk_status_t err = bio->bi_status;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;

- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
+ bio_for_each_folio_all(fi, bio) {
+ struct folio *folio = fi.folio;

- DBG_BUGON(PageUptodate(page));
- DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
- if (!err)
- SetPageUptodate(page);
- unlock_page(page);
- }
+ DBG_BUGON(folio_test_uptodate(folio));
+ DBG_BUGON(z_erofs_page_is_invalidated(&folio->page));
+ if (!erofs_page_is_managed(EROFS_SB(q->sb), &folio->page))
+ continue;
+
+ if (!err)
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
}
if (err)
q->eio = true;
--
2.39.3


2024-03-05 09:30:10

by Gao Xiang

[permalink] [raw]
Subject: [PATCH 6/6] erofs: refine managed cache operations to folios

Convert erofs_try_to_free_all_cached_pages() and
z_erofs_cache_release_folio().

Besides, erofs_page_is_managed() is moved to zdata.c and renamed
as erofs_folio_is_managed().

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/compress.h | 7 ----
fs/erofs/decompressor_deflate.c | 3 --
fs/erofs/decompressor_lzma.c | 3 --
fs/erofs/internal.h | 4 +--
fs/erofs/utils.c | 2 +-
fs/erofs/zdata.c | 63 ++++++++++++++++-----------------
6 files changed, 34 insertions(+), 48 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 7cc5841577b2..333587ba6183 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -81,13 +81,6 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
return true;
}

-#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
-static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
- struct page *page)
-{
- return page->mapping == MNGD_MAPPING(sbi);
-}
-
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
extern const struct z_erofs_decompressor erofs_decompressors[];
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index b98872058abe..81e65c453ef0 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -212,9 +212,6 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,

if (rq->out[no] != rq->in[j])
continue;
-
- DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
- rq->in[j]));
tmppage = erofs_allocpage(pgpl, rq->gfp);
if (!tmppage) {
err = -ENOMEM;
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 6ca357d83cfa..4b28dc130c9f 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -258,9 +258,6 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,

if (rq->out[no] != rq->in[j])
continue;
-
- DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
- rq->in[j]));
tmppage = erofs_allocpage(pgpl, rq->gfp);
if (!tmppage) {
err = -ENOMEM;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..65db0250f146 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -467,8 +467,8 @@ int __init erofs_init_shrinker(void);
void erofs_exit_shrinker(void);
int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
- struct erofs_workgroup *egrp);
+int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *egrp);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags);
void *erofs_get_pcpubuf(unsigned int requiredpages);
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index e146d09151af..518bdd69c823 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -129,7 +129,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* the XArray. Otherwise some cached pages could be still attached to
* the orphan old workgroup when the new one is available in the tree.
*/
- if (erofs_try_to_free_all_cached_pages(sbi, grp))
+ if (erofs_try_to_free_all_cached_folios(sbi, grp))
goto out;

/*
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 63990c8192f2..c1bd4d8392eb 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -119,6 +119,12 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
}

+#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
+static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
+{
+ return fo->mapping == MNGD_MAPPING(sbi);
+}
+
/*
* bit 30: I/O error occurred on this folio
* bit 0 - 29: remaining parts to complete this folio
@@ -611,9 +617,9 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}

-/* called by erofs_shrinker to get rid of all compressed_pages */
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
- struct erofs_workgroup *grp)
+/* called by erofs_shrinker to get rid of all cached compressed bvecs */
+int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *grp)
{
struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj);
@@ -621,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
int i;

DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- /*
- * refcount of workgroup is now freezed as 0,
- * therefore no need to worry about available decompression users.
- */
+ /* There is no actice user since the pcluster is now freezed */
for (i = 0; i < pclusterpages; ++i) {
- struct page *page = pcl->compressed_bvecs[i].page;
+ struct folio *folio = pcl->compressed_bvecs[i].folio;

- if (!page)
+ if (!folio)
continue;

- /* block other users from reclaiming or migrating the page */
- if (!trylock_page(page))
+ /* Avoid reclaiming or migrating this folio */
+ if (!folio_trylock(folio))
return -EBUSY;

- if (!erofs_page_is_managed(sbi, page))
+ if (!erofs_folio_is_managed(sbi, folio))
continue;
-
- /* barrier is implied in the following 'unlock_page' */
- WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
- detach_page_private(page);
- unlock_page(page);
+ pcl->compressed_bvecs[i].folio = NULL;
+ folio_detach_private(folio);
+ folio_unlock(folio);
}
return 0;
}
@@ -658,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)

ret = false;
spin_lock(&pcl->obj.lockref.lock);
- if (pcl->obj.lockref.count > 0)
- goto out;
-
- DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- for (i = 0; i < pclusterpages; ++i) {
- if (pcl->compressed_bvecs[i].page == &folio->page) {
- WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
- ret = true;
- break;
+ if (pcl->obj.lockref.count <= 0) {
+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+ for (i = 0; i < pclusterpages; ++i) {
+ if (pcl->compressed_bvecs[i].folio == folio) {
+ pcl->compressed_bvecs[i].folio = NULL;
+ folio_detach_private(folio);
+ ret = true;
+ break;
+ }
}
}
- if (ret)
- folio_detach_private(folio);
-out:
spin_unlock(&pcl->obj.lockref.lock);
return ret;
}
@@ -1201,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
be->compressed_pages[i] = page;

if (z_erofs_is_inline_pcluster(pcl) ||
- erofs_page_is_managed(EROFS_SB(be->sb), page)) {
+ erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
if (!PageUptodate(page))
err = -EIO;
continue;
@@ -1286,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
/* consider shortlived pages added when decompressing */
page = be->compressed_pages[i];

- if (!page || erofs_page_is_managed(sbi, page))
+ if (!page ||
+ erofs_folio_is_managed(sbi, page_folio(page)))
continue;
(void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
@@ -1573,7 +1572,7 @@ static void z_erofs_submissionqueue_endio(struct bio *bio)

DBG_BUGON(folio_test_uptodate(folio));
DBG_BUGON(z_erofs_page_is_invalidated(&folio->page));
- if (!erofs_page_is_managed(EROFS_SB(q->sb), &folio->page))
+ if (!erofs_folio_is_managed(EROFS_SB(q->sb), folio))
continue;

if (!err)
--
2.39.3


2024-03-10 01:03:53

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 1/6] erofs: convert z_erofs_onlinepage_.* to folios

On 2024/3/5 17:14, Gao Xiang wrote:
> Online folios are locked file-backed folios which will eventually
> keep decoded (e.g. decompressed) data of each inode for end users to
> utilize. It may belong to a few pclusters and contain other data (e.g.
> compressed data for inplace I/Os) temporarily in a time-sharing manner
> to reduce memory footprints for low-ended storage devices with high
> latencies under heary I/O pressure.
>
> Apart from folio_end_read() usage, it's a straight-forward conversion.
>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2024-03-10 01:04:13

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 2/6] erofs: convert z_erofs_do_read_page() to folios

On 2024/3/5 17:14, Gao Xiang wrote:
> It is a straight-forward conversion. Besides, it's renamed as
> z_erofs_scan_folio().
>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2024-03-10 01:07:58

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 5/6] erofs: convert z_erofs_submissionqueue_endio() to folios

On 2024/3/5 17:14, Gao Xiang wrote:
> Use bio_for_each_folio() to iterate over each folio in the bio and
> there is no large folios for now.
>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2024-03-10 01:09:23

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH 6/6] erofs: refine managed cache operations to folios

On 2024/3/5 17:14, Gao Xiang wrote:
> Convert erofs_try_to_free_all_cached_pages() and
> z_erofs_cache_release_folio().
>
> Besides, erofs_page_is_managed() is moved to zdata.c and renamed
> as erofs_folio_is_managed().
>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,