2020-12-07 01:27:53

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

Previously, we played around with magical page->mapping for short-lived
temporary pages since we need to identify different types of pages in
the same pcluster but both invalidated and short-lived temporary pages
can have page->mapping == NULL. It was considered as safe because that
temporary pages are all non-LRU / non-movable pages.

This patch tends to use specific page->private to identify short-lived
pages instead so it won't rely on page->mapping anymore. Details are
described in "compress.h" as well.

Signed-off-by: Gao Xiang <[email protected]>
---
tested with ro_fsstress for a whole night.

The old "[PATCH 4/4] erofs: complete a missing case for inplace I/O" is
temporarily dropped since ro_fsstress failed with such modification,
will look into later.

fs/erofs/compress.h | 50 ++++++++++++++++++++++++++++++-----------
fs/erofs/decompressor.c | 2 +-
fs/erofs/zdata.c | 42 +++++++++++++++++++++-------------
fs/erofs/zdata.h | 1 +
4 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 3d452443c545..2bbf47f353ef 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -26,30 +26,54 @@ struct z_erofs_decompress_req {
bool inplace_io, partial_decoding;
};

+#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
+
/*
- * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
- * used to mark temporary allocated pages from other
- * file/cached pages and NULL mapping pages.
+ * For all pages in a pcluster, page->private should be one of
+ * Type Last 2bits page->private
+ * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
+ * cached/managed page 00 pointer to z_erofs_pcluster
+ * online page (file-backed, 01/10/11 sub-index << 2 | count
+ * some pages can be used for inplace I/O)
+ *
+ * page->mapping should be one of
+ * Type page->mapping
+ * short-lived page NULL
+ * cached/managed page non-NULL or NULL (invalidated/truncated page)
+ * online page non-NULL
+ *
+ * For all managed pages, PG_private should be set with 1 extra refcount,
+ * which is used for page reclaim / migration.
*/
-#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)

-/* check if a page is marked as staging */
-static inline bool z_erofs_page_is_staging(struct page *page)
+/*
+ * short-lived pages are pages directly from buddy system with specific
+ * page->private (no need to set PagePrivate since these are non-LRU /
+ * non-movable pages and bypass reclaim / migration code).
+ */
+static inline bool z_erofs_is_shortlived_page(struct page *page)
{
- return page->mapping == Z_EROFS_MAPPING_STAGING;
+ if (page->private != Z_EROFS_SHORTLIVED_PAGE)
+ return false;
+
+ DBG_BUGON(page->mapping);
+ return true;
}

-static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
- struct page *page)
+static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
+ struct page *page)
{
- if (!z_erofs_page_is_staging(page))
+ if (!z_erofs_is_shortlived_page(page))
return false;

- /* staging pages should not be used by others at the same time */
- if (page_ref_count(page) > 1)
+ /* short-lived pages should not be used by others at the same time */
+ if (page_ref_count(page) > 1) {
put_page(page);
- else
+ } else {
+ /* follow the pcluster rule above. */
+ set_page_private(page, 0);
list_add(&page->lru, pagepool);
+ }
return true;
}

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index cbadbf55c6c2..1cb1ffd10569 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
victim = erofs_allocpage(pagepool, GFP_KERNEL);
if (!victim)
return -ENOMEM;
- victim->mapping = Z_EROFS_MAPPING_STAGING;
+ set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 86fd3bf62af6..afeadf413c2c 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -255,6 +255,7 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
erofs_workgroup_unfreeze(&pcl->obj, 1);

if (ret) {
+ set_page_private(page, 0);
ClearPagePrivate(page);
put_page(page);
}
@@ -648,12 +649,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,

retry:
err = z_erofs_attach_page(clt, page, page_type);
- /* should allocate an additional staging page for pagevec */
+ /* should allocate an additional short-lived page for pagevec */
if (err == -EAGAIN) {
struct page *const newpage =
alloc_page(GFP_NOFS | __GFP_NOFAIL);

- newpage->mapping = Z_EROFS_MAPPING_STAGING;
+ set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
err = z_erofs_attach_page(clt, newpage,
Z_EROFS_PAGE_TYPE_EXCLUSIVE);
if (!err)
@@ -710,6 +711,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
queue_work(z_erofs_workqueue, &io->u.work);
}

+static bool z_erofs_page_is_invalidated(struct page *page)
+{
+ return !page->mapping && !z_erofs_is_shortlived_page(page);
+}
+
static void z_erofs_decompressqueue_endio(struct bio *bio)
{
tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
@@ -722,7 +728,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
struct page *page = bvec->bv_page;

DBG_BUGON(PageUptodate(page));
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));

if (err)
SetPageError(page);
@@ -795,9 +801,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

/* all pages in pagevec ought to be valid */
DBG_BUGON(!page);
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));

- if (z_erofs_put_stagingpage(pagepool, page))
+ if (z_erofs_put_shortlivedpage(pagepool, page))
continue;

if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
@@ -831,9 +837,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

/* all compressed pages ought to be valid */
DBG_BUGON(!page);
- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));

- if (!z_erofs_page_is_staging(page)) {
+ if (!z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (!PageUptodate(page))
err = -EIO;
@@ -858,7 +864,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = true;
}

- /* PG_error needs checking for inplaced and staging pages */
+ /* PG_error needs checking for all non-managed pages */
if (PageError(page)) {
DBG_BUGON(PageUptodate(page));
err = -EIO;
@@ -897,8 +903,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
if (erofs_page_is_managed(sbi, page))
continue;

- /* recycle all individual staging pages */
- (void)z_erofs_put_stagingpage(pagepool, page);
+ /* recycle all individual short-lived pages */
+ (void)z_erofs_put_shortlivedpage(pagepool, page);

WRITE_ONCE(compressed_pages[i], NULL);
}
@@ -908,10 +914,10 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
if (!page)
continue;

- DBG_BUGON(!page->mapping);
+ DBG_BUGON(z_erofs_page_is_invalidated(page));

- /* recycle all individual staging pages */
- if (z_erofs_put_stagingpage(pagepool, page))
+ /* recycle all individual short-lived pages */
+ if (z_erofs_put_shortlivedpage(pagepool, page))
continue;

if (err < 0)
@@ -1011,13 +1017,17 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
mapping = READ_ONCE(page->mapping);

/*
- * unmanaged (file) pages are all locked solidly,
+ * file-backed online pages in plcuster are all locked steady,
* therefore it is impossible for `mapping' to be NULL.
*/
if (mapping && mapping != mc)
/* ought to be unmanaged pages */
goto out;

+ /* directly return for shortlived page as well */
+ if (z_erofs_is_shortlived_page(page))
+ goto out;
+
lock_page(page);

/* only true if page reclaim goes wrong, should never happen */
@@ -1062,8 +1072,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
- /* non-LRU / non-movable temporary page is needed */
- page->mapping = Z_EROFS_MAPPING_STAGING;
+ /* turn into temporary page if fails */
+ set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
tocache = false;
}

diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 68c9b29fc0ca..b503b353d4ab 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)

v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+ set_page_private(page, 0);
ClearPagePrivate(page);
if (!PageError(page))
SetPageUptodate(page);
--
2.18.4


2020-12-07 01:28:18

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v2 2/3] erofs: insert to managed cache after adding to pcl

Previously, it could be some concern to call add_to_page_cache_lru()
with page->mapping == Z_EROFS_MAPPING_STAGING (!= NULL).

In contrast, page->private is used instead now, so partially revert
commit 5ddcee1f3a1c ("erofs: get rid of __stagingpage_alloc helper")
with some adaption for simplicity.

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/zdata.c | 23 +++++++----------------
1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index afeadf413c2c..edd7325570e1 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1071,28 +1071,19 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
put_page(page);
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
- if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
- /* turn into temporary page if fails */
- set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
- tocache = false;
- }
-
if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
- if (tocache) {
- /* since it added to managed cache successfully */
- unlock_page(page);
- put_page(page);
- } else {
- list_add(&page->lru, pagepool);
- }
+ list_add(&page->lru, pagepool);
cond_resched();
goto repeat;
}

- if (tocache) {
- set_page_private(page, (unsigned long)pcl);
- SetPagePrivate(page);
+ if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
+ /* turn into temporary page if fails */
+ set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
+ goto out;
}
+ set_page_private(page, (unsigned long)pcl);
+ SetPagePrivate(page);
out: /* the only exit (for tracing and debugging) */
return page;
}
--
2.18.4

2020-12-07 01:29:27

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v2 3/3] erofs: simplify try_to_claim_pcluster()

simplify try_to_claim_pcluster() by directly using cmpxchg() here
(the retry loop caused more overhead.) Also, move the chain loop
detection in and rename it to z_erofs_try_to_claim_pcluster().

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/zdata.c | 51 +++++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index edd7325570e1..b1b6cd03046f 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -298,34 +298,33 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
return ret ? 0 : -EAGAIN;
}

-static enum z_erofs_collectmode
-try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
- z_erofs_next_pcluster_t *owned_head)
+static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt)
{
- /* let's claim these following types of pclusters */
-retry:
- if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
- /* type 1, nil pcluster */
- if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
- *owned_head) != Z_EROFS_PCLUSTER_NIL)
- goto retry;
+ struct z_erofs_pcluster *pcl = clt->pcl;
+ z_erofs_next_pcluster_t *owned_head = &clt->owned_head;

+ /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */
+ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
+ *owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
- /* lucky, I am the followee :) */
- return COLLECT_PRIMARY_FOLLOWED;
- } else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
- /*
- * type 2, link to the end of a existing open chain,
- * be careful that its submission itself is governed
- * by the original owned chain.
- */
- if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
- *owned_head) != Z_EROFS_PCLUSTER_TAIL)
- goto retry;
+ /* so we can attach this pcluster to our submission chain. */
+ clt->mode = COLLECT_PRIMARY_FOLLOWED;
+ return;
+ }
+
+ /*
+ * type 2, link to the end of an existing open chain, be careful
+ * that its submission is controlled by the original attached chain.
+ */
+ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+ *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
- return COLLECT_PRIMARY_HOOKED;
+ clt->mode = COLLECT_PRIMARY_HOOKED;
+ clt->tailpcl = NULL;
+ return;
}
- return COLLECT_PRIMARY; /* :( better luck next time */
+ /* type 3, it belongs to a chain, but it isn't the end of the chain */
+ clt->mode = COLLECT_PRIMARY;
}

static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
@@ -370,10 +369,8 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
/* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = pcl;
- clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
- /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
- if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
- clt->tailpcl = NULL;
+
+ z_erofs_try_to_claim_pcluster(clt);
clt->cl = cl;
return 0;
}
--
2.18.4

2020-12-08 04:11:02

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

Hi Chao,

On Mon, Dec 07, 2020 at 09:23:44AM +0800, Gao Xiang wrote:
> Previously, we played around with magical page->mapping for short-lived
> temporary pages since we need to identify different types of pages in
> the same pcluster but both invalidated and short-lived temporary pages
> can have page->mapping == NULL. It was considered as safe because that
> temporary pages are all non-LRU / non-movable pages.
>
> This patch tends to use specific page->private to identify short-lived
> pages instead so it won't rely on page->mapping anymore. Details are
> described in "compress.h" as well.
>
> Signed-off-by: Gao Xiang <[email protected]>
> ---
> tested with ro_fsstress for a whole night.
>
> The old "[PATCH 4/4] erofs: complete a missing case for inplace I/O" is
> temporarily dropped since ro_fsstress failed with such modification,
> will look into later.
>

Do you have some extra bandwidth to review these commits?
plus a commit from Vladimir Zapolskiy:
https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git/commit/?id=c8390cfaa07cb9e9ccaa946a1919b69dfb34bad1

The merge window will be open the next week. I have to prepare
the submission from now.

Thanks,
Gao Xiang

2020-12-08 08:22:05

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

On 2020/12/7 9:23, Gao Xiang wrote:
> Previously, we played around with magical page->mapping for short-lived
> temporary pages since we need to identify different types of pages in
> the same pcluster but both invalidated and short-lived temporary pages
> can have page->mapping == NULL. It was considered as safe because that
> temporary pages are all non-LRU / non-movable pages.
>
> This patch tends to use specific page->private to identify short-lived
> pages instead so it won't rely on page->mapping anymore. Details are
> described in "compress.h" as well.
>
> Signed-off-by: Gao Xiang <[email protected]>
> ---
> tested with ro_fsstress for a whole night.
>
> The old "[PATCH 4/4] erofs: complete a missing case for inplace I/O" is
> temporarily dropped since ro_fsstress failed with such modification,
> will look into later.
>
> fs/erofs/compress.h | 50 ++++++++++++++++++++++++++++++-----------
> fs/erofs/decompressor.c | 2 +-
> fs/erofs/zdata.c | 42 +++++++++++++++++++++-------------
> fs/erofs/zdata.h | 1 +
> 4 files changed, 65 insertions(+), 30 deletions(-)
>
> diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
> index 3d452443c545..2bbf47f353ef 100644
> --- a/fs/erofs/compress.h
> +++ b/fs/erofs/compress.h
> @@ -26,30 +26,54 @@ struct z_erofs_decompress_req {
> bool inplace_io, partial_decoding;
> };
>
> +#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
> +
> /*
> - * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
> - * used to mark temporary allocated pages from other
> - * file/cached pages and NULL mapping pages.
> + * For all pages in a pcluster, page->private should be one of
> + * Type Last 2bits page->private
> + * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
> + * cached/managed page 00 pointer to z_erofs_pcluster
> + * online page (file-backed, 01/10/11 sub-index << 2 | count
> + * some pages can be used for inplace I/O)
> + *
> + * page->mapping should be one of
> + * Type page->mapping
> + * short-lived page NULL
> + * cached/managed page non-NULL or NULL (invalidated/truncated page)
> + * online page non-NULL
> + *
> + * For all managed pages, PG_private should be set with 1 extra refcount,
> + * which is used for page reclaim / migration.

FYI, there is a generic way to set/clear page_private, it binds the private
value set and page count operation in one function:

attach_page_private()
detach_page_private()

If there are use cases, let's try to use them as much as possible.

> */
> -#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
>
> -/* check if a page is marked as staging */
> -static inline bool z_erofs_page_is_staging(struct page *page)
> +/*
> + * short-lived pages are pages directly from buddy system with specific
> + * page->private (no need to set PagePrivate since these are non-LRU /
> + * non-movable pages and bypass reclaim / migration code).
> + */
> +static inline bool z_erofs_is_shortlived_page(struct page *page)
> {
> - return page->mapping == Z_EROFS_MAPPING_STAGING;
> + if (page->private != Z_EROFS_SHORTLIVED_PAGE)
> + return false;
> +
> + DBG_BUGON(page->mapping);
> + return true;
> }
>
> -static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
> - struct page *page)
> +static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
> + struct page *page)
> {
> - if (!z_erofs_page_is_staging(page))
> + if (!z_erofs_is_shortlived_page(page))
> return false;
>
> - /* staging pages should not be used by others at the same time */
> - if (page_ref_count(page) > 1)
> + /* short-lived pages should not be used by others at the same time */
> + if (page_ref_count(page) > 1) {

Does this be a possible case?

> put_page(page);
> - else
> + } else {
> + /* follow the pcluster rule above. */
> + set_page_private(page, 0);
> list_add(&page->lru, pagepool);
> + }
> return true;
> }
>
> diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
> index cbadbf55c6c2..1cb1ffd10569 100644
> --- a/fs/erofs/decompressor.c
> +++ b/fs/erofs/decompressor.c
> @@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
> victim = erofs_allocpage(pagepool, GFP_KERNEL);
> if (!victim)
> return -ENOMEM;
> - victim->mapping = Z_EROFS_MAPPING_STAGING;
> + set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
> }
> rq->out[i] = victim;
> }
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index 86fd3bf62af6..afeadf413c2c 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -255,6 +255,7 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
> erofs_workgroup_unfreeze(&pcl->obj, 1);
>
> if (ret) {
> + set_page_private(page, 0);
> ClearPagePrivate(page);
> put_page(page);

detach_page_private()?

Thanks,

> }
> @@ -648,12 +649,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
>
> retry:
> err = z_erofs_attach_page(clt, page, page_type);
> - /* should allocate an additional staging page for pagevec */
> + /* should allocate an additional short-lived page for pagevec */
> if (err == -EAGAIN) {
> struct page *const newpage =
> alloc_page(GFP_NOFS | __GFP_NOFAIL);
>
> - newpage->mapping = Z_EROFS_MAPPING_STAGING;
> + set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
> err = z_erofs_attach_page(clt, newpage,
> Z_EROFS_PAGE_TYPE_EXCLUSIVE);
> if (!err)
> @@ -710,6 +711,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
> queue_work(z_erofs_workqueue, &io->u.work);
> }
>
> +static bool z_erofs_page_is_invalidated(struct page *page)
> +{
> + return !page->mapping && !z_erofs_is_shortlived_page(page);
> +}
> +
> static void z_erofs_decompressqueue_endio(struct bio *bio)
> {
> tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
> @@ -722,7 +728,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
> struct page *page = bvec->bv_page;
>
> DBG_BUGON(PageUptodate(page));
> - DBG_BUGON(!page->mapping);
> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>
> if (err)
> SetPageError(page);
> @@ -795,9 +801,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>
> /* all pages in pagevec ought to be valid */
> DBG_BUGON(!page);
> - DBG_BUGON(!page->mapping);
> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>
> - if (z_erofs_put_stagingpage(pagepool, page))
> + if (z_erofs_put_shortlivedpage(pagepool, page))
> continue;
>
> if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
> @@ -831,9 +837,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>
> /* all compressed pages ought to be valid */
> DBG_BUGON(!page);
> - DBG_BUGON(!page->mapping);
> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>
> - if (!z_erofs_page_is_staging(page)) {
> + if (!z_erofs_is_shortlived_page(page)) {
> if (erofs_page_is_managed(sbi, page)) {
> if (!PageUptodate(page))
> err = -EIO;
> @@ -858,7 +864,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> overlapped = true;
> }
>
> - /* PG_error needs checking for inplaced and staging pages */
> + /* PG_error needs checking for all non-managed pages */
> if (PageError(page)) {
> DBG_BUGON(PageUptodate(page));
> err = -EIO;
> @@ -897,8 +903,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> if (erofs_page_is_managed(sbi, page))
> continue;
>
> - /* recycle all individual staging pages */
> - (void)z_erofs_put_stagingpage(pagepool, page);
> + /* recycle all individual short-lived pages */
> + (void)z_erofs_put_shortlivedpage(pagepool, page);
>
> WRITE_ONCE(compressed_pages[i], NULL);
> }
> @@ -908,10 +914,10 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> if (!page)
> continue;
>
> - DBG_BUGON(!page->mapping);
> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>
> - /* recycle all individual staging pages */
> - if (z_erofs_put_stagingpage(pagepool, page))
> + /* recycle all individual short-lived pages */
> + if (z_erofs_put_shortlivedpage(pagepool, page))
> continue;
>
> if (err < 0)
> @@ -1011,13 +1017,17 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> mapping = READ_ONCE(page->mapping);
>
> /*
> - * unmanaged (file) pages are all locked solidly,
> + * file-backed online pages in plcuster are all locked steady,
> * therefore it is impossible for `mapping' to be NULL.
> */
> if (mapping && mapping != mc)
> /* ought to be unmanaged pages */
> goto out;
>
> + /* directly return for shortlived page as well */
> + if (z_erofs_is_shortlived_page(page))
> + goto out;
> +
> lock_page(page);
>
> /* only true if page reclaim goes wrong, should never happen */
> @@ -1062,8 +1072,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> out_allocpage:
> page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
> if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
> - /* non-LRU / non-movable temporary page is needed */
> - page->mapping = Z_EROFS_MAPPING_STAGING;
> + /* turn into temporary page if fails */
> + set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> tocache = false;
> }
>
> diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
> index 68c9b29fc0ca..b503b353d4ab 100644
> --- a/fs/erofs/zdata.h
> +++ b/fs/erofs/zdata.h
> @@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
>
> v = atomic_dec_return(u.o);
> if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
> + set_page_private(page, 0);
> ClearPagePrivate(page);
> if (!PageError(page))
> SetPageUptodate(page);
>

2020-12-08 08:27:42

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

Hi Chao,

On Tue, Dec 08, 2020 at 04:15:59PM +0800, Chao Yu wrote:
> On 2020/12/7 9:23, Gao Xiang wrote:
> > Previously, we played around with magical page->mapping for short-lived
> > temporary pages since we need to identify different types of pages in
> > the same pcluster but both invalidated and short-lived temporary pages
> > can have page->mapping == NULL. It was considered as safe because that
> > temporary pages are all non-LRU / non-movable pages.
> >
> > This patch tends to use specific page->private to identify short-lived
> > pages instead so it won't rely on page->mapping anymore. Details are
> > described in "compress.h" as well.
> >
> > Signed-off-by: Gao Xiang <[email protected]>
> > ---
> > tested with ro_fsstress for a whole night.
> >
> > The old "[PATCH 4/4] erofs: complete a missing case for inplace I/O" is
> > temporarily dropped since ro_fsstress failed with such modification,
> > will look into later.
> >
> > fs/erofs/compress.h | 50 ++++++++++++++++++++++++++++++-----------
> > fs/erofs/decompressor.c | 2 +-
> > fs/erofs/zdata.c | 42 +++++++++++++++++++++-------------
> > fs/erofs/zdata.h | 1 +
> > 4 files changed, 65 insertions(+), 30 deletions(-)
> >
> > diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
> > index 3d452443c545..2bbf47f353ef 100644
> > --- a/fs/erofs/compress.h
> > +++ b/fs/erofs/compress.h
> > @@ -26,30 +26,54 @@ struct z_erofs_decompress_req {
> > bool inplace_io, partial_decoding;
> > };
> > +#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
> > +
> > /*
> > - * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
> > - * used to mark temporary allocated pages from other
> > - * file/cached pages and NULL mapping pages.
> > + * For all pages in a pcluster, page->private should be one of
> > + * Type Last 2bits page->private
> > + * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
> > + * cached/managed page 00 pointer to z_erofs_pcluster
> > + * online page (file-backed, 01/10/11 sub-index << 2 | count
> > + * some pages can be used for inplace I/O)
> > + *
> > + * page->mapping should be one of
> > + * Type page->mapping
> > + * short-lived page NULL
> > + * cached/managed page non-NULL or NULL (invalidated/truncated page)
> > + * online page non-NULL
> > + *
> > + * For all managed pages, PG_private should be set with 1 extra refcount,
> > + * which is used for page reclaim / migration.
>
> FYI, there is a generic way to set/clear page_private, it binds the private
> value set and page count operation in one function:
>
> attach_page_private()
> detach_page_private()
>
> If there are use cases, let's try to use them as much as possible.

I discussed this case in the original thread,
https://lore.kernel.org/r/20200519100612.GA3687@hsiangkao-HP-ZHAN-66-Pro-G1

The previous conclusion is that for EROFS case (see Matthew's reply) this
pair won't have too much usage. since EROFS pattern saves extra page
reference count (- and +) by cases.

I could use attach_page_private() and detach_page_private() if possible,
but the problem is I'm not not its such pair internal implementation is
stable (but the PG_Private rule is stable for decades);

>
> > */
> > -#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
> > -/* check if a page is marked as staging */
> > -static inline bool z_erofs_page_is_staging(struct page *page)
> > +/*
> > + * short-lived pages are pages directly from buddy system with specific
> > + * page->private (no need to set PagePrivate since these are non-LRU /
> > + * non-movable pages and bypass reclaim / migration code).
> > + */
> > +static inline bool z_erofs_is_shortlived_page(struct page *page)
> > {
> > - return page->mapping == Z_EROFS_MAPPING_STAGING;
> > + if (page->private != Z_EROFS_SHORTLIVED_PAGE)
> > + return false;
> > +
> > + DBG_BUGON(page->mapping);
> > + return true;
> > }
> > -static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
> > - struct page *page)
> > +static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
> > + struct page *page)
> > {
> > - if (!z_erofs_page_is_staging(page))
> > + if (!z_erofs_is_shortlived_page(page))
> > return false;
> > - /* staging pages should not be used by others at the same time */
> > - if (page_ref_count(page) > 1)
> > + /* short-lived pages should not be used by others at the same time */
> > + if (page_ref_count(page) > 1) {
>
> Does this be a possible case?

Yes, see decompress.c, if will get_page(page);

>
> > put_page(page);
> > - else
> > + } else {
> > + /* follow the pcluster rule above. */
> > + set_page_private(page, 0);
> > list_add(&page->lru, pagepool);
> > + }
> > return true;
> > }
> > diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
> > index cbadbf55c6c2..1cb1ffd10569 100644
> > --- a/fs/erofs/decompressor.c
> > +++ b/fs/erofs/decompressor.c
> > @@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
> > victim = erofs_allocpage(pagepool, GFP_KERNEL);
> > if (!victim)
> > return -ENOMEM;
> > - victim->mapping = Z_EROFS_MAPPING_STAGING;
> > + set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
> > }
> > rq->out[i] = victim;
> > }
> > diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> > index 86fd3bf62af6..afeadf413c2c 100644
> > --- a/fs/erofs/zdata.c
> > +++ b/fs/erofs/zdata.c
> > @@ -255,6 +255,7 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
> > erofs_workgroup_unfreeze(&pcl->obj, 1);
> > if (ret) {
> > + set_page_private(page, 0);
> > ClearPagePrivate(page);
> > put_page(page);
>
> detach_page_private()?

The same as the above.

Thanks,
Gao Xiang

>
> Thanks,
>
> > }
> > @@ -648,12 +649,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> > retry:
> > err = z_erofs_attach_page(clt, page, page_type);
> > - /* should allocate an additional staging page for pagevec */
> > + /* should allocate an additional short-lived page for pagevec */
> > if (err == -EAGAIN) {
> > struct page *const newpage =
> > alloc_page(GFP_NOFS | __GFP_NOFAIL);
> > - newpage->mapping = Z_EROFS_MAPPING_STAGING;
> > + set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
> > err = z_erofs_attach_page(clt, newpage,
> > Z_EROFS_PAGE_TYPE_EXCLUSIVE);
> > if (!err)
> > @@ -710,6 +711,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
> > queue_work(z_erofs_workqueue, &io->u.work);
> > }
> > +static bool z_erofs_page_is_invalidated(struct page *page)
> > +{
> > + return !page->mapping && !z_erofs_is_shortlived_page(page);
> > +}
> > +
> > static void z_erofs_decompressqueue_endio(struct bio *bio)
> > {
> > tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
> > @@ -722,7 +728,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
> > struct page *page = bvec->bv_page;
> > DBG_BUGON(PageUptodate(page));
> > - DBG_BUGON(!page->mapping);
> > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > if (err)
> > SetPageError(page);
> > @@ -795,9 +801,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > /* all pages in pagevec ought to be valid */
> > DBG_BUGON(!page);
> > - DBG_BUGON(!page->mapping);
> > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > - if (z_erofs_put_stagingpage(pagepool, page))
> > + if (z_erofs_put_shortlivedpage(pagepool, page))
> > continue;
> > if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
> > @@ -831,9 +837,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > /* all compressed pages ought to be valid */
> > DBG_BUGON(!page);
> > - DBG_BUGON(!page->mapping);
> > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > - if (!z_erofs_page_is_staging(page)) {
> > + if (!z_erofs_is_shortlived_page(page)) {
> > if (erofs_page_is_managed(sbi, page)) {
> > if (!PageUptodate(page))
> > err = -EIO;
> > @@ -858,7 +864,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > overlapped = true;
> > }
> > - /* PG_error needs checking for inplaced and staging pages */
> > + /* PG_error needs checking for all non-managed pages */
> > if (PageError(page)) {
> > DBG_BUGON(PageUptodate(page));
> > err = -EIO;
> > @@ -897,8 +903,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > if (erofs_page_is_managed(sbi, page))
> > continue;
> > - /* recycle all individual staging pages */
> > - (void)z_erofs_put_stagingpage(pagepool, page);
> > + /* recycle all individual short-lived pages */
> > + (void)z_erofs_put_shortlivedpage(pagepool, page);
> > WRITE_ONCE(compressed_pages[i], NULL);
> > }
> > @@ -908,10 +914,10 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > if (!page)
> > continue;
> > - DBG_BUGON(!page->mapping);
> > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > - /* recycle all individual staging pages */
> > - if (z_erofs_put_stagingpage(pagepool, page))
> > + /* recycle all individual short-lived pages */
> > + if (z_erofs_put_shortlivedpage(pagepool, page))
> > continue;
> > if (err < 0)
> > @@ -1011,13 +1017,17 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> > mapping = READ_ONCE(page->mapping);
> > /*
> > - * unmanaged (file) pages are all locked solidly,
> > + * file-backed online pages in plcuster are all locked steady,
> > * therefore it is impossible for `mapping' to be NULL.
> > */
> > if (mapping && mapping != mc)
> > /* ought to be unmanaged pages */
> > goto out;
> > + /* directly return for shortlived page as well */
> > + if (z_erofs_is_shortlived_page(page))
> > + goto out;
> > +
> > lock_page(page);
> > /* only true if page reclaim goes wrong, should never happen */
> > @@ -1062,8 +1072,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> > out_allocpage:
> > page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
> > if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
> > - /* non-LRU / non-movable temporary page is needed */
> > - page->mapping = Z_EROFS_MAPPING_STAGING;
> > + /* turn into temporary page if fails */
> > + set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> > tocache = false;
> > }
> > diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
> > index 68c9b29fc0ca..b503b353d4ab 100644
> > --- a/fs/erofs/zdata.h
> > +++ b/fs/erofs/zdata.h
> > @@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
> > v = atomic_dec_return(u.o);
> > if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
> > + set_page_private(page, 0);
> > ClearPagePrivate(page);
> > if (!PageError(page))
> > SetPageUptodate(page);
> >
>

2020-12-08 08:33:14

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

On Tue, Dec 08, 2020 at 04:15:59PM +0800, Chao Yu wrote:
> On 2020/12/7 9:23, Gao Xiang wrote:

...


> > }
> > -static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
> > - struct page *page)
> > +static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
> > + struct page *page)
> > {
> > - if (!z_erofs_page_is_staging(page))
> > + if (!z_erofs_is_shortlived_page(page))
> > return false;
> > - /* staging pages should not be used by others at the same time */
> > - if (page_ref_count(page) > 1)
> > + /* short-lived pages should not be used by others at the same time */
> > + if (page_ref_count(page) > 1) {
>
> Does this be a possible case?

Add more words about this.... since EROFS uses rolling decompression (which means
the sliding window is limited (e.g. 64k, but some vendors adjust it to 12k for
example ) even though the uncompressed size is too large (e.g. 128k)), and by
using get_page(), vmap(), and z_erofs_put_shortlivedpage() to free such usage.
Since shortlivedpages won't share with other parallel thread, so it's safe to
just like this to decrease page count (it means how many shared get_page()
before) and recycle to pagepool (on the last reference for later use.)

Thanks,
Gao Xiang

2020-12-08 08:49:32

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

Hi Xiang,

On 2020/12/8 16:23, Gao Xiang wrote:
> Hi Chao,
>
> On Tue, Dec 08, 2020 at 04:15:59PM +0800, Chao Yu wrote:
>> On 2020/12/7 9:23, Gao Xiang wrote:
>>> Previously, we played around with magical page->mapping for short-lived
>>> temporary pages since we need to identify different types of pages in
>>> the same pcluster but both invalidated and short-lived temporary pages
>>> can have page->mapping == NULL. It was considered as safe because that
>>> temporary pages are all non-LRU / non-movable pages.
>>>
>>> This patch tends to use specific page->private to identify short-lived
>>> pages instead so it won't rely on page->mapping anymore. Details are
>>> described in "compress.h" as well.
>>>
>>> Signed-off-by: Gao Xiang <[email protected]>
>>> ---
>>> tested with ro_fsstress for a whole night.
>>>
>>> The old "[PATCH 4/4] erofs: complete a missing case for inplace I/O" is
>>> temporarily dropped since ro_fsstress failed with such modification,
>>> will look into later.
>>>
>>> fs/erofs/compress.h | 50 ++++++++++++++++++++++++++++++-----------
>>> fs/erofs/decompressor.c | 2 +-
>>> fs/erofs/zdata.c | 42 +++++++++++++++++++++-------------
>>> fs/erofs/zdata.h | 1 +
>>> 4 files changed, 65 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
>>> index 3d452443c545..2bbf47f353ef 100644
>>> --- a/fs/erofs/compress.h
>>> +++ b/fs/erofs/compress.h
>>> @@ -26,30 +26,54 @@ struct z_erofs_decompress_req {
>>> bool inplace_io, partial_decoding;
>>> };
>>> +#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
>>> +
>>> /*
>>> - * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
>>> - * used to mark temporary allocated pages from other
>>> - * file/cached pages and NULL mapping pages.
>>> + * For all pages in a pcluster, page->private should be one of
>>> + * Type Last 2bits page->private
>>> + * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
>>> + * cached/managed page 00 pointer to z_erofs_pcluster
>>> + * online page (file-backed, 01/10/11 sub-index << 2 | count
>>> + * some pages can be used for inplace I/O)
>>> + *
>>> + * page->mapping should be one of
>>> + * Type page->mapping
>>> + * short-lived page NULL
>>> + * cached/managed page non-NULL or NULL (invalidated/truncated page)
>>> + * online page non-NULL
>>> + *
>>> + * For all managed pages, PG_private should be set with 1 extra refcount,
>>> + * which is used for page reclaim / migration.
>>
>> FYI, there is a generic way to set/clear page_private, it binds the private
>> value set and page count operation in one function:
>>
>> attach_page_private()
>> detach_page_private()
>>
>> If there are use cases, let's try to use them as much as possible.
>
> I discussed this case in the original thread,
> https://lore.kernel.org/r/20200519100612.GA3687@hsiangkao-HP-ZHAN-66-Pro-G1
>
> The previous conclusion is that for EROFS case (see Matthew's reply) this
> pair won't have too much usage. since EROFS pattern saves extra page
> reference count (- and +) by cases.

Alright, I see.

>
> I could use attach_page_private() and detach_page_private() if possible,
> but the problem is I'm not not its such pair internal implementation is
> stable (but the PG_Private rule is stable for decades);
>
>>
>>> */
>>> -#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
>>> -/* check if a page is marked as staging */
>>> -static inline bool z_erofs_page_is_staging(struct page *page)
>>> +/*
>>> + * short-lived pages are pages directly from buddy system with specific
>>> + * page->private (no need to set PagePrivate since these are non-LRU /
>>> + * non-movable pages and bypass reclaim / migration code).
>>> + */
>>> +static inline bool z_erofs_is_shortlived_page(struct page *page)
>>> {
>>> - return page->mapping == Z_EROFS_MAPPING_STAGING;
>>> + if (page->private != Z_EROFS_SHORTLIVED_PAGE)
>>> + return false;
>>> +
>>> + DBG_BUGON(page->mapping);
>>> + return true;
>>> }
>>> -static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
>>> - struct page *page)
>>> +static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
>>> + struct page *page)
>>> {
>>> - if (!z_erofs_page_is_staging(page))
>>> + if (!z_erofs_is_shortlived_page(page))
>>> return false;
>>> - /* staging pages should not be used by others at the same time */
>>> - if (page_ref_count(page) > 1)
>>> + /* short-lived pages should not be used by others at the same time */
>>> + if (page_ref_count(page) > 1) {
>>
>> Does this be a possible case?
>
> Yes, see decompress.c, if will get_page(page);

Yup,

>
>>
>>> put_page(page);
>>> - else
>>> + } else {
>>> + /* follow the pcluster rule above. */
>>> + set_page_private(page, 0);
>>> list_add(&page->lru, pagepool);
>>> + }
>>> return true;
>>> }
>>> diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
>>> index cbadbf55c6c2..1cb1ffd10569 100644
>>> --- a/fs/erofs/decompressor.c
>>> +++ b/fs/erofs/decompressor.c
>>> @@ -76,7 +76,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
>>> victim = erofs_allocpage(pagepool, GFP_KERNEL);
>>> if (!victim)
>>> return -ENOMEM;
>>> - victim->mapping = Z_EROFS_MAPPING_STAGING;
>>> + set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
>>> }
>>> rq->out[i] = victim;
>>> }
>>> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
>>> index 86fd3bf62af6..afeadf413c2c 100644
>>> --- a/fs/erofs/zdata.c
>>> +++ b/fs/erofs/zdata.c
>>> @@ -255,6 +255,7 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
>>> erofs_workgroup_unfreeze(&pcl->obj, 1);
>>> if (ret) {
>>> + set_page_private(page, 0);
>>> ClearPagePrivate(page);
>>> put_page(page);
>>
>> detach_page_private()?
>
> The same as the above.

Reviewed-by: Chao Yu <[email protected]>

Thanks,

>
> Thanks,
> Gao Xiang
>
>>
>> Thanks,
>>
>>> }
>>> @@ -648,12 +649,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
>>> retry:
>>> err = z_erofs_attach_page(clt, page, page_type);
>>> - /* should allocate an additional staging page for pagevec */
>>> + /* should allocate an additional short-lived page for pagevec */
>>> if (err == -EAGAIN) {
>>> struct page *const newpage =
>>> alloc_page(GFP_NOFS | __GFP_NOFAIL);
>>> - newpage->mapping = Z_EROFS_MAPPING_STAGING;
>>> + set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
>>> err = z_erofs_attach_page(clt, newpage,
>>> Z_EROFS_PAGE_TYPE_EXCLUSIVE);
>>> if (!err)
>>> @@ -710,6 +711,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
>>> queue_work(z_erofs_workqueue, &io->u.work);
>>> }
>>> +static bool z_erofs_page_is_invalidated(struct page *page)
>>> +{
>>> + return !page->mapping && !z_erofs_is_shortlived_page(page);
>>> +}
>>> +
>>> static void z_erofs_decompressqueue_endio(struct bio *bio)
>>> {
>>> tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
>>> @@ -722,7 +728,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
>>> struct page *page = bvec->bv_page;
>>> DBG_BUGON(PageUptodate(page));
>>> - DBG_BUGON(!page->mapping);
>>> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>>> if (err)
>>> SetPageError(page);
>>> @@ -795,9 +801,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>>> /* all pages in pagevec ought to be valid */
>>> DBG_BUGON(!page);
>>> - DBG_BUGON(!page->mapping);
>>> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>>> - if (z_erofs_put_stagingpage(pagepool, page))
>>> + if (z_erofs_put_shortlivedpage(pagepool, page))
>>> continue;
>>> if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
>>> @@ -831,9 +837,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>>> /* all compressed pages ought to be valid */
>>> DBG_BUGON(!page);
>>> - DBG_BUGON(!page->mapping);
>>> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>>> - if (!z_erofs_page_is_staging(page)) {
>>> + if (!z_erofs_is_shortlived_page(page)) {
>>> if (erofs_page_is_managed(sbi, page)) {
>>> if (!PageUptodate(page))
>>> err = -EIO;
>>> @@ -858,7 +864,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>>> overlapped = true;
>>> }
>>> - /* PG_error needs checking for inplaced and staging pages */
>>> + /* PG_error needs checking for all non-managed pages */
>>> if (PageError(page)) {
>>> DBG_BUGON(PageUptodate(page));
>>> err = -EIO;
>>> @@ -897,8 +903,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>>> if (erofs_page_is_managed(sbi, page))
>>> continue;
>>> - /* recycle all individual staging pages */
>>> - (void)z_erofs_put_stagingpage(pagepool, page);
>>> + /* recycle all individual short-lived pages */
>>> + (void)z_erofs_put_shortlivedpage(pagepool, page);
>>> WRITE_ONCE(compressed_pages[i], NULL);
>>> }
>>> @@ -908,10 +914,10 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
>>> if (!page)
>>> continue;
>>> - DBG_BUGON(!page->mapping);
>>> + DBG_BUGON(z_erofs_page_is_invalidated(page));
>>> - /* recycle all individual staging pages */
>>> - if (z_erofs_put_stagingpage(pagepool, page))
>>> + /* recycle all individual short-lived pages */
>>> + if (z_erofs_put_shortlivedpage(pagepool, page))
>>> continue;
>>> if (err < 0)
>>> @@ -1011,13 +1017,17 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
>>> mapping = READ_ONCE(page->mapping);
>>> /*
>>> - * unmanaged (file) pages are all locked solidly,
>>> + * file-backed online pages in plcuster are all locked steady,
>>> * therefore it is impossible for `mapping' to be NULL.
>>> */
>>> if (mapping && mapping != mc)
>>> /* ought to be unmanaged pages */
>>> goto out;
>>> + /* directly return for shortlived page as well */
>>> + if (z_erofs_is_shortlived_page(page))
>>> + goto out;
>>> +
>>> lock_page(page);
>>> /* only true if page reclaim goes wrong, should never happen */
>>> @@ -1062,8 +1072,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
>>> out_allocpage:
>>> page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
>>> if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
>>> - /* non-LRU / non-movable temporary page is needed */
>>> - page->mapping = Z_EROFS_MAPPING_STAGING;
>>> + /* turn into temporary page if fails */
>>> + set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
>>> tocache = false;
>>> }
>>> diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
>>> index 68c9b29fc0ca..b503b353d4ab 100644
>>> --- a/fs/erofs/zdata.h
>>> +++ b/fs/erofs/zdata.h
>>> @@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
>>> v = atomic_dec_return(u.o);
>>> if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
>>> + set_page_private(page, 0);
>>> ClearPagePrivate(page);
>>> if (!PageError(page))
>>> SetPageUptodate(page);
>>>
>>
>
> .
>

2020-12-08 08:54:30

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] erofs: get rid of magical Z_EROFS_MAPPING_STAGING

On Tue, Dec 08, 2020 at 04:44:12PM +0800, Chao Yu wrote:
> Hi Xiang,

...

> >
> > I discussed this case in the original thread,
> > https://lore.kernel.org/r/20200519100612.GA3687@hsiangkao-HP-ZHAN-66-Pro-G1
> >
> > The previous conclusion is that for EROFS case (see Matthew's reply) this
> > pair won't have too much usage. since EROFS pattern saves extra page
> > reference count (- and +) by cases.
>
> Alright, I see.

Yeah, yet in order for further confusion (or questions from others), let me
update to use this pair as much as possible in the next version :( (If someone
breaks in the future, I may need to remind him in time though...)

>

...

>
> Reviewed-by: Chao Yu <[email protected]>

Thanks for the review!

Thanks,
Gao Xiang

>
> Thanks,
>
> >
> > Thanks,
> > Gao Xiang
> >
> > >
> > > Thanks,
> > >
> > > > }
> > > > @@ -648,12 +649,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> > > > retry:
> > > > err = z_erofs_attach_page(clt, page, page_type);
> > > > - /* should allocate an additional staging page for pagevec */
> > > > + /* should allocate an additional short-lived page for pagevec */
> > > > if (err == -EAGAIN) {
> > > > struct page *const newpage =
> > > > alloc_page(GFP_NOFS | __GFP_NOFAIL);
> > > > - newpage->mapping = Z_EROFS_MAPPING_STAGING;
> > > > + set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
> > > > err = z_erofs_attach_page(clt, newpage,
> > > > Z_EROFS_PAGE_TYPE_EXCLUSIVE);
> > > > if (!err)
> > > > @@ -710,6 +711,11 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
> > > > queue_work(z_erofs_workqueue, &io->u.work);
> > > > }
> > > > +static bool z_erofs_page_is_invalidated(struct page *page)
> > > > +{
> > > > + return !page->mapping && !z_erofs_is_shortlived_page(page);
> > > > +}
> > > > +
> > > > static void z_erofs_decompressqueue_endio(struct bio *bio)
> > > > {
> > > > tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
> > > > @@ -722,7 +728,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
> > > > struct page *page = bvec->bv_page;
> > > > DBG_BUGON(PageUptodate(page));
> > > > - DBG_BUGON(!page->mapping);
> > > > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > > > if (err)
> > > > SetPageError(page);
> > > > @@ -795,9 +801,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > > /* all pages in pagevec ought to be valid */
> > > > DBG_BUGON(!page);
> > > > - DBG_BUGON(!page->mapping);
> > > > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > > > - if (z_erofs_put_stagingpage(pagepool, page))
> > > > + if (z_erofs_put_shortlivedpage(pagepool, page))
> > > > continue;
> > > > if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
> > > > @@ -831,9 +837,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > > /* all compressed pages ought to be valid */
> > > > DBG_BUGON(!page);
> > > > - DBG_BUGON(!page->mapping);
> > > > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > > > - if (!z_erofs_page_is_staging(page)) {
> > > > + if (!z_erofs_is_shortlived_page(page)) {
> > > > if (erofs_page_is_managed(sbi, page)) {
> > > > if (!PageUptodate(page))
> > > > err = -EIO;
> > > > @@ -858,7 +864,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > > overlapped = true;
> > > > }
> > > > - /* PG_error needs checking for inplaced and staging pages */
> > > > + /* PG_error needs checking for all non-managed pages */
> > > > if (PageError(page)) {
> > > > DBG_BUGON(PageUptodate(page));
> > > > err = -EIO;
> > > > @@ -897,8 +903,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > > if (erofs_page_is_managed(sbi, page))
> > > > continue;
> > > > - /* recycle all individual staging pages */
> > > > - (void)z_erofs_put_stagingpage(pagepool, page);
> > > > + /* recycle all individual short-lived pages */
> > > > + (void)z_erofs_put_shortlivedpage(pagepool, page);
> > > > WRITE_ONCE(compressed_pages[i], NULL);
> > > > }
> > > > @@ -908,10 +914,10 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > > if (!page)
> > > > continue;
> > > > - DBG_BUGON(!page->mapping);
> > > > + DBG_BUGON(z_erofs_page_is_invalidated(page));
> > > > - /* recycle all individual staging pages */
> > > > - if (z_erofs_put_stagingpage(pagepool, page))
> > > > + /* recycle all individual short-lived pages */
> > > > + if (z_erofs_put_shortlivedpage(pagepool, page))
> > > > continue;
> > > > if (err < 0)
> > > > @@ -1011,13 +1017,17 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> > > > mapping = READ_ONCE(page->mapping);
> > > > /*
> > > > - * unmanaged (file) pages are all locked solidly,
> > > > + * file-backed online pages in plcuster are all locked steady,
> > > > * therefore it is impossible for `mapping' to be NULL.
> > > > */
> > > > if (mapping && mapping != mc)
> > > > /* ought to be unmanaged pages */
> > > > goto out;
> > > > + /* directly return for shortlived page as well */
> > > > + if (z_erofs_is_shortlived_page(page))
> > > > + goto out;
> > > > +
> > > > lock_page(page);
> > > > /* only true if page reclaim goes wrong, should never happen */
> > > > @@ -1062,8 +1072,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
> > > > out_allocpage:
> > > > page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
> > > > if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
> > > > - /* non-LRU / non-movable temporary page is needed */
> > > > - page->mapping = Z_EROFS_MAPPING_STAGING;
> > > > + /* turn into temporary page if fails */
> > > > + set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
> > > > tocache = false;
> > > > }
> > > > diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
> > > > index 68c9b29fc0ca..b503b353d4ab 100644
> > > > --- a/fs/erofs/zdata.h
> > > > +++ b/fs/erofs/zdata.h
> > > > @@ -173,6 +173,7 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
> > > > v = atomic_dec_return(u.o);
> > > > if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
> > > > + set_page_private(page, 0);
> > > > ClearPagePrivate(page);
> > > > if (!PageError(page))
> > > > SetPageUptodate(page);
> > > >
> > >
> >
> > .
> >
>

2020-12-08 08:55:11

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] erofs: insert to managed cache after adding to pcl

On 2020/12/7 9:23, Gao Xiang wrote:
> Previously, it could be some concern to call add_to_page_cache_lru()
> with page->mapping == Z_EROFS_MAPPING_STAGING (!= NULL).
>
> In contrast, page->private is used instead now, so partially revert
> commit 5ddcee1f3a1c ("erofs: get rid of __stagingpage_alloc helper")
> with some adaption for simplicity.
>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2020-12-08 09:32:05

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] erofs: simplify try_to_claim_pcluster()

On 2020/12/7 9:23, Gao Xiang wrote:
> simplify try_to_claim_pcluster() by directly using cmpxchg() here
> (the retry loop caused more overhead.) Also, move the chain loop
> detection in and rename it to z_erofs_try_to_claim_pcluster().

Looks more clean.

>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,