2021-12-25 07:06:40

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v3 0/5] erofs: support tail-packing inline compressed data

Hi folks,

v2: https://lore.kernel.org/r/[email protected]
mkfs v8: https://lore.kernel.org/r/[email protected]

This is the 3rd version of tail-packing inline compressed data feature.
It tries to inline the tail pcluster right after the inode metadata to
save data I/O and storage space.

Take Linux 5.10.87 source code as an example:
linux-5.10.87 (erofs, uncompressed) 972570624

linux-5.10.87 (erofs, lz4hc,9 4k tailpacking) 391696384
linux-5.10.87 (erofs, lz4hc,9 8k tailpacking) 368807936
linux-5.10.87 (erofs, lz4hc,9 16k tailpacking) 345649152

linux-5.10.87 (erofs, lz4hc,9 4k vanilla) 416079872
linux-5.10.87 (erofs, lz4hc,9 8k vanilla) 395493376
linux-5.10.87 (erofs, lz4hc,9 16k vanilla) 383213568

Thanks,
Gao Xiang

changes since v2:
- properly split patches;
- some code cleanup.

Gao Xiang (3):
erofs: tidy up z_erofs_lz4_decompress
erofs: introduce z_erofs_fixup_insize
erofs: support unaligned data decompression

Yue Hu (2):
erofs: support inline data decompression
erofs: add on-disk compressed tail-packing inline support

fs/erofs/compress.h | 4 +-
fs/erofs/decompressor.c | 129 ++++++++++++++++++++---------------
fs/erofs/decompressor_lzma.c | 19 +++---
fs/erofs/erofs_fs.h | 10 ++-
fs/erofs/internal.h | 6 ++
fs/erofs/super.c | 3 +
fs/erofs/sysfs.c | 2 +
fs/erofs/zdata.c | 128 +++++++++++++++++++++++-----------
fs/erofs/zdata.h | 24 ++++++-
fs/erofs/zmap.c | 116 +++++++++++++++++++++++--------
10 files changed, 299 insertions(+), 142 deletions(-)

--
2.24.4



2021-12-25 07:06:40

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v3 2/5] erofs: introduce z_erofs_fixup_insize

To prepare for the upcoming ztailpacking feature, introduce
z_erofs_fixup_insize() and pageofs_in to wrap up the process
to get the exact compressed size via zero padding.

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/compress.h | 4 +++-
fs/erofs/decompressor.c | 34 +++++++++++++++++++++++++---------
fs/erofs/decompressor_lzma.c | 19 ++++++++-----------
3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 579406504919..19e6c56a9f47 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -12,7 +12,7 @@ struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;

- unsigned short pageofs_out;
+ unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;

/* indicate the algorithm will be used for decompression */
@@ -87,6 +87,8 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
return page->mapping == MNGD_MAPPING(sbi);
}

+int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
+ unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index d1282a8b709e..4dd2ca61465c 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -181,6 +181,24 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
return src;
}

+/*
+ * Get the exact inputsize with zero_padding feature.
+ * - For LZ4, it should work if zero_padding feature is on (5.3+);
+ * - For MicroLZMA, it'd be enabled all the time.
+ */
+int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
+ unsigned int padbufsize)
+{
+ const char *padend;
+
+ padend = memchr_inv(padbuf, 0, padbufsize);
+ if (!padend)
+ return -EFSCORRUPTED;
+ rq->inputsize -= padend - padbuf;
+ rq->pageofs_in += padend - padbuf;
+ return 0;
+}
+
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
u8 *out)
{
@@ -195,21 +213,19 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
inputmargin = 0;
support_0padding = false;

- /* decompression inplace is only safe when zero_padding is enabled */
+ /* LZ4 decompression inplace is only safe if zero_padding is enabled */
if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
support_0padding = true;
-
- while (!headpage[inputmargin & ~PAGE_MASK])
- if (!(++inputmargin & ~PAGE_MASK))
- break;
-
- if (inputmargin >= rq->inputsize) {
+ ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
+ min_t(unsigned int, rq->inputsize,
+ EROFS_BLKSIZ - rq->pageofs_in));
+ if (ret) {
kunmap_atomic(headpage);
- return -EIO;
+ return ret;
}
}

- rq->inputsize -= inputmargin;
+ inputmargin = rq->pageofs_in;
src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
&maptype, support_0padding);
if (IS_ERR(src))
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 50045510a1f4..05a3063cf2bc 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -156,7 +156,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int nrpages_in =
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
- unsigned int inputmargin, inlen, outlen, pageofs;
+ unsigned int inlen, outlen, pageofs;
struct z_erofs_lzma *strm;
u8 *kin;
bool bounced = false;
@@ -164,16 +164,13 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,

/* 1. get the exact LZMA compressed size */
kin = kmap(*rq->in);
- inputmargin = 0;
- while (!kin[inputmargin & ~PAGE_MASK])
- if (!(++inputmargin & ~PAGE_MASK))
- break;
-
- if (inputmargin >= PAGE_SIZE) {
+ err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
+ min_t(unsigned int, rq->inputsize,
+ EROFS_BLKSIZ - rq->pageofs_in));
+ if (err) {
kunmap(*rq->in);
- return -EFSCORRUPTED;
+ return err;
}
- rq->inputsize -= inputmargin;

/* 2. get an available lzma context */
again:
@@ -193,9 +190,9 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
xz_dec_microlzma_reset(strm->state, inlen, outlen,
!rq->partial_decoding);
pageofs = rq->pageofs_out;
- strm->buf.in = kin + inputmargin;
+ strm->buf.in = kin + rq->pageofs_in;
strm->buf.in_pos = 0;
- strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin);
+ strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - rq->pageofs_in);
inlen -= strm->buf.in_size;
strm->buf.out = NULL;
strm->buf.out_pos = 0;
--
2.24.4


2021-12-25 07:06:55

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v3 3/5] erofs: support unaligned data decompression

Previously, compressed data was assumed as block-aligned. This
should be changed due to in-block tail-packing inline data.

Signed-off-by: Yue Hu <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/decompressor.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 4dd2ca61465c..3a067127a22d 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -118,7 +118,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,

static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
void *inpage, unsigned int *inputmargin, int *maptype,
- bool support_0padding)
+ bool may_inplace)
{
struct z_erofs_decompress_req *rq = ctx->rq;
unsigned int omargin, total, i, j;
@@ -127,7 +127,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,

if (rq->inplace_io) {
omargin = PAGE_ALIGN(ctx->oend) - ctx->oend;
- if (rq->partial_decoding || !support_0padding ||
+ if (rq->partial_decoding || !may_inplace ||
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;

@@ -203,15 +203,13 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
u8 *out)
{
struct z_erofs_decompress_req *rq = ctx->rq;
+ bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
u8 *headpage, *src;
- bool support_0padding;
int ret, maptype;

DBG_BUGON(*rq->in == NULL);
headpage = kmap_atomic(*rq->in);
- inputmargin = 0;
- support_0padding = false;

/* LZ4 decompression inplace is only safe if zero_padding is enabled */
if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
@@ -223,11 +221,13 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
kunmap_atomic(headpage);
return ret;
}
+ may_inplace = !((rq->pageofs_in + rq->inputsize) &
+ (EROFS_BLKSIZ - 1));
}

inputmargin = rq->pageofs_in;
src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
- &maptype, support_0padding);
+ &maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);

@@ -317,7 +317,8 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
- const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
+ const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
+ PAGE_SIZE - rq->pageofs_out);
unsigned char *src, *dst;

if (nrpages_out > 2) {
@@ -330,7 +331,7 @@ static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
return 0;
}

- src = kmap_atomic(*rq->in);
+ src = kmap_atomic(*rq->in) + rq->pageofs_in;
if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf);
--
2.24.4


2021-12-25 07:06:55

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v3 4/5] erofs: support inline data decompression

From: Yue Hu <[email protected]>

Currently, we have already support tail-packing inline for
uncompressed file, let's also implement this for compressed
files to save I/Os and storage space.

Different from normal pclusters, compressed data is available
in advance because of other metadata I/Os. Therefore, they
directly move into the bypass queue without extra I/O submission.

It's the last compression feature before folio/subpage support.

Signed-off-by: Yue Hu <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/zdata.c | 128 ++++++++++++++++++++++++++++++++---------------
fs/erofs/zdata.h | 24 ++++++++-
2 files changed, 109 insertions(+), 43 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index bc765d8a6dc2..e6ef02634e08 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)

static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
{
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
int i;

for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;

- if (pcl->pclusterpages > pcs->maxpages)
+ if (pclusterpages > pcs->maxpages)
continue;

kmem_cache_free(pcs->slab, pcl);
@@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
container_of(grp, struct z_erofs_pcluster, obj);
int i;

+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/*
* refcount of workgroup is now freezed as 1,
* therefore no need to worry about available decompression users.
@@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page)
if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
unsigned int i;

+ DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pcl->pclusterpages; ++i) {
if (pcl->compressed_pages[i] == page) {
WRITE_ONCE(pcl->compressed_pages[i], NULL);
@@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
struct inode *inode,
struct erofs_map_blocks *map)
{
+ bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl;
struct erofs_workgroup *grp;
@@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
}

/* no available pcluster, let's allocate one */
- pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
+ pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
+ map->m_plen >> PAGE_SHIFT);
if (IS_ERR(pcl))
return PTR_ERR(pcl);

atomic_set(&pcl->obj.refcount, 1);
- pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
(map->m_flags & EROFS_MAP_FULL_MAPPED ?
@@ -494,16 +498,25 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
mutex_init(&cl->lock);
DBG_BUGON(!mutex_trylock(&cl->lock));

- grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
- if (IS_ERR(grp)) {
- err = PTR_ERR(grp);
- goto err_out;
- }
+ if (ztailpacking) {
+ pcl->obj.index = 0; /* which indicates ztailpacking */
+ pcl->pageofs_in = erofs_blkoff(map->m_pa);
+ pcl->tailpacking_size = map->m_plen;
+ } else {
+ pcl->obj.index = map->m_pa >> PAGE_SHIFT;

- if (grp != &pcl->obj) {
- clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
- err = -EEXIST;
- goto err_out;
+ grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+ if (IS_ERR(grp)) {
+ err = PTR_ERR(grp);
+ goto err_out;
+ }
+
+ if (grp != &pcl->obj) {
+ clt->pcl = container_of(grp,
+ struct z_erofs_pcluster, obj);
+ err = -EEXIST;
+ goto err_out;
+ }
}
/* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
@@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);

- if (!PAGE_ALIGNED(map->m_pa)) {
- DBG_BUGON(1);
- return -EINVAL;
+ if (map->m_flags & EROFS_MAP_META) {
+ if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ goto tailpacking;
}

grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) {
clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
} else {
+tailpacking:
ret = z_erofs_register_collection(clt, inode, map);
-
if (!ret)
goto out;
if (ret != -EEXIST)
@@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
out:
z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
clt->cl->pagevec, clt->cl->vcnt);
-
/* since file-backed online pages are traversed in reverse order */
- clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages;
+ clt->icpage_ptr = clt->pcl->compressed_pages +
+ z_erofs_pclusterpages(clt->pcl);
return 0;
}

@@ -687,8 +703,26 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
else
cache_strategy = DONTALLOC;

- preload_compressed_pages(clt, MNGD_MAPPING(sbi),
- cache_strategy, pagepool);
+ if (z_erofs_is_inline_pcluster(clt->pcl)) {
+ struct page *mpage;
+
+ mpage = erofs_get_meta_page(inode->i_sb,
+ erofs_blknr(map->m_pa));
+ if (IS_ERR(mpage)) {
+ err = PTR_ERR(mpage);
+ erofs_err(inode->i_sb,
+ "failed to get inline page, err %d", err);
+ goto err_out;
+ }
+ /* TODO: new subpage feature will get rid of it */
+ unlock_page(mpage);
+
+ WRITE_ONCE(clt->pcl->compressed_pages[0], mpage);
+ clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+ } else {
+ preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+ cache_strategy, pagepool);
+ }

hitted:
/*
@@ -844,6 +878,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
struct page **pagepool)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct z_erofs_pagevec_ctor ctor;
unsigned int i, inputsize, outputsize, llen, nr_pages;
struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
@@ -925,16 +960,15 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
overlapped = false;
compressed_pages = pcl->compressed_pages;

- for (i = 0; i < pcl->pclusterpages; ++i) {
+ for (i = 0; i < pclusterpages; ++i) {
unsigned int pagenr;

page = compressed_pages[i];
-
/* all compressed pages ought to be valid */
DBG_BUGON(!page);
- DBG_BUGON(z_erofs_page_is_invalidated(page));

- if (!z_erofs_is_shortlived_page(page)) {
+ if (!z_erofs_is_inline_pcluster(pcl) &&
+ !z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (!PageUptodate(page))
err = -EIO;
@@ -960,10 +994,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
}

/* PG_error needs checking for all non-managed pages */
- if (PageError(page)) {
- DBG_BUGON(PageUptodate(page));
+ if (!PageUptodate(page))
err = -EIO;
- }
}

if (err)
@@ -978,11 +1010,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
partial = true;
}

- inputsize = pcl->pclusterpages * PAGE_SIZE;
+ if (z_erofs_is_inline_pcluster(pcl))
+ inputsize = pcl->tailpacking_size;
+ else
+ inputsize = pclusterpages * PAGE_SIZE;
+
err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
.sb = sb,
.in = compressed_pages,
.out = pages,
+ .pageofs_in = pcl->pageofs_in,
.pageofs_out = cl->pageofs,
.inputsize = inputsize,
.outputsize = outputsize,
@@ -992,17 +1029,22 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
}, pagepool);

out:
- /* must handle all compressed pages before ending pages */
- for (i = 0; i < pcl->pclusterpages; ++i) {
- page = compressed_pages[i];
-
- if (erofs_page_is_managed(sbi, page))
- continue;
+ /* must handle all compressed pages before actual file pages */
+ if (z_erofs_is_inline_pcluster(pcl)) {
+ page = compressed_pages[0];
+ WRITE_ONCE(compressed_pages[0], NULL);
+ put_page(page);
+ } else {
+ for (i = 0; i < pclusterpages; ++i) {
+ page = compressed_pages[i];

- /* recycle all individual short-lived pages */
- (void)z_erofs_put_shortlivedpage(pagepool, page);
+ if (erofs_page_is_managed(sbi, page))
+ continue;

- WRITE_ONCE(compressed_pages[i], NULL);
+ /* recycle all individual short-lived pages */
+ (void)z_erofs_put_shortlivedpage(pagepool, page);
+ WRITE_ONCE(compressed_pages[i], NULL);
+ }
}

for (i = 0; i < nr_pages; ++i) {
@@ -1288,6 +1330,14 @@ static void z_erofs_submit_queue(struct super_block *sb,

pcl = container_of(owned_head, struct z_erofs_pcluster, next);

+ /* close the main owned chain at first */
+ owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+ Z_EROFS_PCLUSTER_TAIL_CLOSED);
+ if (z_erofs_is_inline_pcluster(pcl)) {
+ move_to_bypass_jobqueue(pcl, qtail, owned_head);
+ continue;
+ }
+
/* no device id here, thus it will always succeed */
mdev = (struct erofs_map_dev) {
.m_pa = blknr_to_addr(pcl->obj.index),
@@ -1297,10 +1347,6 @@ static void z_erofs_submit_queue(struct super_block *sb,
cur = erofs_blknr(mdev.m_pa);
end = cur + pcl->pclusterpages;

- /* close the main owned chain at first */
- owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
- Z_EROFS_PCLUSTER_TAIL_CLOSED);
-
do {
struct page *page;

diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 4a69515dea75..e043216b545f 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -62,8 +62,16 @@ struct z_erofs_pcluster {
/* A: lower limit of decompressed length and if full length or not */
unsigned int length;

- /* I: physical cluster size in pages */
- unsigned short pclusterpages;
+ /* I: page offset of inline compressed data */
+ unsigned short pageofs_in;
+
+ union {
+ /* I: physical cluster size in pages */
+ unsigned short pclusterpages;
+
+ /* I: tailpacking inline compressed size */
+ unsigned short tailpacking_size;
+ };

/* I: compression algorithm format */
unsigned char algorithmformat;
@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue {
} u;
};

+static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
+{
+ return !pcl->obj.index;
+}
+
+static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
+{
+ if (z_erofs_is_inline_pcluster(pcl))
+ return 1;
+ return pcl->pclusterpages;
+}
+
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
--
2.24.4


2021-12-25 07:06:55

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v3 1/5] erofs: tidy up z_erofs_lz4_decompress

To prepare for the upcoming ztailpacking feature and further
cleanups, introduce a unique z_erofs_lz4_decompress_ctx to keep
the context, including inpages, outpages and oend, which are
frequently used by the lz4 decompressor.

No logic changes.

Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/decompressor.c | 80 +++++++++++++++++++++--------------------
1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index c373a199c407..d1282a8b709e 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -16,6 +16,11 @@
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif

+struct z_erofs_lz4_decompress_ctx {
+ struct z_erofs_decompress_req *rq;
+ unsigned int inpages, outpages, oend;
+};
+
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
@@ -56,11 +61,10 @@ int z_erofs_load_lz4_config(struct super_block *sb,
* Fill all gaps with bounce pages if it's a sparse page list. Also check if
* all physical pages are consecutive, which can be seen for moderate CR.
*/
-static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
+static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
struct page **pagepool)
{
- const unsigned int nr =
- PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ struct z_erofs_decompress_req *rq = ctx->rq;
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 };
@@ -70,7 +74,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
unsigned int i, j, top;

top = 0;
- for (i = j = 0; i < nr; ++i, ++j) {
+ for (i = j = 0; i < ctx->outpages; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;

@@ -112,41 +116,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
return kaddr ? 1 : 0;
}

-static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
+static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
void *inpage, unsigned int *inputmargin, int *maptype,
bool support_0padding)
{
- unsigned int nrpages_in, nrpages_out;
- unsigned int ofull, oend, inputsize, total, i, j;
+ struct z_erofs_decompress_req *rq = ctx->rq;
+ unsigned int omargin, total, i, j;
struct page **in;
void *src, *tmp;

- inputsize = rq->inputsize;
- nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
- oend = rq->pageofs_out + rq->outputsize;
- ofull = PAGE_ALIGN(oend);
- nrpages_out = ofull >> PAGE_SHIFT;
-
if (rq->inplace_io) {
+ omargin = PAGE_ALIGN(ctx->oend) - ctx->oend;
if (rq->partial_decoding || !support_0padding ||
- ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize))
+ omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;

- for (i = 0; i < nrpages_in; ++i) {
+ for (i = 0; i < ctx->inpages; ++i) {
DBG_BUGON(rq->in[i] == NULL);
- for (j = 0; j < nrpages_out - nrpages_in + i; ++j)
+ for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
if (rq->out[j] == rq->in[i])
goto docopy;
}
}

- if (nrpages_in <= 1) {
+ if (ctx->inpages <= 1) {
*maptype = 0;
return inpage;
}
kunmap_atomic(inpage);
might_sleep();
- src = erofs_vm_map_ram(rq->in, nrpages_in);
+ src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
*maptype = 1;
@@ -155,7 +154,7 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in;
- src = erofs_get_pcpubuf(nrpages_in);
+ src = erofs_get_pcpubuf(ctx->inpages);
if (!src) {
DBG_BUGON(1);
kunmap_atomic(inpage);
@@ -182,9 +181,10 @@ static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
return src;
}

-static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
+static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
u8 *out)
{
+ struct z_erofs_decompress_req *rq = ctx->rq;
unsigned int inputmargin;
u8 *headpage, *src;
bool support_0padding;
@@ -210,8 +210,8 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
}

rq->inputsize -= inputmargin;
- src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin,
- &maptype, support_0padding);
+ src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
+ &maptype, support_0padding);
if (IS_ERR(src))
return PTR_ERR(src);

@@ -240,9 +240,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
}

if (maptype == 0) {
- kunmap_atomic(src);
+ kunmap_atomic(headpage);
} else if (maptype == 1) {
- vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT);
+ vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
} else {
@@ -255,14 +255,18 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
- const unsigned int nrpages_out =
- PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ struct z_erofs_lz4_decompress_ctx ctx;
unsigned int dst_maptype;
void *dst;
int ret;

+ ctx.rq = rq;
+ ctx.oend = rq->pageofs_out + rq->outputsize;
+ ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT;
+ ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
+
/* one optimized fast path only for non bigpcluster cases yet */
- if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) {
+ if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out);
dst_maptype = 0;
@@ -270,27 +274,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}

/* general decoding path which can be used for all cases */
- ret = z_erofs_lz4_prepare_dstpages(rq, pagepool);
- if (ret < 0)
+ ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool);
+ if (ret < 0) {
return ret;
- if (ret) {
+ } else if (ret > 0) {
dst = page_address(*rq->out);
dst_maptype = 1;
- goto dstmap_out;
+ } else {
+ dst = erofs_vm_map_ram(rq->out, ctx.outpages);
+ if (!dst)
+ return -ENOMEM;
+ dst_maptype = 2;
}

- dst = erofs_vm_map_ram(rq->out, nrpages_out);
- if (!dst)
- return -ENOMEM;
- dst_maptype = 2;
-
dstmap_out:
- ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out);
-
+ ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
if (!dst_maptype)
kunmap_atomic(dst);
else if (dst_maptype == 2)
- vm_unmap_ram(dst, nrpages_out);
+ vm_unmap_ram(dst, ctx.outpages);
return ret;
}

--
2.24.4


2021-12-25 07:06:55

by Gao Xiang

[permalink] [raw]
Subject: [PATCH v3 5/5] erofs: add on-disk compressed tail-packing inline support

From: Yue Hu <[email protected]>

Introduces erofs compressed tail-packing inline support.

This approach adds a new field called `h_idata_size' in the
per-file compression header to indicate the encoded size of
each tail-packing pcluster.

At runtime, it will find the start logical offset of the tail
pcluster when initializing per-inode zmap and record such
extent (headlcn, idataoff) information to the in-memory inode.
Therefore, follow-on requests can directly recognize if one
pcluster is a tail-packing inline pcluster or not.

Signed-off-by: Yue Hu <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
---
fs/erofs/erofs_fs.h | 10 +++-
fs/erofs/internal.h | 6 +++
fs/erofs/super.c | 3 ++
fs/erofs/sysfs.c | 2 +
fs/erofs/zmap.c | 116 ++++++++++++++++++++++++++++++++------------
5 files changed, 105 insertions(+), 32 deletions(-)

diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index dda79afb901d..3ea62c6fb00a 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -23,13 +23,15 @@
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
+#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
- EROFS_FEATURE_INCOMPAT_COMPR_HEAD2)
+ EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
+ EROFS_FEATURE_INCOMPAT_ZTAILPACKING)

#define EROFS_SB_EXTSLOT_SIZE 16

@@ -292,13 +294,17 @@ struct z_erofs_lzma_cfgs {
* (4B) + 2B + (4B) if compacted 2B is on.
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
+ * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
+#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008

struct z_erofs_map_header {
- __le32 h_reserved1;
+ __le16 h_reserved1;
+ /* indicates the encoded size of tailpacking data */
+ __le16 h_idata_size;
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 8e70435629e5..ebb9ffa08dd2 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -274,6 +274,7 @@ EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
+EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)

/* atomic flag definitions */
@@ -308,6 +309,9 @@ struct erofs_inode {
unsigned short z_advise;
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
+ unsigned long z_idata_headlcn;
+ unsigned int z_idataoff;
+ unsigned short z_idata_size;
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
@@ -421,6 +425,8 @@ struct erofs_map_blocks {
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004
+/* Used to map tail extent for tailpacking inline pcluster */
+#define EROFS_GET_BLOCKS_FINDTAIL 0x0008

enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 58f381f80205..0724ad5fd6cf 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -411,6 +411,9 @@ static int erofs_read_superblock(struct super_block *sb)

/* handle multiple devices */
ret = erofs_init_devices(sb, dsb);
+
+ if (erofs_sb_has_ztailpacking(sbi))
+ erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
out:
kunmap(page);
put_page(page);
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index 666693432107..dac252bc9228 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -75,6 +75,7 @@ EROFS_ATTR_FEATURE(chunked_file);
EROFS_ATTR_FEATURE(device_table);
EROFS_ATTR_FEATURE(compr_head2);
EROFS_ATTR_FEATURE(sb_chksum);
+EROFS_ATTR_FEATURE(ztailpacking);

static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
@@ -84,6 +85,7 @@ static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(device_table),
ATTR_LIST(compr_head2),
ATTR_LIST(sb_chksum),
+ ATTR_LIST(ztailpacking),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 660489a7fb64..52b5369f9b42 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -7,12 +7,17 @@
#include <asm/unaligned.h>
#include <trace/events/erofs.h>

+static int z_erofs_do_map_blocks(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags);
+
int z_erofs_fill_inode(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);

if (!erofs_sb_has_big_pcluster(sbi) &&
+ !erofs_sb_has_ztailpacking(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
@@ -51,6 +56,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_unlock;

DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
+ !erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);

pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
@@ -65,6 +71,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)

h = kaddr + erofs_blkoff(pos);
vi->z_advise = le16_to_cpu(h->h_advise);
+ vi->z_idata_size = le16_to_cpu(h->h_idata_size);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;

@@ -94,13 +101,34 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
err = -EFSCORRUPTED;
goto unmap_done;
}
- /* paired with smp_mb() at the beginning of the function */
- smp_mb();
- set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
unmap_done:
kunmap_atomic(kaddr);
unlock_page(page);
put_page(page);
+ if (err)
+ goto out_unlock;
+
+ if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
+ struct erofs_map_blocks map = { .mpage = NULL };
+
+ vi->z_idata_size = le16_to_cpu(h->h_idata_size);
+ err = z_erofs_do_map_blocks(inode, &map,
+ EROFS_GET_BLOCKS_FINDTAIL);
+ if (map.mpage)
+ put_page(map.mpage);
+
+ if (!map.m_plen ||
+ erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
+ erofs_err(sb, "invalid tail-packing pclustersize %llu",
+ map.m_plen);
+ err = -EFSCORRUPTED;
+ }
+ if (err < 0)
+ goto out_unlock;
+ }
+ /* paired with smp_mb() at the beginning of the function */
+ smp_mb();
+ set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
@@ -117,6 +145,7 @@ struct z_erofs_maprecorder {
u16 clusterofs;
u16 delta[2];
erofs_blk_t pblk, compressedlcs;
+ erofs_off_t nextpackoff;
};

static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
@@ -169,6 +198,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
if (err)
return err;

+ m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
m->lcn = lcn;
di = m->kaddr + erofs_blkoff(pos);

@@ -243,12 +273,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits,

static int unpack_compacted_index(struct z_erofs_maprecorder *m,
unsigned int amortizedshift,
- unsigned int eofs, bool lookahead)
+ erofs_off_t pos, bool lookahead)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const unsigned int lomask = (1 << lclusterbits) - 1;
- unsigned int vcnt, base, lo, encodebits, nblk;
+ unsigned int vcnt, base, lo, encodebits, nblk, eofs;
int i;
u8 *in, type;
bool big_pcluster;
@@ -260,8 +290,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
else
return -EOPNOTSUPP;

+ /* it doesn't equal to roundup(..) */
+ m->nextpackoff = rounddown(pos, vcnt << amortizedshift) +
+ (vcnt << amortizedshift);
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
+ eofs = erofs_blkoff(pos);
base = round_down(eofs, vcnt << amortizedshift);
in = m->kaddr + base;

@@ -399,8 +433,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err)
return err;
- return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos),
- lookahead);
+ return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}

static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
@@ -583,11 +616,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
return 0;
}

-int z_erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
- int flags)
+static int z_erofs_do_map_blocks(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
{
struct erofs_inode *const vi = EROFS_I(inode);
+ bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
@@ -597,22 +631,8 @@ int z_erofs_map_blocks_iter(struct inode *inode,
unsigned long initial_lcn;
unsigned long long ofs, end;

- trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
-
- /* when trying to read beyond EOF, leave it unmapped */
- if (map->m_la >= inode->i_size) {
- map->m_llen = map->m_la + 1 - inode->i_size;
- map->m_la = inode->i_size;
- map->m_flags = 0;
- goto out;
- }
-
- err = z_erofs_fill_inode_lazy(inode);
- if (err)
- goto out;
-
lclusterbits = vi->z_logical_clusterbits;
- ofs = map->m_la;
+ ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1);

@@ -620,6 +640,9 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err)
goto unmap_out;

+ if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
+ vi->z_idataoff = m.nextpackoff;
+
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits;

@@ -658,12 +681,20 @@ int z_erofs_map_blocks_iter(struct inode *inode,
goto unmap_out;
}

- map->m_llen = end - map->m_la;
- map->m_pa = blknr_to_addr(m.pblk);
+ if (flags & EROFS_GET_BLOCKS_FINDTAIL)
+ vi->z_idata_headlcn = m.lcn;

- err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
- if (err)
- goto out;
+ map->m_llen = end - map->m_la;
+ if (ztailpacking && m.lcn == vi->z_idata_headlcn) {
+ map->m_flags |= EROFS_MAP_META;
+ map->m_pa = vi->z_idataoff;
+ map->m_plen = vi->z_idata_size;
+ } else {
+ map->m_pa = blknr_to_addr(m.pblk);
+ err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
+ if (err)
+ goto out;
+ }

if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
@@ -689,6 +720,31 @@ int z_erofs_map_blocks_iter(struct inode *inode,
__func__, map->m_la, map->m_pa,
map->m_llen, map->m_plen, map->m_flags);

+ return err;
+}
+
+int z_erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ int err = 0;
+
+ trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
+
+ /* when trying to read beyond EOF, leave it unmapped */
+ if (map->m_la >= inode->i_size) {
+ map->m_llen = map->m_la + 1 - inode->i_size;
+ map->m_la = inode->i_size;
+ map->m_flags = 0;
+ goto out;
+ }
+
+ err = z_erofs_fill_inode_lazy(inode);
+ if (err)
+ goto out;
+
+ err = z_erofs_do_map_blocks(inode, map, flags);
+out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);

/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
--
2.24.4


2021-12-27 02:09:18

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v3 1/5] erofs: tidy up z_erofs_lz4_decompress

On 2021/12/25 15:06, Gao Xiang wrote:
> To prepare for the upcoming ztailpacking feature and further
> cleanups, introduce a unique z_erofs_lz4_decompress_ctx to keep
> the context, including inpages, outpages and oend, which are
> frequently used by the lz4 decompressor.
>
> No logic changes.
>
> Signed-off-by: Gao Xiang <[email protected]>
> ---
> fs/erofs/decompressor.c | 80 +++++++++++++++++++++--------------------
> 1 file changed, 41 insertions(+), 39 deletions(-)
>
> diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
> index c373a199c407..d1282a8b709e 100644
> --- a/fs/erofs/decompressor.c
> +++ b/fs/erofs/decompressor.c
> @@ -16,6 +16,11 @@
> #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
> #endif
>
> +struct z_erofs_lz4_decompress_ctx {
> + struct z_erofs_decompress_req *rq;
> + unsigned int inpages, outpages, oend;
> +};
> +

Could you please comment a bit about fields of structure
z_erofs_lz4_decompress_ctx?

Otherwise, the patch looks good to me.

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2021-12-27 02:25:53

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v3 1/5] erofs: tidy up z_erofs_lz4_decompress

Hi Chao,

On Mon, Dec 27, 2021 at 10:08:54AM +0800, Chao Yu wrote:
> On 2021/12/25 15:06, Gao Xiang wrote:
> > To prepare for the upcoming ztailpacking feature and further
> > cleanups, introduce a unique z_erofs_lz4_decompress_ctx to keep
> > the context, including inpages, outpages and oend, which are
> > frequently used by the lz4 decompressor.
> >
> > No logic changes.
> >
> > Signed-off-by: Gao Xiang <[email protected]>
> > ---
> > fs/erofs/decompressor.c | 80 +++++++++++++++++++++--------------------
> > 1 file changed, 41 insertions(+), 39 deletions(-)
> >
> > diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
> > index c373a199c407..d1282a8b709e 100644
> > --- a/fs/erofs/decompressor.c
> > +++ b/fs/erofs/decompressor.c
> > @@ -16,6 +16,11 @@
> > #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
> > #endif
> > +struct z_erofs_lz4_decompress_ctx {
> > + struct z_erofs_decompress_req *rq;
> > + unsigned int inpages, outpages, oend;
> > +};
> > +
>
> Could you please comment a bit about fields of structure
> z_erofs_lz4_decompress_ctx?
>

Many thanks for the comment! I will revise it in the next version.

Thanks,
Gao Xiang

> Otherwise, the patch looks good to me.
>
> Reviewed-by: Chao Yu <[email protected]>
>
> Thanks,

2021-12-27 02:27:00

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v3 2/5] erofs: introduce z_erofs_fixup_insize

On 2021/12/25 15:06, Gao Xiang wrote:
> To prepare for the upcoming ztailpacking feature, introduce
> z_erofs_fixup_insize() and pageofs_in to wrap up the process
> to get the exact compressed size via zero padding.
>
> Signed-off-by: Gao Xiang <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2021-12-27 07:26:18

by Yue Hu

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] erofs: add on-disk compressed tail-packing inline support

Hi Xiang,

On Sat, 25 Dec 2021 15:06:26 +0800
Gao Xiang <[email protected]> wrote:

> From: Yue Hu <[email protected]>
>
> Introduces erofs compressed tail-packing inline support.
>
> This approach adds a new field called `h_idata_size' in the
> per-file compression header to indicate the encoded size of
> each tail-packing pcluster.
>
> At runtime, it will find the start logical offset of the tail
> pcluster when initializing per-inode zmap and record such
> extent (headlcn, idataoff) information to the in-memory inode.
> Therefore, follow-on requests can directly recognize if one
> pcluster is a tail-packing inline pcluster or not.
>
> Signed-off-by: Yue Hu <[email protected]>
> Signed-off-by: Gao Xiang <[email protected]>
> ---
> fs/erofs/erofs_fs.h | 10 +++-
> fs/erofs/internal.h | 6 +++
> fs/erofs/super.c | 3 ++
> fs/erofs/sysfs.c | 2 +
> fs/erofs/zmap.c | 116 ++++++++++++++++++++++++++++++++------------
> 5 files changed, 105 insertions(+), 32 deletions(-)
>
> diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
> index dda79afb901d..3ea62c6fb00a 100644
> --- a/fs/erofs/erofs_fs.h
> +++ b/fs/erofs/erofs_fs.h
> @@ -23,13 +23,15 @@
> #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
> #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
> #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
> +#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
> #define EROFS_ALL_FEATURE_INCOMPAT \
> (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
> EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
> EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
> EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
> EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
> - EROFS_FEATURE_INCOMPAT_COMPR_HEAD2)
> + EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
> + EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
>
> #define EROFS_SB_EXTSLOT_SIZE 16
>
> @@ -292,13 +294,17 @@ struct z_erofs_lzma_cfgs {
> * (4B) + 2B + (4B) if compacted 2B is on.
> * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
> * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
> + * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
> */
> #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
> #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
> #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
> +#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
>
> struct z_erofs_map_header {
> - __le32 h_reserved1;
> + __le16 h_reserved1;
> + /* indicates the encoded size of tailpacking data */
> + __le16 h_idata_size;
> __le16 h_advise;
> /*
> * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 8e70435629e5..ebb9ffa08dd2 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -274,6 +274,7 @@ EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
> EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
> EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
> EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
> +EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
> EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
>
> /* atomic flag definitions */
> @@ -308,6 +309,9 @@ struct erofs_inode {
> unsigned short z_advise;
> unsigned char z_algorithmtype[2];
> unsigned char z_logical_clusterbits;
> + unsigned long z_idata_headlcn;
> + unsigned int z_idataoff;

need a space?

> + unsigned short z_idata_size;
> };
> #endif /* CONFIG_EROFS_FS_ZIP */
> };
> @@ -421,6 +425,8 @@ struct erofs_map_blocks {
> #define EROFS_GET_BLOCKS_FIEMAP 0x0002
> /* Used to map the whole extent if non-negligible data is requested for LZMA */
> #define EROFS_GET_BLOCKS_READMORE 0x0004
> +/* Used to map tail extent for tailpacking inline pcluster */
> +#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
>
> enum {
> Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
> diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> index 58f381f80205..0724ad5fd6cf 100644
> --- a/fs/erofs/super.c
> +++ b/fs/erofs/super.c
> @@ -411,6 +411,9 @@ static int erofs_read_superblock(struct super_block *sb)
>
> /* handle multiple devices */
> ret = erofs_init_devices(sb, dsb);
> +
> + if (erofs_sb_has_ztailpacking(sbi))
> + erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
> out:
> kunmap(page);
> put_page(page);
> diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
> index 666693432107..dac252bc9228 100644
> --- a/fs/erofs/sysfs.c
> +++ b/fs/erofs/sysfs.c
> @@ -75,6 +75,7 @@ EROFS_ATTR_FEATURE(chunked_file);
> EROFS_ATTR_FEATURE(device_table);
> EROFS_ATTR_FEATURE(compr_head2);
> EROFS_ATTR_FEATURE(sb_chksum);
> +EROFS_ATTR_FEATURE(ztailpacking);
>
> static struct attribute *erofs_feat_attrs[] = {
> ATTR_LIST(zero_padding),
> @@ -84,6 +85,7 @@ static struct attribute *erofs_feat_attrs[] = {
> ATTR_LIST(device_table),
> ATTR_LIST(compr_head2),
> ATTR_LIST(sb_chksum),
> + ATTR_LIST(ztailpacking),
> NULL,
> };
> ATTRIBUTE_GROUPS(erofs_feat);
> diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
> index 660489a7fb64..52b5369f9b42 100644
> --- a/fs/erofs/zmap.c
> +++ b/fs/erofs/zmap.c
> @@ -7,12 +7,17 @@
> #include <asm/unaligned.h>
> #include <trace/events/erofs.h>
>
> +static int z_erofs_do_map_blocks(struct inode *inode,
> + struct erofs_map_blocks *map,
> + int flags);
> +
> int z_erofs_fill_inode(struct inode *inode)
> {
> struct erofs_inode *const vi = EROFS_I(inode);
> struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
>
> if (!erofs_sb_has_big_pcluster(sbi) &&
> + !erofs_sb_has_ztailpacking(sbi) &&
> vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
> vi->z_advise = 0;
> vi->z_algorithmtype[0] = 0;
> @@ -51,6 +56,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
> goto out_unlock;
>
> DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
> + !erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
> vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
>
> pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
> @@ -65,6 +71,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
>
> h = kaddr + erofs_blkoff(pos);
> vi->z_advise = le16_to_cpu(h->h_advise);
> + vi->z_idata_size = le16_to_cpu(h->h_idata_size);

duplicated code?

> vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
> vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
>
> @@ -94,13 +101,34 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
> err = -EFSCORRUPTED;
> goto unmap_done;
> }
> - /* paired with smp_mb() at the beginning of the function */
> - smp_mb();
> - set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
> unmap_done:
> kunmap_atomic(kaddr);
> unlock_page(page);
> put_page(page);
> + if (err)
> + goto out_unlock;
> +
> + if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
> + struct erofs_map_blocks map = { .mpage = NULL };
> +
> + vi->z_idata_size = le16_to_cpu(h->h_idata_size);
> + err = z_erofs_do_map_blocks(inode, &map,
> + EROFS_GET_BLOCKS_FINDTAIL);
> + if (map.mpage)
> + put_page(map.mpage);
> +
> + if (!map.m_plen ||
> + erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
> + erofs_err(sb, "invalid tail-packing pclustersize %llu",
> + map.m_plen);
> + err = -EFSCORRUPTED;
> + }
> + if (err < 0)
> + goto out_unlock;
> + }
> + /* paired with smp_mb() at the beginning of the function */
> + smp_mb();
> + set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
> out_unlock:
> clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
> return err;
> @@ -117,6 +145,7 @@ struct z_erofs_maprecorder {
> u16 clusterofs;
> u16 delta[2];
> erofs_blk_t pblk, compressedlcs;
> + erofs_off_t nextpackoff;
> };
>
> static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
> @@ -169,6 +198,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
> if (err)
> return err;
>
> + m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
> m->lcn = lcn;
> di = m->kaddr + erofs_blkoff(pos);
>
> @@ -243,12 +273,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits,
>
> static int unpack_compacted_index(struct z_erofs_maprecorder *m,
> unsigned int amortizedshift,
> - unsigned int eofs, bool lookahead)
> + erofs_off_t pos, bool lookahead)
> {
> struct erofs_inode *const vi = EROFS_I(m->inode);
> const unsigned int lclusterbits = vi->z_logical_clusterbits;
> const unsigned int lomask = (1 << lclusterbits) - 1;
> - unsigned int vcnt, base, lo, encodebits, nblk;
> + unsigned int vcnt, base, lo, encodebits, nblk, eofs;
> int i;
> u8 *in, type;
> bool big_pcluster;
> @@ -260,8 +290,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
> else
> return -EOPNOTSUPP;
>
> + /* it doesn't equal to roundup(..) */
> + m->nextpackoff = rounddown(pos, vcnt << amortizedshift) +
> + (vcnt << amortizedshift);
> big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
> encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
> + eofs = erofs_blkoff(pos);
> base = round_down(eofs, vcnt << amortizedshift);
> in = m->kaddr + base;
>
> @@ -399,8 +433,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
> err = z_erofs_reload_indexes(m, erofs_blknr(pos));
> if (err)
> return err;
> - return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos),
> - lookahead);
> + return unpack_compacted_index(m, amortizedshift, pos, lookahead);
> }
>
> static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
> @@ -583,11 +616,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
> return 0;
> }
>
> -int z_erofs_map_blocks_iter(struct inode *inode,
> - struct erofs_map_blocks *map,
> - int flags)
> +static int z_erofs_do_map_blocks(struct inode *inode,
> + struct erofs_map_blocks *map,
> + int flags)
> {
> struct erofs_inode *const vi = EROFS_I(inode);
> + bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
> struct z_erofs_maprecorder m = {
> .inode = inode,
> .map = map,
> @@ -597,22 +631,8 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> unsigned long initial_lcn;
> unsigned long long ofs, end;
>
> - trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
> -
> - /* when trying to read beyond EOF, leave it unmapped */
> - if (map->m_la >= inode->i_size) {
> - map->m_llen = map->m_la + 1 - inode->i_size;
> - map->m_la = inode->i_size;
> - map->m_flags = 0;
> - goto out;
> - }
> -
> - err = z_erofs_fill_inode_lazy(inode);
> - if (err)
> - goto out;
> -
> lclusterbits = vi->z_logical_clusterbits;
> - ofs = map->m_la;
> + ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
> initial_lcn = ofs >> lclusterbits;
> endoff = ofs & ((1 << lclusterbits) - 1);
>
> @@ -620,6 +640,9 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> if (err)
> goto unmap_out;
>
> + if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
> + vi->z_idataoff = m.nextpackoff;
> +
> map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
> end = (m.lcn + 1ULL) << lclusterbits;
>
> @@ -658,12 +681,20 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> goto unmap_out;
> }
>
> - map->m_llen = end - map->m_la;
> - map->m_pa = blknr_to_addr(m.pblk);
> + if (flags & EROFS_GET_BLOCKS_FINDTAIL)
> + vi->z_idata_headlcn = m.lcn;

The name of "z_idata_headlcn" includes "idata" which looks like ztailpacking related?
If it is, add ztailpacking check here? If not, update it such as "z_tailextent_headlcn"?

>
> - err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
> - if (err)
> - goto out;
> + map->m_llen = end - map->m_la;
> + if (ztailpacking && m.lcn == vi->z_idata_headlcn) {
> + map->m_flags |= EROFS_MAP_META;
> + map->m_pa = vi->z_idataoff;
> + map->m_plen = vi->z_idata_size;
> + } else {
> + map->m_pa = blknr_to_addr(m.pblk);
> + err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
> + if (err)
> + goto out;
> + }
>
> if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
> map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
> @@ -689,6 +720,31 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> __func__, map->m_la, map->m_pa,
> map->m_llen, map->m_plen, map->m_flags);
>
> + return err;
> +}
> +
> +int z_erofs_map_blocks_iter(struct inode *inode,
> + struct erofs_map_blocks *map,
> + int flags)
> +{
> + int err = 0;
> +
> + trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
> +
> + /* when trying to read beyond EOF, leave it unmapped */
> + if (map->m_la >= inode->i_size) {
> + map->m_llen = map->m_la + 1 - inode->i_size;
> + map->m_la = inode->i_size;
> + map->m_flags = 0;
> + goto out;
> + }
> +
> + err = z_erofs_fill_inode_lazy(inode);
> + if (err)
> + goto out;
> +
> + err = z_erofs_do_map_blocks(inode, map, flags);
> +out:
> trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
>
> /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */


2021-12-27 08:21:10

by Yue Hu

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] erofs: support inline data decompression

Hi Xiang,

On Sat, 25 Dec 2021 15:06:25 +0800
Gao Xiang <[email protected]> wrote:

> From: Yue Hu <[email protected]>
>
> Currently, we have already support tail-packing inline for
> uncompressed file, let's also implement this for compressed
> files to save I/Os and storage space.
>
> Different from normal pclusters, compressed data is available
> in advance because of other metadata I/Os. Therefore, they
> directly move into the bypass queue without extra I/O submission.
>
> It's the last compression feature before folio/subpage support.
>
> Signed-off-by: Yue Hu <[email protected]>
> Signed-off-by: Gao Xiang <[email protected]>
> ---
> fs/erofs/zdata.c | 128 ++++++++++++++++++++++++++++++++---------------
> fs/erofs/zdata.h | 24 ++++++++-
> 2 files changed, 109 insertions(+), 43 deletions(-)
>
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index bc765d8a6dc2..e6ef02634e08 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
>
> static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
> {
> + unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
> int i;
>
> for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
> struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
>
> - if (pcl->pclusterpages > pcs->maxpages)
> + if (pclusterpages > pcs->maxpages)
> continue;
>
> kmem_cache_free(pcs->slab, pcl);
> @@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
> container_of(grp, struct z_erofs_pcluster, obj);
> int i;
>
> + DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
> /*
> * refcount of workgroup is now freezed as 1,
> * therefore no need to worry about available decompression users.
> @@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page)
> if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
> unsigned int i;
>
> + DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
> for (i = 0; i < pcl->pclusterpages; ++i) {
> if (pcl->compressed_pages[i] == page) {
> WRITE_ONCE(pcl->compressed_pages[i], NULL);
> @@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> struct inode *inode,
> struct erofs_map_blocks *map)
> {
> + bool ztailpacking = map->m_flags & EROFS_MAP_META;
> struct z_erofs_pcluster *pcl;
> struct z_erofs_collection *cl;
> struct erofs_workgroup *grp;
> @@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> }
>
> /* no available pcluster, let's allocate one */
> - pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
> + pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
> + map->m_plen >> PAGE_SHIFT);
> if (IS_ERR(pcl))
> return PTR_ERR(pcl);
>
> atomic_set(&pcl->obj.refcount, 1);
> - pcl->obj.index = map->m_pa >> PAGE_SHIFT;
> pcl->algorithmformat = map->m_algorithmformat;
> pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
> (map->m_flags & EROFS_MAP_FULL_MAPPED ?
> @@ -494,16 +498,25 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> mutex_init(&cl->lock);
> DBG_BUGON(!mutex_trylock(&cl->lock));
>
> - grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
> - if (IS_ERR(grp)) {
> - err = PTR_ERR(grp);
> - goto err_out;
> - }
> + if (ztailpacking) {
> + pcl->obj.index = 0; /* which indicates ztailpacking */
> + pcl->pageofs_in = erofs_blkoff(map->m_pa);
> + pcl->tailpacking_size = map->m_plen;
> + } else {
> + pcl->obj.index = map->m_pa >> PAGE_SHIFT;
>
> - if (grp != &pcl->obj) {
> - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
> - err = -EEXIST;
> - goto err_out;
> + grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
> + if (IS_ERR(grp)) {
> + err = PTR_ERR(grp);
> + goto err_out;
> + }
> +
> + if (grp != &pcl->obj) {
> + clt->pcl = container_of(grp,
> + struct z_erofs_pcluster, obj);
> + err = -EEXIST;
> + goto err_out;
> + }
> }
> /* used to check tail merging loop due to corrupted images */
> if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
> @@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
> DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
> DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
>
> - if (!PAGE_ALIGNED(map->m_pa)) {
> - DBG_BUGON(1);
> - return -EINVAL;
> + if (map->m_flags & EROFS_MAP_META) {
> + if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
> + DBG_BUGON(1);
> + return -EFSCORRUPTED;
> + }
> + goto tailpacking;
> }
>
> grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
> if (grp) {
> clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
> } else {
> +tailpacking:
> ret = z_erofs_register_collection(clt, inode, map);
> -
> if (!ret)
> goto out;
> if (ret != -EEXIST)
> @@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
> out:
> z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
> clt->cl->pagevec, clt->cl->vcnt);
> -
> /* since file-backed online pages are traversed in reverse order */
> - clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages;
> + clt->icpage_ptr = clt->pcl->compressed_pages +
> + z_erofs_pclusterpages(clt->pcl);
> return 0;
> }
>
> @@ -687,8 +703,26 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> else
> cache_strategy = DONTALLOC;
>
> - preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> - cache_strategy, pagepool);
> + if (z_erofs_is_inline_pcluster(clt->pcl)) {

current cache_strategy is only for preload_compressed_pages(), so the cache_strategy should be
needless for inline branch.

> + struct page *mpage;
> +
> + mpage = erofs_get_meta_page(inode->i_sb,
> + erofs_blknr(map->m_pa));

could we just use the map->mpage directly if it's what we want(which is the most cases when test),
if not we erofs_get_meta_page()?

> + if (IS_ERR(mpage)) {
> + err = PTR_ERR(mpage);
> + erofs_err(inode->i_sb,
> + "failed to get inline page, err %d", err);
> + goto err_out;
> + }
> + /* TODO: new subpage feature will get rid of it */
> + unlock_page(mpage);
> +
> + WRITE_ONCE(clt->pcl->compressed_pages[0], mpage);
> + clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
> + } else {
> + preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> + cache_strategy, pagepool);
> + }
>
> hitted:
> /*
> @@ -844,6 +878,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> struct page **pagepool)
> {
> struct erofs_sb_info *const sbi = EROFS_SB(sb);
> + unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
> struct z_erofs_pagevec_ctor ctor;
> unsigned int i, inputsize, outputsize, llen, nr_pages;
> struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
> @@ -925,16 +960,15 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> overlapped = false;
> compressed_pages = pcl->compressed_pages;
>
> - for (i = 0; i < pcl->pclusterpages; ++i) {
> + for (i = 0; i < pclusterpages; ++i) {
> unsigned int pagenr;
>
> page = compressed_pages[i];
> -
> /* all compressed pages ought to be valid */
> DBG_BUGON(!page);
> - DBG_BUGON(z_erofs_page_is_invalidated(page));
>
> - if (!z_erofs_is_shortlived_page(page)) {
> + if (!z_erofs_is_inline_pcluster(pcl) &&

some inline checks may exist for noinline case if it's bigpcluster. And i understand the
behavior of ztailpacking page is differ from normal page. So better to branch them? moving
the inline check outside the for loop?

> + !z_erofs_is_shortlived_page(page)) {
> if (erofs_page_is_managed(sbi, page)) {
> if (!PageUptodate(page))
> err = -EIO;
> @@ -960,10 +994,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> }
>
> /* PG_error needs checking for all non-managed pages */
> - if (PageError(page)) {
> - DBG_BUGON(PageUptodate(page));
> + if (!PageUptodate(page))
> err = -EIO;
> - }
> }
>
> if (err)
> @@ -978,11 +1010,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> partial = true;
> }
>
> - inputsize = pcl->pclusterpages * PAGE_SIZE;
> + if (z_erofs_is_inline_pcluster(pcl))
> + inputsize = pcl->tailpacking_size;
> + else
> + inputsize = pclusterpages * PAGE_SIZE;
> +
> err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
> .sb = sb,
> .in = compressed_pages,
> .out = pages,
> + .pageofs_in = pcl->pageofs_in,
> .pageofs_out = cl->pageofs,
> .inputsize = inputsize,
> .outputsize = outputsize,
> @@ -992,17 +1029,22 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> }, pagepool);
>
> out:
> - /* must handle all compressed pages before ending pages */
> - for (i = 0; i < pcl->pclusterpages; ++i) {
> - page = compressed_pages[i];
> -
> - if (erofs_page_is_managed(sbi, page))
> - continue;
> + /* must handle all compressed pages before actual file pages */
> + if (z_erofs_is_inline_pcluster(pcl)) {
> + page = compressed_pages[0];
> + WRITE_ONCE(compressed_pages[0], NULL);
> + put_page(page);
> + } else {
> + for (i = 0; i < pclusterpages; ++i) {
> + page = compressed_pages[i];
>
> - /* recycle all individual short-lived pages */
> - (void)z_erofs_put_shortlivedpage(pagepool, page);
> + if (erofs_page_is_managed(sbi, page))
> + continue;
>
> - WRITE_ONCE(compressed_pages[i], NULL);
> + /* recycle all individual short-lived pages */
> + (void)z_erofs_put_shortlivedpage(pagepool, page);
> + WRITE_ONCE(compressed_pages[i], NULL);
> + }
> }
>
> for (i = 0; i < nr_pages; ++i) {
> @@ -1288,6 +1330,14 @@ static void z_erofs_submit_queue(struct super_block *sb,
>
> pcl = container_of(owned_head, struct z_erofs_pcluster, next);
>
> + /* close the main owned chain at first */
> + owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
> + Z_EROFS_PCLUSTER_TAIL_CLOSED);
> + if (z_erofs_is_inline_pcluster(pcl)) {
> + move_to_bypass_jobqueue(pcl, qtail, owned_head);
> + continue;
> + }
> +
> /* no device id here, thus it will always succeed */
> mdev = (struct erofs_map_dev) {
> .m_pa = blknr_to_addr(pcl->obj.index),
> @@ -1297,10 +1347,6 @@ static void z_erofs_submit_queue(struct super_block *sb,
> cur = erofs_blknr(mdev.m_pa);
> end = cur + pcl->pclusterpages;
>
> - /* close the main owned chain at first */
> - owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
> - Z_EROFS_PCLUSTER_TAIL_CLOSED);
> -
> do {
> struct page *page;
>
> diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
> index 4a69515dea75..e043216b545f 100644
> --- a/fs/erofs/zdata.h
> +++ b/fs/erofs/zdata.h
> @@ -62,8 +62,16 @@ struct z_erofs_pcluster {
> /* A: lower limit of decompressed length and if full length or not */
> unsigned int length;
>
> - /* I: physical cluster size in pages */
> - unsigned short pclusterpages;
> + /* I: page offset of inline compressed data */
> + unsigned short pageofs_in;
> +
> + union {
> + /* I: physical cluster size in pages */
> + unsigned short pclusterpages;
> +
> + /* I: tailpacking inline compressed size */
> + unsigned short tailpacking_size;
> + };
>
> /* I: compression algorithm format */
> unsigned char algorithmformat;
> @@ -94,6 +102,18 @@ struct z_erofs_decompressqueue {
> } u;
> };
>
> +static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
> +{
> + return !pcl->obj.index;
> +}
> +
> +static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
> +{
> + if (z_erofs_is_inline_pcluster(pcl))
> + return 1;
> + return pcl->pclusterpages;
> +}
> +
> #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
> #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
> #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)


2021-12-27 09:20:50

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] erofs: add on-disk compressed tail-packing inline support

Hi Yue,

On Mon, Dec 27, 2021 at 03:23:26PM +0800, Yue Hu wrote:
> Hi Xiang,
>
> On Sat, 25 Dec 2021 15:06:26 +0800
> Gao Xiang <[email protected]> wrote:
>
> > From: Yue Hu <[email protected]>
> >
> > Introduces erofs compressed tail-packing inline support.
> >
> > This approach adds a new field called `h_idata_size' in the
> > per-file compression header to indicate the encoded size of
> > each tail-packing pcluster.
> >
> > At runtime, it will find the start logical offset of the tail
> > pcluster when initializing per-inode zmap and record such
> > extent (headlcn, idataoff) information to the in-memory inode.
> > Therefore, follow-on requests can directly recognize if one
> > pcluster is a tail-packing inline pcluster or not.
> >
> > Signed-off-by: Yue Hu <[email protected]>
> > Signed-off-by: Gao Xiang <[email protected]>
> > ---
> > fs/erofs/erofs_fs.h | 10 +++-
> > fs/erofs/internal.h | 6 +++
> > fs/erofs/super.c | 3 ++
> > fs/erofs/sysfs.c | 2 +
> > fs/erofs/zmap.c | 116 ++++++++++++++++++++++++++++++++------------
> > 5 files changed, 105 insertions(+), 32 deletions(-)
> >
> > diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
> > index dda79afb901d..3ea62c6fb00a 100644
> > --- a/fs/erofs/erofs_fs.h
> > +++ b/fs/erofs/erofs_fs.h
> > @@ -23,13 +23,15 @@
> > #define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
> > #define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
> > #define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
> > +#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
> > #define EROFS_ALL_FEATURE_INCOMPAT \
> > (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
> > EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
> > EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
> > EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
> > EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
> > - EROFS_FEATURE_INCOMPAT_COMPR_HEAD2)
> > + EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
> > + EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
> >
> > #define EROFS_SB_EXTSLOT_SIZE 16
> >
> > @@ -292,13 +294,17 @@ struct z_erofs_lzma_cfgs {
> > * (4B) + 2B + (4B) if compacted 2B is on.
> > * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
> > * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
> > + * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
> > */
> > #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
> > #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
> > #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
> > +#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
> >
> > struct z_erofs_map_header {
> > - __le32 h_reserved1;
> > + __le16 h_reserved1;
> > + /* indicates the encoded size of tailpacking data */
> > + __le16 h_idata_size;
> > __le16 h_advise;
> > /*
> > * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
> > diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> > index 8e70435629e5..ebb9ffa08dd2 100644
> > --- a/fs/erofs/internal.h
> > +++ b/fs/erofs/internal.h
> > @@ -274,6 +274,7 @@ EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
> > EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
> > EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
> > EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
> > +EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
> > EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
> >
> > /* atomic flag definitions */
> > @@ -308,6 +309,9 @@ struct erofs_inode {
> > unsigned short z_advise;
> > unsigned char z_algorithmtype[2];
> > unsigned char z_logical_clusterbits;
> > + unsigned long z_idata_headlcn;
> > + unsigned int z_idataoff;
>
> need a space?

Will fix.

>
> > + unsigned short z_idata_size;
> > };
> > #endif /* CONFIG_EROFS_FS_ZIP */
> > };
> > @@ -421,6 +425,8 @@ struct erofs_map_blocks {
> > #define EROFS_GET_BLOCKS_FIEMAP 0x0002
> > /* Used to map the whole extent if non-negligible data is requested for LZMA */
> > #define EROFS_GET_BLOCKS_READMORE 0x0004
> > +/* Used to map tail extent for tailpacking inline pcluster */
> > +#define EROFS_GET_BLOCKS_FINDTAIL 0x0008
> >
> > enum {
> > Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
> > diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> > index 58f381f80205..0724ad5fd6cf 100644
> > --- a/fs/erofs/super.c
> > +++ b/fs/erofs/super.c
> > @@ -411,6 +411,9 @@ static int erofs_read_superblock(struct super_block *sb)
> >
> > /* handle multiple devices */
> > ret = erofs_init_devices(sb, dsb);
> > +
> > + if (erofs_sb_has_ztailpacking(sbi))
> > + erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
> > out:
> > kunmap(page);
> > put_page(page);
> > diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
> > index 666693432107..dac252bc9228 100644
> > --- a/fs/erofs/sysfs.c
> > +++ b/fs/erofs/sysfs.c
> > @@ -75,6 +75,7 @@ EROFS_ATTR_FEATURE(chunked_file);
> > EROFS_ATTR_FEATURE(device_table);
> > EROFS_ATTR_FEATURE(compr_head2);
> > EROFS_ATTR_FEATURE(sb_chksum);
> > +EROFS_ATTR_FEATURE(ztailpacking);
> >
> > static struct attribute *erofs_feat_attrs[] = {
> > ATTR_LIST(zero_padding),
> > @@ -84,6 +85,7 @@ static struct attribute *erofs_feat_attrs[] = {
> > ATTR_LIST(device_table),
> > ATTR_LIST(compr_head2),
> > ATTR_LIST(sb_chksum),
> > + ATTR_LIST(ztailpacking),
> > NULL,
> > };
> > ATTRIBUTE_GROUPS(erofs_feat);
> > diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
> > index 660489a7fb64..52b5369f9b42 100644
> > --- a/fs/erofs/zmap.c
> > +++ b/fs/erofs/zmap.c
> > @@ -7,12 +7,17 @@
> > #include <asm/unaligned.h>
> > #include <trace/events/erofs.h>
> >
> > +static int z_erofs_do_map_blocks(struct inode *inode,
> > + struct erofs_map_blocks *map,
> > + int flags);
> > +
> > int z_erofs_fill_inode(struct inode *inode)
> > {
> > struct erofs_inode *const vi = EROFS_I(inode);
> > struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
> >
> > if (!erofs_sb_has_big_pcluster(sbi) &&
> > + !erofs_sb_has_ztailpacking(sbi) &&
> > vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
> > vi->z_advise = 0;
> > vi->z_algorithmtype[0] = 0;
> > @@ -51,6 +56,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
> > goto out_unlock;
> >
> > DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
> > + !erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
> > vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
> >
> > pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
> > @@ -65,6 +71,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
> >
> > h = kaddr + erofs_blkoff(pos);
> > vi->z_advise = le16_to_cpu(h->h_advise);
> > + vi->z_idata_size = le16_to_cpu(h->h_idata_size);
>
> duplicated code?

Yeah, good catch, I should leave the latter one.

>
> > vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
> > vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
> >
> > @@ -94,13 +101,34 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
> > err = -EFSCORRUPTED;
> > goto unmap_done;
> > }
> > - /* paired with smp_mb() at the beginning of the function */
> > - smp_mb();
> > - set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
> > unmap_done:
> > kunmap_atomic(kaddr);
> > unlock_page(page);
> > put_page(page);
> > + if (err)
> > + goto out_unlock;
> > +
> > + if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
> > + struct erofs_map_blocks map = { .mpage = NULL };
> > +
> > + vi->z_idata_size = le16_to_cpu(h->h_idata_size);
> > + err = z_erofs_do_map_blocks(inode, &map,
> > + EROFS_GET_BLOCKS_FINDTAIL);
> > + if (map.mpage)
> > + put_page(map.mpage);
> > +
> > + if (!map.m_plen ||
> > + erofs_blkoff(map.m_pa) + map.m_plen > EROFS_BLKSIZ) {
> > + erofs_err(sb, "invalid tail-packing pclustersize %llu",
> > + map.m_plen);
> > + err = -EFSCORRUPTED;
> > + }
> > + if (err < 0)
> > + goto out_unlock;
> > + }
> > + /* paired with smp_mb() at the beginning of the function */
> > + smp_mb();
> > + set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
> > out_unlock:
> > clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
> > return err;
> > @@ -117,6 +145,7 @@ struct z_erofs_maprecorder {
> > u16 clusterofs;
> > u16 delta[2];
> > erofs_blk_t pblk, compressedlcs;
> > + erofs_off_t nextpackoff;
> > };
> >
> > static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
> > @@ -169,6 +198,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
> > if (err)
> > return err;
> >
> > + m->nextpackoff = pos + sizeof(struct z_erofs_vle_decompressed_index);
> > m->lcn = lcn;
> > di = m->kaddr + erofs_blkoff(pos);
> >
> > @@ -243,12 +273,12 @@ static int get_compacted_la_distance(unsigned int lclusterbits,
> >
> > static int unpack_compacted_index(struct z_erofs_maprecorder *m,
> > unsigned int amortizedshift,
> > - unsigned int eofs, bool lookahead)
> > + erofs_off_t pos, bool lookahead)
> > {
> > struct erofs_inode *const vi = EROFS_I(m->inode);
> > const unsigned int lclusterbits = vi->z_logical_clusterbits;
> > const unsigned int lomask = (1 << lclusterbits) - 1;
> > - unsigned int vcnt, base, lo, encodebits, nblk;
> > + unsigned int vcnt, base, lo, encodebits, nblk, eofs;
> > int i;
> > u8 *in, type;
> > bool big_pcluster;
> > @@ -260,8 +290,12 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
> > else
> > return -EOPNOTSUPP;
> >
> > + /* it doesn't equal to roundup(..) */
> > + m->nextpackoff = rounddown(pos, vcnt << amortizedshift) +
> > + (vcnt << amortizedshift);
> > big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
> > encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
> > + eofs = erofs_blkoff(pos);
> > base = round_down(eofs, vcnt << amortizedshift);
> > in = m->kaddr + base;
> >
> > @@ -399,8 +433,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
> > err = z_erofs_reload_indexes(m, erofs_blknr(pos));
> > if (err)
> > return err;
> > - return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos),
> > - lookahead);
> > + return unpack_compacted_index(m, amortizedshift, pos, lookahead);
> > }
> >
> > static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
> > @@ -583,11 +616,12 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
> > return 0;
> > }
> >
> > -int z_erofs_map_blocks_iter(struct inode *inode,
> > - struct erofs_map_blocks *map,
> > - int flags)
> > +static int z_erofs_do_map_blocks(struct inode *inode,
> > + struct erofs_map_blocks *map,
> > + int flags)
> > {
> > struct erofs_inode *const vi = EROFS_I(inode);
> > + bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
> > struct z_erofs_maprecorder m = {
> > .inode = inode,
> > .map = map,
> > @@ -597,22 +631,8 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> > unsigned long initial_lcn;
> > unsigned long long ofs, end;
> >
> > - trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
> > -
> > - /* when trying to read beyond EOF, leave it unmapped */
> > - if (map->m_la >= inode->i_size) {
> > - map->m_llen = map->m_la + 1 - inode->i_size;
> > - map->m_la = inode->i_size;
> > - map->m_flags = 0;
> > - goto out;
> > - }
> > -
> > - err = z_erofs_fill_inode_lazy(inode);
> > - if (err)
> > - goto out;
> > -
> > lclusterbits = vi->z_logical_clusterbits;
> > - ofs = map->m_la;
> > + ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
> > initial_lcn = ofs >> lclusterbits;
> > endoff = ofs & ((1 << lclusterbits) - 1);
> >
> > @@ -620,6 +640,9 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> > if (err)
> > goto unmap_out;
> >
> > + if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
> > + vi->z_idataoff = m.nextpackoff;
> > +
> > map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
> > end = (m.lcn + 1ULL) << lclusterbits;
> >
> > @@ -658,12 +681,20 @@ int z_erofs_map_blocks_iter(struct inode *inode,
> > goto unmap_out;
> > }
> >
> > - map->m_llen = end - map->m_la;
> > - map->m_pa = blknr_to_addr(m.pblk);
> > + if (flags & EROFS_GET_BLOCKS_FINDTAIL)
> > + vi->z_idata_headlcn = m.lcn;
>
> The name of "z_idata_headlcn" includes "idata" which looks like ztailpacking related?
> If it is, add ztailpacking check here? If not, update it such as "z_tailextent_headlcn"?

Yeah, z_tailextent_headlcn is a better name, will update soon.

Thanks,
Gao Xiang

2021-12-27 09:26:32

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] erofs: support inline data decompression

On Mon, Dec 27, 2021 at 04:18:19PM +0800, Yue Hu wrote:
> Hi Xiang,
>
> On Sat, 25 Dec 2021 15:06:25 +0800
> Gao Xiang <[email protected]> wrote:
>
> > From: Yue Hu <[email protected]>
> >
> > Currently, we have already support tail-packing inline for
> > uncompressed file, let's also implement this for compressed
> > files to save I/Os and storage space.
> >
> > Different from normal pclusters, compressed data is available
> > in advance because of other metadata I/Os. Therefore, they
> > directly move into the bypass queue without extra I/O submission.
> >
> > It's the last compression feature before folio/subpage support.
> >
> > Signed-off-by: Yue Hu <[email protected]>
> > Signed-off-by: Gao Xiang <[email protected]>
> > ---
> > fs/erofs/zdata.c | 128 ++++++++++++++++++++++++++++++++---------------
> > fs/erofs/zdata.h | 24 ++++++++-
> > 2 files changed, 109 insertions(+), 43 deletions(-)
> >
> > diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> > index bc765d8a6dc2..e6ef02634e08 100644
> > --- a/fs/erofs/zdata.c
> > +++ b/fs/erofs/zdata.c
> > @@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
> >
> > static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
> > {
> > + unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
> > int i;
> >
> > for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
> > struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
> >
> > - if (pcl->pclusterpages > pcs->maxpages)
> > + if (pclusterpages > pcs->maxpages)
> > continue;
> >
> > kmem_cache_free(pcs->slab, pcl);
> > @@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
> > container_of(grp, struct z_erofs_pcluster, obj);
> > int i;
> >
> > + DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
> > /*
> > * refcount of workgroup is now freezed as 1,
> > * therefore no need to worry about available decompression users.
> > @@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page)
> > if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
> > unsigned int i;
> >
> > + DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
> > for (i = 0; i < pcl->pclusterpages; ++i) {
> > if (pcl->compressed_pages[i] == page) {
> > WRITE_ONCE(pcl->compressed_pages[i], NULL);
> > @@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> > struct inode *inode,
> > struct erofs_map_blocks *map)
> > {
> > + bool ztailpacking = map->m_flags & EROFS_MAP_META;
> > struct z_erofs_pcluster *pcl;
> > struct z_erofs_collection *cl;
> > struct erofs_workgroup *grp;
> > @@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> > }
> >
> > /* no available pcluster, let's allocate one */
> > - pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
> > + pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
> > + map->m_plen >> PAGE_SHIFT);
> > if (IS_ERR(pcl))
> > return PTR_ERR(pcl);
> >
> > atomic_set(&pcl->obj.refcount, 1);
> > - pcl->obj.index = map->m_pa >> PAGE_SHIFT;
> > pcl->algorithmformat = map->m_algorithmformat;
> > pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
> > (map->m_flags & EROFS_MAP_FULL_MAPPED ?
> > @@ -494,16 +498,25 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> > mutex_init(&cl->lock);
> > DBG_BUGON(!mutex_trylock(&cl->lock));
> >
> > - grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
> > - if (IS_ERR(grp)) {
> > - err = PTR_ERR(grp);
> > - goto err_out;
> > - }
> > + if (ztailpacking) {
> > + pcl->obj.index = 0; /* which indicates ztailpacking */
> > + pcl->pageofs_in = erofs_blkoff(map->m_pa);
> > + pcl->tailpacking_size = map->m_plen;
> > + } else {
> > + pcl->obj.index = map->m_pa >> PAGE_SHIFT;
> >
> > - if (grp != &pcl->obj) {
> > - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
> > - err = -EEXIST;
> > - goto err_out;
> > + grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
> > + if (IS_ERR(grp)) {
> > + err = PTR_ERR(grp);
> > + goto err_out;
> > + }
> > +
> > + if (grp != &pcl->obj) {
> > + clt->pcl = container_of(grp,
> > + struct z_erofs_pcluster, obj);
> > + err = -EEXIST;
> > + goto err_out;
> > + }
> > }
> > /* used to check tail merging loop due to corrupted images */
> > if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
> > @@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
> > DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
> > DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
> >
> > - if (!PAGE_ALIGNED(map->m_pa)) {
> > - DBG_BUGON(1);
> > - return -EINVAL;
> > + if (map->m_flags & EROFS_MAP_META) {
> > + if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
> > + DBG_BUGON(1);
> > + return -EFSCORRUPTED;
> > + }
> > + goto tailpacking;
> > }
> >
> > grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
> > if (grp) {
> > clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
> > } else {
> > +tailpacking:
> > ret = z_erofs_register_collection(clt, inode, map);
> > -
> > if (!ret)
> > goto out;
> > if (ret != -EEXIST)
> > @@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
> > out:
> > z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
> > clt->cl->pagevec, clt->cl->vcnt);
> > -
> > /* since file-backed online pages are traversed in reverse order */
> > - clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages;
> > + clt->icpage_ptr = clt->pcl->compressed_pages +
> > + z_erofs_pclusterpages(clt->pcl);
> > return 0;
> > }
> >
> > @@ -687,8 +703,26 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> > else
> > cache_strategy = DONTALLOC;
> >
> > - preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> > - cache_strategy, pagepool);
> > + if (z_erofs_is_inline_pcluster(clt->pcl)) {
>
> current cache_strategy is only for preload_compressed_pages(), so the cache_strategy should be
> needless for inline branch.
>

Ok, you are right. will update.

> > + struct page *mpage;
> > +
> > + mpage = erofs_get_meta_page(inode->i_sb,
> > + erofs_blknr(map->m_pa));
>
> could we just use the map->mpage directly if it's what we want(which is the most cases when test),
> if not we erofs_get_meta_page()?

Nope, I tend to avoid this since I will introduce a new subpage
feature to clean up all erofs_get_meta_page() usage.

Not because we cannot do like this, just to avoid coupling and messy.

>
> > + if (IS_ERR(mpage)) {
> > + err = PTR_ERR(mpage);
> > + erofs_err(inode->i_sb,
> > + "failed to get inline page, err %d", err);
> > + goto err_out;
> > + }
> > + /* TODO: new subpage feature will get rid of it */
> > + unlock_page(mpage);
> > +
> > + WRITE_ONCE(clt->pcl->compressed_pages[0], mpage);
> > + clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
> > + } else {
> > + preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> > + cache_strategy, pagepool);
> > + }
> >
> > hitted:
> > /*
> > @@ -844,6 +878,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > struct page **pagepool)
> > {
> > struct erofs_sb_info *const sbi = EROFS_SB(sb);
> > + unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
> > struct z_erofs_pagevec_ctor ctor;
> > unsigned int i, inputsize, outputsize, llen, nr_pages;
> > struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
> > @@ -925,16 +960,15 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > overlapped = false;
> > compressed_pages = pcl->compressed_pages;
> >
> > - for (i = 0; i < pcl->pclusterpages; ++i) {
> > + for (i = 0; i < pclusterpages; ++i) {
> > unsigned int pagenr;
> >
> > page = compressed_pages[i];
> > -
> > /* all compressed pages ought to be valid */
> > DBG_BUGON(!page);
> > - DBG_BUGON(z_erofs_page_is_invalidated(page));
> >
> > - if (!z_erofs_is_shortlived_page(page)) {
> > + if (!z_erofs_is_inline_pcluster(pcl) &&
>
> some inline checks may exist for noinline case if it's bigpcluster. And i understand the
> behavior of ztailpacking page is differ from normal page. So better to branch them? moving
> the inline check outside the for loop?

The truth is that I really don't want to add any complexity of this code.
Also it's my next target to clean up. But I would never separate
ztailpacking cases alone....

Thanks,
Gao Xiang

2021-12-27 09:39:44

by Yue Hu

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] erofs: support inline data decompression

On Mon, 27 Dec 2021 17:26:25 +0800
Gao Xiang <[email protected]> wrote:

> On Mon, Dec 27, 2021 at 04:18:19PM +0800, Yue Hu wrote:
> > Hi Xiang,
> >
> > On Sat, 25 Dec 2021 15:06:25 +0800
> > Gao Xiang <[email protected]> wrote:
> >
> > > From: Yue Hu <[email protected]>
> > >
> > > Currently, we have already support tail-packing inline for
> > > uncompressed file, let's also implement this for compressed
> > > files to save I/Os and storage space.
> > >
> > > Different from normal pclusters, compressed data is available
> > > in advance because of other metadata I/Os. Therefore, they
> > > directly move into the bypass queue without extra I/O submission.
> > >
> > > It's the last compression feature before folio/subpage support.
> > >
> > > Signed-off-by: Yue Hu <[email protected]>
> > > Signed-off-by: Gao Xiang <[email protected]>
> > > ---
> > > fs/erofs/zdata.c | 128 ++++++++++++++++++++++++++++++++---------------
> > > fs/erofs/zdata.h | 24 ++++++++-
> > > 2 files changed, 109 insertions(+), 43 deletions(-)
> > >
> > > diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> > > index bc765d8a6dc2..e6ef02634e08 100644
> > > --- a/fs/erofs/zdata.c
> > > +++ b/fs/erofs/zdata.c
> > > @@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
> > >
> > > static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
> > > {
> > > + unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
> > > int i;
> > >
> > > for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
> > > struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
> > >
> > > - if (pcl->pclusterpages > pcs->maxpages)
> > > + if (pclusterpages > pcs->maxpages)
> > > continue;
> > >
> > > kmem_cache_free(pcs->slab, pcl);
> > > @@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
> > > container_of(grp, struct z_erofs_pcluster, obj);
> > > int i;
> > >
> > > + DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
> > > /*
> > > * refcount of workgroup is now freezed as 1,
> > > * therefore no need to worry about available decompression users.
> > > @@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page)
> > > if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
> > > unsigned int i;
> > >
> > > + DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
> > > for (i = 0; i < pcl->pclusterpages; ++i) {
> > > if (pcl->compressed_pages[i] == page) {
> > > WRITE_ONCE(pcl->compressed_pages[i], NULL);
> > > @@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> > > struct inode *inode,
> > > struct erofs_map_blocks *map)
> > > {
> > > + bool ztailpacking = map->m_flags & EROFS_MAP_META;
> > > struct z_erofs_pcluster *pcl;
> > > struct z_erofs_collection *cl;
> > > struct erofs_workgroup *grp;
> > > @@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> > > }
> > >
> > > /* no available pcluster, let's allocate one */
> > > - pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
> > > + pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
> > > + map->m_plen >> PAGE_SHIFT);
> > > if (IS_ERR(pcl))
> > > return PTR_ERR(pcl);
> > >
> > > atomic_set(&pcl->obj.refcount, 1);
> > > - pcl->obj.index = map->m_pa >> PAGE_SHIFT;
> > > pcl->algorithmformat = map->m_algorithmformat;
> > > pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
> > > (map->m_flags & EROFS_MAP_FULL_MAPPED ?
> > > @@ -494,16 +498,25 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
> > > mutex_init(&cl->lock);
> > > DBG_BUGON(!mutex_trylock(&cl->lock));
> > >
> > > - grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
> > > - if (IS_ERR(grp)) {
> > > - err = PTR_ERR(grp);
> > > - goto err_out;
> > > - }
> > > + if (ztailpacking) {
> > > + pcl->obj.index = 0; /* which indicates ztailpacking */
> > > + pcl->pageofs_in = erofs_blkoff(map->m_pa);
> > > + pcl->tailpacking_size = map->m_plen;
> > > + } else {
> > > + pcl->obj.index = map->m_pa >> PAGE_SHIFT;
> > >
> > > - if (grp != &pcl->obj) {
> > > - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
> > > - err = -EEXIST;
> > > - goto err_out;
> > > + grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
> > > + if (IS_ERR(grp)) {
> > > + err = PTR_ERR(grp);
> > > + goto err_out;
> > > + }
> > > +
> > > + if (grp != &pcl->obj) {
> > > + clt->pcl = container_of(grp,
> > > + struct z_erofs_pcluster, obj);
> > > + err = -EEXIST;
> > > + goto err_out;
> > > + }
> > > }
> > > /* used to check tail merging loop due to corrupted images */
> > > if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
> > > @@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
> > > DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
> > > DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
> > >
> > > - if (!PAGE_ALIGNED(map->m_pa)) {
> > > - DBG_BUGON(1);
> > > - return -EINVAL;
> > > + if (map->m_flags & EROFS_MAP_META) {
> > > + if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
> > > + DBG_BUGON(1);
> > > + return -EFSCORRUPTED;
> > > + }
> > > + goto tailpacking;
> > > }
> > >
> > > grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
> > > if (grp) {
> > > clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
> > > } else {
> > > +tailpacking:
> > > ret = z_erofs_register_collection(clt, inode, map);
> > > -
> > > if (!ret)
> > > goto out;
> > > if (ret != -EEXIST)
> > > @@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
> > > out:
> > > z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
> > > clt->cl->pagevec, clt->cl->vcnt);
> > > -
> > > /* since file-backed online pages are traversed in reverse order */
> > > - clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages;
> > > + clt->icpage_ptr = clt->pcl->compressed_pages +
> > > + z_erofs_pclusterpages(clt->pcl);
> > > return 0;
> > > }
> > >
> > > @@ -687,8 +703,26 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
> > > else
> > > cache_strategy = DONTALLOC;
> > >
> > > - preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> > > - cache_strategy, pagepool);
> > > + if (z_erofs_is_inline_pcluster(clt->pcl)) {
> >
> > current cache_strategy is only for preload_compressed_pages(), so the cache_strategy should be
> > needless for inline branch.
> >
>
> Ok, you are right. will update.
>
> > > + struct page *mpage;
> > > +
> > > + mpage = erofs_get_meta_page(inode->i_sb,
> > > + erofs_blknr(map->m_pa));
> >
> > could we just use the map->mpage directly if it's what we want(which is the most cases when test),
> > if not we erofs_get_meta_page()?
>
> Nope, I tend to avoid this since I will introduce a new subpage
> feature to clean up all erofs_get_meta_page() usage.
>
> Not because we cannot do like this, just to avoid coupling and messy.

got it.

>
> >
> > > + if (IS_ERR(mpage)) {
> > > + err = PTR_ERR(mpage);
> > > + erofs_err(inode->i_sb,
> > > + "failed to get inline page, err %d", err);
> > > + goto err_out;
> > > + }
> > > + /* TODO: new subpage feature will get rid of it */
> > > + unlock_page(mpage);
> > > +
> > > + WRITE_ONCE(clt->pcl->compressed_pages[0], mpage);
> > > + clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
> > > + } else {
> > > + preload_compressed_pages(clt, MNGD_MAPPING(sbi),
> > > + cache_strategy, pagepool);
> > > + }
> > >
> > > hitted:
> > > /*
> > > @@ -844,6 +878,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > struct page **pagepool)
> > > {
> > > struct erofs_sb_info *const sbi = EROFS_SB(sb);
> > > + unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
> > > struct z_erofs_pagevec_ctor ctor;
> > > unsigned int i, inputsize, outputsize, llen, nr_pages;
> > > struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
> > > @@ -925,16 +960,15 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
> > > overlapped = false;
> > > compressed_pages = pcl->compressed_pages;
> > >
> > > - for (i = 0; i < pcl->pclusterpages; ++i) {
> > > + for (i = 0; i < pclusterpages; ++i) {
> > > unsigned int pagenr;
> > >
> > > page = compressed_pages[i];
> > > -
> > > /* all compressed pages ought to be valid */
> > > DBG_BUGON(!page);
> > > - DBG_BUGON(z_erofs_page_is_invalidated(page));
> > >
> > > - if (!z_erofs_is_shortlived_page(page)) {
> > > + if (!z_erofs_is_inline_pcluster(pcl) &&
> >
> > some inline checks may exist for noinline case if it's bigpcluster. And i understand the
> > behavior of ztailpacking page is differ from normal page. So better to branch them? moving
> > the inline check outside the for loop?
>
> The truth is that I really don't want to add any complexity of this code.
> Also it's my next target to clean up. But I would never separate
> ztailpacking cases alone....

got it.

>
> Thanks,
> Gao Xiang