2024-05-17 12:51:40

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 00/10] ext4: support adding multi-delalloc blocks

Changes since v4:
- In patch 3, switch to check EXT4_ERROR_FS instead of
ext4_forced_shutdown() to prevent warning on errors=continue mode as
Jan suggested.
- In patch 8, rename ext4_da_check_clu_allocated() to
ext4_clu_alloc_state() and change the return value according to the
cluster allocation state as Jan suggested.
- In patch 9, do some appropriate logic changes since
the ext4_clu_alloc_state() has been changed in patch 8, so I remove
the reviewed-by tag from Jan, please take a look again.

Changes since v3:
- Fix two commit message grammatical issues in patch 2 and 4.

Changes since v2:
- Improve the commit message in patch 2,4,6 as Ritesh and Jan
suggested, makes the changes more clear.
- Add patch 3, add a warning if the delalloc counters are still not
zero on inactive.
- In patch 6, add a WARN in ext4_es_insert_delayed_extent(), strictly
requires the end_allocated parameter to be set to false if the
inserting extent belongs to one cluster.
- In patch 9, modify the reserve blocks math formula as Jan suggested,
prevent the count going to be negative.
- In patch 10, update the stale ext4_da_map_blocks() function comments.

Hello!

This patch series is the part 2 prepartory changes of the buffered IO
iomap conversion, I picked them out from my buffered IO iomap conversion
RFC series v3[1], add a fix for an issue found in current ext4 code, and
also add bigalloc feature support. Please look the following patches for
details.

The first 3 patches fix an incorrect delalloc reserved blocks count
issue and add a warning to make it easy to detect, the second 6 patches
make ext4_insert_delayed_block() call path support inserting
multi-delalloc blocks once a time, and the last patch makes
ext4_da_map_blocks() buffer_head unaware, prepared for iomap.

This patch set has been passed 'kvm-xfstests -g auto' tests, I hope it
could be reviewed and merged first.

[1] https://lore.kernel.org/linux-ext4/[email protected]/

Thanks,
Yi.

---
v2: https://lore.kernel.org/linux-ext4/[email protected]/
v3: https://lore.kernel.org/linux-ext4/[email protected]/

Zhang Yi (10):
ext4: factor out a common helper to query extent map
ext4: check the extent status again before inserting delalloc block
ext4: warn if delalloc counters are not zero on inactive
ext4: trim delalloc extent
ext4: drop iblock parameter
ext4: make ext4_es_insert_delayed_block() insert multi-blocks
ext4: make ext4_da_reserve_space() reserve multi-clusters
ext4: factor out a helper to check the cluster allocation state
ext4: make ext4_insert_delayed_block() insert multi-blocks
ext4: make ext4_da_map_blocks() buffer_head unaware

fs/ext4/extents_status.c | 70 +++++++---
fs/ext4/extents_status.h | 5 +-
fs/ext4/inode.c | 250 +++++++++++++++++++++++-------------
fs/ext4/super.c | 6 +-
include/trace/events/ext4.h | 26 ++--
5 files changed, 234 insertions(+), 123 deletions(-)

--
2.39.2



2024-05-17 12:51:46

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 05/10] ext4: drop iblock parameter

From: Zhang Yi <[email protected]>

The start block of the delalloc extent to be inserted is equal to
map->m_lblk, just drop the duplicate iblock input parameter.

Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Reviewed-by: Ritesh Harjani (IBM) <[email protected]>
---
fs/ext4/inode.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fb76016e2ab7..de157aebc306 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1712,8 +1712,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
* time. This function looks up the requested blocks and sets the
* buffer delay bit under the protection of i_data_sem.
*/
-static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
- struct ext4_map_blocks *map,
+static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
struct buffer_head *bh)
{
struct extent_status es;
@@ -1733,8 +1732,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
(unsigned long) map->m_lblk);

/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
- retval = es.es_len - (iblock - es.es_lblk);
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
map->m_len = retval;
@@ -1754,7 +1753,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
return 0;
}

- map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
+ map->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk;
if (ext4_es_is_written(&es))
map->m_flags |= EXT4_MAP_MAPPED;
else if (ext4_es_is_unwritten(&es))
@@ -1790,8 +1789,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* is held in write mode, before inserting a new da entry in
* the extent status tree.
*/
- if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
- retval = es.es_len - (iblock - es.es_lblk);
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
map->m_len = retval;
@@ -1848,7 +1847,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
- ret = ext4_da_map_blocks(inode, iblock, &map, bh);
+ ret = ext4_da_map_blocks(inode, &map, bh);
if (ret <= 0)
return ret;

--
2.39.2


2024-05-17 12:52:44

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 04/10] ext4: trim delalloc extent

From: Zhang Yi <[email protected]>

In ext4_da_map_blocks(), we could find four kind of extents in the
extent status tree: hole, unwritten, written and delayed extent. Now we
only trim the map len if we found an unwritten extent or a written
extent. This is okay now since map->m_len is always set to one and we
always insert one delayed block at a time. But this will become isn't
okay for other two cases if ext4_insert_delayed_block() and
ext4_da_map_blocks() support inserting multiple map->len blocks later.

1. If we found a hole in the extent status tree which es->es_len is
shorter than the length we want to write, we should trim the
map->m_len to prevent adding extra delay more blocks than we
expected. For example, assume we write data [A, C) to a file that
contains a hole extent [A, B) and a written extent [B, D) in the
cache.

A B C D
before da write: ...hhhhhh|wwwwww....

Then we will get extent [A, B), we should trim map->m_len to B-A
before inserting new delalloc blocks, if not, the range [B, C) will
be duplicated.

2. If we found a delayed extent in the extent status tree which
es->es_len is shorter than the length we want to write, we should
trim the map->m_len to es->es_len and return directly since the front
part of this map has been delayed, we can't insert the delalloc
extent that contains the latter part in this round, we should return
the delayed length and the caller should increase the position and
call ext4_da_map_blocks() again. For example, assume we write data
[A, C) to a file that contains a delayed extent [A, B) in the cache.

A B C
before da write: ...dddddd|hhh....

Then we will get delayed extent [A, B), we should also trim map->m_len
to B-A and return, if not, we will incorrectly assume that the write
is complete and won't insert [B, C).

So we need to always trim the map->m_len if the found es->es_len in the
extent status tree is shorter than the map->m_len, prearing for
inserting a extent with multiple delalloc blocks. This patch only does a
pre-fix, the handle is crude and ext4_da_map_blocks() deserve a cleanup,
we will do that later.

Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
---
fs/ext4/inode.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6114ca79f464..fb76016e2ab7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1734,6 +1734,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,

/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+ retval = es.es_len - (iblock - es.es_lblk);
+ if (retval > map->m_len)
+ retval = map->m_len;
+ map->m_len = retval;
+
if (ext4_es_is_hole(&es))
goto add_delayed;

@@ -1750,10 +1755,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
}

map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
- retval = es.es_len - (iblock - es.es_lblk);
- if (retval > map->m_len)
- retval = map->m_len;
- map->m_len = retval;
if (ext4_es_is_written(&es))
map->m_flags |= EXT4_MAP_MAPPED;
else if (ext4_es_is_unwritten(&es))
@@ -1790,6 +1791,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* the extent status tree.
*/
if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+ retval = es.es_len - (iblock - es.es_lblk);
+ if (retval > map->m_len)
+ retval = map->m_len;
+ map->m_len = retval;
+
if (!ext4_es_is_hole(&es)) {
up_write(&EXT4_I(inode)->i_data_sem);
goto found;
--
2.39.2


2024-05-17 12:52:49

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 06/10] ext4: make ext4_es_insert_delayed_block() insert multi-blocks

From: Zhang Yi <[email protected]>

Rename ext4_es_insert_delayed_block() to ext4_es_insert_delayed_extent()
and pass length parameter to make it insert multiple delalloc blocks at
a time. For the case of bigalloc, split the allocated parameter to
lclu_allocated and end_allocated. lclu_allocated indicates the
allocation state of the cluster which is containing the lblk,
end_allocated indicates the allocation state of the extent end, clusters
in the middle of delay allocated extent must be unallocated.

Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
---
fs/ext4/extents_status.c | 70 ++++++++++++++++++++++++++-----------
fs/ext4/extents_status.h | 5 +--
fs/ext4/inode.c | 2 +-
include/trace/events/ext4.h | 16 +++++----
4 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 4a00e2f019d9..23caf1f028b0 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -2052,34 +2052,49 @@ bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
}

/*
- * ext4_es_insert_delayed_block - adds a delayed block to the extents status
- * tree, adding a pending reservation where
- * needed
+ * ext4_es_insert_delayed_extent - adds some delayed blocks to the extents
+ * status tree, adding a pending reservation
+ * where needed
*
* @inode - file containing the newly added block
- * @lblk - logical block to be added
- * @allocated - indicates whether a physical cluster has been allocated for
- * the logical cluster that contains the block
+ * @lblk - start logical block to be added
+ * @len - length of blocks to be added
+ * @lclu_allocated/end_allocated - indicates whether a physical cluster has
+ * been allocated for the logical cluster
+ * that contains the start/end block. Note that
+ * end_allocated should always be set to false
+ * if the start and the end block are in the
+ * same cluster
*/
-void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
- bool allocated)
+void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, bool lclu_allocated,
+ bool end_allocated)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct extent_status newes;
+ ext4_lblk_t end = lblk + len - 1;
int err1 = 0, err2 = 0, err3 = 0;
struct extent_status *es1 = NULL;
struct extent_status *es2 = NULL;
- struct pending_reservation *pr = NULL;
+ struct pending_reservation *pr1 = NULL;
+ struct pending_reservation *pr2 = NULL;

if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return;

- es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
- lblk, inode->i_ino);
+ es_debug("add [%u/%u) delayed to extent status tree of inode %lu\n",
+ lblk, len, inode->i_ino);
+ if (!len)
+ return;
+
+ WARN_ON_ONCE((EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) &&
+ end_allocated);

newes.es_lblk = lblk;
- newes.es_len = 1;
+ newes.es_len = len;
ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
- trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
+ trace_ext4_es_insert_delayed_extent(inode, &newes, lclu_allocated,
+ end_allocated);

ext4_es_insert_extent_check(inode, &newes);

@@ -2088,11 +2103,15 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
- if ((err1 || err2 || err3) && allocated && !pr)
- pr = __alloc_pending(true);
+ if (err1 || err2 || err3) {
+ if (lclu_allocated && !pr1)
+ pr1 = __alloc_pending(true);
+ if (end_allocated && !pr2)
+ pr2 = __alloc_pending(true);
+ }
write_lock(&EXT4_I(inode)->i_es_lock);

- err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
+ err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
@@ -2112,13 +2131,22 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
es2 = NULL;
}

- if (allocated) {
- err3 = __insert_pending(inode, lblk, &pr);
+ if (lclu_allocated) {
+ err3 = __insert_pending(inode, lblk, &pr1);
if (err3 != 0)
goto error;
- if (pr) {
- __free_pending(pr);
- pr = NULL;
+ if (pr1) {
+ __free_pending(pr1);
+ pr1 = NULL;
+ }
+ }
+ if (end_allocated) {
+ err3 = __insert_pending(inode, end, &pr2);
+ if (err3 != 0)
+ goto error;
+ if (pr2) {
+ __free_pending(pr2);
+ pr2 = NULL;
}
}
error:
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index d9847a4a25db..3c8e2edee5d5 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -249,8 +249,9 @@ extern void ext4_exit_pending(void);
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
-extern void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
- bool allocated);
+extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, bool lclu_allocated,
+ bool end_allocated);
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern void ext4_clear_inode_es(struct inode *inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index de157aebc306..f64fe8b873ce 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1702,7 +1702,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
}
}

- ext4_es_insert_delayed_block(inode, lblk, allocated);
+ ext4_es_insert_delayed_extent(inode, lblk, 1, allocated, false);
return 0;
}

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index a697f4b77162..6b41ac61310f 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2478,11 +2478,11 @@ TRACE_EVENT(ext4_es_shrink,
__entry->scan_time, __entry->nr_skipped, __entry->retried)
);

-TRACE_EVENT(ext4_es_insert_delayed_block,
+TRACE_EVENT(ext4_es_insert_delayed_extent,
TP_PROTO(struct inode *inode, struct extent_status *es,
- bool allocated),
+ bool lclu_allocated, bool end_allocated),

- TP_ARGS(inode, es, allocated),
+ TP_ARGS(inode, es, lclu_allocated, end_allocated),

TP_STRUCT__entry(
__field( dev_t, dev )
@@ -2491,7 +2491,8 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
__field( ext4_lblk_t, len )
__field( ext4_fsblk_t, pblk )
__field( char, status )
- __field( bool, allocated )
+ __field( bool, lclu_allocated )
+ __field( bool, end_allocated )
),

TP_fast_assign(
@@ -2501,16 +2502,17 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
__entry->len = es->es_len;
__entry->pblk = ext4_es_show_pblock(es);
__entry->status = ext4_es_status(es);
- __entry->allocated = allocated;
+ __entry->lclu_allocated = lclu_allocated;
+ __entry->end_allocated = end_allocated;
),

TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s "
- "allocated %d",
+ "allocated %d %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->lblk, __entry->len,
__entry->pblk, show_extent_status(__entry->status),
- __entry->allocated)
+ __entry->lclu_allocated, __entry->end_allocated)
);

/* fsmap traces */
--
2.39.2


2024-05-17 12:53:25

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 09/10] ext4: make ext4_insert_delayed_block() insert multi-blocks

From: Zhang Yi <[email protected]>

Rename ext4_insert_delayed_block() to ext4_insert_delayed_blocks(),
pass length parameter to make it insert multiple delalloc blocks at a
time. For non-bigalloc case, just reserve len blocks and insert delalloc
extent. For bigalloc case, we can ensure that the clusters in the middle
of a extent must be unallocated, we only need to check whether the start
and end clusters are delayed/allocated. We should subtract the space for
the start and/or end block(s) if they are allocated.

Signed-off-by: Zhang Yi <[email protected]>
---
fs/ext4/inode.c | 51 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index eefedb7264c7..4febee4c833f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1679,24 +1679,29 @@ static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
}

/*
- * ext4_insert_delayed_block - adds a delayed block to the extents status
- * tree, incrementing the reserved cluster/block
- * count or making a pending reservation
- * where needed
+ * ext4_insert_delayed_blocks - adds a multiple delayed blocks to the extents
+ * status tree, incrementing the reserved
+ * cluster/block count or making pending
+ * reservations where needed
*
* @inode - file containing the newly added block
- * @lblk - logical block to be added
+ * @lblk - start logical block to be added
+ * @len - length of blocks to be added
*
* Returns 0 on success, negative error code on failure.
*/
-static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
+static int ext4_insert_delayed_blocks(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int ret;
- bool allocated = false;
+ bool lclu_allocated = false;
+ bool end_allocated = false;
+ ext4_lblk_t resv_clu;
+ ext4_lblk_t end = lblk + len - 1;

/*
- * If the cluster containing lblk is shared with a delayed,
+ * If the cluster containing lblk or end is shared with a delayed,
* written, or unwritten extent in a bigalloc file system, it's
* already been accounted for and does not need to be reserved.
* A pending reservation must be made for the cluster if it's
@@ -1707,23 +1712,39 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
* extents status tree doesn't get a match.
*/
if (sbi->s_cluster_ratio == 1) {
- ret = ext4_da_reserve_space(inode, 1);
+ ret = ext4_da_reserve_space(inode, len);
if (ret != 0) /* ENOSPC */
return ret;
} else { /* bigalloc */
+ resv_clu = EXT4_B2C(sbi, end) - EXT4_B2C(sbi, lblk) + 1;
+
ret = ext4_clu_alloc_state(inode, lblk);
if (ret < 0)
return ret;
- if (ret == 2)
- allocated = true;
- if (ret == 0) {
- ret = ext4_da_reserve_space(inode, 1);
+ if (ret > 0) {
+ resv_clu--;
+ lclu_allocated = (ret == 2);
+ }
+
+ if (EXT4_B2C(sbi, lblk) != EXT4_B2C(sbi, end)) {
+ ret = ext4_clu_alloc_state(inode, end);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ resv_clu--;
+ end_allocated = (ret == 2);
+ }
+ }
+
+ if (resv_clu) {
+ ret = ext4_da_reserve_space(inode, resv_clu);
if (ret != 0) /* ENOSPC */
return ret;
}
}

- ext4_es_insert_delayed_extent(inode, lblk, 1, allocated, false);
+ ext4_es_insert_delayed_extent(inode, lblk, len, lclu_allocated,
+ end_allocated);
return 0;
}

@@ -1828,7 +1849,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
}
}

- retval = ext4_insert_delayed_block(inode, map->m_lblk);
+ retval = ext4_insert_delayed_blocks(inode, map->m_lblk, map->m_len);
up_write(&EXT4_I(inode)->i_data_sem);
if (retval)
return retval;
--
2.39.2


2024-05-17 12:53:58

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 10/10] ext4: make ext4_da_map_blocks() buffer_head unaware

From: Zhang Yi <[email protected]>

After calling the ext4_da_map_blocks(), a delalloc extent state could
be identified through the EXT4_MAP_DELAYED flag in map. So factor out
buffer_head related handles in ext4_da_map_blocks(), make this function
buffer_head unaware and becomes a common helper, and also update the
stale function commtents, preparing for the iomap da write path in the
future.

Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
---
fs/ext4/inode.c | 63 ++++++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4febee4c833f..2afa3f83ddfb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1749,36 +1749,32 @@ static int ext4_insert_delayed_blocks(struct inode *inode, ext4_lblk_t lblk,
}

/*
- * This function is grabs code from the very beginning of
- * ext4_map_blocks, but assumes that the caller is from delayed write
- * time. This function looks up the requested blocks and sets the
- * buffer delay bit under the protection of i_data_sem.
+ * Looks up the requested blocks and sets the delalloc extent map.
+ * First try to look up for the extent entry that contains the requested
+ * blocks in the extent status tree without i_data_sem, then try to look
+ * up for the ondisk extent mapping with i_data_sem in read mode,
+ * finally hold i_data_sem in write mode, looks up again and add a
+ * delalloc extent entry if it still couldn't find any extent. Pass out
+ * the mapped extent through @map and return 0 on success.
*/
-static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
- struct buffer_head *bh)
+static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map)
{
struct extent_status es;
int retval;
- sector_t invalid_block = ~((sector_t) 0xffff);
#ifdef ES_AGGRESSIVE_TEST
struct ext4_map_blocks orig_map;

memcpy(&orig_map, map, sizeof(*map));
#endif

- if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
- invalid_block = ~0;
-
map->m_flags = 0;
ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
(unsigned long) map->m_lblk);

/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- retval = es.es_len - (map->m_lblk - es.es_lblk);
- if (retval > map->m_len)
- retval = map->m_len;
- map->m_len = retval;
+ map->m_len = min_t(unsigned int, map->m_len,
+ es.es_len - (map->m_lblk - es.es_lblk));

if (ext4_es_is_hole(&es))
goto add_delayed;
@@ -1788,10 +1784,8 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
* Delayed extent could be allocated by fallocate.
* So we need to check it.
*/
- if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
+ if (ext4_es_is_delonly(&es)) {
+ map->m_flags |= EXT4_MAP_DELAYED;
return 0;
}

@@ -1806,7 +1800,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
#endif
- return retval;
+ return 0;
}

/*
@@ -1820,7 +1814,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
retval = ext4_map_query_blocks(NULL, inode, map);
up_read(&EXT4_I(inode)->i_data_sem);
if (retval)
- return retval;
+ return retval < 0 ? retval : 0;

add_delayed:
down_write(&EXT4_I(inode)->i_data_sem);
@@ -1832,10 +1826,8 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
* the extent status tree.
*/
if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- retval = es.es_len - (map->m_lblk - es.es_lblk);
- if (retval > map->m_len)
- retval = map->m_len;
- map->m_len = retval;
+ map->m_len = min_t(unsigned int, map->m_len,
+ es.es_len - (map->m_lblk - es.es_lblk));

if (!ext4_es_is_hole(&es)) {
up_write(&EXT4_I(inode)->i_data_sem);
@@ -1845,18 +1837,14 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
retval = ext4_map_query_blocks(NULL, inode, map);
if (retval) {
up_write(&EXT4_I(inode)->i_data_sem);
- return retval;
+ return retval < 0 ? retval : 0;
}
}

+ map->m_flags |= EXT4_MAP_DELAYED;
retval = ext4_insert_delayed_blocks(inode, map->m_lblk, map->m_len);
up_write(&EXT4_I(inode)->i_data_sem);
- if (retval)
- return retval;

- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
return retval;
}

@@ -1876,11 +1864,15 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
struct ext4_map_blocks map;
+ sector_t invalid_block = ~((sector_t) 0xffff);
int ret = 0;

BUG_ON(create == 0);
BUG_ON(bh->b_size != inode->i_sb->s_blocksize);

+ if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+ invalid_block = ~0;
+
map.m_lblk = iblock;
map.m_len = 1;

@@ -1889,10 +1881,17 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
- ret = ext4_da_map_blocks(inode, &map, bh);
- if (ret <= 0)
+ ret = ext4_da_map_blocks(inode, &map);
+ if (ret < 0)
return ret;

+ if (map.m_flags & EXT4_MAP_DELAYED) {
+ map_bh(bh, inode->i_sb, invalid_block);
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+ return 0;
+ }
+
map_bh(bh, inode->i_sb, map.m_pblk);
ext4_update_bh_state(bh, map.m_flags);

--
2.39.2


2024-05-17 12:54:13

by Zhang Yi

[permalink] [raw]
Subject: [PATCH v5 08/10] ext4: factor out a helper to check the cluster allocation state

From: Zhang Yi <[email protected]>

Factor out a common helper ext4_clu_alloc_state(), check whether the
cluster containing a delalloc block to be added has been allocated or
has delalloc reservation, no logic changes.

Signed-off-by: Zhang Yi <[email protected]>
---
fs/ext4/inode.c | 55 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0c52969654ac..eefedb7264c7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1649,6 +1649,35 @@ static void ext4_print_free_blocks(struct inode *inode)
return;
}

+/*
+ * Check whether the cluster containing lblk has been allocated or has
+ * delalloc reservation.
+ *
+ * Returns 0 if the cluster doesn't have either, 1 if it has delalloc
+ * reservation, 2 if it's already been allocated, negative error code on
+ * failure.
+ */
+static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret;
+
+ /* Has delalloc reservation? */
+ if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
+ return 1;
+
+ /* Already been allocated? */
+ if (ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk))
+ return 2;
+ ret = ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk));
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 2;
+
+ return 0;
+}
+
/*
* ext4_insert_delayed_block - adds a delayed block to the extents status
* tree, incrementing the reserved cluster/block
@@ -1682,23 +1711,15 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
if (ret != 0) /* ENOSPC */
return ret;
} else { /* bigalloc */
- if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
- if (!ext4_es_scan_clu(inode,
- &ext4_es_is_mapped, lblk)) {
- ret = ext4_clu_mapped(inode,
- EXT4_B2C(sbi, lblk));
- if (ret < 0)
- return ret;
- if (ret == 0) {
- ret = ext4_da_reserve_space(inode, 1);
- if (ret != 0) /* ENOSPC */
- return ret;
- } else {
- allocated = true;
- }
- } else {
- allocated = true;
- }
+ ret = ext4_clu_alloc_state(inode, lblk);
+ if (ret < 0)
+ return ret;
+ if (ret == 2)
+ allocated = true;
+ if (ret == 0) {
+ ret = ext4_da_reserve_space(inode, 1);
+ if (ret != 0) /* ENOSPC */
+ return ret;
}
}

--
2.39.2


2024-05-20 09:37:32

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH v5 08/10] ext4: factor out a helper to check the cluster allocation state

On Fri 17-05-24 20:40:03, Zhang Yi wrote:
> From: Zhang Yi <[email protected]>
>
> Factor out a common helper ext4_clu_alloc_state(), check whether the
> cluster containing a delalloc block to be added has been allocated or
> has delalloc reservation, no logic changes.
>
> Signed-off-by: Zhang Yi <[email protected]>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <[email protected]>

Honza

> ---
> fs/ext4/inode.c | 55 ++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 38 insertions(+), 17 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 0c52969654ac..eefedb7264c7 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1649,6 +1649,35 @@ static void ext4_print_free_blocks(struct inode *inode)
> return;
> }
>
> +/*
> + * Check whether the cluster containing lblk has been allocated or has
> + * delalloc reservation.
> + *
> + * Returns 0 if the cluster doesn't have either, 1 if it has delalloc
> + * reservation, 2 if it's already been allocated, negative error code on
> + * failure.
> + */
> +static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
> +{
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + int ret;
> +
> + /* Has delalloc reservation? */
> + if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
> + return 1;
> +
> + /* Already been allocated? */
> + if (ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk))
> + return 2;
> + ret = ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk));
> + if (ret < 0)
> + return ret;
> + if (ret > 0)
> + return 2;
> +
> + return 0;
> +}
> +
> /*
> * ext4_insert_delayed_block - adds a delayed block to the extents status
> * tree, incrementing the reserved cluster/block
> @@ -1682,23 +1711,15 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
> if (ret != 0) /* ENOSPC */
> return ret;
> } else { /* bigalloc */
> - if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
> - if (!ext4_es_scan_clu(inode,
> - &ext4_es_is_mapped, lblk)) {
> - ret = ext4_clu_mapped(inode,
> - EXT4_B2C(sbi, lblk));
> - if (ret < 0)
> - return ret;
> - if (ret == 0) {
> - ret = ext4_da_reserve_space(inode, 1);
> - if (ret != 0) /* ENOSPC */
> - return ret;
> - } else {
> - allocated = true;
> - }
> - } else {
> - allocated = true;
> - }
> + ret = ext4_clu_alloc_state(inode, lblk);
> + if (ret < 0)
> + return ret;
> + if (ret == 2)
> + allocated = true;
> + if (ret == 0) {
> + ret = ext4_da_reserve_space(inode, 1);
> + if (ret != 0) /* ENOSPC */
> + return ret;
> }
> }
>
> --
> 2.39.2
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2024-05-20 09:40:02

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH v5 09/10] ext4: make ext4_insert_delayed_block() insert multi-blocks

On Fri 17-05-24 20:40:04, Zhang Yi wrote:
> From: Zhang Yi <[email protected]>
>
> Rename ext4_insert_delayed_block() to ext4_insert_delayed_blocks(),
> pass length parameter to make it insert multiple delalloc blocks at a
> time. For non-bigalloc case, just reserve len blocks and insert delalloc
> extent. For bigalloc case, we can ensure that the clusters in the middle
> of a extent must be unallocated, we only need to check whether the start
> and end clusters are delayed/allocated. We should subtract the space for
> the start and/or end block(s) if they are allocated.
>
> Signed-off-by: Zhang Yi <[email protected]>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <[email protected]>

Honza

> ---
> fs/ext4/inode.c | 51 ++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 36 insertions(+), 15 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index eefedb7264c7..4febee4c833f 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1679,24 +1679,29 @@ static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
> }
>
> /*
> - * ext4_insert_delayed_block - adds a delayed block to the extents status
> - * tree, incrementing the reserved cluster/block
> - * count or making a pending reservation
> - * where needed
> + * ext4_insert_delayed_blocks - adds a multiple delayed blocks to the extents
> + * status tree, incrementing the reserved
> + * cluster/block count or making pending
> + * reservations where needed
> *
> * @inode - file containing the newly added block
> - * @lblk - logical block to be added
> + * @lblk - start logical block to be added
> + * @len - length of blocks to be added
> *
> * Returns 0 on success, negative error code on failure.
> */
> -static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
> +static int ext4_insert_delayed_blocks(struct inode *inode, ext4_lblk_t lblk,
> + ext4_lblk_t len)
> {
> struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> int ret;
> - bool allocated = false;
> + bool lclu_allocated = false;
> + bool end_allocated = false;
> + ext4_lblk_t resv_clu;
> + ext4_lblk_t end = lblk + len - 1;
>
> /*
> - * If the cluster containing lblk is shared with a delayed,
> + * If the cluster containing lblk or end is shared with a delayed,
> * written, or unwritten extent in a bigalloc file system, it's
> * already been accounted for and does not need to be reserved.
> * A pending reservation must be made for the cluster if it's
> @@ -1707,23 +1712,39 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
> * extents status tree doesn't get a match.
> */
> if (sbi->s_cluster_ratio == 1) {
> - ret = ext4_da_reserve_space(inode, 1);
> + ret = ext4_da_reserve_space(inode, len);
> if (ret != 0) /* ENOSPC */
> return ret;
> } else { /* bigalloc */
> + resv_clu = EXT4_B2C(sbi, end) - EXT4_B2C(sbi, lblk) + 1;
> +
> ret = ext4_clu_alloc_state(inode, lblk);
> if (ret < 0)
> return ret;
> - if (ret == 2)
> - allocated = true;
> - if (ret == 0) {
> - ret = ext4_da_reserve_space(inode, 1);
> + if (ret > 0) {
> + resv_clu--;
> + lclu_allocated = (ret == 2);
> + }
> +
> + if (EXT4_B2C(sbi, lblk) != EXT4_B2C(sbi, end)) {
> + ret = ext4_clu_alloc_state(inode, end);
> + if (ret < 0)
> + return ret;
> + if (ret > 0) {
> + resv_clu--;
> + end_allocated = (ret == 2);
> + }
> + }
> +
> + if (resv_clu) {
> + ret = ext4_da_reserve_space(inode, resv_clu);
> if (ret != 0) /* ENOSPC */
> return ret;
> }
> }
>
> - ext4_es_insert_delayed_extent(inode, lblk, 1, allocated, false);
> + ext4_es_insert_delayed_extent(inode, lblk, len, lclu_allocated,
> + end_allocated);
> return 0;
> }
>
> @@ -1828,7 +1849,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
> }
> }
>
> - retval = ext4_insert_delayed_block(inode, map->m_lblk);
> + retval = ext4_insert_delayed_blocks(inode, map->m_lblk, map->m_len);
> up_write(&EXT4_I(inode)->i_data_sem);
> if (retval)
> return retval;
> --
> 2.39.2
>
--
Jan Kara <[email protected]>
SUSE Labs, CR