From: Zhang Yi <[email protected]>
Changes since v3:
- Fix two commit message grammatical issues in patch 2 and 4.
Changes since v2:
- Improve the commit message in patch 2,4,6 as Ritesh and Jan
suggested, makes the changes more clear.
- Add patch 3, add a warning if the delalloc counters are still not
zero on inactive.
- In patch 6, add a WARN in ext4_es_insert_delayed_extent(), strictly
requires the end_allocated parameter to be set to false if the
inserting extent belongs to one cluster.
- In patch 9, modify the reserve blocks math formula as Jan suggested,
prevent the count going to be negative.
- In patch 10, update the stale ext4_da_map_blocks() function comments.
Hello!
This patch series is the part 2 prepartory changes of the buffered IO
iomap conversion, I picked them out from my buffered IO iomap conversion
RFC series v3[1], add a fix for an issue found in current ext4 code, and
also add bigalloc feature support. Please look the following patches for
details.
The first 3 patches fix an incorrect delalloc reserved blocks count
issue and add a warning to make it easy to detect, the second 6 patches
make ext4_insert_delayed_block() call path support inserting
multi-delalloc blocks once a time, and the last patch makes
ext4_da_map_blocks() buffer_head unaware, prepared for iomap.
This patch set has been passed 'kvm-xfstests -g auto' tests, I hope it
could be reviewed and merged first.
[1] https://lore.kernel.org/linux-ext4/[email protected]/
Thanks,
Yi.
---
v2: https://lore.kernel.org/linux-ext4/[email protected]/
v3: https://lore.kernel.org/linux-ext4/[email protected]/
Zhang Yi (10):
ext4: factor out a common helper to query extent map
ext4: check the extent status again before inserting delalloc block
ext4: warn if delalloc counters are not zero on inactive
ext4: trim delalloc extent
ext4: drop iblock parameter
ext4: make ext4_es_insert_delayed_block() insert multi-blocks
ext4: make ext4_da_reserve_space() reserve multi-clusters
ext4: factor out check for whether a cluster is allocated
ext4: make ext4_insert_delayed_block() insert multi-blocks
ext4: make ext4_da_map_blocks() buffer_head unaware
fs/ext4/extents_status.c | 70 +++++++---
fs/ext4/extents_status.h | 5 +-
fs/ext4/inode.c | 248 +++++++++++++++++++++++-------------
fs/ext4/super.c | 6 +-
include/trace/events/ext4.h | 26 ++--
5 files changed, 231 insertions(+), 124 deletions(-)
--
2.39.2
From: Zhang Yi <[email protected]>
The start block of the delalloc extent to be inserted is equal to
map->m_lblk, just drop the duplicate iblock input parameter.
Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Reviewed-by: Ritesh Harjani (IBM) <[email protected]>
---
fs/ext4/inode.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fb76016e2ab7..de157aebc306 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1712,8 +1712,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
* time. This function looks up the requested blocks and sets the
* buffer delay bit under the protection of i_data_sem.
*/
-static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
- struct ext4_map_blocks *map,
+static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
struct buffer_head *bh)
{
struct extent_status es;
@@ -1733,8 +1732,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
(unsigned long) map->m_lblk);
/* Lookup extent status tree firstly */
- if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
- retval = es.es_len - (iblock - es.es_lblk);
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
map->m_len = retval;
@@ -1754,7 +1753,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
return 0;
}
- map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
+ map->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk;
if (ext4_es_is_written(&es))
map->m_flags |= EXT4_MAP_MAPPED;
else if (ext4_es_is_unwritten(&es))
@@ -1790,8 +1789,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* is held in write mode, before inserting a new da entry in
* the extent status tree.
*/
- if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
- retval = es.es_len - (iblock - es.es_lblk);
+ if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
map->m_len = retval;
@@ -1848,7 +1847,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
- ret = ext4_da_map_blocks(inode, iblock, &map, bh);
+ ret = ext4_da_map_blocks(inode, &map, bh);
if (ret <= 0)
return ret;
--
2.39.2
From: Zhang Yi <[email protected]>
Factor out a common helper ext4_da_check_clu_allocated(), check whether
the cluster containing a delalloc block to be added has been delayed or
allocated, no logic changes.
Signed-off-by: Zhang Yi <[email protected]>
---
fs/ext4/inode.c | 52 +++++++++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 17 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0c52969654ac..6e418d3f8e87 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1649,6 +1649,34 @@ static void ext4_print_free_blocks(struct inode *inode)
return;
}
+/*
+ * Check whether the cluster containing lblk has been delayed or allocated,
+ * if not, it means we should reserve a cluster when add delalloc, return 1,
+ * otherwise return 0 or error code.
+ */
+static int ext4_da_check_clu_allocated(struct inode *inode, ext4_lblk_t lblk,
+ bool *allocated)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret;
+
+ *allocated = false;
+ if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
+ return 0;
+
+ if (ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk))
+ goto allocated;
+
+ ret = ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk));
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return 1;
+allocated:
+ *allocated = true;
+ return 0;
+}
+
/*
* ext4_insert_delayed_block - adds a delayed block to the extents status
* tree, incrementing the reserved cluster/block
@@ -1682,23 +1710,13 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
if (ret != 0) /* ENOSPC */
return ret;
} else { /* bigalloc */
- if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
- if (!ext4_es_scan_clu(inode,
- &ext4_es_is_mapped, lblk)) {
- ret = ext4_clu_mapped(inode,
- EXT4_B2C(sbi, lblk));
- if (ret < 0)
- return ret;
- if (ret == 0) {
- ret = ext4_da_reserve_space(inode, 1);
- if (ret != 0) /* ENOSPC */
- return ret;
- } else {
- allocated = true;
- }
- } else {
- allocated = true;
- }
+ ret = ext4_da_check_clu_allocated(inode, lblk, &allocated);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ ret = ext4_da_reserve_space(inode, 1);
+ if (ret != 0) /* ENOSPC */
+ return ret;
}
}
--
2.39.2
From: Zhang Yi <[email protected]>
After calling the ext4_da_map_blocks(), a delalloc extent state could
be identified through the EXT4_MAP_DELAYED flag in map. So factor out
buffer_head related handles in ext4_da_map_blocks(), make this function
buffer_head unaware and becomes a common helper, and also update the
stale function commtents, preparing for the iomap da write path in the
future.
Signed-off-by: Zhang Yi <[email protected]>
---
fs/ext4/inode.c | 63 ++++++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 32 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c56386d1b10d..1dba5337382a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1745,36 +1745,32 @@ static int ext4_insert_delayed_blocks(struct inode *inode, ext4_lblk_t lblk,
}
/*
- * This function is grabs code from the very beginning of
- * ext4_map_blocks, but assumes that the caller is from delayed write
- * time. This function looks up the requested blocks and sets the
- * buffer delay bit under the protection of i_data_sem.
+ * Looks up the requested blocks and sets the delalloc extent map.
+ * First try to look up for the extent entry that contains the requested
+ * blocks in the extent status tree without i_data_sem, then try to look
+ * up for the ondisk extent mapping with i_data_sem in read mode,
+ * finally hold i_data_sem in write mode, looks up again and add a
+ * delalloc extent entry if it still couldn't find any extent. Pass out
+ * the mapped extent through @map and return 0 on success.
*/
-static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
- struct buffer_head *bh)
+static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map)
{
struct extent_status es;
int retval;
- sector_t invalid_block = ~((sector_t) 0xffff);
#ifdef ES_AGGRESSIVE_TEST
struct ext4_map_blocks orig_map;
memcpy(&orig_map, map, sizeof(*map));
#endif
- if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
- invalid_block = ~0;
-
map->m_flags = 0;
ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
(unsigned long) map->m_lblk);
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- retval = es.es_len - (map->m_lblk - es.es_lblk);
- if (retval > map->m_len)
- retval = map->m_len;
- map->m_len = retval;
+ map->m_len = min_t(unsigned int, map->m_len,
+ es.es_len - (map->m_lblk - es.es_lblk));
if (ext4_es_is_hole(&es))
goto add_delayed;
@@ -1784,10 +1780,8 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
* Delayed extent could be allocated by fallocate.
* So we need to check it.
*/
- if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
+ if (ext4_es_is_delonly(&es)) {
+ map->m_flags |= EXT4_MAP_DELAYED;
return 0;
}
@@ -1802,7 +1796,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
#endif
- return retval;
+ return 0;
}
/*
@@ -1816,7 +1810,7 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
retval = ext4_map_query_blocks(NULL, inode, map);
up_read(&EXT4_I(inode)->i_data_sem);
if (retval)
- return retval;
+ return retval < 0 ? retval : 0;
add_delayed:
down_write(&EXT4_I(inode)->i_data_sem);
@@ -1828,10 +1822,8 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
* the extent status tree.
*/
if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- retval = es.es_len - (map->m_lblk - es.es_lblk);
- if (retval > map->m_len)
- retval = map->m_len;
- map->m_len = retval;
+ map->m_len = min_t(unsigned int, map->m_len,
+ es.es_len - (map->m_lblk - es.es_lblk));
if (!ext4_es_is_hole(&es)) {
up_write(&EXT4_I(inode)->i_data_sem);
@@ -1841,18 +1833,14 @@ static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map,
retval = ext4_map_query_blocks(NULL, inode, map);
if (retval) {
up_write(&EXT4_I(inode)->i_data_sem);
- return retval;
+ return retval < 0 ? retval : 0;
}
}
+ map->m_flags |= EXT4_MAP_DELAYED;
retval = ext4_insert_delayed_blocks(inode, map->m_lblk, map->m_len);
up_write(&EXT4_I(inode)->i_data_sem);
- if (retval)
- return retval;
- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
return retval;
}
@@ -1872,11 +1860,15 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
struct ext4_map_blocks map;
+ sector_t invalid_block = ~((sector_t) 0xffff);
int ret = 0;
BUG_ON(create == 0);
BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
+ if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
+ invalid_block = ~0;
+
map.m_lblk = iblock;
map.m_len = 1;
@@ -1885,10 +1877,17 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
- ret = ext4_da_map_blocks(inode, &map, bh);
- if (ret <= 0)
+ ret = ext4_da_map_blocks(inode, &map);
+ if (ret < 0)
return ret;
+ if (map.m_flags & EXT4_MAP_DELAYED) {
+ map_bh(bh, inode->i_sb, invalid_block);
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
+ return 0;
+ }
+
map_bh(bh, inode->i_sb, map.m_pblk);
ext4_update_bh_state(bh, map.m_flags);
--
2.39.2