2023-09-13 02:13:49

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 0/6] improve cluster and block removal code

This patch series cleans up and rewrites parts of the code used to free
clusters or blocks when space is removed from a file. The intent is to
improve the readability, clarity, and efficiency of that code. These
changes do not fix any known bugs.

Eric Whitney (6):
ext4: consolidate code used to free clusters
ext4: rework partial cluster definition and related tracepoints
ext4: rework partial cluster handling to use lblk more consistently
ext4: consolidate partial cluster initialization
ext4: simplify and improve efficiency of cluster removal code
ext4: remove mballoc's NOFREE flags

fs/ext4/ext4.h | 4 +-
fs/ext4/ext4_extents.h | 19 +-
fs/ext4/extents.c | 371 ++++++++++++++++++------------------
fs/ext4/mballoc.c | 25 +--
include/trace/events/ext4.h | 123 ++++++++----
5 files changed, 287 insertions(+), 255 deletions(-)

--
2.30.2


2023-09-13 02:13:57

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 1/6] ext4: consolidate code used to free clusters

The code used to free clusters when removing a block range from an extent
tree belonging to a bigalloc file system is duplicated in several places.
Collect it into a single function for improved readability. Fold
ext4_rereserve_cluster into that function, as it has only one call site
after consolidation and contains a small amount of code. Improve comments
where clusters are freed and clean up the header for ext4_ext_rm_leaf().

Signed-off-by: Eric Whitney <[email protected]>
---
fs/ext4/extents.c | 146 ++++++++++++++++++++--------------------------
1 file changed, 64 insertions(+), 82 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 202c76996b62..9470502b886a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2420,35 +2420,46 @@ static inline int get_default_free_blocks_flags(struct inode *inode)
return 0;
}

-/*
- * ext4_rereserve_cluster - increment the reserved cluster count when
- * freeing a cluster with a pending reservation
- *
- * @inode - file containing the cluster
- * @lblk - logical block in cluster to be reserved
+/**
+ * free_partial_cluster() - frees all the allocated blocks contained in a
+ * partial cluster and rereserves space for delayed
+ * allocated blocks it contains
*
- * Increments the reserved cluster count and adjusts quota in a bigalloc
- * file system when freeing a partial cluster containing at least one
- * delayed and unwritten block. A partial cluster meeting that
- * requirement will have a pending reservation. If so, the
- * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
- * defer reserved and allocated space accounting to a subsequent call
- * to this function.
+ * @handle: journal handle for current transaction
+ * @inode: file containing the partial cluster
+ * @partial: partial cluster to be freed
*/
-static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
+static void free_partial_cluster(handle_t *handle, struct inode *inode,
+ struct partial_cluster *partial)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
+ int flags = get_default_free_blocks_flags(inode);
+
+ /*
+ * When the partial cluster contains at least one delayed and
+ * unwritten block (has pending reservation), the RERESERVE_CLUSTER
+ * flag forces ext4_free_blocks() to defer reserved and allocated
+ * space accounting to this function. This avoids potential difficult
+ * to handle ENOSPC conditions when the file system is near exhaustion.
+ */
+ if (ext4_is_pending(inode, partial->lblk))
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
+
+ ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, partial->pclu),
+ sbi->s_cluster_ratio, flags);

- dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER) {
+ dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));

- spin_lock(&ei->i_block_reservation_lock);
- ei->i_reserved_data_blocks++;
- percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
- spin_unlock(&ei->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ ei->i_reserved_data_blocks++;
+ percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
+ spin_unlock(&ei->i_block_reservation_lock);

- percpu_counter_add(&sbi->s_freeclusters_counter, 1);
- ext4_remove_pending(inode, lblk);
+ percpu_counter_add(&sbi->s_freeclusters_counter, 1);
+ ext4_remove_pending(inode, partial->lblk);
+ }
}

static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -2491,19 +2502,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* cluster of the last block in the extent, we free it
*/
last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
-
if (partial->state != initial &&
partial->pclu != EXT4_B2C(sbi, last_pblk)) {
- if (partial->state == tofree) {
- flags = get_default_free_blocks_flags(inode);
- if (ext4_is_pending(inode, partial->lblk))
- flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(sbi, partial->pclu),
- sbi->s_cluster_ratio, flags);
- if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
- ext4_rereserve_cluster(inode, partial->lblk);
- }
+ if (partial->state == tofree)
+ free_partial_cluster(handle, inode, partial);
partial->state = initial;
}

@@ -2516,23 +2518,21 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* state is nofree). If a partial cluster exists here, it must be
* shared with the last block in the extent.
*/
- flags = get_default_free_blocks_flags(inode);

/* partial, left end cluster aligned, right end unaligned */
if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
(EXT4_LBLK_CMASK(sbi, to) >= from) &&
(partial->state != nofree)) {
- if (ext4_is_pending(inode, to))
- flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
- ext4_free_blocks(handle, inode, NULL,
- EXT4_PBLK_CMASK(sbi, last_pblk),
- sbi->s_cluster_ratio, flags);
- if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
- ext4_rereserve_cluster(inode, to);
+ if (partial->state == initial) {
+ partial->pclu = EXT4_B2C(sbi, last_pblk);
+ partial->lblk = to;
+ partial->state = tofree;
+ }
+ free_partial_cluster(handle, inode, partial);
partial->state = initial;
- flags = get_default_free_blocks_flags(inode);
}

+ flags = get_default_free_blocks_flags(inode);
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;

/*
@@ -2571,20 +2571,17 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
return 0;
}

-/*
- * ext4_ext_rm_leaf() Removes the extents associated with the
- * blocks appearing between "start" and "end". Both "start"
- * and "end" must appear in the same extent or EIO is returned.
+/**
+ * ext4_ext_rm_leaf() - Removes the extents associated with the blocks
+ * appearing between "start" and "end"
*
* @handle: The journal handle
- * @inode: The files inode
- * @path: The path to the leaf
- * @partial_cluster: The cluster which we'll have to free if all extents
- * has been released from it. However, if this value is
- * negative, it's a cluster just to the right of the
- * punched region and it must not be freed.
- * @start: The first block to remove
- * @end: The last block to remove
+ * @inode: The file's inode
+ * @path: The path to the leaf
+ * @partial: Information used to determine whether a cluster in a bigalloc
+ * file system should be freed as extents are removed
+ * @start: The first block to remove
+ * @end: The last block to remove
*/
static int
ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
@@ -2759,24 +2756,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,

/*
* If there's a partial cluster and at least one extent remains in
- * the leaf, free the partial cluster if it isn't shared with the
- * current extent. If it is shared with the current extent
- * we reset the partial cluster because we've reached the start of the
- * truncated/punched region and we're done removing blocks.
+ * the leaf, free the partial if it isn't shared with the next
+ * extent. Otherwise, clear it - the beginning of the space to be
+ * removed has been reached. If no extent remains in the leaf,
+ * ext4_ext_remove_space() will always read in the next leaf (if any)
+ * containing the next adjacent extent, allowing this code to handle
+ * the case where the last block in that extent is outside the space
+ * to be removed but might be shared with the partial cluster.
*/
if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
- if (partial->pclu != EXT4_B2C(sbi, pblk)) {
- int flags = get_default_free_blocks_flags(inode);
-
- if (ext4_is_pending(inode, partial->lblk))
- flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(sbi, partial->pclu),
- sbi->s_cluster_ratio, flags);
- if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
- ext4_rereserve_cluster(inode, partial->lblk);
- }
+ if (partial->pclu != EXT4_B2C(sbi, pblk))
+ free_partial_cluster(handle, inode, partial);
partial->state = initial;
}

@@ -3032,21 +3023,12 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
path->p_hdr->eh_entries);

/*
- * if there's a partial cluster and we have removed the first extent
- * in the file, then we also free the partial cluster, if any
+ * if a partial cluster still remains here the extent tree has
+ * been traversed to the beginning of the file, so it is not
+ * shared with another extent
*/
- if (partial.state == tofree && err == 0) {
- int flags = get_default_free_blocks_flags(inode);
-
- if (ext4_is_pending(inode, partial.lblk))
- flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
- ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(sbi, partial.pclu),
- sbi->s_cluster_ratio, flags);
- if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
- ext4_rereserve_cluster(inode, partial.lblk);
- partial.state = initial;
- }
+ if (partial.state == tofree && err == 0)
+ free_partial_cluster(handle, inode, &partial);

/* TODO: flexible tree reduction should be here */
if (path->p_hdr->eh_entries == 0) {
--
2.30.2

2023-09-13 02:21:43

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 6/6] ext4: remove mballoc's NOFREE flags

ext4_remove_space() no longer relies on the NOFREE_FIRST_CLUSTER and
NOFREE_LAST_CLUSTER flags used to condition the behavior of
ext4_free_blocks() when applied to clusters. Remove everything
related to those flags.

Signed-off-by: Eric Whitney <[email protected]>
---
fs/ext4/ext4.h | 4 +---
fs/ext4/mballoc.c | 25 ++++---------------------
include/trace/events/ext4.h | 4 +---
3 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9418359b1d9d..32c803f7dc56 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -736,9 +736,7 @@ enum {
#define EXT4_FREE_BLOCKS_FORGET 0x0002
#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
-#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
-#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
-#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040
+#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0010

#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c91db9f57524..f9096ab49bfb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6681,35 +6681,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
/*
* If the extent to be freed does not begin on a cluster
* boundary, we need to deal with partial clusters at the
- * beginning and end of the extent. Normally we will free
- * blocks at the beginning or the end unless we are explicitly
- * requested to avoid doing so.
+ * beginning and end of the extent.
*/
overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
- if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
- overflow = sbi->s_cluster_ratio - overflow;
- block += overflow;
- if (count > overflow)
- count -= overflow;
- else
- return;
- } else {
- block -= overflow;
- count += overflow;
- }
+ block -= overflow;
+ count += overflow;
/* The range changed so it's no longer validated */
flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
}
overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
- if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
- if (count > overflow)
- count -= overflow;
- else
- return;
- } else
- count += sbi->s_cluster_ratio - overflow;
+ count += sbi->s_cluster_ratio - overflow;
/* The range changed so it's no longer validated */
flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
}
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b474ded2623d..786987154893 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -71,9 +71,7 @@ TRACE_DEFINE_ENUM(BH_Boundary);
{ EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \
{ EXT4_FREE_BLOCKS_FORGET, "FORGET" }, \
{ EXT4_FREE_BLOCKS_VALIDATED, "VALIDATED" }, \
- { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" }, \
- { EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER,"1ST_CLUSTER" },\
- { EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" })
+ { EXT4_FREE_BLOCKS_NO_QUOT_UPDATE, "NO_QUOTA" })

TRACE_DEFINE_ENUM(ES_WRITTEN_B);
TRACE_DEFINE_ENUM(ES_UNWRITTEN_B);
--
2.30.2

2023-09-13 05:58:48

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 4/6] ext4: consolidate partial cluster initialization

Pull the code used to initialize a partial cluster into a single
location to improve readability and to minimize the disturbance on
other code. Take advantage of the change to track partial clusters
in the logical space to use a more efficient means to search for a
block adjacent to the block range to be removed.

Signed-off-by: Eric Whitney <[email protected]>
---
fs/ext4/extents.c | 70 +++++++++++++++++------------------------------
1 file changed, 25 insertions(+), 45 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 793a9437be9f..a0c9e37ef804 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2641,17 +2641,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,

/* If this extent is beyond the end of the hole, skip it */
if (end < ex_ee_block) {
- /*
- * We're going to skip this extent and move to another,
- * so note that its first cluster is in use to avoid
- * freeing it when removing blocks. Eventually, the
- * right edge of the truncated/punched region will
- * be just to the left.
- */
- if (sbi->s_cluster_ratio > 1) {
- partial->lblk = ex_ee_block;
- partial->state = keep;
- }
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2812,10 +2801,6 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
handle_t *handle;
int i = 0, err = 0;

- partial.pclu = 0;
- partial.lblk = 0;
- partial.state = none;
-
ext_debug(inode, "truncate since %u to %u\n", start, end);

/* probably first extent we're gonna free will be last in block */
@@ -2825,6 +2810,13 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
if (IS_ERR(handle))
return PTR_ERR(handle);

+ /* state never changes for non-bigalloc file systems */
+ partial.state = none;
+ if (sbi->s_cluster_ratio > 1) {
+ partial.start_lclu = EXT4_B2C(sbi, start);
+ partial.end_lclu = EXT4_B2C(sbi, end);
+ }
+
again:
trace_ext4_ext_remove_space(inode, start, end, depth);

@@ -2838,7 +2830,6 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
if (end < EXT_MAX_BLOCKS - 1) {
struct ext4_extent *ex;
ext4_lblk_t ee_block, ex_end, lblk;
- ext4_fsblk_t pblk;

/* find extent for or closest extent to this block */
path = ext4_find_extent(inode, end, NULL,
@@ -2871,16 +2862,6 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
*/
if (end >= ee_block && end < ex_end) {

- /*
- * If we're going to split the extent, note that
- * the cluster containing the block after 'end' is
- * in use to avoid freeing it when removing blocks.
- */
- if (sbi->s_cluster_ratio > 1) {
- partial.lblk = end + 1;
- partial.state = keep;
- }
-
/*
* Split the extent in two so that 'end' is the last
* block in the first new extent. Also we should not
@@ -2891,27 +2872,26 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
end + 1, 1);
if (err < 0)
goto out;
+ }

- } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
- partial.state == none) {
- /*
- * If we're punching, there's an extent to the right.
- * If the partial cluster hasn't been set, set it to
- * that extent's first cluster and its state to keep
- * so it won't be freed should it contain blocks to be
- * removed. If it's already set (free/keep), we're
- * retrying and keep the original partial cluster info
- * so a cluster marked free as a result of earlier
- * extent removal is not lost.
- */
- lblk = ex_end + 1;
- err = ext4_ext_search_right(inode, path, &lblk, &pblk,
- NULL);
- if (err < 0)
- goto out;
- if (pblk) {
- partial.lblk = lblk;
+ /*
+ * if there's a block following the space to be removed
+ * in a bigalloc file system note that the cluster
+ * containing it must not be freed
+ */
+ if (sbi->s_cluster_ratio > 1 && partial.state == none) {
+ if (end < ee_block) {
+ partial.lblk = ee_block;
partial.state = keep;
+ } else if (end >= ee_block && end < ex_end) {
+ partial.lblk = end + 1;
+ partial.state = keep;
+ } else if (end >= ex_end) {
+ lblk = ext4_ext_next_allocated_block(path);
+ if (lblk != EXT_MAX_BLOCKS) {
+ partial.lblk = lblk;
+ partial.state = keep;
+ }
}
}
}
--
2.30.2

2023-09-13 07:05:30

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 3/6] ext4: rework partial cluster handling to use lblk more consistently

Working in the logical block space where possible when manipulating
partial clusters makes the code easier to understand. It also offers
the opportunity for efficiency improvements, both in this patch and
those that follow.

Signed-off-by: Eric Whitney <[email protected]>
---
fs/ext4/extents.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0c52218fb171..793a9437be9f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2505,7 +2505,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
*/
last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
if (partial->state != none &&
- partial->pclu != EXT4_B2C(sbi, last_pblk)) {
+ EXT4_B2C(sbi, partial->lblk) != EXT4_B2C(sbi, to)) {
if (partial->state == free)
free_partial_cluster(handle, inode, partial);
partial->state = none;
@@ -2547,7 +2547,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);

/* reset the partial cluster if we've freed past it */
- if (partial->state != none && partial->pclu != EXT4_B2C(sbi, pblk))
+ if (partial->state != none &&
+ EXT4_B2C(sbi, partial->lblk) != EXT4_B2C(sbi, from))
partial->state = none;

/*
@@ -2597,11 +2598,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
struct ext4_extent_header *eh;
ext4_lblk_t a, b;
unsigned num;
- ext4_lblk_t ex_ee_block;
+ ext4_lblk_t ex_ee_block, lblk;
unsigned short ex_ee_len;
unsigned unwritten = 0;
struct ext4_extent *ex;
- ext4_fsblk_t pblk;

/* the header must be checked already in ext4_ext_remove_space() */
ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
@@ -2649,8 +2649,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* be just to the left.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex);
- partial->pclu = EXT4_B2C(sbi, pblk);
+ partial->lblk = ex_ee_block;
partial->state = keep;
}
ex--;
@@ -2767,8 +2766,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* to be removed but might be shared with the partial cluster.
*/
if (partial->state == free && ex >= EXT_FIRST_EXTENT(eh)) {
- pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
- if (partial->pclu != EXT4_B2C(sbi, pblk))
+ lblk = ex_ee_block + ex_ee_len - 1;
+ if (EXT4_B2C(sbi, partial->lblk) != EXT4_B2C(sbi, lblk))
free_partial_cluster(handle, inode, partial);
partial->state = none;
}
@@ -2878,8 +2877,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
- partial.pclu = EXT4_B2C(sbi, pblk);
+ partial.lblk = end + 1;
partial.state = keep;
}

@@ -2912,7 +2910,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
if (err < 0)
goto out;
if (pblk) {
- partial.pclu = EXT4_B2C(sbi, pblk);
+ partial.lblk = lblk;
partial.state = keep;
}
}
--
2.30.2

2023-09-13 08:16:52

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 2/6] ext4: rework partial cluster definition and related tracepoints

Rework the partial cluster definition to use more obvious state values
and document the relationship between states and valid lblk and pclu
values. Add entries for the first and last clusters delimiting the
space to be removed to enable optimizations in future patches. Rework
the tracepoints containing partial clusters to produce a more readable
output format. Add a tracepoint for free_partial_cluster().

Signed-off-by: Eric Whitney <[email protected]>
---
fs/ext4/ext4_extents.h | 19 ++++--
fs/ext4/extents.c | 50 +++++++--------
include/trace/events/ext4.h | 119 ++++++++++++++++++++++++++----------
3 files changed, 125 insertions(+), 63 deletions(-)

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 26435f3a3094..06c2ce31dbcd 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -121,15 +121,22 @@ struct ext4_ext_path {

/*
* Used to record a portion of a cluster found at the beginning or end
- * of an extent while traversing the extent tree during space removal.
- * A partial cluster may be removed if it does not contain blocks shared
- * with extents that aren't being deleted (tofree state). Otherwise,
- * it cannot be removed (nofree state).
+ * of an extent while traversing the extent tree when removing space.
+ * In the "none" state, no partial cluster is being tracked and both
+ * lblk and pclu values are invalid.
+ * In the "free" state, a partial cluster that is a possible candidate
+ * to be freed is being tracked, and both lblk and pclu values are valid.
+ * In the "keep" state, a partial cluster that must not be freed is being
+ * tracked, the lblk value is valid and the pclu value is not valid.
+ * start_lclu and end_lclu are the logical clusters at the start and end
+ * of the space to be removed.
*/
struct partial_cluster {
- ext4_fsblk_t pclu; /* physical cluster number */
+ enum {none, free, keep} state;
ext4_lblk_t lblk; /* logical block number within logical cluster */
- enum {initial, tofree, nofree} state;
+ ext4_fsblk_t pclu; /* physical cluster number */
+ ext4_lblk_t start_lclu;
+ ext4_lblk_t end_lclu;
};

/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9470502b886a..0c52218fb171 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2436,6 +2436,8 @@ static void free_partial_cluster(handle_t *handle, struct inode *inode,
struct ext4_inode_info *ei = EXT4_I(inode);
int flags = get_default_free_blocks_flags(inode);

+ trace_free_partial_cluster(inode, partial);
+
/*
* When the partial cluster contains at least one delayed and
* unwritten block (has pending reservation), the RERESERVE_CLUSTER
@@ -2502,11 +2504,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* cluster of the last block in the extent, we free it
*/
last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
- if (partial->state != initial &&
+ if (partial->state != none &&
partial->pclu != EXT4_B2C(sbi, last_pblk)) {
- if (partial->state == tofree)
+ if (partial->state == free)
free_partial_cluster(handle, inode, partial);
- partial->state = initial;
+ partial->state = none;
}

num = le32_to_cpu(ex->ee_block) + ee_len - from;
@@ -2515,21 +2517,21 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
/*
* We free the partial cluster at the end of the extent (if any),
* unless the cluster is used by another extent (partial_cluster
- * state is nofree). If a partial cluster exists here, it must be
+ * state is keep). If a partial cluster exists here, it must be
* shared with the last block in the extent.
*/

/* partial, left end cluster aligned, right end unaligned */
if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
(EXT4_LBLK_CMASK(sbi, to) >= from) &&
- (partial->state != nofree)) {
- if (partial->state == initial) {
+ (partial->state != keep)) {
+ if (partial->state == none) {
partial->pclu = EXT4_B2C(sbi, last_pblk);
partial->lblk = to;
- partial->state = tofree;
+ partial->state = free;
}
free_partial_cluster(handle, inode, partial);
- partial->state = initial;
+ partial->state = none;
}

flags = get_default_free_blocks_flags(inode);
@@ -2545,8 +2547,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);

/* reset the partial cluster if we've freed past it */
- if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
- partial->state = initial;
+ if (partial->state != none && partial->pclu != EXT4_B2C(sbi, pblk))
+ partial->state = none;

/*
* If we've freed the entire extent but the beginning is not left
@@ -2559,13 +2561,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* extent is left cluster aligned.
*/
if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
- if (partial->state == initial) {
+ if (partial->state == none) {
partial->pclu = EXT4_B2C(sbi, pblk);
partial->lblk = from;
- partial->state = tofree;
+ partial->state = free;
}
} else {
- partial->state = initial;
+ partial->state = none;
}

return 0;
@@ -2649,7 +2651,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (sbi->s_cluster_ratio > 1) {
pblk = ext4_ext_pblock(ex);
partial->pclu = EXT4_B2C(sbi, pblk);
- partial->state = nofree;
+ partial->state = keep;
}
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2764,11 +2766,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* the case where the last block in that extent is outside the space
* to be removed but might be shared with the partial cluster.
*/
- if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
+ if (partial->state == free && ex >= EXT_FIRST_EXTENT(eh)) {
pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
if (partial->pclu != EXT4_B2C(sbi, pblk))
free_partial_cluster(handle, inode, partial);
- partial->state = initial;
+ partial->state = none;
}

/* if this leaf is free, then we should
@@ -2813,7 +2815,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,

partial.pclu = 0;
partial.lblk = 0;
- partial.state = initial;
+ partial.state = none;

ext_debug(inode, "truncate since %u to %u\n", start, end);

@@ -2878,7 +2880,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
if (sbi->s_cluster_ratio > 1) {
pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial.pclu = EXT4_B2C(sbi, pblk);
- partial.state = nofree;
+ partial.state = keep;
}

/*
@@ -2893,15 +2895,15 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
goto out;

} else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
- partial.state == initial) {
+ partial.state == none) {
/*
* If we're punching, there's an extent to the right.
* If the partial cluster hasn't been set, set it to
- * that extent's first cluster and its state to nofree
+ * that extent's first cluster and its state to keep
* so it won't be freed should it contain blocks to be
- * removed. If it's already set (tofree/nofree), we're
+ * removed. If it's already set (free/keep), we're
* retrying and keep the original partial cluster info
- * so a cluster marked tofree as a result of earlier
+ * so a cluster marked free as a result of earlier
* extent removal is not lost.
*/
lblk = ex_end + 1;
@@ -2911,7 +2913,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
goto out;
if (pblk) {
partial.pclu = EXT4_B2C(sbi, pblk);
- partial.state = nofree;
+ partial.state = keep;
}
}
}
@@ -3027,7 +3029,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
* been traversed to the beginning of the file, so it is not
* shared with another extent
*/
- if (partial.state == tofree && err == 0)
+ if (partial.state == free && err == 0)
free_partial_cluster(handle, inode, &partial);

/* TODO: flexible tree reduction should be here */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 65029dfb92fb..b474ded2623d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -95,6 +95,16 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})

+TRACE_DEFINE_ENUM(none);
+TRACE_DEFINE_ENUM(free);
+TRACE_DEFINE_ENUM(keep);
+
+#define show_partial_cluster_state(state) \
+ __print_symbolic(state, \
+ { none, "none"}, \
+ { free, "free"}, \
+ { keep, "keep"})
+
TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
@@ -1984,6 +1994,42 @@ TRACE_EVENT(ext4_ext_show_extent,
(unsigned short) __entry->len)
);

+TRACE_EVENT(free_partial_cluster,
+ TP_PROTO(struct inode *inode, struct partial_cluster *pc),
+
+ TP_ARGS(inode, pc),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( int, pc_state )
+ __field( ext4_lblk_t, pc_lblk )
+ __field( ext4_fsblk_t, pc_pclu )
+ __field( ext4_lblk_t, pc_start_lclu )
+ __field( ext4_lblk_t, pc_end_lclu )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pc_state = pc->state;
+ __entry->pc_lblk = pc->lblk;
+ __entry->pc_pclu = pc->pclu;
+ __entry->pc_start_lclu = pc->start_lclu;
+ __entry->pc_end_lclu = pc->end_lclu;
+ ),
+
+ TP_printk("dev %d,%d ino %lu partial "
+ "[state %s lblk %u pclu %lld start_lclu %u end_lclu %u]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ show_partial_cluster_state(__entry->pc_state),
+ (unsigned int) __entry->pc_lblk,
+ (long long) __entry->pc_pclu,
+ (unsigned int) __entry->pc_start_lclu,
+ (unsigned int) __entry->pc_end_lclu)
+);
+
TRACE_EVENT(ext4_remove_blocks,
TP_PROTO(struct inode *inode, struct ext4_extent *ex,
ext4_lblk_t from, ext4_fsblk_t to,
@@ -1992,16 +2038,18 @@ TRACE_EVENT(ext4_remove_blocks,
TP_ARGS(inode, ex, from, to, pc),

TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( ino_t, ino )
- __field( ext4_lblk_t, from )
- __field( ext4_lblk_t, to )
- __field( ext4_fsblk_t, ee_pblk )
- __field( ext4_lblk_t, ee_lblk )
- __field( unsigned short, ee_len )
- __field( ext4_fsblk_t, pc_pclu )
- __field( ext4_lblk_t, pc_lblk )
- __field( int, pc_state)
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( ext4_lblk_t, from )
+ __field( ext4_lblk_t, to )
+ __field( ext4_fsblk_t, ee_pblk )
+ __field( ext4_lblk_t, ee_lblk )
+ __field( unsigned short, ee_len )
+ __field( int, pc_state )
+ __field( ext4_lblk_t, pc_lblk )
+ __field( ext4_fsblk_t, pc_pclu )
+ __field( ext4_lblk_t, pc_start_lclu )
+ __field( ext4_lblk_t, pc_end_lclu )
),

TP_fast_assign(
@@ -2012,13 +2060,16 @@ TRACE_EVENT(ext4_remove_blocks,
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_lblk = le32_to_cpu(ex->ee_block);
__entry->ee_len = ext4_ext_get_actual_len(ex);
- __entry->pc_pclu = pc->pclu;
- __entry->pc_lblk = pc->lblk;
__entry->pc_state = pc->state;
+ __entry->pc_lblk = pc->lblk;
+ __entry->pc_pclu = pc->pclu;
+ __entry->pc_start_lclu = pc->start_lclu;
+ __entry->pc_end_lclu = pc->end_lclu;
),

- TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
- "from %u to %u partial [pclu %lld lblk %u state %d]",
+ TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u] "
+ "from %u to %u partial "
+ "[state %s lblk %u pclu %lld start_lclu %u end_lclu %u]",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->ee_lblk,
@@ -2026,9 +2077,11 @@ TRACE_EVENT(ext4_remove_blocks,
(unsigned short) __entry->ee_len,
(unsigned) __entry->from,
(unsigned) __entry->to,
- (long long) __entry->pc_pclu,
+ show_partial_cluster_state(__entry->pc_state),
(unsigned int) __entry->pc_lblk,
- (int) __entry->pc_state)
+ (long long) __entry->pc_pclu,
+ (unsigned int) __entry->pc_start_lclu,
+ (unsigned int) __entry->pc_end_lclu)
);

TRACE_EVENT(ext4_ext_rm_leaf,
@@ -2045,9 +2098,9 @@ TRACE_EVENT(ext4_ext_rm_leaf,
__field( ext4_lblk_t, ee_lblk )
__field( ext4_fsblk_t, ee_pblk )
__field( short, ee_len )
- __field( ext4_fsblk_t, pc_pclu )
- __field( ext4_lblk_t, pc_lblk )
__field( int, pc_state)
+ __field( ext4_lblk_t, pc_lblk )
+ __field( ext4_fsblk_t, pc_pclu )
),

TP_fast_assign(
@@ -2057,22 +2110,22 @@ TRACE_EVENT(ext4_ext_rm_leaf,
__entry->ee_lblk = le32_to_cpu(ex->ee_block);
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_len = ext4_ext_get_actual_len(ex);
- __entry->pc_pclu = pc->pclu;
- __entry->pc_lblk = pc->lblk;
__entry->pc_state = pc->state;
+ __entry->pc_lblk = pc->lblk;
+ __entry->pc_pclu = pc->pclu;
),

- TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
- "partial [pclu %lld lblk %u state %d]",
+ TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u] "
+ "partial [state %s lblk %u pclu %lld]",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->start,
(unsigned) __entry->ee_lblk,
(unsigned long long) __entry->ee_pblk,
(unsigned short) __entry->ee_len,
- (long long) __entry->pc_pclu,
+ show_partial_cluster_state(__entry->pc_state),
(unsigned int) __entry->pc_lblk,
- (int) __entry->pc_state)
+ (long long) __entry->pc_pclu)
);

TRACE_EVENT(ext4_ext_rm_idx,
@@ -2120,7 +2173,7 @@ TRACE_EVENT(ext4_ext_remove_space,
__entry->depth = depth;
),

- TP_printk("dev %d,%d ino %lu since %u end %u depth %d",
+ TP_printk("dev %d,%d ino %lu start %u end %u depth %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->start,
@@ -2140,9 +2193,9 @@ TRACE_EVENT(ext4_ext_remove_space_done,
__field( ext4_lblk_t, start )
__field( ext4_lblk_t, end )
__field( int, depth )
- __field( ext4_fsblk_t, pc_pclu )
- __field( ext4_lblk_t, pc_lblk )
__field( int, pc_state )
+ __field( ext4_lblk_t, pc_lblk )
+ __field( ext4_fsblk_t, pc_pclu )
__field( unsigned short, eh_entries )
),

@@ -2152,23 +2205,23 @@ TRACE_EVENT(ext4_ext_remove_space_done,
__entry->start = start;
__entry->end = end;
__entry->depth = depth;
- __entry->pc_pclu = pc->pclu;
- __entry->pc_lblk = pc->lblk;
__entry->pc_state = pc->state;
+ __entry->pc_lblk = pc->lblk;
+ __entry->pc_pclu = pc->pclu;
__entry->eh_entries = le16_to_cpu(eh_entries);
),

- TP_printk("dev %d,%d ino %lu since %u end %u depth %d "
- "partial [pclu %lld lblk %u state %d] "
+ TP_printk("dev %d,%d ino %lu start %u end %u depth %d "
+ "partial [state %s lblk %u pclu %lld] "
"remaining_entries %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned) __entry->start,
(unsigned) __entry->end,
__entry->depth,
- (long long) __entry->pc_pclu,
+ show_partial_cluster_state(__entry->pc_state),
(unsigned int) __entry->pc_lblk,
- (int) __entry->pc_state,
+ (long long) __entry->pc_pclu,
(unsigned short) __entry->eh_entries)
);

--
2.30.2

2023-09-13 08:26:40

by Eric Whitney

[permalink] [raw]
Subject: [PATCH 5/6] ext4: simplify and improve efficiency of cluster removal code

Rework the code in ext4_remove_space to further improve readability.
Explicitly separate the code used for bigalloc and non-bigalloc file
systems, take a clearer approach to bigalloc processing, and rewrite
the comments. Take advantage of the new start_lclu and end_lclu
components in struct partial_cluster to minimize the number of checks
made for pending reservations and to maximize the number of blocks that
can be freed in a single operation when processing an extent.

Signed-off-by: Eric Whitney <[email protected]>
---
fs/ext4/extents.c | 153 ++++++++++++++++++++++++++++------------------
1 file changed, 92 insertions(+), 61 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a0c9e37ef804..542d25d17f65 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2444,9 +2444,17 @@ static void free_partial_cluster(handle_t *handle, struct inode *inode,
* flag forces ext4_free_blocks() to defer reserved and allocated
* space accounting to this function. This avoids potential difficult
* to handle ENOSPC conditions when the file system is near exhaustion.
+ *
+ * A check for a pending reservation is only necessary if the partial
+ * cluster matches the cluster at the beginning or the end of the
+ * space to be removed. All other pending reservations are
+ * removed by ext4_ext_remove_extent() before ext4_ext_remove_space()
+ * is called.
*/
- if (ext4_is_pending(inode, partial->lblk))
- flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
+ if (EXT4_B2C(sbi, partial->lblk) == partial->start_lclu ||
+ EXT4_B2C(sbi, partial->lblk) == partial->end_lclu)
+ if (ext4_is_pending(inode, partial->lblk))
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;

ext4_free_blocks(handle, inode, NULL, EXT4_C2B(sbi, partial->pclu),
sbi->s_cluster_ratio, flags);
@@ -2464,6 +2472,16 @@ static void free_partial_cluster(handle_t *handle, struct inode *inode,
}
}

+/**
+ * ext4_remove_blocks() - frees a range of blocks found in a specified extent
+ *
+ * @handle: journal handle for current transaction
+ * @inode: file containing block range
+ * @ex: extent containing block range
+ * @partial: partial cluster tracking info for bigalloc
+ * @from: start of block range
+ * @to: end of block range
+ */
static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex,
struct partial_cluster *partial,
@@ -2471,17 +2489,17 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned short ee_len = ext4_ext_get_actual_len(ex);
- ext4_fsblk_t last_pblk, pblk;
- ext4_lblk_t num;
+ ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
+ ext4_fsblk_t ee_pblock = ext4_ext_pblock(ex);
+ ext4_fsblk_t pblk;
+ ext4_lblk_t nclus, nblks = 0;
int flags;

/* only extent tail removal is allowed */
- if (from < le32_to_cpu(ex->ee_block) ||
- to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
- ext4_error(sbi->s_sb,
- "strange request: removal(2) %u-%u from %u:%u",
- from, to, le32_to_cpu(ex->ee_block), ee_len);
- return 0;
+ if (unlikely(from < ee_block || to != ee_block + ee_len - 1)) {
+ EXT4_ERROR_INODE(inode, "extent tail required: from %u to %u ee_block %u ee_len %u",
+ from, to, ee_block, ee_len);
+ return -EFSCORRUPTED;
}

#ifdef EXTENTS_STATS
@@ -2499,76 +2517,89 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,

trace_ext4_remove_blocks(inode, ex, from, to, partial);

+ /* initial processing for the simple non-bigalloc case */
+ if (sbi->s_cluster_ratio == 1) {
+ pblk = ee_pblock + from - ee_block;
+ nblks = to - from + 1;
+ goto free_blocks;
+ }
+
+ /* initial bigalloc processing until free_blocks: below */
+
/*
- * if we have a partial cluster, and it's different from the
- * cluster of the last block in the extent, we free it
+ * If there's a partial cluster which differs from the last cluster
+ * in the block range, free it and/or clear it. Any partial that
+ * remains will correspond to the last cluster in the range.
*/
- last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
if (partial->state != none &&
- EXT4_B2C(sbi, partial->lblk) != EXT4_B2C(sbi, to)) {
+ EXT4_B2C(sbi, partial->lblk) > EXT4_B2C(sbi, to)) {
if (partial->state == free)
free_partial_cluster(handle, inode, partial);
partial->state = none;
}

- num = le32_to_cpu(ex->ee_block) + ee_len - from;
- pblk = ext4_ext_pblock(ex) + ee_len - num;
+ /* calculate the number of clusters covering the block range */
+ nclus = EXT4_B2C(sbi, to) - EXT4_B2C(sbi, from) + 1;

/*
- * We free the partial cluster at the end of the extent (if any),
- * unless the cluster is used by another extent (partial_cluster
- * state is keep). If a partial cluster exists here, it must be
- * shared with the last block in the extent.
+ * The range does not end on a cluster boundary, but contains the
+ * first block of its last cluster. If the last cluster is also
+ * the last cluster or first cluster of the space to be removed
+ * free it and/or clear it, noting that it's been processed.
+ * Otherwise, for improved efficiency free it below along with
+ * any other clusters wholly contained within the range.
*/
-
- /* partial, left end cluster aligned, right end unaligned */
- if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
- (EXT4_LBLK_CMASK(sbi, to) >= from) &&
- (partial->state != keep)) {
- if (partial->state == none) {
- partial->pclu = EXT4_B2C(sbi, last_pblk);
- partial->lblk = to;
- partial->state = free;
+ if (to != EXT4_LBLK_CFILL(sbi, to) &&
+ from <= EXT4_LBLK_CMASK(sbi, to)) {
+ if (EXT4_B2C(sbi, to) == partial->end_lclu ||
+ EXT4_B2C(sbi, to) == partial->start_lclu) {
+ if (partial->state == none) {
+ partial->lblk = to;
+ pblk = ee_pblock + ee_len - 1;
+ partial->pclu = EXT4_B2C(sbi, pblk);
+ partial->state = free;
+ }
+ if (partial->state == free)
+ free_partial_cluster(handle, inode, partial);
+ nclus--;
+ } else {
+ if (partial->state == keep)
+ nclus--;
}
- free_partial_cluster(handle, inode, partial);
partial->state = none;
}

- flags = get_default_free_blocks_flags(inode);
- flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
-
/*
- * For bigalloc file systems, we never free a partial cluster
- * at the beginning of the extent. Instead, we check to see if we
- * need to free it on a subsequent call to ext4_remove_blocks,
- * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
+ * The range's first cluster (which could also be its last cluster)
+ * does not begin on a cluster boundary. If the range begins with
+ * the extent's first block, record the cluster as a partial if it
+ * hasn't already been set. Otherwise, clear the partial because
+ * the beginning of the space to be removed has been reached.
*/
- flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
- ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
+ if (nclus && EXT4_LBLK_COFF(sbi, from) != 0) {
+ if (from == ee_block) {
+ if (partial->state == none) {
+ partial->lblk = from;
+ partial->pclu = EXT4_B2C(sbi, ee_pblock);
+ partial->state = free;
+ }
+ } else {
+ partial->state = none;
+ }
+ nclus--;
+ }

- /* reset the partial cluster if we've freed past it */
- if (partial->state != none &&
- EXT4_B2C(sbi, partial->lblk) != EXT4_B2C(sbi, from))
- partial->state = none;
+ /* free remaining clusters contained within the range */
+ if (nclus) {
+ pblk = ee_pblock + from - ee_block + (sbi->s_cluster_ratio - 1);
+ pblk = EXT4_PBLK_CMASK(sbi, pblk);
+ nblks = nclus << sbi->s_cluster_bits;
+ }

- /*
- * If we've freed the entire extent but the beginning is not left
- * cluster aligned and is not marked as ineligible for freeing we
- * record the partial cluster at the beginning of the extent. It
- * wasn't freed by the preceding ext4_free_blocks() call, and we
- * need to look farther to the left to determine if it's to be freed
- * (not shared with another extent). Else, reset the partial
- * cluster - we're either done freeing or the beginning of the
- * extent is left cluster aligned.
- */
- if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
- if (partial->state == none) {
- partial->pclu = EXT4_B2C(sbi, pblk);
- partial->lblk = from;
- partial->state = free;
- }
- } else {
- partial->state = none;
+free_blocks:
+ if (nblks) {
+ flags = get_default_free_blocks_flags(inode);
+ ext4_free_blocks(handle, inode, NULL, pblk, nblks, flags);
}

return 0;
--
2.30.2