2020-06-22 13:15:24

by Wang Shilong

[permalink] [raw]
Subject: [PATCH 2/2] ext4: avoid trimming block group if only few blocks freed

From: Wang Shilong <[email protected]>

Now WAS_TRIMMED flag will be cleared if there are any blocks
freed in this block group, this might be not good idea if there
are only few blocks freed, since most of freed blocks have been
issued discard before.

So this patch tries to introduce another counter which record
how many blocks freed since last time trimmed, WAS_TRIMMED flag
will be only cleared if there are enough free blocks(default 128).

Also expose a new sys interface min_freed_blocks_to_trim to tune
default behavior.

Cc: Shuichi Ihara <[email protected]>
Cc: Andreas Dilger <[email protected]>
Cc: Wang Shilong <[email protected]>
Signed-off-by: Wang Shilong <[email protected]>
---
fs/ext4/ext4.h | 7 +++++++
fs/ext4/mballoc.c | 17 +++++++++++++++--
fs/ext4/sysfs.c | 2 ++
3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 252754da2f1b..2da86d1ebe3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1240,6 +1240,9 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1

+/* Default min freed blocks which we could clear TRIMMED flags */
+#define DEFAULT_MIN_FREED_BLOCKS_TO_TRIM 128
+
/*
* Structure of the super block
*/
@@ -1533,6 +1536,9 @@ struct ext4_sb_info {
/* the size of zero-out chunk */
unsigned int s_extent_max_zeroout_kb;

+ /* Min freed blocks per group that we could run trim on it*/
+ unsigned long s_min_freed_blocks_to_trim;
+
unsigned int s_log_groups_per_flex;
struct flex_groups * __rcu *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
@@ -3125,6 +3131,7 @@ struct ext4_group_info {
struct rb_root bb_free_root;
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
+ ext4_grpblk_t bb_freed_last_trimmed; /* total free blocks since last trimmed*/
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
struct list_head bb_prealloc_list;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 235a316584d0..d33ee1781b2c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2763,6 +2763,8 @@ int ext4_mb_init(struct super_block *sb)
sbi->s_mb_group_prealloc, sbi->s_stripe);
}

+ sbi->s_min_freed_blocks_to_trim = DEFAULT_MIN_FREED_BLOCKS_TO_TRIM;
+
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
ret = -ENOMEM;
@@ -5091,8 +5093,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* If the volume is mounted with -o discard, online discard
* is supported and the free blocks will be trimmed online.
*/
- if (!test_opt(sb, DISCARD))
- EXT4_MB_GDP_CLEAR_TRIMMED(gdp);
+ if (!test_opt(sb, DISCARD)) {
+ e4b.bd_info->bb_freed_last_trimmed += count;
+ /*
+ * Only clear the WAS_TRIMMED flag if there are
+ * several blocks freed, or if the group becomes
+ * totally 'empty'(free < num_itable_blocks + 2).
+ */
+ if (e4b.bd_info->bb_freed_last_trimmed >=
+ sbi->s_min_freed_blocks_to_trim ||
+ e4b.bd_info->bb_free < (sbi->s_itb_per_group + 2))
+ EXT4_MB_GDP_CLEAR_TRIMMED(gdp);
+ }
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);

@@ -5425,6 +5437,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
}
ext4_lock_group(sb, group);
EXT4_MB_GDP_SET_TRIMMED(gdp);
+ e4b.bd_info->bb_freed_last_trimmed = 0;
ext4_group_desc_csum_set(sb, group, gdp);
ext4_unlock_group(sb, group);
err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 6c9fc9e21c13..8ee4e7e3f125 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -216,6 +216,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
+EXT4_RW_ATTR_SBI_UI(min_freed_blocks_to_trim, s_min_freed_blocks_to_trim);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
@@ -259,6 +260,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
+ ATTR_LIST(min_freed_blocks_to_trim),
ATTR_LIST(trigger_fs_error),
ATTR_LIST(err_ratelimit_interval_ms),
ATTR_LIST(err_ratelimit_burst),
--
2.25.4


2020-06-22 14:17:49

by Wang Shilong

[permalink] [raw]
Subject: [PATCH v2 2/2] ext4: avoid trimming block group if only few blocks freed

From: Wang Shilong <[email protected]>

Now WAS_TRIMMED flag will be cleared if there are any blocks
freed in this block group, this might be not good idea if there
are only few blocks freed, since most of freed blocks have been
issued discard before.

So this patch tries to introduce another counter which record
how many blocks freed since last time trimmed, WAS_TRIMMED flag
will be only cleared if there are enough free blocks(default 128).

Also expose a new sys interface min_freed_blocks_to_trim to tune
default behavior.

Cc: Shuichi Ihara <[email protected]>
Cc: Andreas Dilger <[email protected]>
Cc: Wang Shilong <[email protected]>
Signed-off-by: Wang Shilong <[email protected]>
---
changelog v1->v2:
init bb_freed_last_trimmed to be zero during setup
---
fs/ext4/ext4.h | 7 +++++++
fs/ext4/mballoc.c | 18 ++++++++++++++++--
fs/ext4/sysfs.c | 2 ++
3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 252754da2f1b..2da86d1ebe3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1240,6 +1240,9 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1

+/* Default min freed blocks which we could clear TRIMMED flags */
+#define DEFAULT_MIN_FREED_BLOCKS_TO_TRIM 128
+
/*
* Structure of the super block
*/
@@ -1533,6 +1536,9 @@ struct ext4_sb_info {
/* the size of zero-out chunk */
unsigned int s_extent_max_zeroout_kb;

+ /* Min freed blocks per group that we could run trim on it*/
+ unsigned long s_min_freed_blocks_to_trim;
+
unsigned int s_log_groups_per_flex;
struct flex_groups * __rcu *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
@@ -3125,6 +3131,7 @@ struct ext4_group_info {
struct rb_root bb_free_root;
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
+ ext4_grpblk_t bb_freed_last_trimmed; /* total free blocks since last trimmed*/
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
struct list_head bb_prealloc_list;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 235a316584d0..52ab9ac5be86 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2558,6 +2558,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root = RB_ROOT;
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
+ meta_group_info[i]->bb_freed_last_trimmed = 0;

mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
return 0;
@@ -2763,6 +2764,8 @@ int ext4_mb_init(struct super_block *sb)
sbi->s_mb_group_prealloc, sbi->s_stripe);
}

+ sbi->s_min_freed_blocks_to_trim = DEFAULT_MIN_FREED_BLOCKS_TO_TRIM;
+
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
ret = -ENOMEM;
@@ -5091,8 +5094,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* If the volume is mounted with -o discard, online discard
* is supported and the free blocks will be trimmed online.
*/
- if (!test_opt(sb, DISCARD))
- EXT4_MB_GDP_CLEAR_TRIMMED(gdp);
+ if (!test_opt(sb, DISCARD)) {
+ e4b.bd_info->bb_freed_last_trimmed += count;
+ /*
+ * Only clear the WAS_TRIMMED flag if there are
+ * several blocks freed, or if the group becomes
+ * totally 'empty'(free < num_itable_blocks + 2).
+ */
+ if (e4b.bd_info->bb_freed_last_trimmed >=
+ sbi->s_min_freed_blocks_to_trim ||
+ e4b.bd_info->bb_free < (sbi->s_itb_per_group + 2))
+ EXT4_MB_GDP_CLEAR_TRIMMED(gdp);
+ }
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);

@@ -5425,6 +5438,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
}
ext4_lock_group(sb, group);
EXT4_MB_GDP_SET_TRIMMED(gdp);
+ e4b.bd_info->bb_freed_last_trimmed = 0;
ext4_group_desc_csum_set(sb, group, gdp);
ext4_unlock_group(sb, group);
err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 6c9fc9e21c13..8ee4e7e3f125 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -216,6 +216,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
+EXT4_RW_ATTR_SBI_UI(min_freed_blocks_to_trim, s_min_freed_blocks_to_trim);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
@@ -259,6 +260,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
+ ATTR_LIST(min_freed_blocks_to_trim),
ATTR_LIST(trigger_fs_error),
ATTR_LIST(err_ratelimit_interval_ms),
ATTR_LIST(err_ratelimit_burst),
--
2.25.4

2020-06-22 17:21:21

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] ext4: avoid trimming block group if only few blocks freed

On Jun 22, 2020, at 8:16 AM, Wang Shilong <[email protected]> wrote:
>
> From: Wang Shilong <[email protected]>
>
> Now WAS_TRIMMED flag will be cleared if there are any blocks
> freed in this block group, this might be not good idea if there
> are only few blocks freed, since most of freed blocks have been
> issued discard before.
>
> So this patch tries to introduce another counter which record
> how many blocks freed since last time trimmed, WAS_TRIMMED flag
> will be only cleared if there are enough free blocks(default 128).
>
> Also expose a new sys interface min_freed_blocks_to_trim to tune
> default behavior.
>
> Cc: Shuichi Ihara <[email protected]>
> Cc: Andreas Dilger <[email protected]>
> Cc: Wang Shilong <[email protected]>
> Signed-off-by: Wang Shilong <[email protected]>

Reviewed-by: Andreas Dilger <[email protected]>

> ---
> changelog v1->v2:
> init bb_freed_last_trimmed to be zero during setup
> ---
> fs/ext4/ext4.h | 7 +++++++
> fs/ext4/mballoc.c | 18 ++++++++++++++++--
> fs/ext4/sysfs.c | 2 ++
> 3 files changed, 25 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 252754da2f1b..2da86d1ebe3f 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1240,6 +1240,9 @@ extern void ext4_set_bits(void *bm, int cur, int len);
> /* Metadata checksum algorithm codes */
> #define EXT4_CRC32C_CHKSUM 1
>
> +/* Default min freed blocks which we could clear TRIMMED flags */
> +#define DEFAULT_MIN_FREED_BLOCKS_TO_TRIM 128
> +
> /*
> * Structure of the super block
> */
> @@ -1533,6 +1536,9 @@ struct ext4_sb_info {
> /* the size of zero-out chunk */
> unsigned int s_extent_max_zeroout_kb;
>
> + /* Min freed blocks per group that we could run trim on it*/
> + unsigned long s_min_freed_blocks_to_trim;
> +
> unsigned int s_log_groups_per_flex;
> struct flex_groups * __rcu *s_flex_groups;
> ext4_group_t s_flex_groups_allocated;
> @@ -3125,6 +3131,7 @@ struct ext4_group_info {
> struct rb_root bb_free_root;
> ext4_grpblk_t bb_first_free; /* first free block */
> ext4_grpblk_t bb_free; /* total free blocks */
> + ext4_grpblk_t bb_freed_last_trimmed; /* total free blocks since last trimmed*/
> ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
> ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
> struct list_head bb_prealloc_list;
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 235a316584d0..52ab9ac5be86 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2558,6 +2558,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
> init_rwsem(&meta_group_info[i]->alloc_sem);
> meta_group_info[i]->bb_free_root = RB_ROOT;
> meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
> + meta_group_info[i]->bb_freed_last_trimmed = 0;
>
> mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
> return 0;
> @@ -2763,6 +2764,8 @@ int ext4_mb_init(struct super_block *sb)
> sbi->s_mb_group_prealloc, sbi->s_stripe);
> }
>
> + sbi->s_min_freed_blocks_to_trim = DEFAULT_MIN_FREED_BLOCKS_TO_TRIM;
> +
> sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
> if (sbi->s_locality_groups == NULL) {
> ret = -ENOMEM;
> @@ -5091,8 +5094,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
> * If the volume is mounted with -o discard, online discard
> * is supported and the free blocks will be trimmed online.
> */
> - if (!test_opt(sb, DISCARD))
> - EXT4_MB_GDP_CLEAR_TRIMMED(gdp);
> + if (!test_opt(sb, DISCARD)) {
> + e4b.bd_info->bb_freed_last_trimmed += count;
> + /*
> + * Only clear the WAS_TRIMMED flag if there are
> + * several blocks freed, or if the group becomes
> + * totally 'empty'(free < num_itable_blocks + 2).
> + */
> + if (e4b.bd_info->bb_freed_last_trimmed >=
> + sbi->s_min_freed_blocks_to_trim ||
> + e4b.bd_info->bb_free < (sbi->s_itb_per_group + 2))
> + EXT4_MB_GDP_CLEAR_TRIMMED(gdp);
> + }
> ext4_group_desc_csum_set(sb, block_group, gdp);
> ext4_unlock_group(sb, block_group);
>
> @@ -5425,6 +5438,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
> }
> ext4_lock_group(sb, group);
> EXT4_MB_GDP_SET_TRIMMED(gdp);
> + e4b.bd_info->bb_freed_last_trimmed = 0;
> ext4_group_desc_csum_set(sb, group, gdp);
> ext4_unlock_group(sb, group);
> err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 6c9fc9e21c13..8ee4e7e3f125 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -216,6 +216,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
> EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
> EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
> EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
> +EXT4_RW_ATTR_SBI_UI(min_freed_blocks_to_trim, s_min_freed_blocks_to_trim);
> EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
> EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
> EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
> @@ -259,6 +260,7 @@ static struct attribute *ext4_attrs[] = {
> ATTR_LIST(mb_group_prealloc),
> ATTR_LIST(max_writeback_mb_bump),
> ATTR_LIST(extent_max_zeroout_kb),
> + ATTR_LIST(min_freed_blocks_to_trim),
> ATTR_LIST(trigger_fs_error),
> ATTR_LIST(err_ratelimit_interval_ms),
> ATTR_LIST(err_ratelimit_burst),
> --
> 2.25.4
>


Cheers, Andreas






Attachments:
signature.asc (890.00 B)
Message signed with OpenPGP