There is no good reason for the s_last_trim_minblks to be atomic. There is
no data integrity needed and there is no real danger in setting and
reading it in a racy manner. Change it to be unsigned long, the same type
as s_clusters_per_group which is the maximum that's allowed.
Signed-off-by: Lukas Czerner <[email protected]>
Suggested-by: Andreas Dilger <[email protected]>
---
fs/ext4/ext4.h | 2 +-
fs/ext4/mballoc.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3825195539d7..92a155401f61 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1660,7 +1660,7 @@ struct ext4_sb_info {
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
- atomic_t s_last_trim_minblks;
+ unsigned long s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 72bfac2d6dce..eda550ec3956 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6374,7 +6374,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_lock_group(sb, group);
if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
- minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
+ minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
if (ret >= 0)
EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
@@ -6475,7 +6475,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
}
if (!ret)
- atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+ EXT4_SB(sb)->s_last_trim_minblks = minlen;
out:
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
--
2.31.1
Ext4 has an optimization mechanism for batched disacrd (FITRIM) that
should help speed up subsequent calls of FITRIM ioctl by skipping the
groups that were previously trimmed. However because the FITRIM allows
to set the minimum size of an extent to trim, ext4 stores the last
minimum extent size and only avoids trimming the group if it was
previously trimmed with minimum extent size equal to, or smaller than
the current call.
There is currently no way to bypass the optimization without
umount/mount cycle. This becomes a problem when the file system is
live migrated to a different storage, because the optimization will
prevent possibly useful discard calls to the storage.
Fix it by exporting the s_last_trim_minblks via sysfs interface which
will allow us to set the minimum size to the number of blocks larger
than subsequent FITRIM call, effectively bypassing the optimization.
By setting the s_last_trim_minblks to ULONG_MAX the optimization will be
effectively cleared regardless of the previous state, or file system
configuration.
For example:
getconf ULONG_MAX > /sys/fs/ext4/dm-1/last_trim_minblks
Signed-off-by: Lukas Czerner <[email protected]>
Reported-by: Laurent GUERBY <[email protected]>
---
v2: Remove unnecessary assignment
v3: s_last_trim_minblks is now unsinged long which simplifies this
fs/ext4/sysfs.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 2314f7446592..95d8a996d2d8 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -245,6 +245,7 @@ EXT4_ATTR(last_error_time, 0444, last_error_time);
EXT4_ATTR(journal_task, 0444, journal_task);
EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
+EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks);
static unsigned int old_bump_val = 128;
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -295,6 +296,7 @@ static struct attribute *ext4_attrs[] = {
#endif
ATTR_LIST(mb_prefetch),
ATTR_LIST(mb_prefetch_limit),
+ ATTR_LIST(last_trim_minblks),
NULL,
};
ATTRIBUTE_GROUPS(ext4);
--
2.31.1
On Nov 3, 2021, at 8:51 AM, Lukas Czerner <[email protected]> wrote:
>
> There is no good reason for the s_last_trim_minblks to be atomic. There is
> no data integrity needed and there is no real danger in setting and
> reading it in a racy manner. Change it to be unsigned long, the same type
> as s_clusters_per_group which is the maximum that's allowed.
>
> Signed-off-by: Lukas Czerner <[email protected]>
> Suggested-by: Andreas Dilger <[email protected]>
Reviewed-by: Andreas Dilger <[email protected]>
Could also add for reference:
Fixes: 3d56b8d2c74c ("ext4: Speed up FITRIM by recording flags in ext4_group_info")
> ---
> fs/ext4/ext4.h | 2 +-
> fs/ext4/mballoc.c | 4 ++--
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3825195539d7..92a155401f61 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1660,7 +1660,7 @@ struct ext4_sb_info {
> struct task_struct *s_mmp_tsk;
>
> /* record the last minlen when FITRIM is called. */
> - atomic_t s_last_trim_minblks;
> + unsigned long s_last_trim_minblks;
>
> /* Reference to checksum algorithm driver via cryptoapi */
> struct crypto_shash *s_chksum_driver;
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 72bfac2d6dce..eda550ec3956 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -6374,7 +6374,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
> ext4_lock_group(sb, group);
>
> if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
> - minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
> + minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
> ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
> if (ret >= 0)
> EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
> @@ -6475,7 +6475,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
> }
>
> if (!ret)
> - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
> + EXT4_SB(sb)->s_last_trim_minblks = minlen;
>
> out:
> range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
> --
> 2.31.1
>
Cheers, Andreas
On Nov 3, 2021, at 8:51 AM, Lukas Czerner <[email protected]> wrote:
>
> Ext4 has an optimization mechanism for batched disacrd (FITRIM) that
> should help speed up subsequent calls of FITRIM ioctl by skipping the
> groups that were previously trimmed. However because the FITRIM allows
> to set the minimum size of an extent to trim, ext4 stores the last
> minimum extent size and only avoids trimming the group if it was
> previously trimmed with minimum extent size equal to, or smaller than
> the current call.
>
> There is currently no way to bypass the optimization without
> umount/mount cycle. This becomes a problem when the file system is
> live migrated to a different storage, because the optimization will
> prevent possibly useful discard calls to the storage.
>
> Fix it by exporting the s_last_trim_minblks via sysfs interface which
> will allow us to set the minimum size to the number of blocks larger
> than subsequent FITRIM call, effectively bypassing the optimization.
>
> By setting the s_last_trim_minblks to ULONG_MAX the optimization will be
> effectively cleared regardless of the previous state, or file system
> configuration.
>
> For example:
> getconf ULONG_MAX > /sys/fs/ext4/dm-1/last_trim_minblks
>
> Signed-off-by: Lukas Czerner <[email protected]>
> Reported-by: Laurent GUERBY <[email protected]>
Reviewed-by: Andreas Dilger <[email protected]>
> ---
> v2: Remove unnecessary assignment
> v3: s_last_trim_minblks is now unsinged long which simplifies this
>
> fs/ext4/sysfs.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 2314f7446592..95d8a996d2d8 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -245,6 +245,7 @@ EXT4_ATTR(last_error_time, 0444, last_error_time);
> EXT4_ATTR(journal_task, 0444, journal_task);
> EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
> EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
> +EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks);
>
> static unsigned int old_bump_val = 128;
> EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
> @@ -295,6 +296,7 @@ static struct attribute *ext4_attrs[] = {
> #endif
> ATTR_LIST(mb_prefetch),
> ATTR_LIST(mb_prefetch_limit),
> + ATTR_LIST(last_trim_minblks),
> NULL,
> };
> ATTRIBUTE_GROUPS(ext4);
> --
> 2.31.1
>
Cheers, Andreas
ping
-Lukas
On Wed, Nov 03, 2021 at 03:51:22PM +0100, Lukas Czerner wrote:
> Ext4 has an optimization mechanism for batched disacrd (FITRIM) that
> should help speed up subsequent calls of FITRIM ioctl by skipping the
> groups that were previously trimmed. However because the FITRIM allows
> to set the minimum size of an extent to trim, ext4 stores the last
> minimum extent size and only avoids trimming the group if it was
> previously trimmed with minimum extent size equal to, or smaller than
> the current call.
>
> There is currently no way to bypass the optimization without
> umount/mount cycle. This becomes a problem when the file system is
> live migrated to a different storage, because the optimization will
> prevent possibly useful discard calls to the storage.
>
> Fix it by exporting the s_last_trim_minblks via sysfs interface which
> will allow us to set the minimum size to the number of blocks larger
> than subsequent FITRIM call, effectively bypassing the optimization.
>
> By setting the s_last_trim_minblks to ULONG_MAX the optimization will be
> effectively cleared regardless of the previous state, or file system
> configuration.
>
> For example:
> getconf ULONG_MAX > /sys/fs/ext4/dm-1/last_trim_minblks
>
> Signed-off-by: Lukas Czerner <[email protected]>
> Reported-by: Laurent GUERBY <[email protected]>
> ---
> v2: Remove unnecessary assignment
> v3: s_last_trim_minblks is now unsinged long which simplifies this
>
> fs/ext4/sysfs.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
> index 2314f7446592..95d8a996d2d8 100644
> --- a/fs/ext4/sysfs.c
> +++ b/fs/ext4/sysfs.c
> @@ -245,6 +245,7 @@ EXT4_ATTR(last_error_time, 0444, last_error_time);
> EXT4_ATTR(journal_task, 0444, journal_task);
> EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
> EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
> +EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks);
>
> static unsigned int old_bump_val = 128;
> EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
> @@ -295,6 +296,7 @@ static struct attribute *ext4_attrs[] = {
> #endif
> ATTR_LIST(mb_prefetch),
> ATTR_LIST(mb_prefetch_limit),
> + ATTR_LIST(last_trim_minblks),
> NULL,
> };
> ATTRIBUTE_GROUPS(ext4);
> --
> 2.31.1
>
On Wed, Nov 03, 2021 at 03:51:21PM +0100, Lukas Czerner wrote:
> There is no good reason for the s_last_trim_minblks to be atomic. There is
> no data integrity needed and there is no real danger in setting and
> reading it in a racy manner. Change it to be unsigned long, the same type
> as s_clusters_per_group which is the maximum that's allowed.
>
> Signed-off-by: Lukas Czerner <[email protected]>
> Suggested-by: Andreas Dilger <[email protected]>
> Reviewed-by: Andreas Dilger <[email protected]>
Thanks, applied.
- Ted
On Wed, 3 Nov 2021 15:51:22 +0100, Lukas Czerner wrote:
> Ext4 has an optimization mechanism for batched disacrd (FITRIM) that
> should help speed up subsequent calls of FITRIM ioctl by skipping the
> groups that were previously trimmed. However because the FITRIM allows
> to set the minimum size of an extent to trim, ext4 stores the last
> minimum extent size and only avoids trimming the group if it was
> previously trimmed with minimum extent size equal to, or smaller than
> the current call.
>
> [...]
Applied, thanks!
[2/2] ext4: Allow to change s_last_trim_minblks via sysfs
commit: db19c4cdc28a8ec1241d50656991ab1bd96f5c02
Best regards,
--
Theodore Ts'o <[email protected]>