2024-03-26 13:45:54

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH 0/5] Minor improvements and cleanups to ext4 mballoc

This series contains some minor improvements and cleanups to ext4
mballoc. No failure is found in "kvm-xfstests smoke" test and unit
test. More details can be found in respective patches. Thanks!

Kemeng Shi (5):
ext4: keep "prefetch_grp" and "nr" consistent
ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used
ext4: call ext4_mb_mark_free_simple in mb_mark_used to clear bits
ext4: use correct criteria name instead stale integer number in
comment
ext4: expand next_linear_group to remove repeat check for linear scan.

fs/ext4/ext4.h | 15 +++++--
fs/ext4/mballoc-test.c | 56 ++++++++++++++++++++++++++
fs/ext4/mballoc.c | 89 +++++++++++++++++-------------------------
fs/ext4/mballoc.h | 4 +-
4 files changed, 105 insertions(+), 59 deletions(-)

--
2.30.0



2024-03-26 13:46:15

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH 2/5] ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used

Add test_mb_mark_used_cost to estimate runtime of mb_mark_used.

Result of unit test is as following:
$ ./tools/testing/kunit/kunit.py run --kunitconfig=fs/ext4/.kunitconfig --raw_output
...
# Subtest: test_mb_mark_used_cost
# test_mb_mark_used_cost: costed jiffies 311
ok 1 block_bits=10 cluster_bits=3 blocks_per_group=8192 group_count=4 desc_size=64
# test_mb_mark_used_cost: costed jiffies 304
ok 2 block_bits=12 cluster_bits=3 blocks_per_group=8192 group_count=4 desc_size=64
ok 3 block_bits=16 cluster_bits=3 blocks_per_group=8192 group_count=4 desc_size=64
# SKIP blocksize exceeds pagesize
# test_mb_mark_used_cost.speed: slow
# test_mb_mark_used_cost: pass:2 fail:0 skip:1 total:3
ok 7 test_mb_mark_used_cost
...

Signed-off-by: Kemeng Shi <[email protected]>
---
fs/ext4/mballoc-test.c | 56 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)

diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 044ca5238f41..cb1a551f9596 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -859,6 +859,56 @@ static void test_mb_free_blocks(struct kunit *test)
ext4_mb_unload_buddy(&e4b);
}

+#define COUNT_FOR_ESTIMATE 1000000
+static void test_mb_mark_used_cost(struct kunit *test)
+{
+ struct ext4_buddy e4b;
+ struct super_block *sb = (struct super_block *)test->priv;
+ struct ext4_free_extent ex;
+ int ret;
+ struct test_range ranges[TEST_RANGE_COUNT];
+ int i, j;
+ unsigned long start, end, all = 0;
+
+ /* buddy cache assumes that each page contains at least one block */
+ if (sb->s_blocksize > PAGE_SIZE)
+ kunit_skip(test, "blocksize exceeds pagesize");
+
+ ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ KUNIT_ASSERT_EQ(test, ret, 0);
+
+ ex.fe_group = TEST_GOAL_GROUP;
+ for (j = 0; j < COUNT_FOR_ESTIMATE; j++) {
+ mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+ start = jiffies;
+ for (i = 0; i < TEST_RANGE_COUNT; i++) {
+ if (ranges[i].len == 0)
+ continue;
+
+ ex.fe_start = ranges[i].start;
+ ex.fe_len = ranges[i].len;
+ ext4_lock_group(sb, TEST_GOAL_GROUP);
+ mb_mark_used(&e4b, &ex);
+ ext4_unlock_group(sb, TEST_GOAL_GROUP);
+ }
+ end = jiffies;
+ all += (end - start);
+
+ for (i = 0; i < TEST_RANGE_COUNT; i++) {
+ if (ranges[i].len == 0)
+ continue;
+
+ ext4_lock_group(sb, TEST_GOAL_GROUP);
+ mb_free_blocks(NULL, &e4b, ranges[i].start,
+ ranges[i].len);
+ ext4_unlock_group(sb, TEST_GOAL_GROUP);
+ }
+ }
+
+ kunit_info(test, "costed jiffies %lu\n", all);
+ ext4_mb_unload_buddy(&e4b);
+}
+
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
{
.blocksize_bits = 10,
@@ -894,6 +944,10 @@ static void mbt_show_layout(const struct mbt_ext4_block_layout *layout,
}
KUNIT_ARRAY_PARAM(mbt_layouts, mbt_test_layouts, mbt_show_layout);

+static const struct kunit_attributes slow_attr = {
+ .speed = KUNIT_SPEED_SLOW,
+};
+
static struct kunit_case mbt_test_cases[] = {
KUNIT_CASE_PARAM(test_new_blocks_simple, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_free_blocks_simple, mbt_layouts_gen_params),
@@ -901,6 +955,8 @@ static struct kunit_case mbt_test_cases[] = {
KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
+ KUNIT_CASE_PARAM_ATTR(test_mb_mark_used_cost, mbt_layouts_gen_params,
+ slow_attr),
{}
};

--
2.30.0


2024-03-26 13:46:36

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH 5/5] ext4: expand next_linear_group to remove repeat check for linear scan.

Expand next_linear_group to remove repat check for linear scan.

Signed-off-by: Kemeng Shi <[email protected]>
---
fs/ext4/mballoc.c | 37 ++++++-------------------------------
1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0f8a34513bf6..561780a274cd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1075,31 +1075,6 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
return 1;
}

-/*
- * Return next linear group for allocation. If linear traversal should not be
- * performed, this function just returns the same group
- */
-static ext4_group_t
-next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
- ext4_group_t ngroups)
-{
- if (!should_optimize_scan(ac))
- goto inc_and_return;
-
- if (ac->ac_groups_linear_remaining) {
- ac->ac_groups_linear_remaining--;
- goto inc_and_return;
- }
-
- return group;
-inc_and_return:
- /*
- * Artificially restricted ngroups for non-extent
- * files makes group > ngroups possible on first loop.
- */
- return group + 1 >= ngroups ? 0 : group + 1;
-}
-
/*
* ext4_mb_choose_next_group: choose next group for allocation.
*
@@ -1118,12 +1093,12 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
{
*new_cr = ac->ac_criteria;

- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
- *group = next_linear_group(ac, *group, ngroups);
- return;
- }
-
- if (*new_cr == CR_POWER2_ALIGNED) {
+ if (!should_optimize_scan(ac))
+ *group = *group + 1 >= ngroups ? 0 : *group + 1;
+ else if (ac->ac_groups_linear_remaining) {
+ ac->ac_groups_linear_remaining--;
+ *group = *group + 1 >= ngroups ? 0 : *group + 1;
+ } else if (*new_cr == CR_POWER2_ALIGNED) {
ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
} else if (*new_cr == CR_GOAL_LEN_FAST) {
ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
--
2.30.0


2024-03-26 13:46:46

by Kemeng Shi

[permalink] [raw]
Subject: [PATCH 4/5] ext4: use correct criteria name instead stale integer number in comment

Use correct criteria name instead stale integer number in comment

Signed-off-by: Kemeng Shi <[email protected]>
---
fs/ext4/ext4.h | 15 ++++++++++++---
fs/ext4/mballoc.c | 14 ++++++++------
fs/ext4/mballoc.h | 4 ++--
3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 023571f8dd1b..9b90013c59a3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -213,11 +213,20 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
-/* Large fragment size list lookup succeeded at least once for cr = 0 */
+/*
+ * Large fragment size list lookup succeeded at least once for cr =
+ * CR_POWER2_ALIGNED
+ */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
-/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
+/*
+ * Avg fragment size rb tree lookup succeeded at least once for cr =
+ * CR_GOAL_LEN_FAST
+ */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
-/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
+/*
+ * Avg fragment size rb tree lookup succeeded at least once for cr =
+ * CR_BEST_AVAIL_LEN
+ */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000

struct ext4_allocation_request {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 62d468379722..0f8a34513bf6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1131,8 +1131,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
- * TODO: For CR=2, we can arrange groups in an rb tree sorted by
- * bb_free. But until that happens, we should never come here.
+ * TODO: For CR=CR_GOAL_LEN_SLOW, we can arrange groups in an
+ * rb tree sorted by bb_free. But until that happens, we should
+ * never come here.
*/
WARN_ON(1);
}
@@ -3444,10 +3445,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
}
if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
sbi->s_mb_prefetch = ext4_get_groups_count(sb);
- /* now many real IOs to prefetch within a single allocation at cr=0
- * given cr=0 is an CPU-related optimization we shouldn't try to
- * load too many groups, at some point we should start to use what
- * we've got in memory.
+ /*
+ * now many real IOs to prefetch within a single allocation at
+ * cr=CR_POWER2_ALIGNED. Given cr=CR_POWER2_ALIGNED is an CPU-related
+ * optimization we shouldn't try to load too many groups, at some point
+ * we should start to use what we've got in memory.
* with an average random access time 5ms, it'd take a second to get
* 200 groups (* N with flex_bg), so let's make this limit 4
*/
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 56938532b4ce..042437d8860f 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -187,8 +187,8 @@ struct ext4_allocation_context {
struct ext4_free_extent ac_f_ex;

/*
- * goal len can change in CR1.5, so save the original len. This is
- * used while adjusting the PA window and for accounting.
+ * goal len can change in CR_BEST_AVAIL_LEN, so save the original len.
+ * This is used while adjusting the PA window and for accounting.
*/
ext4_grpblk_t ac_orig_goal_len;

--
2.30.0


2024-03-29 07:14:35

by Ojaswin Mujoo

[permalink] [raw]
Subject: Re: [PATCH 5/5] ext4: expand next_linear_group to remove repeat check for linear scan.

On Wed, Mar 27, 2024 at 05:38:23AM +0800, Kemeng Shi wrote:
> Expand next_linear_group to remove repat check for linear scan.
>
> Signed-off-by: Kemeng Shi <[email protected]>
> ---
> fs/ext4/mballoc.c | 37 ++++++-------------------------------
> 1 file changed, 6 insertions(+), 31 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 0f8a34513bf6..561780a274cd 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -1075,31 +1075,6 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
> return 1;
> }
>
> -/*
> - * Return next linear group for allocation. If linear traversal should not be
> - * performed, this function just returns the same group
> - */
> -static ext4_group_t
> -next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
> - ext4_group_t ngroups)
> -{
> - if (!should_optimize_scan(ac))
> - goto inc_and_return;
> -
> - if (ac->ac_groups_linear_remaining) {
> - ac->ac_groups_linear_remaining--;
> - goto inc_and_return;
> - }
> -
> - return group;
> -inc_and_return:
> - /*
> - * Artificially restricted ngroups for non-extent
> - * files makes group > ngroups possible on first loop.
> - */
> - return group + 1 >= ngroups ? 0 : group + 1;
> -}
> -
> /*
> * ext4_mb_choose_next_group: choose next group for allocation.
> *
> @@ -1118,12 +1093,12 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
> {
> *new_cr = ac->ac_criteria;
>
> - if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
> - *group = next_linear_group(ac, *group, ngroups);
> - return;
> - }
> -
> - if (*new_cr == CR_POWER2_ALIGNED) {
> + if (!should_optimize_scan(ac))
> + *group = *group + 1 >= ngroups ? 0 : *group + 1;
> + else if (ac->ac_groups_linear_remaining) {
> + ac->ac_groups_linear_remaining--;
> + *group = *group + 1 >= ngroups ? 0 : *group + 1;
> + } else if (*new_cr == CR_POWER2_ALIGNED) {


Hi Kemeng, thanks for the cleanups

I feel that open coding this logic and having a single if for linear scan and
non linear scan cases is making the code a bit more harder to follow and we are
losing some comments as well.

Since our main aim is to avoid the double checking, maybe we can keep
next_linear_group() strictly for getting the next linear group correctly and
rest of the checks outside. So something like:

static ext4_group_t
next_linear_group(ext4_group_t group, ext4_group_t ngroups)
{

/*
* Artificially restricted ngroups for non-extent
* files makes group > ngroups possible on first loop.
*/
return group + 1 >= ngroups ? 0 : group + 1;
}

static void ext4_mb_choose_next_group(...)
{
...

/*
* Fallback to linear scan when optimized scanning is disabled
*/
if (!should_optimize_scan(ac)) {
*group = next_linear_group(*group, ngroups);
return;
}

/*
* Optimized scanning can return non adjacent groups which can cause
* seek overhead for rotational disks. So try few linear groups before
* trying optimized scan.
*/
if (ac->ac_groups_linear_remaining) {
*group = next_linear_group(*group, ngroups);
ac->ac_groups_linear_remaining--;
return;
}

...
}

Let me know your thought.

Regards,
ojaswin

> ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
> } else if (*new_cr == CR_GOAL_LEN_FAST) {
> ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
> --
> 2.30.0
>




2024-03-29 07:16:38

by Ojaswin Mujoo

[permalink] [raw]
Subject: Re: [PATCH 4/5] ext4: use correct criteria name instead stale integer number in comment

On Wed, Mar 27, 2024 at 05:38:22AM +0800, Kemeng Shi wrote:
> Use correct criteria name instead stale integer number in comment
>
> Signed-off-by: Kemeng Shi <[email protected]>
> ---
> fs/ext4/ext4.h | 15 ++++++++++++---
> fs/ext4/mballoc.c | 14 ++++++++------
> fs/ext4/mballoc.h | 4 ++--
> 3 files changed, 22 insertions(+), 11 deletions(-)
>

Thanks for the cleanup! Feel free to add:

Reviewed-by: Ojaswin Mujoo <[email protected]>

> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 023571f8dd1b..9b90013c59a3 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -213,11 +213,20 @@ enum criteria {
> #define EXT4_MB_USE_RESERVED 0x2000
> /* Do strict check for free blocks while retrying block allocation */
> #define EXT4_MB_STRICT_CHECK 0x4000
> -/* Large fragment size list lookup succeeded at least once for cr = 0 */
> +/*
> + * Large fragment size list lookup succeeded at least once for cr =
> + * CR_POWER2_ALIGNED
> + */
> #define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
> -/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
> +/*
> + * Avg fragment size rb tree lookup succeeded at least once for cr =
> + * CR_GOAL_LEN_FAST
> + */
> #define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
> -/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
> +/*
> + * Avg fragment size rb tree lookup succeeded at least once for cr =
> + * CR_BEST_AVAIL_LEN
> + */
> #define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
>
> struct ext4_allocation_request {
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 62d468379722..0f8a34513bf6 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -1131,8 +1131,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
> ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
> } else {
> /*
> - * TODO: For CR=2, we can arrange groups in an rb tree sorted by
> - * bb_free. But until that happens, we should never come here.
> + * TODO: For CR=CR_GOAL_LEN_SLOW, we can arrange groups in an
> + * rb tree sorted by bb_free. But until that happens, we should
> + * never come here.
> */
> WARN_ON(1);
> }
> @@ -3444,10 +3445,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
> }
> if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
> sbi->s_mb_prefetch = ext4_get_groups_count(sb);
> - /* now many real IOs to prefetch within a single allocation at cr=0
> - * given cr=0 is an CPU-related optimization we shouldn't try to
> - * load too many groups, at some point we should start to use what
> - * we've got in memory.
> + /*
> + * now many real IOs to prefetch within a single allocation at
> + * cr=CR_POWER2_ALIGNED. Given cr=CR_POWER2_ALIGNED is an CPU-related
> + * optimization we shouldn't try to load too many groups, at some point
> + * we should start to use what we've got in memory.
> * with an average random access time 5ms, it'd take a second to get
> * 200 groups (* N with flex_bg), so let's make this limit 4
> */
> diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
> index 56938532b4ce..042437d8860f 100644
> --- a/fs/ext4/mballoc.h
> +++ b/fs/ext4/mballoc.h
> @@ -187,8 +187,8 @@ struct ext4_allocation_context {
> struct ext4_free_extent ac_f_ex;
>
> /*
> - * goal len can change in CR1.5, so save the original len. This is
> - * used while adjusting the PA window and for accounting.
> + * goal len can change in CR_BEST_AVAIL_LEN, so save the original len.
> + * This is used while adjusting the PA window and for accounting.
> */
> ext4_grpblk_t ac_orig_goal_len;
>
> --
> 2.30.0
>

2024-03-29 07:29:26

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 2/5] ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used

Hi Kemeng,

kernel test robot noticed the following build errors:

[auto build test ERROR on tytso-ext4/dev]
[also build test ERROR on linus/master v6.9-rc1 next-20240328]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Kemeng-Shi/ext4-keep-prefetch_grp-and-nr-consistent/20240326-214754
base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
patch link: https://lore.kernel.org/r/20240326213823.528302-3-shikemeng%40huaweicloud.com
patch subject: [PATCH 2/5] ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used
config: x86_64-randconfig-072-20240329 (https://download.01.org/0day-ci/archive/20240329/[email protected]/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

In file included from include/kunit/static_stub.h:18:0,
from fs/ext4/mballoc.c:21:
>> fs/ext4/mballoc-test.c:959:10: error: initializer element is not constant
slow_attr),
^
include/kunit/test.h:218:13: note: in definition of macro 'KUNIT_CASE_PARAM_ATTR'
.attr = attributes, .module_name = KBUILD_MODNAME}
^~~~~~~~~~
fs/ext4/mballoc-test.c:959:10: note: (near initialization for 'mbt_test_cases[6].attr')
slow_attr),
^
include/kunit/test.h:218:13: note: in definition of macro 'KUNIT_CASE_PARAM_ATTR'
.attr = attributes, .module_name = KBUILD_MODNAME}
^~~~~~~~~~


vim +959 fs/ext4/mballoc-test.c

950
951 static struct kunit_case mbt_test_cases[] = {
952 KUNIT_CASE_PARAM(test_new_blocks_simple, mbt_layouts_gen_params),
953 KUNIT_CASE_PARAM(test_free_blocks_simple, mbt_layouts_gen_params),
954 KUNIT_CASE_PARAM(test_mb_generate_buddy, mbt_layouts_gen_params),
955 KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
956 KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
957 KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
958 KUNIT_CASE_PARAM_ATTR(test_mb_mark_used_cost, mbt_layouts_gen_params,
> 959 slow_attr),
960 {}
961 };
962

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-04-03 06:59:57

by Kemeng Shi

[permalink] [raw]
Subject: Re: [PATCH 5/5] ext4: expand next_linear_group to remove repeat check for linear scan.



on 3/29/2024 3:14 PM, Ojaswin Mujoo wrote:
> On Wed, Mar 27, 2024 at 05:38:23AM +0800, Kemeng Shi wrote:
>> Expand next_linear_group to remove repat check for linear scan.
>>
>> Signed-off-by: Kemeng Shi <[email protected]>
>> ---
>> fs/ext4/mballoc.c | 37 ++++++-------------------------------
>> 1 file changed, 6 insertions(+), 31 deletions(-)
>>
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index 0f8a34513bf6..561780a274cd 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -1075,31 +1075,6 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
>> return 1;
>> }
>>
>> -/*
>> - * Return next linear group for allocation. If linear traversal should not be
>> - * performed, this function just returns the same group
>> - */
>> -static ext4_group_t
>> -next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
>> - ext4_group_t ngroups)
>> -{
>> - if (!should_optimize_scan(ac))
>> - goto inc_and_return;
>> -
>> - if (ac->ac_groups_linear_remaining) {
>> - ac->ac_groups_linear_remaining--;
>> - goto inc_and_return;
>> - }
>> -
>> - return group;
>> -inc_and_return:
>> - /*
>> - * Artificially restricted ngroups for non-extent
>> - * files makes group > ngroups possible on first loop.
>> - */
>> - return group + 1 >= ngroups ? 0 : group + 1;
>> -}
>> -
>> /*
>> * ext4_mb_choose_next_group: choose next group for allocation.
>> *
>> @@ -1118,12 +1093,12 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
>> {
>> *new_cr = ac->ac_criteria;
>>
>> - if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
>> - *group = next_linear_group(ac, *group, ngroups);
>> - return;
>> - }
>> -
>> - if (*new_cr == CR_POWER2_ALIGNED) {
>> + if (!should_optimize_scan(ac))
>> + *group = *group + 1 >= ngroups ? 0 : *group + 1;
>> + else if (ac->ac_groups_linear_remaining) {
>> + ac->ac_groups_linear_remaining--;
>> + *group = *group + 1 >= ngroups ? 0 : *group + 1;
>> + } else if (*new_cr == CR_POWER2_ALIGNED) {
>
>
> Hi Kemeng, thanks for the cleanups
>
> I feel that open coding this logic and having a single if for linear scan and
> non linear scan cases is making the code a bit more harder to follow and we are
> losing some comments as well.
>
> Since our main aim is to avoid the double checking, maybe we can keep
> next_linear_group() strictly for getting the next linear group correctly and
> rest of the checks outside. So something like:
>
> static ext4_group_t
> next_linear_group(ext4_group_t group, ext4_group_t ngroups)
> {
>
> /*
> * Artificially restricted ngroups for non-extent
> * files makes group > ngroups possible on first loop.
> */
> return group + 1 >= ngroups ? 0 : group + 1;
> }
>
> static void ext4_mb_choose_next_group(...)
> {
> ...
>
> /*
> * Fallback to linear scan when optimized scanning is disabled
> */
> if (!should_optimize_scan(ac)) {
> *group = next_linear_group(*group, ngroups);
> return;
> }
>
> /*
> * Optimized scanning can return non adjacent groups which can cause
> * seek overhead for rotational disks. So try few linear groups before
> * trying optimized scan.
> */
> if (ac->ac_groups_linear_remaining) {
> *group = next_linear_group(*group, ngroups);
> ac->ac_groups_linear_remaining--;
> return;
> }
>
> ...
> }
>
> Let me know your thought.
This make senses to me. I will do in next version. Thanks for the advise.

Kemeng
>
> Regards,
> ojaswin
>
>> ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
>> } else if (*new_cr == CR_GOAL_LEN_FAST) {
>> ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
>> --
>> 2.30.0
>>
>
>
>


2024-04-07 03:21:13

by Kemeng Shi

[permalink] [raw]
Subject: Re: [PATCH 4/5] ext4: use correct criteria name instead stale integer number in comment



on 4/4/2024 10:19 PM, Jan Kara wrote:
> On Wed 27-03-24 05:38:22, Kemeng Shi wrote:
>> Use correct criteria name instead stale integer number in comment
>>
>> Signed-off-by: Kemeng Shi <[email protected]>
>
> Looks good. But since the symbolic names already have CR prefix, we
> probably don't have to write e.g.:
>
> /* Large fragment size list lookup succeeded at least once for cr =
> * CR_POWER2_ALIGNED */
>
> But we can write directly:
>
> /* Large fragment size list lookup succeeded at least once for
> * CR_POWER2_ALIGNED */
Sure, will do it in next version. Thanks.

Kemeng
>
> Honza
>
>> ---
>> fs/ext4/ext4.h | 15 ++++++++++++---
>> fs/ext4/mballoc.c | 14 ++++++++------
>> fs/ext4/mballoc.h | 4 ++--
>> 3 files changed, 22 insertions(+), 11 deletions(-)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 023571f8dd1b..9b90013c59a3 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -213,11 +213,20 @@ enum criteria {
>> #define EXT4_MB_USE_RESERVED 0x2000
>> /* Do strict check for free blocks while retrying block allocation */
>> #define EXT4_MB_STRICT_CHECK 0x4000
>> -/* Large fragment size list lookup succeeded at least once for cr = 0 */
>> +/*
>> + * Large fragment size list lookup succeeded at least once for cr =
>> + * CR_POWER2_ALIGNED
>> + */
>> #define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
>> -/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
>> +/*
>> + * Avg fragment size rb tree lookup succeeded at least once for cr =
>> + * CR_GOAL_LEN_FAST
>> + */
>> #define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
>> -/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
>> +/*
>> + * Avg fragment size rb tree lookup succeeded at least once for cr =
>> + * CR_BEST_AVAIL_LEN
>> + */
>> #define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
>>
>> struct ext4_allocation_request {
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index 62d468379722..0f8a34513bf6 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -1131,8 +1131,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
>> ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
>> } else {
>> /*
>> - * TODO: For CR=2, we can arrange groups in an rb tree sorted by
>> - * bb_free. But until that happens, we should never come here.
>> + * TODO: For CR=CR_GOAL_LEN_SLOW, we can arrange groups in an
>> + * rb tree sorted by bb_free. But until that happens, we should
>> + * never come here.
>> */
>> WARN_ON(1);
>> }
>> @@ -3444,10 +3445,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
>> }
>> if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
>> sbi->s_mb_prefetch = ext4_get_groups_count(sb);
>> - /* now many real IOs to prefetch within a single allocation at cr=0
>> - * given cr=0 is an CPU-related optimization we shouldn't try to
>> - * load too many groups, at some point we should start to use what
>> - * we've got in memory.
>> + /*
>> + * now many real IOs to prefetch within a single allocation at
>> + * cr=CR_POWER2_ALIGNED. Given cr=CR_POWER2_ALIGNED is an CPU-related
>> + * optimization we shouldn't try to load too many groups, at some point
>> + * we should start to use what we've got in memory.
>> * with an average random access time 5ms, it'd take a second to get
>> * 200 groups (* N with flex_bg), so let's make this limit 4
>> */
>> diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
>> index 56938532b4ce..042437d8860f 100644
>> --- a/fs/ext4/mballoc.h
>> +++ b/fs/ext4/mballoc.h
>> @@ -187,8 +187,8 @@ struct ext4_allocation_context {
>> struct ext4_free_extent ac_f_ex;
>>
>> /*
>> - * goal len can change in CR1.5, so save the original len. This is
>> - * used while adjusting the PA window and for accounting.
>> + * goal len can change in CR_BEST_AVAIL_LEN, so save the original len.
>> + * This is used while adjusting the PA window and for accounting.
>> */
>> ext4_grpblk_t ac_orig_goal_len;
>>
>> --
>> 2.30.0
>>