2024-02-27 09:11:10

by Baokun Li

[permalink] [raw]
Subject: [PATCH v2 4/9] ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()

We can trigger a slab-out-of-bounds with the following commands:

mkfs.ext4 -F /dev/$disk 10G
mount /dev/$disk /tmp/test
echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc
echo test > /tmp/test/file && sync

==================================================================
BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4]
Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11
CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521
Call Trace:
dump_stack_lvl+0x2c/0x50
kasan_report+0xb6/0xf0
ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4]
ext4_mb_regular_allocator+0x19e9/0x2370 [ext4]
ext4_mb_new_blocks+0x88a/0x1370 [ext4]
ext4_ext_map_blocks+0x14f7/0x2390 [ext4]
ext4_map_blocks+0x569/0xea0 [ext4]
ext4_do_writepages+0x10f6/0x1bc0 [ext4]
[...]
==================================================================

The flow of issue triggering is as follows:

// Set s_mb_group_prealloc to 2147483647 via sysfs
ext4_mb_new_blocks
ext4_mb_normalize_request
ext4_mb_normalize_group_request
ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc
ext4_mb_regular_allocator
ext4_mb_choose_next_group
ext4_mb_choose_next_group_best_avail
mb_avg_fragment_size_order
order = fls(len) - 2 = 29
ext4_mb_find_good_group_avg_frag_lists
frag_list = &sbi->s_mb_avg_fragment_size[order]
if (list_empty(frag_list)) // Trigger SOOB!

At 4k block size, the length of the s_mb_avg_fragment_size list is 14,
but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds
to be triggered by an attempt to access an element at index 29.

Add a new attr_id attr_clusters_in_group with values in the range
[0, sbi->s_clusters_per_group] and declare mb_group_prealloc as
that type to fix the issue. In addition avoid returning an order
from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb)
and reduce some useless loops.

Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)")
CC: [email protected]
Signed-off-by: Baokun Li <[email protected]>
---
fs/ext4/mballoc.c | 6 ++++++
fs/ext4/sysfs.c | 13 ++++++++++++-
2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 85a91a61b761..7ad089df2408 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
return 0;
if (order == MB_NUM_ORDERS(sb))
order--;
+ if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
+ order = MB_NUM_ORDERS(sb) - 1;
return order;
}

@@ -1057,6 +1059,10 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
return;
}
+
+ /* Skip some unnecessary loops. */
+ if (unlikely(i > MB_NUM_ORDERS(ac->ac_sb)))
+ i = MB_NUM_ORDERS(ac->ac_sb);
}

/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 7f455b5f22c0..ddd71673176c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -29,6 +29,7 @@ typedef enum {
attr_trigger_test_error,
attr_first_error_time,
attr_last_error_time,
+ attr_clusters_in_group,
attr_feature,
attr_pointer_ui,
attr_pointer_ul,
@@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);

EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
ext4_sb_info, s_inode_readahead_blks);
+EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
+ ext4_sb_info, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
@@ -376,6 +378,7 @@ static ssize_t ext4_generic_attr_show(struct ext4_attr *a,

switch (a->attr_id) {
case attr_inode_readahead:
+ case attr_clusters_in_group:
case attr_pointer_ui:
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr));
@@ -455,6 +458,14 @@ static ssize_t ext4_generic_attr_store(struct ext4_attr *a,
else
*((unsigned int *) ptr) = t;
return len;
+ case attr_clusters_in_group:
+ ret = kstrtouint(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if (t > sbi->s_clusters_per_group)
+ return -EINVAL;
+ *((unsigned int *) ptr) = t;
+ return len;
case attr_pointer_ul:
ret = kstrtoul(skip_spaces(buf), 0, &lt);
if (ret)
--
2.31.1



2024-03-14 10:31:11

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH v2 4/9] ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()

On Tue 27-02-24 17:11:43, Baokun Li wrote:
> We can trigger a slab-out-of-bounds with the following commands:
>
> mkfs.ext4 -F /dev/$disk 10G
> mount /dev/$disk /tmp/test
> echo 2147483647 > /sys/fs/ext4/$disk/mb_group_prealloc
> echo test > /tmp/test/file && sync
>
> ==================================================================
> BUG: KASAN: slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4]
> Read of size 8 at addr ffff888121b9d0f0 by task kworker/u2:0/11
> CPU: 0 PID: 11 Comm: kworker/u2:0 Tainted: GL 6.7.0-next-20240118 #521
> Call Trace:
> dump_stack_lvl+0x2c/0x50
> kasan_report+0xb6/0xf0
> ext4_mb_find_good_group_avg_frag_lists+0x8a/0x200 [ext4]
> ext4_mb_regular_allocator+0x19e9/0x2370 [ext4]
> ext4_mb_new_blocks+0x88a/0x1370 [ext4]
> ext4_ext_map_blocks+0x14f7/0x2390 [ext4]
> ext4_map_blocks+0x569/0xea0 [ext4]
> ext4_do_writepages+0x10f6/0x1bc0 [ext4]
> [...]
> ==================================================================
>
> The flow of issue triggering is as follows:
>
> // Set s_mb_group_prealloc to 2147483647 via sysfs
> ext4_mb_new_blocks
> ext4_mb_normalize_request
> ext4_mb_normalize_group_request
> ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc
> ext4_mb_regular_allocator
> ext4_mb_choose_next_group
> ext4_mb_choose_next_group_best_avail
> mb_avg_fragment_size_order
> order = fls(len) - 2 = 29
> ext4_mb_find_good_group_avg_frag_lists
> frag_list = &sbi->s_mb_avg_fragment_size[order]
> if (list_empty(frag_list)) // Trigger SOOB!
>
> At 4k block size, the length of the s_mb_avg_fragment_size list is 14,
> but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds
> to be triggered by an attempt to access an element at index 29.
>
> Add a new attr_id attr_clusters_in_group with values in the range
> [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as
> that type to fix the issue. In addition avoid returning an order
> from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb)
> and reduce some useless loops.
>
> Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)")
> CC: [email protected]
> Signed-off-by: Baokun Li <[email protected]>

Looks good. Just one nit below. Otherwise feel free to add:

Reviewed-by: Jan Kara <[email protected]>

> ---
> fs/ext4/mballoc.c | 6 ++++++
> fs/ext4/sysfs.c | 13 ++++++++++++-
> 2 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 85a91a61b761..7ad089df2408 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
> return 0;
> if (order == MB_NUM_ORDERS(sb))
> order--;
> + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
> + order = MB_NUM_ORDERS(sb) - 1;
> return order;
> }
>
> @@ -1057,6 +1059,10 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
> ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
> return;
> }
> +
> + /* Skip some unnecessary loops. */
> + if (unlikely(i > MB_NUM_ORDERS(ac->ac_sb)))
> + i = MB_NUM_ORDERS(ac->ac_sb);

How can this possibly trigger now? MB_NUM_ORDERS is sb->s_blocksize_bits +
2. 'i' is starting at fls(ac->ac_g_ex.fe_len) and ac_g_ex.fe_len shouldn't
be larger than clusters per group, hence fls() should be less than
sb->s_blocksize_bits? Am I missing something? And if yes, we should rather
make sure 'order' is never absurdly big?

I suspect this code is defensive upto a point of being confusing :)

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2024-03-14 13:47:47

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH v2 4/9] ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()

On 2024/3/14 20:50, Jan Kara wrote:
> On Thu 14-03-24 20:37:38, Baokun Li wrote:
>> On 2024/3/14 20:00, Jan Kara wrote:
>>> On Thu 14-03-24 19:24:56, Baokun Li wrote:
>>>> Hi Jan,
>>>>
>>>> On 2024/3/14 18:30, Jan Kara wrote:
>>>>> On Tue 27-02-24 17:11:43, Baokun Li wrote:
>>>>>
>>>>>
>>>>> At 4k block size, the length of the s_mb_avg_fragment_size list is 14,
>>>>> but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds
>>>>> to be triggered by an attempt to access an element at index 29.
>>>>>
>>>>> Add a new attr_id attr_clusters_in_group with values in the range
>>>>> [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as
>>>>> that type to fix the issue. In addition avoid returning an order
>>>>> from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb)
>>>>> and reduce some useless loops.
>>>>>
>>>>> Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)")
>>>>> CC: [email protected]
>>>>> Signed-off-by: Baokun Li <[email protected]>
>>>>> Looks good. Just one nit below. Otherwise feel free to add:
>>>>>
>>>>> Reviewed-by: Jan Kara <[email protected]>
>>>>>
>>>>>> ---
>>>>>> fs/ext4/mballoc.c | 6 ++++++
>>>>>> fs/ext4/sysfs.c | 13 ++++++++++++-
>>>>>> 2 files changed, 18 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>>>>>> index 85a91a61b761..7ad089df2408 100644
>>>>>> --- a/fs/ext4/mballoc.c
>>>>>> +++ b/fs/ext4/mballoc.c
>>>>>> @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
>>>>>> return 0;
>>>>>> if (order == MB_NUM_ORDERS(sb))
>>>>>> order--;
>>>>>> + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
>>>>>> + order = MB_NUM_ORDERS(sb) - 1;
>>>>>> return order;
>>>>>> }
>>>>>> @@ -1057,6 +1059,10 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
>>>>>> ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
>>>>>> return;
>>>>>> }
>>>>>> +
>>>>>> + /* Skip some unnecessary loops. */
>>>>>> + if (unlikely(i > MB_NUM_ORDERS(ac->ac_sb)))
>>>>>> + i = MB_NUM_ORDERS(ac->ac_sb);
>>>>> How can this possibly trigger now? MB_NUM_ORDERS is sb->s_blocksize_bits +
>>>>> 2. 'i' is starting at fls(ac->ac_g_ex.fe_len) and ac_g_ex.fe_len shouldn't
>>>>> be larger than clusters per group, hence fls() should be less than
>>>>> sb->s_blocksize_bits? Am I missing something? And if yes, we should rather
>>>>> make sure 'order' is never absurdly big?
>>>>>
>>>>> I suspect this code is defensive upto a point of being confusing :)
>>>>>
>>>>> Honza
>>>> Yes, this is indeed defensive code! Only walk into this branch when
>>>> WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)) is triggered.
>>>> As previously mentioned by ojaswin in the following link:
>>>>
>>>> "The reason for this is that otherwise when order is large eg 29,
>>>> we would unnecessarily loop from i=29 to i=13 while always
>>>> looking at the same avg_fragment_list[13]."
>>>>
>>>> Link:https://lore.kernel.org/lkml/[email protected]/
>>>>
>>>> Thank you so much for the review! ღ( ´・ᴗ・` )
>>> Thanks for the link. So what Ojaswin has suggested has been slightly
>>> different though. He suggested to trim the order before the for loop, not
>>> after the first iteration as you do which is what was confusing me. I'd
>>> even suggest to replace your check with:
>>>
>>> /*
>>> * mb_avg_fragment_size_order() returns order in a way that makes
>>> * retrieving back the length using (1 << order) inaccurate. Hence, use
>>> * fls() instead since we need to know the actual length while modifying
>>> * goal length.
>>> */
>>> - order = fls(ac->ac_g_ex.fe_len) - 1;
>>> + order = min(fls(ac->ac_g_ex.fe_len), MB_NUM_ORDERS(ac->ac_sb)) - 1;
>>> min_order = order - sbi->s_mb_best_avail_max_trim_order;
>>> if (min_order < 0)
>>> min_order = 0;
>>>
>>> Honza
>> Yes, I changed it that way because it only happens when an exception
>> somewhere causes fe_len to be a huge value. I think in this case we
>> should report the exception via WARN_ON_ONCE(), and trimming the
>> order before the for loop will bypass WARN_ON_ONCE and not report
>> any errors.
> Fair enough. Then:
> /*
> * mb_avg_fragment_size_order() returns order in a way that makes
> * retrieving back the length using (1 << order) inaccurate. Hence, use
> * fls() instead since we need to know the actual length while modifying
> * goal length.
> */
> order = fls(ac->ac_g_ex.fe_len) - 1;
> + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(ac->ac_sb) - 1))
> + order = MB_NUM_ORDERS(ac->ac_sb) - 1;
> min_order = order - sbi->s_mb_best_avail_max_trim_order;
> if (min_order < 0)
> min_order = 0;
>
> Still much less confusing...
>
> Honza
Yes this does look much better!
Let me send v3!

Thanks for the suggestion!
--
With Best Regards,
Baokun Li
.

2024-03-14 11:25:14

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH v2 4/9] ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()

Hi Jan,

On 2024/3/14 18:30, Jan Kara wrote:
> On Tue 27-02-24 17:11:43, Baokun Li wrote:
>
>
> At 4k block size, the length of the s_mb_avg_fragment_size list is 14,
> but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds
> to be triggered by an attempt to access an element at index 29.
>
> Add a new attr_id attr_clusters_in_group with values in the range
> [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as
> that type to fix the issue. In addition avoid returning an order
> from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb)
> and reduce some useless loops.
>
> Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)")
> CC: [email protected]
> Signed-off-by: Baokun Li <[email protected]>
> Looks good. Just one nit below. Otherwise feel free to add:
>
> Reviewed-by: Jan Kara <[email protected]>
>
>> ---
>> fs/ext4/mballoc.c | 6 ++++++
>> fs/ext4/sysfs.c | 13 ++++++++++++-
>> 2 files changed, 18 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index 85a91a61b761..7ad089df2408 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
>> return 0;
>> if (order == MB_NUM_ORDERS(sb))
>> order--;
>> + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
>> + order = MB_NUM_ORDERS(sb) - 1;
>> return order;
>> }
>>
>> @@ -1057,6 +1059,10 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
>> ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
>> return;
>> }
>> +
>> + /* Skip some unnecessary loops. */
>> + if (unlikely(i > MB_NUM_ORDERS(ac->ac_sb)))
>> + i = MB_NUM_ORDERS(ac->ac_sb);
> How can this possibly trigger now? MB_NUM_ORDERS is sb->s_blocksize_bits +
> 2. 'i' is starting at fls(ac->ac_g_ex.fe_len) and ac_g_ex.fe_len shouldn't
> be larger than clusters per group, hence fls() should be less than
> sb->s_blocksize_bits? Am I missing something? And if yes, we should rather
> make sure 'order' is never absurdly big?
>
> I suspect this code is defensive upto a point of being confusing :)
>
> Honza

Yes, this is indeed defensive code! Only walk into this branch when
WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)) is triggered.
As previously mentioned by ojaswin in the following link:

"The reason for this is that otherwise when order is large eg 29,
we would unnecessarily loop from i=29 to i=13 while always
looking at the same avg_fragment_list[13]."

Link:https://lore.kernel.org/lkml/[email protected]/

Thank you so much for the review! ღ( ´・ᴗ・` )
--
With Best Regards,
Baokun Li
.

2024-03-14 12:38:11

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH v2 4/9] ext4: fix slab-out-of-bounds in ext4_mb_find_good_group_avg_frag_lists()

On 2024/3/14 20:00, Jan Kara wrote:
> On Thu 14-03-24 19:24:56, Baokun Li wrote:
>> Hi Jan,
>>
>> On 2024/3/14 18:30, Jan Kara wrote:
>>> On Tue 27-02-24 17:11:43, Baokun Li wrote:
>>>
>>>
>>> At 4k block size, the length of the s_mb_avg_fragment_size list is 14,
>>> but an oversized s_mb_group_prealloc is set, causing slab-out-of-bounds
>>> to be triggered by an attempt to access an element at index 29.
>>>
>>> Add a new attr_id attr_clusters_in_group with values in the range
>>> [0, sbi->s_clusters_per_group] and declare mb_group_prealloc as
>>> that type to fix the issue. In addition avoid returning an order
>>> from mb_avg_fragment_size_order() greater than MB_NUM_ORDERS(sb)
>>> and reduce some useless loops.
>>>
>>> Fixes: 7e170922f06b ("ext4: Add allocation criteria 1.5 (CR1_5)")
>>> CC: [email protected]
>>> Signed-off-by: Baokun Li <[email protected]>
>>> Looks good. Just one nit below. Otherwise feel free to add:
>>>
>>> Reviewed-by: Jan Kara <[email protected]>
>>>
>>>> ---
>>>> fs/ext4/mballoc.c | 6 ++++++
>>>> fs/ext4/sysfs.c | 13 ++++++++++++-
>>>> 2 files changed, 18 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>>>> index 85a91a61b761..7ad089df2408 100644
>>>> --- a/fs/ext4/mballoc.c
>>>> +++ b/fs/ext4/mballoc.c
>>>> @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
>>>> return 0;
>>>> if (order == MB_NUM_ORDERS(sb))
>>>> order--;
>>>> + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
>>>> + order = MB_NUM_ORDERS(sb) - 1;
>>>> return order;
>>>> }
>>>> @@ -1057,6 +1059,10 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
>>>> ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
>>>> return;
>>>> }
>>>> +
>>>> + /* Skip some unnecessary loops. */
>>>> + if (unlikely(i > MB_NUM_ORDERS(ac->ac_sb)))
>>>> + i = MB_NUM_ORDERS(ac->ac_sb);
>>> How can this possibly trigger now? MB_NUM_ORDERS is sb->s_blocksize_bits +
>>> 2. 'i' is starting at fls(ac->ac_g_ex.fe_len) and ac_g_ex.fe_len shouldn't
>>> be larger than clusters per group, hence fls() should be less than
>>> sb->s_blocksize_bits? Am I missing something? And if yes, we should rather
>>> make sure 'order' is never absurdly big?
>>>
>>> I suspect this code is defensive upto a point of being confusing :)
>>>
>>> Honza
>> Yes, this is indeed defensive code! Only walk into this branch when
>> WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)) is triggered.
>> As previously mentioned by ojaswin in the following link:
>>
>> "The reason for this is that otherwise when order is large eg 29,
>> we would unnecessarily loop from i=29 to i=13 while always
>> looking at the same avg_fragment_list[13]."
>>
>> Link:https://lore.kernel.org/lkml/[email protected]/
>>
>> Thank you so much for the review! ღ( ´・ᴗ・` )
> Thanks for the link. So what Ojaswin has suggested has been slightly
> different though. He suggested to trim the order before the for loop, not
> after the first iteration as you do which is what was confusing me. I'd
> even suggest to replace your check with:
>
> /*
> * mb_avg_fragment_size_order() returns order in a way that makes
> * retrieving back the length using (1 << order) inaccurate. Hence, use
> * fls() instead since we need to know the actual length while modifying
> * goal length.
> */
> - order = fls(ac->ac_g_ex.fe_len) - 1;
> + order = min(fls(ac->ac_g_ex.fe_len), MB_NUM_ORDERS(ac->ac_sb)) - 1;
> min_order = order - sbi->s_mb_best_avail_max_trim_order;
> if (min_order < 0)
> min_order = 0;
>
> Honza
Yes, I changed it that way because it only happens when an exception
somewhere causes fe_len to be a huge value. I think in this case we
should report the exception via WARN_ON_ONCE(), and trimming the
order before the for loop will bypass WARN_ON_ONCE and not report
any errors.

Thanks!
--
With Best Regards,
Baokun Li
.