We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
operations.
Fixes: b763f3bedc2d ("f2fs: restructure f2fs page.private layout")
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 2 ++
fs/f2fs/f2fs.h | 5 ++++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3a01a1b50104..d2cf48c5a2e4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3819,6 +3819,8 @@ int f2fs_migrate_page(struct address_space *mapping,
get_page(newpage);
}
+ /* guarantee to start from no stale private field */
+ set_page_private(newpage, 0);
if (PagePrivate(page)) {
set_page_private(newpage, page_private(page));
SetPagePrivate(newpage);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65befc68d88e..ee8eb33e2c25 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1331,7 +1331,8 @@ enum {
#define PAGE_PRIVATE_GET_FUNC(name, flagname) \
static inline bool page_private_##name(struct page *page) \
{ \
- return test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
+ return PagePrivate(page) && \
+ test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
}
@@ -1341,6 +1342,7 @@ static inline void set_page_private_##name(struct page *page) \
if (!PagePrivate(page)) { \
get_page(page); \
SetPagePrivate(page); \
+ set_page_private(page, 0); \
} \
set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \
set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
@@ -1392,6 +1394,7 @@ static inline void set_page_private_data(struct page *page, unsigned long data)
if (!PagePrivate(page)) {
get_page(page);
SetPagePrivate(page);
+ set_page_private(page, 0);
}
set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
page_private(page) |= data << PAGE_PRIVATE_MAX;
--
2.32.0.93.g670b81a890-goog
On 2021/7/5 13:22, Jaegeuk Kim wrote:
> We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
> operations.
Oops, I didn't get the point, shouldn't .private be zero after page was
just allocated by filesystem? What's the case we will encounter stall
private data left in page?
Cc Matthew Wilcox.
Thanks,
>
> Fixes: b763f3bedc2d ("f2fs: restructure f2fs page.private layout")
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 2 ++
> fs/f2fs/f2fs.h | 5 ++++-
> 2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 3a01a1b50104..d2cf48c5a2e4 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3819,6 +3819,8 @@ int f2fs_migrate_page(struct address_space *mapping,
> get_page(newpage);
> }
>
> + /* guarantee to start from no stale private field */
> + set_page_private(newpage, 0);
> if (PagePrivate(page)) {
> set_page_private(newpage, page_private(page));
> SetPagePrivate(newpage);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 65befc68d88e..ee8eb33e2c25 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1331,7 +1331,8 @@ enum {
> #define PAGE_PRIVATE_GET_FUNC(name, flagname) \
> static inline bool page_private_##name(struct page *page) \
> { \
> - return test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
> + return PagePrivate(page) && \
> + test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
> test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
> }
>
> @@ -1341,6 +1342,7 @@ static inline void set_page_private_##name(struct page *page) \
> if (!PagePrivate(page)) { \
> get_page(page); \
> SetPagePrivate(page); \
> + set_page_private(page, 0); \
> } \
> set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \
> set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
> @@ -1392,6 +1394,7 @@ static inline void set_page_private_data(struct page *page, unsigned long data)
> if (!PagePrivate(page)) {
> get_page(page);
> SetPagePrivate(page);
> + set_page_private(page, 0);
> }
> set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
> page_private(page) |= data << PAGE_PRIVATE_MAX;
>
On 07/05, Chao Yu wrote:
> On 2021/7/5 13:22, Jaegeuk Kim wrote:
> > We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
> > operations.
>
> Oops, I didn't get the point, shouldn't .private be zero after page was
> just allocated by filesystem? What's the case we will encounter stall
> private data left in page?
I'm seeing f2fs_migrate_page() has the newpage with some value without Private
flag. That causes a kernel panic later due to wrong private flag used in f2fs.
>
> Cc Matthew Wilcox.
>
> Thanks,
>
> >
> > Fixes: b763f3bedc2d ("f2fs: restructure f2fs page.private layout")
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/data.c | 2 ++
> > fs/f2fs/f2fs.h | 5 ++++-
> > 2 files changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 3a01a1b50104..d2cf48c5a2e4 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -3819,6 +3819,8 @@ int f2fs_migrate_page(struct address_space *mapping,
> > get_page(newpage);
> > }
> > + /* guarantee to start from no stale private field */
> > + set_page_private(newpage, 0);
> > if (PagePrivate(page)) {
> > set_page_private(newpage, page_private(page));
> > SetPagePrivate(newpage);
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 65befc68d88e..ee8eb33e2c25 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1331,7 +1331,8 @@ enum {
> > #define PAGE_PRIVATE_GET_FUNC(name, flagname) \
> > static inline bool page_private_##name(struct page *page) \
> > { \
> > - return test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
> > + return PagePrivate(page) && \
> > + test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
> > test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
> > }
> > @@ -1341,6 +1342,7 @@ static inline void set_page_private_##name(struct page *page) \
> > if (!PagePrivate(page)) { \
> > get_page(page); \
> > SetPagePrivate(page); \
> > + set_page_private(page, 0); \
> > } \
> > set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \
> > set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
> > @@ -1392,6 +1394,7 @@ static inline void set_page_private_data(struct page *page, unsigned long data)
> > if (!PagePrivate(page)) {
> > get_page(page);
> > SetPagePrivate(page);
> > + set_page_private(page, 0);
> > }
> > set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
> > page_private(page) |= data << PAGE_PRIVATE_MAX;
> >
On 2021/7/5 16:56, Jaegeuk Kim wrote:
> On 07/05, Chao Yu wrote:
>> On 2021/7/5 13:22, Jaegeuk Kim wrote:
>>> We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
>>> operations.
>>
>> Oops, I didn't get the point, shouldn't .private be zero after page was
>> just allocated by filesystem? What's the case we will encounter stall
>> private data left in page?
>
> I'm seeing f2fs_migrate_page() has the newpage with some value without Private
> flag. That causes a kernel panic later due to wrong private flag used in f2fs.
I'm not familiar with that part of codes, so Cc mm mailing list for help.
My question is newpage in .migrate_page() may contain non-zero value in .private
field but w/o setting PagePrivate flag, is it a normal case?
Thanks,
>
>>
>> Cc Matthew Wilcox.
>>
>> Thanks,
>>
>>>
>>> Fixes: b763f3bedc2d ("f2fs: restructure f2fs page.private layout")
>>> Signed-off-by: Jaegeuk Kim <[email protected]>
>>> ---
>>> fs/f2fs/data.c | 2 ++
>>> fs/f2fs/f2fs.h | 5 ++++-
>>> 2 files changed, 6 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 3a01a1b50104..d2cf48c5a2e4 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -3819,6 +3819,8 @@ int f2fs_migrate_page(struct address_space *mapping,
>>> get_page(newpage);
>>> }
>>> + /* guarantee to start from no stale private field */
>>> + set_page_private(newpage, 0);
>>> if (PagePrivate(page)) {
>>> set_page_private(newpage, page_private(page));
>>> SetPagePrivate(newpage);
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index 65befc68d88e..ee8eb33e2c25 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -1331,7 +1331,8 @@ enum {
>>> #define PAGE_PRIVATE_GET_FUNC(name, flagname) \
>>> static inline bool page_private_##name(struct page *page) \
>>> { \
>>> - return test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
>>> + return PagePrivate(page) && \
>>> + test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
>>> test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
>>> }
>>> @@ -1341,6 +1342,7 @@ static inline void set_page_private_##name(struct page *page) \
>>> if (!PagePrivate(page)) { \
>>> get_page(page); \
>>> SetPagePrivate(page); \
>>> + set_page_private(page, 0); \
>>> } \
>>> set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \
>>> set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
>>> @@ -1392,6 +1394,7 @@ static inline void set_page_private_data(struct page *page, unsigned long data)
>>> if (!PagePrivate(page)) {
>>> get_page(page);
>>> SetPagePrivate(page);
>>> + set_page_private(page, 0);
>>> }
>>> set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
>>> page_private(page) |= data << PAGE_PRIVATE_MAX;
>>>
On Mon, Jul 05, 2021 at 07:33:35PM +0800, Chao Yu wrote:
> On 2021/7/5 16:56, Jaegeuk Kim wrote:
> > On 07/05, Chao Yu wrote:
> > > On 2021/7/5 13:22, Jaegeuk Kim wrote:
> > > > We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
> > > > operations.
> > >
> > > Oops, I didn't get the point, shouldn't .private be zero after page was
> > > just allocated by filesystem? What's the case we will encounter stall
> > > private data left in page?
> >
> > I'm seeing f2fs_migrate_page() has the newpage with some value without Private
> > flag. That causes a kernel panic later due to wrong private flag used in f2fs.
>
> I'm not familiar with that part of codes, so Cc mm mailing list for help.
>
> My question is newpage in .migrate_page() may contain non-zero value in .private
> field but w/o setting PagePrivate flag, is it a normal case?
I think freshly allocated pages have a page->private of 0. ie this
code in mm/page_alloc.c:
page = rmqueue(ac->preferred_zoneref->zone, zone, order,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags);
where prep_new_page() calls post_alloc_hook() which contains:
set_page_private(page, 0);
Now, I do see in __buffer_migrate_page() (mm/migrate.c):
attach_page_private(newpage, detach_page_private(page));
but as far as I can tell, f2fs doesn't call any of the
buffer_migrate_page() paths. So I'm not sure why you're seeing
a non-zero page->private.
On 2021/7/5 19:47, Matthew Wilcox wrote:
> On Mon, Jul 05, 2021 at 07:33:35PM +0800, Chao Yu wrote:
>> On 2021/7/5 16:56, Jaegeuk Kim wrote:
>>> On 07/05, Chao Yu wrote:
>>>> On 2021/7/5 13:22, Jaegeuk Kim wrote:
>>>>> We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
>>>>> operations.
>>>>
>>>> Oops, I didn't get the point, shouldn't .private be zero after page was
>>>> just allocated by filesystem? What's the case we will encounter stall
>>>> private data left in page?
>>>
>>> I'm seeing f2fs_migrate_page() has the newpage with some value without Private
>>> flag. That causes a kernel panic later due to wrong private flag used in f2fs.
>>
>> I'm not familiar with that part of codes, so Cc mm mailing list for help.
>>
>> My question is newpage in .migrate_page() may contain non-zero value in .private
>> field but w/o setting PagePrivate flag, is it a normal case?
>
> I think freshly allocated pages have a page->private of 0. ie this
> code in mm/page_alloc.c:
>
> page = rmqueue(ac->preferred_zoneref->zone, zone, order,
> gfp_mask, alloc_flags, ac->migratetype);
> if (page) {
> prep_new_page(page, order, gfp_mask, alloc_flags);
>
> where prep_new_page() calls post_alloc_hook() which contains:
> set_page_private(page, 0);
>
> Now, I do see in __buffer_migrate_page() (mm/migrate.c):
>
> attach_page_private(newpage, detach_page_private(page));
>
> but as far as I can tell, f2fs doesn't call any of the
> buffer_migrate_page() paths. So I'm not sure why you're seeing
> a non-zero page->private.
Well, that's strange.
Jaegeuk, let's add a BUGON in f2fs to track the call path where newpage
has non-zero private value? if this issue is reproducible.
Thanks,
>
On 07/05, Matthew Wilcox wrote:
> On Mon, Jul 05, 2021 at 07:33:35PM +0800, Chao Yu wrote:
> > On 2021/7/5 16:56, Jaegeuk Kim wrote:
> > > On 07/05, Chao Yu wrote:
> > > > On 2021/7/5 13:22, Jaegeuk Kim wrote:
> > > > > We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
> > > > > operations.
> > > >
> > > > Oops, I didn't get the point, shouldn't .private be zero after page was
> > > > just allocated by filesystem? What's the case we will encounter stall
> > > > private data left in page?
> > >
> > > I'm seeing f2fs_migrate_page() has the newpage with some value without Private
> > > flag. That causes a kernel panic later due to wrong private flag used in f2fs.
> >
> > I'm not familiar with that part of codes, so Cc mm mailing list for help.
> >
> > My question is newpage in .migrate_page() may contain non-zero value in .private
> > field but w/o setting PagePrivate flag, is it a normal case?
>
> I think freshly allocated pages have a page->private of 0. ie this
> code in mm/page_alloc.c:
>
> page = rmqueue(ac->preferred_zoneref->zone, zone, order,
> gfp_mask, alloc_flags, ac->migratetype);
> if (page) {
> prep_new_page(page, order, gfp_mask, alloc_flags);
>
> where prep_new_page() calls post_alloc_hook() which contains:
> set_page_private(page, 0);
>
> Now, I do see in __buffer_migrate_page() (mm/migrate.c):
>
> attach_page_private(newpage, detach_page_private(page));
>
> but as far as I can tell, f2fs doesn't call any of the
> buffer_migrate_page() paths. So I'm not sure why you're seeing
> a non-zero page->private.
Hmm, I can see it in 4.14 and 5.10 kernel.
The trace is on:
30875 [ 1065.118750] c3 87 f2fs_migrate_page+0x354/0x45c
30876 [ 1065.123872] c3 87 move_to_new_page+0x70/0x30c
30877 [ 1065.128813] c3 87 migrate_pages+0x3a0/0x964
30878 [ 1065.133583] c3 87 compact_zone+0x608/0xb04
30879 [ 1065.138257] c3 87 kcompactd+0x378/0x4ec
30880 [ 1065.142664] c3 87 kthread+0x11c/0x12c
30881 [ 1065.146897] c3 87 ret_from_fork+0x10/0x18
It seems compaction_alloc() gets a free page which doesn't reset the fields?
On 07/06, Chao Yu wrote:
> On 2021/7/5 19:47, Matthew Wilcox wrote:
> > On Mon, Jul 05, 2021 at 07:33:35PM +0800, Chao Yu wrote:
> > > On 2021/7/5 16:56, Jaegeuk Kim wrote:
> > > > On 07/05, Chao Yu wrote:
> > > > > On 2021/7/5 13:22, Jaegeuk Kim wrote:
> > > > > > We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
> > > > > > operations.
> > > > >
> > > > > Oops, I didn't get the point, shouldn't .private be zero after page was
> > > > > just allocated by filesystem? What's the case we will encounter stall
> > > > > private data left in page?
> > > >
> > > > I'm seeing f2fs_migrate_page() has the newpage with some value without Private
> > > > flag. That causes a kernel panic later due to wrong private flag used in f2fs.
> > >
> > > I'm not familiar with that part of codes, so Cc mm mailing list for help.
> > >
> > > My question is newpage in .migrate_page() may contain non-zero value in .private
> > > field but w/o setting PagePrivate flag, is it a normal case?
> >
> > I think freshly allocated pages have a page->private of 0. ie this
> > code in mm/page_alloc.c:
> >
> > page = rmqueue(ac->preferred_zoneref->zone, zone, order,
> > gfp_mask, alloc_flags, ac->migratetype);
> > if (page) {
> > prep_new_page(page, order, gfp_mask, alloc_flags);
> >
> > where prep_new_page() calls post_alloc_hook() which contains:
> > set_page_private(page, 0);
> >
> > Now, I do see in __buffer_migrate_page() (mm/migrate.c):
> >
> > attach_page_private(newpage, detach_page_private(page));
> >
> > but as far as I can tell, f2fs doesn't call any of the
> > buffer_migrate_page() paths. So I'm not sure why you're seeing
> > a non-zero page->private.
>
> Well, that's strange.
>
> Jaegeuk, let's add a BUGON in f2fs to track the call path where newpage
> has non-zero private value? if this issue is reproducible.
We can debug anything tho, this issue is blocking the production, and I'd
like to get this in this merge windows. Could you please check the patch
has any holes?
>
> Thanks,
>
> >
On Mon, Jul 05, 2021 at 11:04:21AM -0700, Jaegeuk Kim wrote:
> On 07/05, Matthew Wilcox wrote:
> > I think freshly allocated pages have a page->private of 0. ie this
> > code in mm/page_alloc.c:
> >
> > page = rmqueue(ac->preferred_zoneref->zone, zone, order,
> > gfp_mask, alloc_flags, ac->migratetype);
> > if (page) {
> > prep_new_page(page, order, gfp_mask, alloc_flags);
> >
> > where prep_new_page() calls post_alloc_hook() which contains:
> > set_page_private(page, 0);
>
> Hmm, I can see it in 4.14 and 5.10 kernel.
>
> The trace is on:
>
> 30875 [ 1065.118750] c3 87 f2fs_migrate_page+0x354/0x45c
> 30876 [ 1065.123872] c3 87 move_to_new_page+0x70/0x30c
> 30877 [ 1065.128813] c3 87 migrate_pages+0x3a0/0x964
> 30878 [ 1065.133583] c3 87 compact_zone+0x608/0xb04
> 30879 [ 1065.138257] c3 87 kcompactd+0x378/0x4ec
> 30880 [ 1065.142664] c3 87 kthread+0x11c/0x12c
> 30881 [ 1065.146897] c3 87 ret_from_fork+0x10/0x18
>
> It seems compaction_alloc() gets a free page which doesn't reset the fields?
I'm not really familiar with the compaction code. Mel, I see a call
to post_alloc_hook() in split_map_pages(). Are there other ways of
getting the compaction code to allocate a page which don't go through
split_map_pages()?
On 2021/7/6 2:06, Jaegeuk Kim wrote:
> On 07/06, Chao Yu wrote:
>> On 2021/7/5 19:47, Matthew Wilcox wrote:
>>> On Mon, Jul 05, 2021 at 07:33:35PM +0800, Chao Yu wrote:
>>>> On 2021/7/5 16:56, Jaegeuk Kim wrote:
>>>>> On 07/05, Chao Yu wrote:
>>>>>> On 2021/7/5 13:22, Jaegeuk Kim wrote:
>>>>>>> We need to guarantee it's initially zero. Otherwise, it'll hurt entire flag
>>>>>>> operations.
>>>>>>
>>>>>> Oops, I didn't get the point, shouldn't .private be zero after page was
>>>>>> just allocated by filesystem? What's the case we will encounter stall
>>>>>> private data left in page?
>>>>>
>>>>> I'm seeing f2fs_migrate_page() has the newpage with some value without Private
>>>>> flag. That causes a kernel panic later due to wrong private flag used in f2fs.
>>>>
>>>> I'm not familiar with that part of codes, so Cc mm mailing list for help.
>>>>
>>>> My question is newpage in .migrate_page() may contain non-zero value in .private
>>>> field but w/o setting PagePrivate flag, is it a normal case?
>>>
>>> I think freshly allocated pages have a page->private of 0. ie this
>>> code in mm/page_alloc.c:
>>>
>>> page = rmqueue(ac->preferred_zoneref->zone, zone, order,
>>> gfp_mask, alloc_flags, ac->migratetype);
>>> if (page) {
>>> prep_new_page(page, order, gfp_mask, alloc_flags);
>>>
>>> where prep_new_page() calls post_alloc_hook() which contains:
>>> set_page_private(page, 0);
>>>
>>> Now, I do see in __buffer_migrate_page() (mm/migrate.c):
>>>
>>> attach_page_private(newpage, detach_page_private(page));
>>>
>>> but as far as I can tell, f2fs doesn't call any of the
>>> buffer_migrate_page() paths. So I'm not sure why you're seeing
>>> a non-zero page->private.
>>
>> Well, that's strange.
>>
>> Jaegeuk, let's add a BUGON in f2fs to track the call path where newpage
>> has non-zero private value? if this issue is reproducible.
>
> We can debug anything tho, this issue is blocking the production, and I'd
> like to get this in this merge windows. Could you please check the patch
> has any holes?
The code looks good to me,
Reviewed-by: Chao Yu <[email protected]>
Thanks,
>
>>
>> Thanks,
>>
>>>
On Mon, Jul 05, 2021 at 07:45:26PM +0100, Matthew Wilcox wrote:
> On Mon, Jul 05, 2021 at 11:04:21AM -0700, Jaegeuk Kim wrote:
> > On 07/05, Matthew Wilcox wrote:
> > > I think freshly allocated pages have a page->private of 0. ie this
> > > code in mm/page_alloc.c:
> > >
> > > page = rmqueue(ac->preferred_zoneref->zone, zone, order,
> > > gfp_mask, alloc_flags, ac->migratetype);
> > > if (page) {
> > > prep_new_page(page, order, gfp_mask, alloc_flags);
> > >
> > > where prep_new_page() calls post_alloc_hook() which contains:
> > > set_page_private(page, 0);
> >
> > Hmm, I can see it in 4.14 and 5.10 kernel.
> >
> > The trace is on:
> >
> > 30875 [ 1065.118750] c3 87 f2fs_migrate_page+0x354/0x45c
> > 30876 [ 1065.123872] c3 87 move_to_new_page+0x70/0x30c
> > 30877 [ 1065.128813] c3 87 migrate_pages+0x3a0/0x964
> > 30878 [ 1065.133583] c3 87 compact_zone+0x608/0xb04
> > 30879 [ 1065.138257] c3 87 kcompactd+0x378/0x4ec
> > 30880 [ 1065.142664] c3 87 kthread+0x11c/0x12c
> > 30881 [ 1065.146897] c3 87 ret_from_fork+0x10/0x18
> >
> > It seems compaction_alloc() gets a free page which doesn't reset the fields?
>
> I'm not really familiar with the compaction code. Mel, I see a call
> to post_alloc_hook() in split_map_pages(). Are there other ways of
> getting the compaction code to allocate a page which don't go through
> split_map_pages()?
I don't *think* so but I didn't look too hard as I had limited time
available before a meeting. compaction_alloc calls isolate_freepages
and that calls split_map_pages whether fast or slow isolating pages. The
problem *may* be in split_page because only the head page gets order set
to 0 but it's a bad fit because tail pages should be cleared of private
state by del_page_from_free_list. It might be worth adding a debugging
patch to split_pages that prints a warning once if a tail page has private
state and dump the contents of private to see if it looks like an order.
--
Mel Gorman
SUSE Labs
On 2021/7/6 17:12, Mel Gorman wrote:
> On Mon, Jul 05, 2021 at 07:45:26PM +0100, Matthew Wilcox wrote:
>> I'm not really familiar with the compaction code. Mel, I see a call
>> to post_alloc_hook() in split_map_pages(). Are there other ways of
>> getting the compaction code to allocate a page which don't go through
>> split_map_pages()?
>
> I don't *think* so but I didn't look too hard as I had limited time
> available before a meeting. compaction_alloc calls isolate_freepages
> and that calls split_map_pages whether fast or slow isolating pages. The
> problem *may* be in split_page because only the head page gets order set
> to 0 but it's a bad fit because tail pages should be cleared of private
> state by del_page_from_free_list. It might be worth adding a debugging
> patch to split_pages that prints a warning once if a tail page has private
> state and dump the contents of private to see if it looks like an order.
Thanks for your hint!
---
mm/page_alloc.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d6e94cc8066c..be87c4be481f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3515,8 +3515,13 @@ void split_page(struct page *page, unsigned int order)
VM_BUG_ON_PAGE(PageCompound(page), page);
VM_BUG_ON_PAGE(!page_count(page), page);
- for (i = 1; i < (1 << order); i++)
- set_page_refcounted(page + i);
+ for (i = 1; i < (1 << order); i++) {
+ struct page *tail_page = page + i;
+
+ set_page_refcounted(tail_page);
+ if (WARN_ON_ONCE(tail_page->private))
+ pr_info("order:%x, tailpage.private:%x", order, tail_page->private);
+ }
split_page_owner(page, 1 << order);
split_page_memcg(page, 1 << order);
}
--
2.22.1
With above diff, I got this:
------------[ cut here ]------------
WARNING: CPU: 3 PID: 57 at mm/page_alloc.c:3363 split_page.cold+0x8/0x3b
CPU: 3 PID: 57 Comm: kcompactd0 Tainted: G O 5.13.0-rc1+ #67
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:split_page.cold+0x8/0x3b
Code: 83 05 a9 1a 32 02 01 48 c7 05 16 5f 32 02 00 00 00 00 48 c7 05 1b 5f 32 02 01 00 00 00 e9 3c ff ff ff 48 83 05 6e 2c 32 02 01 <0f> 0b 48 83 05 6c 2c 32 02 01 48 c7 c7 38 b2 b1 82 89 ce 89 4d dc
RSP: 0018:ffffc90000227bc0 EFLAGS: 00010202
RAX: 0000000000001f80 RBX: ffffea0004942600 RCX: 0000000000000007
RDX: 00000000000d0000 RSI: 0000000000000007 RDI: ffffea0004942000
RBP: ffffc90000227be8 R08: 0000000000000081 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000125200 R12: ffffea0004944000
R13: 0000000000000080 R14: ffffea0004942000 R15: ffffc90000227c00
FS: 0000000000000000(0000) GS:ffff888217b80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f94ed9bef80 CR3: 0000000002e12001 CR4: 00000000000706e0
Call Trace:
split_map_pages+0x11d/0x190
isolate_freepages+0x355/0x3f0
? free_unref_page+0xd0/0x110
? trace_hardirqs_on+0x52/0x200
compaction_alloc+0x61/0x80
migrate_pages+0x36a/0xf30
? move_freelist_tail+0x140/0x140
? isolate_freepages+0x3f0/0x3f0
compact_zone+0x221/0xaa0
kcompactd_do_work+0x1ef/0x590
kcompactd+0x115/0x5c0
? woken_wake_function+0x40/0x40
kthread+0x17d/0x1e0
? proactive_compact_node+0xe0/0xe0
? kthread_insert_work_sanity_check+0xf0/0xf0
ret_from_fork+0x22/0x30
irq event stamp: 389337
hardirqs last enabled at (389343): [<ffffffff811755ea>] console_unlock+0x4da/0x690
hardirqs last disabled at (389348): [<ffffffff811755d0>] console_unlock+0x4c0/0x690
softirqs last enabled at (277616): [<ffffffff824005bf>] __do_softirq+0x5bf/0x6c4
softirqs last disabled at (277605): [<ffffffff810bc47b>] irq_exit_rcu+0x12b/0x1b0
---[ end trace 910306ade44b0b3d ]---
order:7, tailpage.private:d0000
order:7, tailpage.private:d0000
order:7, tailpage.private:d0000
order:7, tailpage.private:200000
order:7, tailpage.private:d0000
order:7, tailpage.private:d0000
order:7, tailpage.private:d0000
So how about adding set_page_private(page, 0) in split_page() to clear
stall data left in tailpages' private field?
Thanks,
>
On Wed, Jul 07, 2021 at 08:48:28AM +0800, Chao Yu wrote:
> On 2021/7/6 17:12, Mel Gorman wrote:
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d6e94cc8066c..be87c4be481f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3515,8 +3515,13 @@ void split_page(struct page *page, unsigned int order)
> VM_BUG_ON_PAGE(PageCompound(page), page);
> VM_BUG_ON_PAGE(!page_count(page), page);
>
> - for (i = 1; i < (1 << order); i++)
> - set_page_refcounted(page + i);
> + for (i = 1; i < (1 << order); i++) {
> + struct page *tail_page = page + i;
> +
> + set_page_refcounted(tail_page);
> + if (WARN_ON_ONCE(tail_page->private))
> + pr_info("order:%x, tailpage.private:%x", order, tail_page->private);
> + }
> split_page_owner(page, 1 << order);
> split_page_memcg(page, 1 << order);
> }
> --
> 2.22.1
>
> With above diff, I got this:
>
> ------------[ cut here ]------------
> WARNING: CPU: 3 PID: 57 at mm/page_alloc.c:3363 split_page.cold+0x8/0x3b
> CPU: 3 PID: 57 Comm: kcompactd0 Tainted: G O 5.13.0-rc1+ #67
> <SNIP>
> order:7, tailpage.private:d0000
> order:7, tailpage.private:d0000
> order:7, tailpage.private:d0000
> order:7, tailpage.private:200000
> order:7, tailpage.private:d0000
> order:7, tailpage.private:d0000
> order:7, tailpage.private:d0000
>
> So how about adding set_page_private(page, 0) in split_page() to clear
> stall data left in tailpages' private field?
>
I think it would work but it would be preferable to find out why the
tail page has an order set in the first place. I've looked over
mm/page_alloc.c and mm/compaction.c a few times and did not spot where
set_private_page(page, 0) is missed when it should be covered by
clear_page_guard or del_page_from_free_list :(
--
Mel Gorman
SUSE Labs
On 2021/7/7 17:57, Mel Gorman wrote:
> I think it would work but it would be preferable to find out why the
> tail page has an order set in the first place. I've looked over
Agreed.
> mm/page_alloc.c and mm/compaction.c a few times and did not spot where
> set_private_page(page, 0) is missed when it should be covered by
> clear_page_guard or del_page_from_free_list :(
I didn't enable CONFIG_DEBUG_PAGEALLOC, so we will expect page private
should be cleared by del_page_from_free_list(), but I guess it only clears
the buddy's private field rather than original page's, so I added below
diff and check the dmesg, it looks stall private value in original page
will be left commonly... Let me know if I missed something?
---
mm/page_alloc.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a06bcfe6f786..1e7031ff548e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1029,6 +1029,7 @@ static inline void __free_one_page(struct page *page,
unsigned long combined_pfn;
unsigned int max_order;
struct page *buddy;
+ struct page *orig_page = page;
bool to_tail;
max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
@@ -1097,6 +1098,10 @@ static inline void __free_one_page(struct page *page,
done_merging:
set_buddy_order(page, order);
+ if (orig_page != page) {
+ if (WARN_ON_ONCE(orig_page->private))
+ pr_info("2order:%x, origpage.private:%x", order, orig_page->private);
+ }
if (fpi_flags & FPI_TO_TAIL)
to_tail = true;
--
2.22.1
>
On Sat 10-07-21 16:11:38, Chao Yu wrote:
> On 2021/7/7 17:57, Mel Gorman wrote:
> > I think it would work but it would be preferable to find out why the
> > tail page has an order set in the first place. I've looked over
>
> Agreed.
>
> > mm/page_alloc.c and mm/compaction.c a few times and did not spot where
> > set_private_page(page, 0) is missed when it should be covered by
> > clear_page_guard or del_page_from_free_list :(
>
> I didn't enable CONFIG_DEBUG_PAGEALLOC, so we will expect page private
> should be cleared by del_page_from_free_list(), but I guess it only clears
> the buddy's private field rather than original page's, so I added below
> diff and check the dmesg, it looks stall private value in original page
> will be left commonly... Let me know if I missed something?
Page private should be cleared when the page is freed to the allocator.
Have a look at PAGE_FLAGS_CHECK_AT_FREE.
> ---
> mm/page_alloc.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index a06bcfe6f786..1e7031ff548e 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1029,6 +1029,7 @@ static inline void __free_one_page(struct page *page,
> unsigned long combined_pfn;
> unsigned int max_order;
> struct page *buddy;
> + struct page *orig_page = page;
> bool to_tail;
>
> max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
> @@ -1097,6 +1098,10 @@ static inline void __free_one_page(struct page *page,
>
> done_merging:
> set_buddy_order(page, order);
> + if (orig_page != page) {
> + if (WARN_ON_ONCE(orig_page->private))
> + pr_info("2order:%x, origpage.private:%x", order, orig_page->private);
> + }
Why is this expected? Buddy allocator uses page private to store order.
Whether we are merging to the freed page or coalesce it to a different
page is not all that important.
--
Michal Hocko
SUSE Labs
On 2021/7/12 14:53, Michal Hocko wrote:
> On Sat 10-07-21 16:11:38, Chao Yu wrote:
>> On 2021/7/7 17:57, Mel Gorman wrote:
>>> I think it would work but it would be preferable to find out why the
>>> tail page has an order set in the first place. I've looked over
>>
>> Agreed.
>>
>>> mm/page_alloc.c and mm/compaction.c a few times and did not spot where
>>> set_private_page(page, 0) is missed when it should be covered by
>>> clear_page_guard or del_page_from_free_list :(
>>
>> I didn't enable CONFIG_DEBUG_PAGEALLOC, so we will expect page private
>> should be cleared by del_page_from_free_list(), but I guess it only clears
>> the buddy's private field rather than original page's, so I added below
>> diff and check the dmesg, it looks stall private value in original page
>> will be left commonly... Let me know if I missed something?
>
> Page private should be cleared when the page is freed to the allocator.
> Have a look at PAGE_FLAGS_CHECK_AT_FREE.
Quoted from Jaegeuk's comments in [1]
"Hmm, I can see it in 4.14 and 5.10 kernel.
The trace is on:
30875 [ 1065.118750] c3 87 f2fs_migrate_page+0x354/0x45c
30876 [ 1065.123872] c3 87 move_to_new_page+0x70/0x30c
30877 [ 1065.128813] c3 87 migrate_pages+0x3a0/0x964
30878 [ 1065.133583] c3 87 compact_zone+0x608/0xb04
30879 [ 1065.138257] c3 87 kcompactd+0x378/0x4ec
30880 [ 1065.142664] c3 87 kthread+0x11c/0x12c
30881 [ 1065.146897] c3 87 ret_from_fork+0x10/0x18
It seems compaction_alloc() gets a free page which doesn't reset the fields?"
https://lore.kernel.org/linux-f2fs-devel/[email protected]/T/#m98a4a5e777f5b0e7366b367463efafd2133dd681
So problem here we met is: in f2fs_migrate_page(), newpage may has stall .private
value rather than PG_private flag, which may cause f2fs will treat the page with
wrong private status.
>
>> ---
>> mm/page_alloc.c | 5 +++++
>> 1 file changed, 5 insertions(+)
>>
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index a06bcfe6f786..1e7031ff548e 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -1029,6 +1029,7 @@ static inline void __free_one_page(struct page *page,
>> unsigned long combined_pfn;
>> unsigned int max_order;
>> struct page *buddy;
>> + struct page *orig_page = page;
>> bool to_tail;
>>
>> max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
>> @@ -1097,6 +1098,10 @@ static inline void __free_one_page(struct page *page,
>>
>> done_merging:
>> set_buddy_order(page, order);
>> + if (orig_page != page) {
>> + if (WARN_ON_ONCE(orig_page->private))
>> + pr_info("2order:%x, origpage.private:%x", order, orig_page->private);
>> + }
>
> Why is this expected? Buddy allocator uses page private to store order.
> Whether we are merging to the freed page or coalesce it to a different
The order was only set in head page, right? Since it looks __free_one_page() tries
to clear page.private for every buddy with del_page_from_free_list().
If that is true, after done_merging label in __free_one_page, if original page is
a tail page, we may missed to clear its page.private field?
Thanks,
> page is not all that important.
>