2021-04-04 15:34:23

by Xu, Yanfei

[permalink] [raw]
Subject: [PATCH 0/2] mm: khugepaged: cleanup and a minor tuning in THP

From: Yanfei Xu <[email protected]>



Yanfei Xu (2):
mm: khugepaged: use macro to align addresses
mm: khugepaged: check MMF_DISABLE_THP ahead of iterating over vmas

mm/khugepaged.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)

--
2.27.0


2021-04-04 15:34:28

by Xu, Yanfei

[permalink] [raw]
Subject: [PATCH 1/2] mm: khugepaged: use macro to align addresses

From: Yanfei Xu <[email protected]>

We could use macro to deal with the addresses which need to be aligned
to improve readability of codes.

Signed-off-by: Yanfei Xu <[email protected]>
---
mm/khugepaged.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a7d6cb912b05..2efe1d0c92ed 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -517,8 +517,8 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
if (!hugepage_vma_check(vma, vm_flags))
return 0;

- hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
- hend = vma->vm_end & HPAGE_PMD_MASK;
+ hstart = ALIGN(vma->vm_start, HPAGE_PMD_SIZE);
+ hend = ALIGN_DOWN(vma->vm_end, HPAGE_PMD_SIZE);
if (hstart < hend)
return khugepaged_enter(vma, vm_flags);
return 0;
@@ -979,8 +979,8 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
if (!vma)
return SCAN_VMA_NULL;

- hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
- hend = vma->vm_end & HPAGE_PMD_MASK;
+ hstart = ALIGN(vma->vm_start, HPAGE_PMD_SIZE);
+ hend = ALIGN_DOWN(vma->vm_end, HPAGE_PMD_SIZE);
if (address < hstart || address + HPAGE_PMD_SIZE > hend)
return SCAN_ADDRESS_RANGE;
if (!hugepage_vma_check(vma, vma->vm_flags))
@@ -1070,7 +1070,7 @@ static void collapse_huge_page(struct mm_struct *mm,
struct mmu_notifier_range range;
gfp_t gfp;

- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(!IS_ALIGNED(address, HPAGE_PMD_SIZE));

/* Only allocate from the target node */
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
@@ -1235,7 +1235,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
int node = NUMA_NO_NODE, unmapped = 0;
bool writable = false;

- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(!IS_ALIGNED(address, HPAGE_PMD_SIZE));

pmd = mm_find_pmd(mm, address);
if (!pmd) {
@@ -1414,7 +1414,7 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
{
struct mm_slot *mm_slot;

- VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(!IS_ALIGNED(addr, HPAGE_PMD_SIZE));

spin_lock(&khugepaged_mm_lock);
mm_slot = get_mm_slot(mm);
@@ -1437,7 +1437,7 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
*/
void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
{
- unsigned long haddr = addr & HPAGE_PMD_MASK;
+ unsigned long haddr = ALIGN_DOWN(addr, HPAGE_PMD_SIZE);
struct vm_area_struct *vma = find_vma(mm, haddr);
struct page *hpage;
pte_t *start_pte, *pte;
@@ -1584,7 +1584,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
if (vma->anon_vma)
continue;
addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
- if (addr & ~HPAGE_PMD_MASK)
+ if (!IS_ALIGNED(addr, HPAGE_PMD_SIZE))
continue;
if (vma->vm_end < addr + HPAGE_PMD_SIZE)
continue;
@@ -2070,7 +2070,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
{
struct mm_slot *mm_slot;
struct mm_struct *mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = NULL;
int progress = 0;

VM_BUG_ON(!pages);
@@ -2092,7 +2092,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
* Don't wait for semaphore (to avoid long wait times). Just move to
* the next mm on the list.
*/
- vma = NULL;
if (unlikely(!mmap_read_trylock(mm)))
goto breakouterloop_mmap_lock;
if (likely(!khugepaged_test_exit(mm)))
@@ -2112,15 +2111,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
progress++;
continue;
}
- hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
- hend = vma->vm_end & HPAGE_PMD_MASK;
+ hstart = ALIGN(vma->vm_start, HPAGE_PMD_SIZE);
+ hend = ALIGN_DOWN(vma->vm_end, HPAGE_PMD_SIZE);
if (hstart >= hend)
goto skip;
if (khugepaged_scan.address > hend)
goto skip;
if (khugepaged_scan.address < hstart)
khugepaged_scan.address = hstart;
- VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(!IS_ALIGNED(khugepaged_scan.address, HPAGE_PMD_SIZE));
+
if (shmem_file(vma->vm_file) && !shmem_huge_enabled(vma))
goto skip;

--
2.27.0

2021-04-04 15:35:36

by Xu, Yanfei

[permalink] [raw]
Subject: [PATCH 2/2] mm: khugepaged: check MMF_DISABLE_THP ahead of iterating over vmas

From: Yanfei Xu <[email protected]>

We could check MMF_DISABLE_THP ahead of iterating over all of vma.
Otherwise if some mm_struct contain a large number of vma, there will
be amounts meaningless cpu cycles cost.

BTW, drop an unnecessary cond_resched(), because there is a another
cond_resched() followed it and no consumed invocation between them.

Signed-off-by: Yanfei Xu <[email protected]>
---
mm/khugepaged.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 2efe1d0c92ed..c293ec4a94ea 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2094,6 +2094,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
*/
if (unlikely(!mmap_read_trylock(mm)))
goto breakouterloop_mmap_lock;
+ if (test_bit(MMF_DISABLE_THP, &mm->flags))
+ goto breakouterloop_mmap_lock;
if (likely(!khugepaged_test_exit(mm)))
vma = find_vma(mm, khugepaged_scan.address);

@@ -2101,7 +2103,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
for (; vma; vma = vma->vm_next) {
unsigned long hstart, hend;

- cond_resched();
if (unlikely(khugepaged_test_exit(mm))) {
progress++;
break;
--
2.27.0

2021-04-06 08:46:00

by Yang Shi

[permalink] [raw]
Subject: Re: [PATCH 2/2] mm: khugepaged: check MMF_DISABLE_THP ahead of iterating over vmas

On Sun, Apr 4, 2021 at 8:33 AM <[email protected]> wrote:
>
> From: Yanfei Xu <[email protected]>
>
> We could check MMF_DISABLE_THP ahead of iterating over all of vma.
> Otherwise if some mm_struct contain a large number of vma, there will
> be amounts meaningless cpu cycles cost.
>
> BTW, drop an unnecessary cond_resched(), because there is a another
> cond_resched() followed it and no consumed invocation between them.
>
> Signed-off-by: Yanfei Xu <[email protected]>
> ---
> mm/khugepaged.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 2efe1d0c92ed..c293ec4a94ea 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -2094,6 +2094,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> */
> if (unlikely(!mmap_read_trylock(mm)))
> goto breakouterloop_mmap_lock;
> + if (test_bit(MMF_DISABLE_THP, &mm->flags))
> + goto breakouterloop_mmap_lock;

It is fine to check this flag. But mmap_lock has been acquired so you
should jump to breakouterloop.

> if (likely(!khugepaged_test_exit(mm)))
> vma = find_vma(mm, khugepaged_scan.address);
>
> @@ -2101,7 +2103,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> for (; vma; vma = vma->vm_next) {
> unsigned long hstart, hend;
>
> - cond_resched();

I don't have a strong opinion for removing this cond_resched(). But
IIUC khugepaged is a best effort job there is no harm to keep it IMHO.

> if (unlikely(khugepaged_test_exit(mm))) {
> progress++;
> break;
> --
> 2.27.0
>
>

2021-04-06 13:01:54

by Xu, Yanfei

[permalink] [raw]
Subject: Re: [PATCH 2/2] mm: khugepaged: check MMF_DISABLE_THP ahead of iterating over vmas



On 4/6/21 10:51 AM, Xu, Yanfei wrote:
>
>
> On 4/6/21 2:20 AM, Yang Shi wrote:
>> [Please note: This e-mail is from an EXTERNAL e-mail address]
>>
>> On Sun, Apr 4, 2021 at 8:33 AM <[email protected]> wrote:
>>>
>>> From: Yanfei Xu <[email protected]>
>>>
>>> We could check MMF_DISABLE_THP ahead of iterating over all of vma.
>>> Otherwise if some mm_struct contain a large number of vma, there will
>>> be amounts meaningless cpu cycles cost.
>>>
>>> BTW, drop an unnecessary cond_resched(), because there is a another
>>> cond_resched() followed it and no consumed invocation between them.
>>>
>>> Signed-off-by: Yanfei Xu <[email protected]>
>>> ---
>>>   mm/khugepaged.c | 3 ++-
>>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>>> index 2efe1d0c92ed..c293ec4a94ea 100644
>>> --- a/mm/khugepaged.c
>>> +++ b/mm/khugepaged.c
>>> @@ -2094,6 +2094,8 @@ static unsigned int
>>> khugepaged_scan_mm_slot(unsigned int pages,
>>>           */
>>>          if (unlikely(!mmap_read_trylock(mm)))
>>>                  goto breakouterloop_mmap_lock;
>>> +       if (test_bit(MMF_DISABLE_THP, &mm->flags))
>>> +               goto breakouterloop_mmap_lock;
>>
>> It is fine to check this flag. But mmap_lock has been acquired so you
>> should jump to breakouterloop.
>
> Oops! It's my fault. Thank you for pointing out this.
> Will fix it in v2.
>
>>
>>>          if (likely(!khugepaged_test_exit(mm)))
>>>                  vma = find_vma(mm, khugepaged_scan.address);
>>>
>>> @@ -2101,7 +2103,6 @@ static unsigned int
>>> khugepaged_scan_mm_slot(unsigned int pages,
>>>          for (; vma; vma = vma->vm_next) {
>>>                  unsigned long hstart, hend;
>>>
>>> -               cond_resched();
>>
>> I don't have a strong opinion for removing this cond_resched(). But
>> IIUC khugepaged is a best effort job there is no harm to keep it IMHO.
>>
>
> Yes, keeping it is no harm. But I think we should add it when we need.
> Look at the blow codes, there are only some simple check between these
> two cond_resched().  And we still have some cond_resched() in the
> khugepaged_scan_file() and khugepaged_scan_pmd() which is the actual
> wrok about collapsing. So I think it is unnecessary.  :)
>

BTW, the original author add this cond_resched() might be worry about
the hugepage_vma_check() always return false due to the MMF_DISABLE_THP.
But now we have moved it out of the for loop of iterating vma.

um.. That is my guess..

Thanks,
Yanfei

>         for (; vma; vma = vma->vm_next) {
>                 unsigned long hstart, hend;
>
>         cond_resched();                 //here
>                 if (unlikely(khugepaged_test_exit(mm))) {
>                         progress++;
>                         break;
>                 }
>                 if (!hugepage_vma_check(vma, vma->vm_flags)) {
> skip:
>                         progress++;
>                         continue;
>                 }
>                 hstart = ALIGN(vma->vm_start, HPAGE_PMD_SIZE);
>                 hend = ALIGN_DOWN(vma->vm_end, HPAGE_PMD_SIZE);
>                 if (hstart >= hend)
>                         goto skip;
>                 if (khugepaged_scan.address > hend)
>                         goto skip;
>                 if (khugepaged_scan.address < hstart)
>                         khugepaged_scan.address = hstart;
>                 VM_BUG_ON(!IS_ALIGNED(khugepaged_scan.address,
> HPAGE_PMD_SIZE));
>
>                 if (shmem_file(vma->vm_file) && !shmem_huge_enabled(vma))
>                         goto skip;
>
>                 while (khugepaged_scan.address < hend) {
>                         int ret;
>                         cond_resched();        //here
>
>
>>>                  if (unlikely(khugepaged_test_exit(mm))) {
>>>                          progress++;
>>>                          break;
>>> --
>>> 2.27.0
>>>
>>>

2021-04-06 14:55:40

by Xu, Yanfei

[permalink] [raw]
Subject: Re: [PATCH 2/2] mm: khugepaged: check MMF_DISABLE_THP ahead of iterating over vmas



On 4/6/21 2:20 AM, Yang Shi wrote:
> [Please note: This e-mail is from an EXTERNAL e-mail address]
>
> On Sun, Apr 4, 2021 at 8:33 AM <[email protected]> wrote:
>>
>> From: Yanfei Xu <[email protected]>
>>
>> We could check MMF_DISABLE_THP ahead of iterating over all of vma.
>> Otherwise if some mm_struct contain a large number of vma, there will
>> be amounts meaningless cpu cycles cost.
>>
>> BTW, drop an unnecessary cond_resched(), because there is a another
>> cond_resched() followed it and no consumed invocation between them.
>>
>> Signed-off-by: Yanfei Xu <[email protected]>
>> ---
>> mm/khugepaged.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index 2efe1d0c92ed..c293ec4a94ea 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -2094,6 +2094,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>> */
>> if (unlikely(!mmap_read_trylock(mm)))
>> goto breakouterloop_mmap_lock;
>> + if (test_bit(MMF_DISABLE_THP, &mm->flags))
>> + goto breakouterloop_mmap_lock;
>
> It is fine to check this flag. But mmap_lock has been acquired so you
> should jump to breakouterloop.

Oops! It's my fault. Thank you for pointing out this.
Will fix it in v2.

>
>> if (likely(!khugepaged_test_exit(mm)))
>> vma = find_vma(mm, khugepaged_scan.address);
>>
>> @@ -2101,7 +2103,6 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>> for (; vma; vma = vma->vm_next) {
>> unsigned long hstart, hend;
>>
>> - cond_resched();
>
> I don't have a strong opinion for removing this cond_resched(). But
> IIUC khugepaged is a best effort job there is no harm to keep it IMHO.
>

Yes, keeping it is no harm. But I think we should add it when we need.
Look at the blow codes, there are only some simple check between these
two cond_resched(). And we still have some cond_resched() in the
khugepaged_scan_file() and khugepaged_scan_pmd() which is the actual
wrok about collapsing. So I think it is unnecessary. :)

for (; vma; vma = vma->vm_next) {
unsigned long hstart, hend;

cond_resched(); //here
if (unlikely(khugepaged_test_exit(mm))) {
progress++;
break;
}
if (!hugepage_vma_check(vma, vma->vm_flags)) {
skip:
progress++;
continue;
}
hstart = ALIGN(vma->vm_start, HPAGE_PMD_SIZE);
hend = ALIGN_DOWN(vma->vm_end, HPAGE_PMD_SIZE);
if (hstart >= hend)
goto skip;
if (khugepaged_scan.address > hend)
goto skip;
if (khugepaged_scan.address < hstart)
khugepaged_scan.address = hstart;
VM_BUG_ON(!IS_ALIGNED(khugepaged_scan.address,
HPAGE_PMD_SIZE));

if (shmem_file(vma->vm_file) && !shmem_huge_enabled(vma))
goto skip;

while (khugepaged_scan.address < hend) {
int ret;
cond_resched(); //here


>> if (unlikely(khugepaged_test_exit(mm))) {
>> progress++;
>> break;
>> --
>> 2.27.0
>>
>>

2021-04-07 11:19:21

by Yang Shi

[permalink] [raw]
Subject: Re: [PATCH 2/2] mm: khugepaged: check MMF_DISABLE_THP ahead of iterating over vmas

On Mon, Apr 5, 2021 at 8:05 PM Xu, Yanfei <[email protected]> wrote:
>
>
>
> On 4/6/21 10:51 AM, Xu, Yanfei wrote:
> >
> >
> > On 4/6/21 2:20 AM, Yang Shi wrote:
> >> [Please note: This e-mail is from an EXTERNAL e-mail address]
> >>
> >> On Sun, Apr 4, 2021 at 8:33 AM <[email protected]> wrote:
> >>>
> >>> From: Yanfei Xu <[email protected]>
> >>>
> >>> We could check MMF_DISABLE_THP ahead of iterating over all of vma.
> >>> Otherwise if some mm_struct contain a large number of vma, there will
> >>> be amounts meaningless cpu cycles cost.
> >>>
> >>> BTW, drop an unnecessary cond_resched(), because there is a another
> >>> cond_resched() followed it and no consumed invocation between them.
> >>>
> >>> Signed-off-by: Yanfei Xu <[email protected]>
> >>> ---
> >>> mm/khugepaged.c | 3 ++-
> >>> 1 file changed, 2 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> >>> index 2efe1d0c92ed..c293ec4a94ea 100644
> >>> --- a/mm/khugepaged.c
> >>> +++ b/mm/khugepaged.c
> >>> @@ -2094,6 +2094,8 @@ static unsigned int
> >>> khugepaged_scan_mm_slot(unsigned int pages,
> >>> */
> >>> if (unlikely(!mmap_read_trylock(mm)))
> >>> goto breakouterloop_mmap_lock;
> >>> + if (test_bit(MMF_DISABLE_THP, &mm->flags))
> >>> + goto breakouterloop_mmap_lock;
> >>
> >> It is fine to check this flag. But mmap_lock has been acquired so you
> >> should jump to breakouterloop.
> >
> > Oops! It's my fault. Thank you for pointing out this.
> > Will fix it in v2.
> >
> >>
> >>> if (likely(!khugepaged_test_exit(mm)))
> >>> vma = find_vma(mm, khugepaged_scan.address);
> >>>
> >>> @@ -2101,7 +2103,6 @@ static unsigned int
> >>> khugepaged_scan_mm_slot(unsigned int pages,
> >>> for (; vma; vma = vma->vm_next) {
> >>> unsigned long hstart, hend;
> >>>
> >>> - cond_resched();
> >>
> >> I don't have a strong opinion for removing this cond_resched(). But
> >> IIUC khugepaged is a best effort job there is no harm to keep it IMHO.
> >>
> >
> > Yes, keeping it is no harm. But I think we should add it when we need.
> > Look at the blow codes, there are only some simple check between these
> > two cond_resched(). And we still have some cond_resched() in the
> > khugepaged_scan_file() and khugepaged_scan_pmd() which is the actual
> > wrok about collapsing. So I think it is unnecessary. :)
> >
>
> BTW, the original author add this cond_resched() might be worry about
> the hugepage_vma_check() always return false due to the MMF_DISABLE_THP.
> But now we have moved it out of the for loop of iterating vma.

A little bit of archeology showed the cond_resched() was there in the
first place even before MMF_DISABLE_THP was introduced.

>
> um.. That is my guess..
>
> Thanks,
> Yanfei
>
> > for (; vma; vma = vma->vm_next) {
> > unsigned long hstart, hend;
> >
> > cond_resched(); //here
> > if (unlikely(khugepaged_test_exit(mm))) {
> > progress++;
> > break;
> > }
> > if (!hugepage_vma_check(vma, vma->vm_flags)) {
> > skip:
> > progress++;
> > continue;
> > }
> > hstart = ALIGN(vma->vm_start, HPAGE_PMD_SIZE);
> > hend = ALIGN_DOWN(vma->vm_end, HPAGE_PMD_SIZE);
> > if (hstart >= hend)
> > goto skip;
> > if (khugepaged_scan.address > hend)
> > goto skip;
> > if (khugepaged_scan.address < hstart)
> > khugepaged_scan.address = hstart;
> > VM_BUG_ON(!IS_ALIGNED(khugepaged_scan.address,
> > HPAGE_PMD_SIZE));
> >
> > if (shmem_file(vma->vm_file) && !shmem_huge_enabled(vma))
> > goto skip;
> >
> > while (khugepaged_scan.address < hend) {
> > int ret;
> > cond_resched(); //here
> >
> >
> >>> if (unlikely(khugepaged_test_exit(mm))) {
> >>> progress++;
> >>> break;
> >>> --
> >>> 2.27.0
> >>>
> >>>