2022-03-09 03:11:58

by maobibo

[permalink] [raw]
Subject: [PATCH] mm: reduce tlb flush range when changing vma protection

numa worker will periodically change vma prot with PROT_NONE, by
default it will scan 256M vma memory size with pmd stepping size.
If there are fewer pages changed with PROT_NONE, tlb flush is called
with pmd size. This patch will calculate flush range for those
pages with pte prot changed, it will reduce size for tlb flush.

Signed-off-by: Bibo Mao <[email protected]>
---
mm/mprotect.c | 39 +++++++++++++++++++++++++++++----------
1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2887644fd150..a9f51a998dc8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,9 +35,23 @@

#include "internal.h"

+typedef struct {
+ unsigned long start;
+ unsigned long end;
+} tlb_range;
+
+static inline void add_tlb_range(tlb_range *range, unsigned long start,
+ unsigned long end)
+{
+ if (start < range->start)
+ range->start = start;
+ if (end > range->end)
+ range->end = end;
+}
+
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot,
- unsigned long cp_flags)
+ unsigned long cp_flags, tlb_range *range)
{
pte_t *pte, oldpte;
spinlock_t *ptl;
@@ -138,6 +152,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
ptent = pte_mkwrite(ptent);
}
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
+ add_tlb_range(range, addr, addr + PAGE_SIZE);
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -184,6 +199,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,

if (!pte_same(oldpte, newpte)) {
set_pte_at(vma->vm_mm, addr, pte, newpte);
+ add_tlb_range(range, addr, addr + PAGE_SIZE);
pages++;
}
}
@@ -221,7 +237,7 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)

static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pud_t *pud, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+ pgprot_t newprot, unsigned long cp_flags, tlb_range *tlb)
{
pmd_t *pmd;
unsigned long next;
@@ -267,6 +283,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
if (nr_ptes) {
if (nr_ptes == HPAGE_PMD_NR) {
pages += HPAGE_PMD_NR;
+ add_tlb_range(tlb, addr, next);
nr_huge_updates++;
}

@@ -277,7 +294,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
/* fall through, the trans huge pmd just split */
}
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
- cp_flags);
+ cp_flags, tlb);
pages += this_pages;
next:
cond_resched();
@@ -293,7 +310,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,

static inline unsigned long change_pud_range(struct vm_area_struct *vma,
p4d_t *p4d, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+ pgprot_t newprot, unsigned long cp_flags, tlb_range *range)
{
pud_t *pud;
unsigned long next;
@@ -305,7 +322,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
if (pud_none_or_clear_bad(pud))
continue;
pages += change_pmd_range(vma, pud, addr, next, newprot,
- cp_flags);
+ cp_flags, range);
} while (pud++, addr = next, addr != end);

return pages;
@@ -313,7 +330,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,

static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
pgd_t *pgd, unsigned long addr, unsigned long end,
- pgprot_t newprot, unsigned long cp_flags)
+ pgprot_t newprot, unsigned long cp_flags, tlb_range *range)
{
p4d_t *p4d;
unsigned long next;
@@ -325,7 +342,7 @@ static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
if (p4d_none_or_clear_bad(p4d))
continue;
pages += change_pud_range(vma, p4d, addr, next, newprot,
- cp_flags);
+ cp_flags, range);
} while (p4d++, addr = next, addr != end);

return pages;
@@ -338,24 +355,26 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
unsigned long next;
- unsigned long start = addr;
unsigned long pages = 0;
+ tlb_range range;

BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
+ range.start = end;
+ range.end = addr;
inc_tlb_flush_pending(mm);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
pages += change_p4d_range(vma, pgd, addr, next, newprot,
- cp_flags);
+ cp_flags, &range);
} while (pgd++, addr = next, addr != end);

/* Only flush the TLB if we actually modified any entries: */
if (pages)
- flush_tlb_range(vma, start, end);
+ flush_tlb_range(vma, range.start, range.end);
dec_tlb_flush_pending(mm);

return pages;
--
2.31.1


2022-03-09 04:14:43

by Nadav Amit

[permalink] [raw]
Subject: Re: [PATCH] mm: reduce tlb flush range when changing vma protection



> On Mar 8, 2022, at 6:57 PM, Bibo Mao <[email protected]> wrote:
>
> numa worker will periodically change vma prot with PROT_NONE, by
> default it will scan 256M vma memory size with pmd stepping size.
> If there are fewer pages changed with PROT_NONE, tlb flush is called
> with pmd size. This patch will calculate flush range for those
> pages with pte prot changed, it will reduce size for tlb flush.
>
> Signed-off-by: Bibo Mao <[email protected]>

Hi Bibo,

I finally managed to make v3 of a patchiest, which I think does
something similar to what you are looking for (without introducing
yet another TLB batching mechanism).

Have a look at [1] and let me know if that would satisfy you.


[1] https://lore.kernel.org/linux-mm/[email protected]/T/#u

2022-03-09 05:16:46

by maobibo

[permalink] [raw]
Subject: Re: [PATCH] mm: reduce tlb flush range when changing vma protection

yeap, your patch is general and better than mine, it can solve the issue.
please drop my patch.

regards
bibo, mao

On 03/09/2022 12:14 PM, Nadav Amit wrote:
>
>
>> On Mar 8, 2022, at 6:57 PM, Bibo Mao <[email protected]> wrote:
>>
>> numa worker will periodically change vma prot with PROT_NONE, by
>> default it will scan 256M vma memory size with pmd stepping size.
>> If there are fewer pages changed with PROT_NONE, tlb flush is called
>> with pmd size. This patch will calculate flush range for those
>> pages with pte prot changed, it will reduce size for tlb flush.
>>
>> Signed-off-by: Bibo Mao <[email protected]>
>
> Hi Bibo,
>
> I finally managed to make v3 of a patchiest, which I think does
> something similar to what you are looking for (without introducing
> yet another TLB batching mechanism).
>
> Have a look at [1] and let me know if that would satisfy you.
>
>
> [1] https://lore.kernel.org/linux-mm/[email protected]/T/#u
>