2024-01-29 14:33:19

by David Hildenbrand

[permalink] [raw]
Subject: [PATCH v1 1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

Let's prepare for further changes by factoring out processing of present
PTEs.

Signed-off-by: David Hildenbrand <[email protected]>
---
mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 52 insertions(+), 40 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index b05fd28dbce1..50a6c79c78fc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
}

+static inline void zap_present_pte(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
+ unsigned long addr, struct zap_details *details,
+ int *rss, bool *force_flush, bool *force_break)
+{
+ struct mm_struct *mm = tlb->mm;
+ bool delay_rmap = false;
+ struct folio *folio;
+ struct page *page;
+
+ page = vm_normal_page(vma, addr, ptent);
+ if (page)
+ folio = page_folio(page);
+
+ if (unlikely(!should_zap_folio(details, folio)))
+ return;
+ ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
+ arch_check_zapped_pte(vma, ptent);
+ tlb_remove_tlb_entry(tlb, pte, addr);
+ zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
+ if (unlikely(!page)) {
+ ksm_might_unmap_zero_page(mm, ptent);
+ return;
+ }
+
+ if (!folio_test_anon(folio)) {
+ if (pte_dirty(ptent)) {
+ folio_mark_dirty(folio);
+ if (tlb_delay_rmap(tlb)) {
+ delay_rmap = true;
+ *force_flush = true;
+ }
+ }
+ if (pte_young(ptent) && likely(vma_has_recency(vma)))
+ folio_mark_accessed(folio);
+ }
+ rss[mm_counter(folio)]--;
+ if (!delay_rmap) {
+ folio_remove_rmap_pte(folio, page, vma);
+ if (unlikely(page_mapcount(page) < 0))
+ print_bad_pte(vma, addr, ptent, page);
+ }
+ if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
+ *force_flush = true;
+ *force_break = true;
+ }
+}
+
static unsigned long zap_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
+ bool force_flush = false, force_break = false;
struct mm_struct *mm = tlb->mm;
- int force_flush = 0;
int rss[NR_MM_COUNTERS];
spinlock_t *ptl;
pte_t *start_pte;
@@ -1565,45 +1613,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
break;

if (pte_present(ptent)) {
- unsigned int delay_rmap;
-
- page = vm_normal_page(vma, addr, ptent);
- if (page)
- folio = page_folio(page);
-
- if (unlikely(!should_zap_folio(details, folio)))
- continue;
- ptent = ptep_get_and_clear_full(mm, addr, pte,
- tlb->fullmm);
- arch_check_zapped_pte(vma, ptent);
- tlb_remove_tlb_entry(tlb, pte, addr);
- zap_install_uffd_wp_if_needed(vma, addr, pte, details,
- ptent);
- if (unlikely(!page)) {
- ksm_might_unmap_zero_page(mm, ptent);
- continue;
- }
-
- delay_rmap = 0;
- if (!folio_test_anon(folio)) {
- if (pte_dirty(ptent)) {
- folio_mark_dirty(folio);
- if (tlb_delay_rmap(tlb)) {
- delay_rmap = 1;
- force_flush = 1;
- }
- }
- if (pte_young(ptent) && likely(vma_has_recency(vma)))
- folio_mark_accessed(folio);
- }
- rss[mm_counter(folio)]--;
- if (!delay_rmap) {
- folio_remove_rmap_pte(folio, page, vma);
- if (unlikely(page_mapcount(page) < 0))
- print_bad_pte(vma, addr, ptent, page);
- }
- if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
- force_flush = 1;
+ zap_present_pte(tlb, vma, pte, ptent, addr, details,
+ rss, &force_flush, &force_break);
+ if (unlikely(force_break)) {
addr += PAGE_SIZE;
break;
}
--
2.43.0



2024-01-30 08:13:58

by Ryan Roberts

[permalink] [raw]
Subject: Re: [PATCH v1 1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

On 29/01/2024 14:32, David Hildenbrand wrote:
> Let's prepare for further changes by factoring out processing of present
> PTEs.
>
> Signed-off-by: David Hildenbrand <[email protected]>
> ---
> mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
> 1 file changed, 52 insertions(+), 40 deletions(-)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index b05fd28dbce1..50a6c79c78fc 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
> pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
> }
>
> +static inline void zap_present_pte(struct mmu_gather *tlb,
> + struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
> + unsigned long addr, struct zap_details *details,
> + int *rss, bool *force_flush, bool *force_break)
> +{
> + struct mm_struct *mm = tlb->mm;
> + bool delay_rmap = false;
> + struct folio *folio;

You need to init this to NULL otherwise its a random value when calling
should_zap_folio() if vm_normal_page() returns NULL.

> + struct page *page;
> +
> + page = vm_normal_page(vma, addr, ptent);
> + if (page)
> + folio = page_folio(page);
> +
> + if (unlikely(!should_zap_folio(details, folio)))
> + return;
> + ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
> + arch_check_zapped_pte(vma, ptent);
> + tlb_remove_tlb_entry(tlb, pte, addr);
> + zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
> + if (unlikely(!page)) {
> + ksm_might_unmap_zero_page(mm, ptent);
> + return;
> + }
> +
> + if (!folio_test_anon(folio)) {
> + if (pte_dirty(ptent)) {
> + folio_mark_dirty(folio);
> + if (tlb_delay_rmap(tlb)) {
> + delay_rmap = true;
> + *force_flush = true;
> + }
> + }
> + if (pte_young(ptent) && likely(vma_has_recency(vma)))
> + folio_mark_accessed(folio);
> + }
> + rss[mm_counter(folio)]--;
> + if (!delay_rmap) {
> + folio_remove_rmap_pte(folio, page, vma);
> + if (unlikely(page_mapcount(page) < 0))
> + print_bad_pte(vma, addr, ptent, page);
> + }
> + if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
> + *force_flush = true;
> + *force_break = true;
> + }
> +}
> +
> static unsigned long zap_pte_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma, pmd_t *pmd,
> unsigned long addr, unsigned long end,
> struct zap_details *details)
> {
> + bool force_flush = false, force_break = false;
> struct mm_struct *mm = tlb->mm;
> - int force_flush = 0;
> int rss[NR_MM_COUNTERS];
> spinlock_t *ptl;
> pte_t *start_pte;
> @@ -1565,45 +1613,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
> break;
>
> if (pte_present(ptent)) {
> - unsigned int delay_rmap;
> -
> - page = vm_normal_page(vma, addr, ptent);
> - if (page)
> - folio = page_folio(page);
> -
> - if (unlikely(!should_zap_folio(details, folio)))
> - continue;
> - ptent = ptep_get_and_clear_full(mm, addr, pte,
> - tlb->fullmm);
> - arch_check_zapped_pte(vma, ptent);
> - tlb_remove_tlb_entry(tlb, pte, addr);
> - zap_install_uffd_wp_if_needed(vma, addr, pte, details,
> - ptent);
> - if (unlikely(!page)) {
> - ksm_might_unmap_zero_page(mm, ptent);
> - continue;
> - }
> -
> - delay_rmap = 0;
> - if (!folio_test_anon(folio)) {
> - if (pte_dirty(ptent)) {
> - folio_mark_dirty(folio);
> - if (tlb_delay_rmap(tlb)) {
> - delay_rmap = 1;
> - force_flush = 1;
> - }
> - }
> - if (pte_young(ptent) && likely(vma_has_recency(vma)))
> - folio_mark_accessed(folio);
> - }
> - rss[mm_counter(folio)]--;
> - if (!delay_rmap) {
> - folio_remove_rmap_pte(folio, page, vma);
> - if (unlikely(page_mapcount(page) < 0))
> - print_bad_pte(vma, addr, ptent, page);
> - }
> - if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
> - force_flush = 1;
> + zap_present_pte(tlb, vma, pte, ptent, addr, details,
> + rss, &force_flush, &force_break);
> + if (unlikely(force_break)) {
> addr += PAGE_SIZE;
> break;
> }


2024-01-30 08:43:22

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v1 1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

On 30.01.24 09:13, Ryan Roberts wrote:
> On 29/01/2024 14:32, David Hildenbrand wrote:
>> Let's prepare for further changes by factoring out processing of present
>> PTEs.
>>
>> Signed-off-by: David Hildenbrand <[email protected]>
>> ---
>> mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>> 1 file changed, 52 insertions(+), 40 deletions(-)
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index b05fd28dbce1..50a6c79c78fc 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
>> pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>> }
>>
>> +static inline void zap_present_pte(struct mmu_gather *tlb,
>> + struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>> + unsigned long addr, struct zap_details *details,
>> + int *rss, bool *force_flush, bool *force_break)
>> +{
>> + struct mm_struct *mm = tlb->mm;
>> + bool delay_rmap = false;
>> + struct folio *folio;
>
> You need to init this to NULL otherwise its a random value when calling
> should_zap_folio() if vm_normal_page() returns NULL.

Right, and we can stop setting it to NULL in the original function.
Patch #2 changes these checks, which is why it's only a problem in this
patch.

Will fix, thanks!

--
Cheers,

David / dhildenb


2024-01-30 08:48:00

by Ryan Roberts

[permalink] [raw]
Subject: Re: [PATCH v1 1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

On 30/01/2024 08:41, David Hildenbrand wrote:
> On 30.01.24 09:13, Ryan Roberts wrote:
>> On 29/01/2024 14:32, David Hildenbrand wrote:
>>> Let's prepare for further changes by factoring out processing of present
>>> PTEs.
>>>
>>> Signed-off-by: David Hildenbrand <[email protected]>
>>> ---
>>>   mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>>   1 file changed, 52 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index b05fd28dbce1..50a6c79c78fc 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct
>>> *vma,
>>>       pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>>   }
>>>   +static inline void zap_present_pte(struct mmu_gather *tlb,
>>> +        struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>>> +        unsigned long addr, struct zap_details *details,
>>> +        int *rss, bool *force_flush, bool *force_break)
>>> +{
>>> +    struct mm_struct *mm = tlb->mm;
>>> +    bool delay_rmap = false;
>>> +    struct folio *folio;
>>
>> You need to init this to NULL otherwise its a random value when calling
>> should_zap_folio() if vm_normal_page() returns NULL.
>
> Right, and we can stop setting it to NULL in the original function. Patch #2
> changes these checks, which is why it's only a problem in this patch.

Yeah I only noticed that after sending out this reply and moving to the next
patch. Still worth fixing this intermediate state I think.

>
> Will fix, thanks!
>


2024-01-30 08:49:23

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v1 1/9] mm/memory: factor out zapping of present pte into zap_present_pte()

On 30.01.24 09:46, Ryan Roberts wrote:
> On 30/01/2024 08:41, David Hildenbrand wrote:
>> On 30.01.24 09:13, Ryan Roberts wrote:
>>> On 29/01/2024 14:32, David Hildenbrand wrote:
>>>> Let's prepare for further changes by factoring out processing of present
>>>> PTEs.
>>>>
>>>> Signed-off-by: David Hildenbrand <[email protected]>
>>>> ---
>>>>   mm/memory.c | 92 ++++++++++++++++++++++++++++++-----------------------
>>>>   1 file changed, 52 insertions(+), 40 deletions(-)
>>>>
>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>> index b05fd28dbce1..50a6c79c78fc 100644
>>>> --- a/mm/memory.c
>>>> +++ b/mm/memory.c
>>>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct
>>>> *vma,
>>>>       pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
>>>>   }
>>>>   +static inline void zap_present_pte(struct mmu_gather *tlb,
>>>> +        struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
>>>> +        unsigned long addr, struct zap_details *details,
>>>> +        int *rss, bool *force_flush, bool *force_break)
>>>> +{
>>>> +    struct mm_struct *mm = tlb->mm;
>>>> +    bool delay_rmap = false;
>>>> +    struct folio *folio;
>>>
>>> You need to init this to NULL otherwise its a random value when calling
>>> should_zap_folio() if vm_normal_page() returns NULL.
>>
>> Right, and we can stop setting it to NULL in the original function. Patch #2
>> changes these checks, which is why it's only a problem in this patch.
>
> Yeah I only noticed that after sending out this reply and moving to the next
> patch. Still worth fixing this intermediate state I think.

Absolutely, I didn't do path-by-patch compilation yet (I suspect the
compiler would complain).

--
Cheers,

David / dhildenb