2023-09-18 10:47:24

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 0/6] mm: convert numa balancing functions to use a folio

The do_numa_pages only handle non-compound page, and only PMD-mapped THP
is handled in do_huge_pmd_numa_page(), but large, PTE-mapped folio will
be supported, let's convert more numa balancing functions to use/take a
folio in preparation for that, no functional change intended for now.

Kefeng Wang (6):
sched/numa, mm: make numa migrate functions to take a folio
mm: mempolicy: make mpol_misplaced() to take a folio
mm: memory: make numa_migrate_prep() to take a folio
mm: memory: use a folio in do_numa_page()
mm: memory: add vm_normal_pmd_folio()
mm: huge_memory: use a folio in do_huge_pmd_numa_page()

include/linux/mempolicy.h | 4 +--
include/linux/mm.h | 2 ++
include/linux/sched/numa_balancing.h | 4 +--
kernel/sched/fair.c | 12 +++----
mm/huge_memory.c | 28 ++++++++--------
mm/internal.h | 2 +-
mm/memory.c | 49 ++++++++++++++++------------
mm/mempolicy.c | 20 ++++++------
8 files changed, 65 insertions(+), 56 deletions(-)

--
2.27.0


2023-09-18 10:47:29

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 2/6] mm: mempolicy: make mpol_misplaced() to take a folio

In preparation for large folio numa balancing, make mpol_misplaced()
to take a folio, no functional change intended.

Signed-off-by: Kefeng Wang <[email protected]>
---
include/linux/mempolicy.h | 4 ++--
mm/memory.c | 2 +-
mm/mempolicy.c | 21 ++++++++++-----------
3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index d232de7cdc56..4a82eee20073 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -174,7 +174,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
extern bool vma_migratable(struct vm_area_struct *vma);

-extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+extern int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long);
extern void mpol_put_task_policy(struct task_struct *);

static inline bool mpol_is_preferred_many(struct mempolicy *pol)
@@ -278,7 +278,7 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
}
#endif

-static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+static inline int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
unsigned long address)
{
return -1; /* no node preference */
diff --git a/mm/memory.c b/mm/memory.c
index 983a40f8ee62..a04c90604c73 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4731,7 +4731,7 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
*flags |= TNF_FAULT_LOCAL;
}

- return mpol_misplaced(page, vma, addr);
+ return mpol_misplaced(page_folio(page), vma, addr);
}

static vm_fault_t do_numa_page(struct vm_fault *vmf)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 39584dc25c84..14a223b68180 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2565,24 +2565,24 @@ static void sp_free(struct sp_node *n)
}

/**
- * mpol_misplaced - check whether current page node is valid in policy
+ * mpol_misplaced - check whether current folio node is valid in policy
*
- * @page: page to be checked
- * @vma: vm area where page mapped
- * @addr: virtual address where page mapped
+ * @folio: folio to be checked
+ * @vma: vm area where folio mapped
+ * @addr: virtual address in @vma for shared policy lookup and interleave policy
*
- * Lookup current policy node id for vma,addr and "compare to" page's
+ * Lookup current policy node id for vma,addr and "compare to" folio's
* node id. Policy determination "mimics" alloc_page_vma().
* Called from fault path where we know the vma and faulting address.
*
* Return: NUMA_NO_NODE if the page is in a node that is valid for this
- * policy, or a suitable node ID to allocate a replacement page from.
+ * policy, or a suitable node ID to allocate a replacement folio from.
*/
-int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
+int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol;
struct zoneref *z;
- int curnid = page_to_nid(page);
+ int curnid = folio_nid(folio);
unsigned long pgoff;
int thiscpu = raw_smp_processor_id();
int thisnid = cpu_to_node(thiscpu);
@@ -2638,12 +2638,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
BUG();
}

- /* Migrate the page towards the node whose CPU is referencing it */
+ /* Migrate the folio towards the node whose CPU is referencing it */
if (pol->flags & MPOL_F_MORON) {
polnid = thisnid;

- if (!should_numa_migrate_memory(current, page_folio(page),
- curnid, thiscpu))
+ if (!should_numa_migrate_memory(current, folio, curnid, thiscpu))
goto out;
}

--
2.27.0

2023-09-18 10:47:42

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 5/6] mm: memory: add vm_normal_pmd_folio()

The new vm_normal_pmd_folio() wrapper is similar to vm_normal_folio(),
which allow them to completely replace the struct page variables with
struct folio variables.

Signed-off-by: Kefeng Wang <[email protected]>
---
include/linux/mm.h | 2 ++
mm/memory.c | 10 ++++++++++
2 files changed, 12 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 12335de50140..7d05ec047186 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2327,6 +2327,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
+struct folio *vm_normal_pmd_folio(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t pmd);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t pmd);

diff --git a/mm/memory.c b/mm/memory.c
index ce3efe7255d2..d4296ee72730 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -689,6 +689,16 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
out:
return pfn_to_page(pfn);
}
+
+struct folio *vm_normal_pmd_folio(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t pmd)
+{
+ struct page *page = vm_normal_page_pmd(vma, addr, pmd);
+
+ if (page)
+ return page_folio(page);
+ return NULL;
+}
#endif

static void restore_exclusive_pte(struct vm_area_struct *vma,
--
2.27.0

2023-09-18 10:59:40

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 6/6] mm: huge_memory: use a folio in do_huge_pmd_numa_page()

Use a folio in do_huge_pmd_numa_page(), reduce three page_folio()
calls to one, no functional change intended.

Signed-off-by: Kefeng Wang <[email protected]>
---
mm/huge_memory.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3e34a48fbdd8..5c015ca40fea 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1517,9 +1517,9 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
pmd_t oldpmd = vmf->orig_pmd;
pmd_t pmd;
- struct page *page;
+ struct folio *folio;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
- int page_nid = NUMA_NO_NODE;
+ int nid = NUMA_NO_NODE;
int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
bool migrated = false, writable = false;
int flags = 0;
@@ -1541,36 +1541,35 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
can_change_pmd_writable(vma, vmf->address, pmd))
writable = true;

- page = vm_normal_page_pmd(vma, haddr, pmd);
- if (!page)
+ folio = vm_normal_pmd_folio(vma, haddr, pmd);
+ if (!folio)
goto out_map;

/* See similar comment in do_numa_page for explanation */
if (!writable)
flags |= TNF_NO_GROUP;

- page_nid = page_to_nid(page);
+ nid = folio_nid(folio);
/*
* For memory tiering mode, cpupid of slow memory page is used
* to record page access time. So use default value.
*/
- if (node_is_toptier(page_nid))
- last_cpupid = page_cpupid_last(page);
- target_nid = numa_migrate_prep(page_folio(page), vma, haddr, page_nid,
- &flags);
+ if (node_is_toptier(nid))
+ last_cpupid = page_cpupid_last(&folio->page);

+ target_nid = numa_migrate_prep(folio, vma, haddr, nid, &flags);
if (target_nid == NUMA_NO_NODE) {
- put_page(page);
+ folio_put(folio);
goto out_map;
}

spin_unlock(vmf->ptl);
writable = false;

- migrated = migrate_misplaced_folio(page_folio(page), vma, target_nid);
+ migrated = migrate_misplaced_folio(folio, vma, target_nid);
if (migrated) {
flags |= TNF_MIGRATED;
- page_nid = target_nid;
+ nid = target_nid;
} else {
flags |= TNF_MIGRATE_FAIL;
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -1582,9 +1581,8 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
}

out:
- if (page_nid != NUMA_NO_NODE)
- task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
- flags);
+ if (nid != NUMA_NO_NODE)
+ task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);

return 0;

--
2.27.0

2023-09-18 11:13:25

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 3/6] mm: memory: make numa_migrate_prep() to take a folio

In preparation for large folio numa balancing, make numa_migrate_prep()
to take a folio, no functional change intended.

Signed-off-by: Kefeng Wang <[email protected]>
---
mm/huge_memory.c | 2 +-
mm/internal.h | 2 +-
mm/memory.c | 10 +++++-----
3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0da653080d8c..3e34a48fbdd8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1556,7 +1556,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
*/
if (node_is_toptier(page_nid))
last_cpupid = page_cpupid_last(page);
- target_nid = numa_migrate_prep(page, vma, haddr, page_nid,
+ target_nid = numa_migrate_prep(page_folio(page), vma, haddr, page_nid,
&flags);

if (target_nid == NUMA_NO_NODE) {
diff --git a/mm/internal.h b/mm/internal.h
index 8c90e966e9f8..f30b81f365c1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -926,7 +926,7 @@ void vunmap_range_noflush(unsigned long start, unsigned long end);

void __vunmap_range_noflush(unsigned long start, unsigned long end);

-int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
+int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);

void free_zone_device_page(struct page *page);
diff --git a/mm/memory.c b/mm/memory.c
index a04c90604c73..ce7d9d9eddc4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4717,10 +4717,10 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
return ret;
}

-int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
+int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags)
{
- get_page(page);
+ folio_get(folio);

/* Record the current PID acceesing VMA */
vma_set_access_pid_bit(vma);
@@ -4731,7 +4731,7 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
*flags |= TNF_FAULT_LOCAL;
}

- return mpol_misplaced(page_folio(page), vma, addr);
+ return mpol_misplaced(folio, vma, addr);
}

static vm_fault_t do_numa_page(struct vm_fault *vmf)
@@ -4805,8 +4805,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
last_cpupid = (-1 & LAST_CPUPID_MASK);
else
last_cpupid = page_cpupid_last(page);
- target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
- &flags);
+ target_nid = numa_migrate_prep(page_folio(page), vma, vmf->address,
+ page_nid, &flags);
if (target_nid == NUMA_NO_NODE) {
put_page(page);
goto out_map;
--
2.27.0

2023-09-18 17:38:18

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 0/6] mm: convert numa balancing functions to use a folio

On Mon, Sep 18, 2023 at 06:32:07PM +0800, Kefeng Wang wrote:
> The do_numa_pages only handle non-compound page, and only PMD-mapped THP
> is handled in do_huge_pmd_numa_page(), but large, PTE-mapped folio will
> be supported, let's convert more numa balancing functions to use/take a
> folio in preparation for that, no functional change intended for now.
>
> Kefeng Wang (6):
> sched/numa, mm: make numa migrate functions to take a folio
> mm: mempolicy: make mpol_misplaced() to take a folio
> mm: memory: make numa_migrate_prep() to take a folio
> mm: memory: use a folio in do_numa_page()
> mm: memory: add vm_normal_pmd_folio()
> mm: huge_memory: use a folio in do_huge_pmd_numa_page()

This all seems OK. It's kind of hard to review though because you change
the same line multiple times. I think it works out better to go top-down
instead of bottom-up. That is, start with do_numa_page() and pass
&folio->page to numa_migrate_prep. Then do vm_normal_pmd_folio() followed
by do_huge_pmd_numa_page(). Fourth would have been numa_migrate_prep(),
etc. I don't want to ask you to redo the entire series, but for future
patch series.

Also, it's nce to do things like remove the unnecessary 'extern' from
function declarations when you change them from page to folio. And
please try to stick to 80 columns; I know it's not always easy/possible.

2023-09-19 00:01:42

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH 0/6] mm: convert numa balancing functions to use a folio



On 2023/9/18 20:57, Matthew Wilcox wrote:
> On Mon, Sep 18, 2023 at 06:32:07PM +0800, Kefeng Wang wrote:
>> The do_numa_pages only handle non-compound page, and only PMD-mapped THP
>> is handled in do_huge_pmd_numa_page(), but large, PTE-mapped folio will
>> be supported, let's convert more numa balancing functions to use/take a
>> folio in preparation for that, no functional change intended for now.
>>
>> Kefeng Wang (6):
>> sched/numa, mm: make numa migrate functions to take a folio
>> mm: mempolicy: make mpol_misplaced() to take a folio
>> mm: memory: make numa_migrate_prep() to take a folio
>> mm: memory: use a folio in do_numa_page()
>> mm: memory: add vm_normal_pmd_folio()
>> mm: huge_memory: use a folio in do_huge_pmd_numa_page()
>
> This all seems OK. It's kind of hard to review though because you change
> the same line multiple times. I think it works out better to go top-down
> instead of bottom-up. That is, start with do_numa_page() and pass
> &folio->page to numa_migrate_prep. Then do vm_normal_pmd_folio() followed
> by do_huge_pmd_numa_page(). Fourth would have been numa_migrate_prep(),
> etc. I don't want to ask you to redo the entire series, but for future
> patch series.
>
> Also, it's nce to do things like remove the unnecessary 'extern' from
> function declarations when you change them from page to folio. And
> please try to stick to 80 columns; I know it's not always easy/possible.
>

Thanks for your review and suggestion, I will keep them in mind when
sending new patch, thanks.

2023-09-20 08:25:48

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH 5/6] mm: memory: add vm_normal_pmd_folio()



On 2023/9/20 11:12, Huang, Ying wrote:
> Kefeng Wang <[email protected]> writes:
>
>> The new vm_normal_pmd_folio() wrapper is similar to vm_normal_folio(),
>> which allow them to completely replace the struct page variables with
>> struct folio variables.
>>
>> Signed-off-by: Kefeng Wang <[email protected]>
>> ---
>> include/linux/mm.h | 2 ++
>> mm/memory.c | 10 ++++++++++
>> 2 files changed, 12 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 12335de50140..7d05ec047186 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -2327,6 +2327,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
>> pte_t pte);
>> struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
>> pte_t pte);
>> +struct folio *vm_normal_pmd_folio(struct vm_area_struct *vma, unsigned long addr,
>> + pmd_t pmd);
>> struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
>> pmd_t pmd);
>
> Why do not follow the counterpart of page (vm_normal_page_pmd()) to be
> vm_normal_folio_pmd()?

Personally, X_pmd_folio seems to get folio from a pmd, but X_folio_pmd
looks like "return the PMD of a folio", I could use
vm_normal_folio_pmd() for consistency, thanks.

2023-09-20 10:40:21

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 5/6] mm: memory: add vm_normal_pmd_folio()

Kefeng Wang <[email protected]> writes:

> The new vm_normal_pmd_folio() wrapper is similar to vm_normal_folio(),
> which allow them to completely replace the struct page variables with
> struct folio variables.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> ---
> include/linux/mm.h | 2 ++
> mm/memory.c | 10 ++++++++++
> 2 files changed, 12 insertions(+)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 12335de50140..7d05ec047186 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2327,6 +2327,8 @@ struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
> pte_t pte);
> struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
> pte_t pte);
> +struct folio *vm_normal_pmd_folio(struct vm_area_struct *vma, unsigned long addr,
> + pmd_t pmd);
> struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
> pmd_t pmd);

Why do not follow the counterpart of page (vm_normal_page_pmd()) to be
vm_normal_folio_pmd()?

--
Best Regards,
Huang, Ying

> diff --git a/mm/memory.c b/mm/memory.c
> index ce3efe7255d2..d4296ee72730 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -689,6 +689,16 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
> out:
> return pfn_to_page(pfn);
> }
> +
> +struct folio *vm_normal_pmd_folio(struct vm_area_struct *vma, unsigned long addr,
> + pmd_t pmd)
> +{
> + struct page *page = vm_normal_page_pmd(vma, addr, pmd);
> +
> + if (page)
> + return page_folio(page);
> + return NULL;
> +}
> #endif
>
> static void restore_exclusive_pte(struct vm_area_struct *vma,