LinuxLists.cc - [PATCH v2 0/6] mm: convert numa balancing functions to use a folio

2023-09-21 19:06:58

Subject: [PATCH v2 0/6] mm: convert numa balancing functions to use a folio

The do_numa_pages only handle non-compound page, and only PMD-mapped THP
is handled in do_huge_pmd_numa_page(), but large, PTE-mapped folio will
be supported, let's convert more numa balancing functions to use/take a
folio in preparation for that, no functional change intended for now.

v2:
- re-order the patch, drop 'extern' and wrap to 80 columns,
suggested by Matthew
- rename vm_normal_pmd_folio to vm_normal_folio_pmd and fix cpupid,
suggested by Huang, Ying

Kefeng Wang (6):
mm: memory: add vm_normal_folio_pmd()
mm: huge_memory: use a folio in do_huge_pmd_numa_page()
mm: memory: use a folio in do_numa_page()
mm: memory: make numa_migrate_prep() to take a folio
mm: mempolicy: make mpol_misplaced() to take a folio
sched/numa, mm: make numa migrate functions to take a folio

include/linux/mempolicy.h | 5 +--
include/linux/mm.h | 2 ++
include/linux/sched/numa_balancing.h | 6 ++--
kernel/sched/fair.c | 12 +++----
mm/huge_memory.c | 29 ++++++++--------
mm/internal.h | 2 +-
mm/memory.c | 49 ++++++++++++++++------------
mm/mempolicy.c | 22 +++++++------
8 files changed, 69 insertions(+), 58 deletions(-)

--
2.27.0

2023-09-21 19:07:37

by Kefeng Wang

[permalink] [raw]

Subject: [PATCH v2 5/6] mm: mempolicy: make mpol_misplaced() to take a folio

In preparation for large folio numa balancing, make mpol_misplaced()
to take a folio, no functional change intended.

Signed-off-by: Kefeng Wang <[email protected]>
---
include/linux/mempolicy.h | 5 +++--
mm/memory.c | 2 +-
mm/mempolicy.c | 22 ++++++++++++----------
3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index d232de7cdc56..6c2754d7bfed 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -174,7 +174,7 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
extern bool vma_migratable(struct vm_area_struct *vma);

-extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long);
extern void mpol_put_task_policy(struct task_struct *);

static inline bool mpol_is_preferred_many(struct mempolicy *pol)
@@ -278,7 +278,8 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
}
#endif

-static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+static inline int mpol_misplaced(struct folio *folio,
+ struct vm_area_struct *vma,
unsigned long address)
{
return -1; /* no node preference */
diff --git a/mm/memory.c b/mm/memory.c
index 93ce8bcbe9d7..29c5618c91e5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4741,7 +4741,7 @@ int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
*flags |= TNF_FAULT_LOCAL;
}

- return mpol_misplaced(&folio->page, vma, addr);
+ return mpol_misplaced(folio, vma, addr);
}

static vm_fault_t do_numa_page(struct vm_fault *vmf)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 98fae2bfc851..ecf06ce3a5dd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2572,24 +2572,25 @@ static void sp_free(struct sp_node *n)
}

/**
- * mpol_misplaced - check whether current page node is valid in policy
+ * mpol_misplaced - check whether current folio node is valid in policy
*
- * @page: page to be checked
- * @vma: vm area where page mapped
- * @addr: virtual address where page mapped
+ * @folio: folio to be checked
+ * @vma: vm area where folio mapped
+ * @addr: virtual address in @vma for shared policy lookup and interleave policy
*
- * Lookup current policy node id for vma,addr and "compare to" page's
+ * Lookup current policy node id for vma,addr and "compare to" folio's
* node id. Policy determination "mimics" alloc_page_vma().
* Called from fault path where we know the vma and faulting address.
*
* Return: NUMA_NO_NODE if the page is in a node that is valid for this
- * policy, or a suitable node ID to allocate a replacement page from.
+ * policy, or a suitable node ID to allocate a replacement folio from.
*/
-int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
+int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr)
{
struct mempolicy *pol;
struct zoneref *z;
- int curnid = page_to_nid(page);
+ int curnid = folio_nid(folio);
unsigned long pgoff;
int thiscpu = raw_smp_processor_id();
int thisnid = cpu_to_node(thiscpu);
@@ -2645,11 +2646,12 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
BUG();
}

- /* Migrate the page towards the node whose CPU is referencing it */
+ /* Migrate the folio towards the node whose CPU is referencing it */
if (pol->flags & MPOL_F_MORON) {
polnid = thisnid;

- if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
+ if (!should_numa_migrate_memory(current, &folio->page, curnid,
+ thiscpu))
goto out;
}

--
2.27.0

2023-09-21 19:08:14

by Kefeng Wang

[permalink] [raw]

Subject: [PATCH v2 3/6] mm: memory: use a folio in do_numa_page()

Numa balancing only try to migrate non-compound page in do_numa_page(),
use a folio in it to save several compound_head calls, note we use
folio_estimated_sharers(), it is enough to check the folio sharers
since only normal page is handled, if large folio numa balancing is
supported, a precise folio sharers check would be used, no functional
change intended.

Signed-off-by: Kefeng Wang <[email protected]>
---
mm/memory.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index dbc7b67eca68..a05cfb6be36d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4747,8 +4747,8 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
static vm_fault_t do_numa_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- struct page *page = NULL;
- int page_nid = NUMA_NO_NODE;
+ struct folio *folio = NULL;
+ int nid = NUMA_NO_NODE;
bool writable = false;
int last_cpupid;
int target_nid;
@@ -4779,12 +4779,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
can_change_pte_writable(vma, vmf->address, pte))
writable = true;

- page = vm_normal_page(vma, vmf->address, pte);
- if (!page || is_zone_device_page(page))
+ folio = vm_normal_folio(vma, vmf->address, pte);
+ if (!folio || folio_is_zone_device(folio))
goto out_map;

/* TODO: handle PTE-mapped THP */
- if (PageCompound(page))
+ if (folio_test_large(folio))
goto out_map;

/*
@@ -4799,34 +4799,34 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
flags |= TNF_NO_GROUP;

/*
- * Flag if the page is shared between multiple address spaces. This
+ * Flag if the folio is shared between multiple address spaces. This
* is later used when determining whether to group tasks together
*/
- if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
+ if (folio_estimated_sharers(folio) > 1 && (vma->vm_flags & VM_SHARED))
flags |= TNF_SHARED;

- page_nid = page_to_nid(page);
+ nid = folio_nid(folio);
/*
* For memory tiering mode, cpupid of slow memory page is used
* to record page access time. So use default value.
*/
if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
- !node_is_toptier(page_nid))
+ !node_is_toptier(nid))
last_cpupid = (-1 & LAST_CPUPID_MASK);
else
- last_cpupid = page_cpupid_last(page);
- target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
- &flags);
+ last_cpupid = page_cpupid_last(&folio->page);
+ target_nid = numa_migrate_prep(&folio->page, vma, vmf->address, nid,
+ &flags);
if (target_nid == NUMA_NO_NODE) {
- put_page(page);
+ folio_put(folio);
goto out_map;
}
pte_unmap_unlock(vmf->pte, vmf->ptl);
writable = false;

/* Migrate to the requested node */
- if (migrate_misplaced_folio(page_folio(page), vma, target_nid)) {
- page_nid = target_nid;
+ if (migrate_misplaced_folio(folio, vma, target_nid)) {
+ nid = target_nid;
flags |= TNF_MIGRATED;
} else {
flags |= TNF_MIGRATE_FAIL;
@@ -4842,8 +4842,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
}

out:
- if (page_nid != NUMA_NO_NODE)
- task_numa_fault(last_cpupid, page_nid, 1, flags);
+ if (nid != NUMA_NO_NODE)
+ task_numa_fault(last_cpupid, nid, 1, flags);
return 0;
out_map:
/*
--
2.27.0

2023-09-21 20:48:14

by Kefeng Wang

[permalink] [raw]

Subject: [PATCH v2 6/6] sched/numa, mm: make numa migrate functions to take a folio

The cpupid(or access time) is stored in the head page for THP, so it is
safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
to take a folio. This is in preparation for large folio numa balancing.

Signed-off-by: Kefeng Wang <[email protected]>
---
include/linux/sched/numa_balancing.h | 6 +++---
kernel/sched/fair.c | 12 ++++++------
mm/mempolicy.c | 2 +-
3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 3988762efe15..06a9d35650f0 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -20,8 +20,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
extern void task_numa_free(struct task_struct *p, bool final);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
- int src_nid, int dst_cpu);
+bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
+ int src_nid, int dst_cpu);
#else
static inline void task_numa_fault(int last_node, int node, int pages,
int flags)
@@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
{
}
static inline bool should_numa_migrate_memory(struct task_struct *p,
- struct page *page, int src_nid, int dst_cpu)
+ struct folio *folio, int src_nid, int dst_cpu)
{
return true;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a502e3255392..75c9a58632a4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
* The smaller the hint page fault latency, the higher the possibility
* for the page to be hot.
*/
-static int numa_hint_fault_latency(struct page *page)
+static int numa_hint_fault_latency(struct folio *folio)
{
int last_time, time;

time = jiffies_to_msecs(jiffies);
- last_time = xchg_page_access_time(page, time);
+ last_time = xchg_page_access_time(&folio->page, time);

return (time - last_time) & PAGE_ACCESS_TIME_MASK;
}
@@ -1784,7 +1784,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat,
}
}

-bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
+bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
int src_nid, int dst_cpu)
{
struct numa_group *ng = deref_curr_numa_group(p);
@@ -1814,16 +1814,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
numa_promotion_adjust_threshold(pgdat, rate_limit, def_th);

th = pgdat->nbp_threshold ? : def_th;
- latency = numa_hint_fault_latency(page);
+ latency = numa_hint_fault_latency(folio);
if (latency >= th)
return false;

return !numa_promotion_rate_limit(pgdat, rate_limit,
- thp_nr_pages(page));
+ folio_nr_pages(folio));
}

this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
- last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+ last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid);

if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
!node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ecf06ce3a5dd..f4e76a887db8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2650,7 +2650,7 @@ int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
if (pol->flags & MPOL_F_MORON) {
polnid = thisnid;

- if (!should_numa_migrate_memory(current, &folio->page, curnid,
+ if (!should_numa_migrate_memory(current, folio, curnid,
thiscpu))
goto out;
}
--
2.27.0

2023-09-21 21:48:13

by Kefeng Wang

[permalink] [raw]

Subject: [PATCH v2 4/6] mm: memory: make numa_migrate_prep() to take a folio

In preparation for large folio numa balancing, make numa_migrate_prep()
to take a folio, no functional change intended.

Signed-off-by: Kefeng Wang <[email protected]>
---
mm/huge_memory.c | 2 +-
mm/internal.h | 2 +-
mm/memory.c | 9 ++++-----
3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 53472e34a761..0f93a73115f7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1556,7 +1556,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
*/
if (node_is_toptier(nid))
last_cpupid = page_cpupid_last(&folio->page);
- target_nid = numa_migrate_prep(&folio->page, vma, haddr, nid, &flags);
+ target_nid = numa_migrate_prep(folio, vma, haddr, nid, &flags);
if (target_nid == NUMA_NO_NODE) {
folio_put(folio);
goto out_map;
diff --git a/mm/internal.h b/mm/internal.h
index 7a961d12b088..d7916f1e9e98 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -984,7 +984,7 @@ void vunmap_range_noflush(unsigned long start, unsigned long end);

void __vunmap_range_noflush(unsigned long start, unsigned long end);

-int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
+int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);

void free_zone_device_page(struct page *page);
diff --git a/mm/memory.c b/mm/memory.c
index a05cfb6be36d..93ce8bcbe9d7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4727,10 +4727,10 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
return ret;
}

-int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
+int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags)
{
- get_page(page);
+ folio_get(folio);

/* Record the current PID acceesing VMA */
vma_set_access_pid_bit(vma);
@@ -4741,7 +4741,7 @@ int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
*flags |= TNF_FAULT_LOCAL;
}

- return mpol_misplaced(page, vma, addr);
+ return mpol_misplaced(&folio->page, vma, addr);
}

static vm_fault_t do_numa_page(struct vm_fault *vmf)
@@ -4815,8 +4815,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
last_cpupid = (-1 & LAST_CPUPID_MASK);
else
last_cpupid = page_cpupid_last(&folio->page);
- target_nid = numa_migrate_prep(&folio->page, vma, vmf->address, nid,
- &flags);
+ target_nid = numa_migrate_prep(folio, vma, vmf->address, nid, &flags);
if (target_nid == NUMA_NO_NODE) {
folio_put(folio);
goto out_map;
--
2.27.0