2023-06-19 08:04:46

by Liam Ni

[permalink] [raw]
Subject: [PATCH V2] x86,NUMA:improve the execution efficiency of numa_meminfo_cover_memory()

The number of pages in memblock that doesn't have the node
assigned,which also means that these pages are not in numa_info.
So these pages can represent the number of lose pages.

Signed-off-by: Liam Ni <[email protected]>
---
arch/x86/mm/numa.c | 25 ++++++-------------------
include/linux/mm.h | 2 ++
mm/mm_init.c | 26 ++++++++++++++++++++++++++
3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 2aadb2019b4f..ffe3b771eac7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -451,28 +451,15 @@ EXPORT_SYMBOL(__node_distance);
* Sanity check to catch more bad NUMA configurations (they are amazingly
* common). Make sure the nodes cover all memory.
*/
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+static bool __init numa_meminfo_cover_memory(void)
{
- u64 numaram, e820ram;
- int i;
-
- numaram = 0;
- for (i = 0; i < mi->nr_blks; i++) {
- u64 s = mi->blk[i].start >> PAGE_SHIFT;
- u64 e = mi->blk[i].end >> PAGE_SHIFT;
- numaram += e - s;
- numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
- if ((s64)numaram < 0)
- numaram = 0;
- }
+ u64 lo_pages;

- e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
+ lo_pages = without_node_pages_in_range(0, max_pfn);

/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
- printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
- (numaram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
+ if (lo_pages >= (1 << (20 - PAGE_SHIFT))) {
+ printk(KERN_ERR "NUMA: we lose more than 1M pages\n");
return false;
}
return true;
@@ -583,7 +570,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
return -EINVAL;
}
}
- if (!numa_meminfo_cover_memory(mi))
+ if (!numa_meminfo_cover_memory())
return -EINVAL;

/* Finally register nodes. */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0daef3f2f029..99b7b49f1378 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3043,6 +3043,8 @@ unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
unsigned long end_pfn);
extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
+extern unsigned long without_node_pages_in_range(unsigned long start_pfn,
+ unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3ddd18a89b66..1d5085a8f93b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1132,6 +1132,32 @@ static void __init adjust_zone_range_for_zone_movable(int nid,
}
}

+/**
+ * @start_pfn: The start PFN to start searching for holes
+ * @end_pfn: The end PFN to stop searching for holes
+ *
+ * Return: Return the number of page frames without node assigned within a range.
+ */
+unsigned long __init without_node_pages_in_range(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ struct memblock_type *type = &memblock.memory;
+ struct memblock_region *r;
+ unsigned long num_pages;
+ int i, nid;
+
+ for (i = 0; i < type->cnt; i++) {
+ r = &type->regions[i];
+ nid = memblock_get_region_node(r);
+
+ if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
+ continue;
+ if (nid == NUMA_NO_NODE)
+ num_pages += PFN_DOWN(r->base + r->size) - PFN_UP(r->base);
+ }
+ return num_pages;
+}
+
/*
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
--
2.25.1



2023-06-27 13:27:45

by Liam Ni

[permalink] [raw]
Subject: Re: [PATCH V2] x86,NUMA:improve the execution efficiency of numa_meminfo_cover_memory()

friendly ping

On Mon, 19 Jun 2023 at 15:53, Liam Ni <[email protected]> wrote:
>
> The number of pages in memblock that doesn't have the node
> assigned,which also means that these pages are not in numa_info.
> So these pages can represent the number of lose pages.
>
> Signed-off-by: Liam Ni <[email protected]>
> ---
> arch/x86/mm/numa.c | 25 ++++++-------------------
> include/linux/mm.h | 2 ++
> mm/mm_init.c | 26 ++++++++++++++++++++++++++
> 3 files changed, 34 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 2aadb2019b4f..ffe3b771eac7 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -451,28 +451,15 @@ EXPORT_SYMBOL(__node_distance);
> * Sanity check to catch more bad NUMA configurations (they are amazingly
> * common). Make sure the nodes cover all memory.
> */
> -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> +static bool __init numa_meminfo_cover_memory(void)
> {
> - u64 numaram, e820ram;
> - int i;
> -
> - numaram = 0;
> - for (i = 0; i < mi->nr_blks; i++) {
> - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> - numaram += e - s;
> - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> - if ((s64)numaram < 0)
> - numaram = 0;
> - }
> + u64 lo_pages;
>
> - e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> + lo_pages = without_node_pages_in_range(0, max_pfn);
>
> /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
> - (numaram << PAGE_SHIFT) >> 20,
> - (e820ram << PAGE_SHIFT) >> 20);
> + if (lo_pages >= (1 << (20 - PAGE_SHIFT))) {
> + printk(KERN_ERR "NUMA: we lose more than 1M pages\n");
> return false;
> }
> return true;
> @@ -583,7 +570,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
> return -EINVAL;
> }
> }
> - if (!numa_meminfo_cover_memory(mi))
> + if (!numa_meminfo_cover_memory())
> return -EINVAL;
>
> /* Finally register nodes. */
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0daef3f2f029..99b7b49f1378 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3043,6 +3043,8 @@ unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
> unsigned long end_pfn);
> extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> unsigned long end_pfn);
> +extern unsigned long without_node_pages_in_range(unsigned long start_pfn,
> + unsigned long end_pfn);
> extern void get_pfn_range_for_nid(unsigned int nid,
> unsigned long *start_pfn, unsigned long *end_pfn);
>
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 3ddd18a89b66..1d5085a8f93b 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1132,6 +1132,32 @@ static void __init adjust_zone_range_for_zone_movable(int nid,
> }
> }
>
> +/**
> + * @start_pfn: The start PFN to start searching for holes
> + * @end_pfn: The end PFN to stop searching for holes
> + *
> + * Return: Return the number of page frames without node assigned within a range.
> + */
> +unsigned long __init without_node_pages_in_range(unsigned long start_pfn,
> + unsigned long end_pfn)
> +{
> + struct memblock_type *type = &memblock.memory;
> + struct memblock_region *r;
> + unsigned long num_pages;
> + int i, nid;
> +
> + for (i = 0; i < type->cnt; i++) {
> + r = &type->regions[i];
> + nid = memblock_get_region_node(r);
> +
> + if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
> + continue;
> + if (nid == NUMA_NO_NODE)
> + num_pages += PFN_DOWN(r->base + r->size) - PFN_UP(r->base);
> + }
> + return num_pages;
> +}
> +
> /*
> * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> * then all holes in the requested range will be accounted for.
> --
> 2.25.1
>

2023-06-29 09:40:29

by Mike Rapoport

[permalink] [raw]
Subject: Re: [PATCH V2] x86,NUMA:improve the execution efficiency of numa_meminfo_cover_memory()

On Mon, Jun 19, 2023 at 03:53:15PM +0800, Liam Ni wrote:
> The number of pages in memblock that doesn't have the node
> assigned,which also means that these pages are not in numa_info.
> So these pages can represent the number of lose pages.
>
> Signed-off-by: Liam Ni <[email protected]>
> ---
> arch/x86/mm/numa.c | 25 ++++++-------------------
> include/linux/mm.h | 2 ++
> mm/mm_init.c | 26 ++++++++++++++++++++++++++
> 3 files changed, 34 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 2aadb2019b4f..ffe3b771eac7 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -451,28 +451,15 @@ EXPORT_SYMBOL(__node_distance);
> * Sanity check to catch more bad NUMA configurations (they are amazingly
> * common). Make sure the nodes cover all memory.
> */
> -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> +static bool __init numa_meminfo_cover_memory(void)
> {
> - u64 numaram, e820ram;
> - int i;
> -
> - numaram = 0;
> - for (i = 0; i < mi->nr_blks; i++) {
> - u64 s = mi->blk[i].start >> PAGE_SHIFT;
> - u64 e = mi->blk[i].end >> PAGE_SHIFT;
> - numaram += e - s;
> - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> - if ((s64)numaram < 0)
> - numaram = 0;
> - }
> + u64 lo_pages;
>
> - e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> + lo_pages = without_node_pages_in_range(0, max_pfn);

The entire validation of NUMA coverage can be done in memblock, it's enough
to pass the threshold to that function and it can verify that amount or
memory without the node id set is less than that threshold.
Then the call to numa_meminfo_cover_memory() can be replaced with, say,
memblock_validate_numa_coverage(SZ_1M).

This applies not only to x86, but to loongarch as well.

And once architecture specific code does not use *absent_pages_in_range(),
that can be made static in mm/mm_init.c

>
> /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
> - (numaram << PAGE_SHIFT) >> 20,
> - (e820ram << PAGE_SHIFT) >> 20);
> + if (lo_pages >= (1 << (20 - PAGE_SHIFT))) {
> + printk(KERN_ERR "NUMA: we lose more than 1M pages\n");
> return false;
> }
> return true;
> @@ -583,7 +570,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
> return -EINVAL;
> }
> }
> - if (!numa_meminfo_cover_memory(mi))
> + if (!numa_meminfo_cover_memory())
> return -EINVAL;
>
> /* Finally register nodes. */
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 3ddd18a89b66..1d5085a8f93b 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1132,6 +1132,32 @@ static void __init adjust_zone_range_for_zone_movable(int nid,
> }
> }
>
> +/**
> + * @start_pfn: The start PFN to start searching for holes
> + * @end_pfn: The end PFN to stop searching for holes
> + *
> + * Return: Return the number of page frames without node assigned within a range.
> + */
> +unsigned long __init without_node_pages_in_range(unsigned long start_pfn,
> + unsigned long end_pfn)
> +{
> + struct memblock_type *type = &memblock.memory;
> + struct memblock_region *r;
> + unsigned long num_pages;
> + int i, nid;
> +
> + for (i = 0; i < type->cnt; i++) {

Please use for_each_mem_pfn_range() or for_each_mem_region() loop here.

> + r = &type->regions[i];
> + nid = memblock_get_region_node(r);
> +
> + if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
> + continue;
> + if (nid == NUMA_NO_NODE)
> + num_pages += PFN_DOWN(r->base + r->size) - PFN_UP(r->base);
> + }
> + return num_pages;
> +}
> +
> /*
> * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> * then all holes in the requested range will be accounted for.
> --
> 2.25.1
>

--
Sincerely yours,
Mike.