My previous patch "x86/mm/numa: Remove numa_nodemask_from_meminfo()" hits a
problem in numa_emulation. The reason is numa_nodes_parsed is not set
correctly after emulation.
This patch set tries to fix this and also with two code refine.
Detailed discussions are in this thread:
https://lkml.org/lkml/2017/3/13/1230
and test result is posted :
https://lkml.org/lkml/2017/4/10/641
Wei Yang (3):
x86/numa_emulation: fix potential memory leak
x86/numa_emulation: assign physnode_mask directly from
numa_nodes_parsed
x86/numa_emulation: restructures numa_nodes_parsed from emulated nodes
arch/x86/mm/numa_emulation.c | 61 ++++++++++++++++++++++++--------------------
1 file changed, 33 insertions(+), 28 deletions(-)
--
2.11.0
By emulating numa, it may contains more or less nodes than physical nodes.
In numa_emulation(), numa_meminfo/numa_distance/__apicid_to_node is
restructured according to emulated nodes, except numa_nodes_parsed.
This patch restructures numa_nodes_parsed from emulated nodes.
Signed-off-by: Wei Yang <[email protected]>
---
arch/x86/mm/numa_emulation.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index a6d01931b9a1..14f075fc4cc5 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -391,6 +391,13 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
/* commit */
*numa_meminfo = ei;
+ /* Make sure numa_nodes_parsed only contains emulated nodes */
+ numa_nodes_parsed = NODE_MASK_NONE;
+ for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
+ if (ei.blk[i].start != ei.blk[i].end &&
+ ei.blk[i].nid != NUMA_NO_NODE)
+ node_set(ei.blk[i].nid, numa_nodes_parsed);
+
/*
* Transform __apicid_to_node table to use emulated nids by
* reverse-mapping phys_nid. The maps should always exist but fall
--
2.11.0
numa_emulation() needs to allocate a space for phys_dist[] temporarily,
while current code may miss to release this when dfl_phys_nid ==
NUMA_NO_NODE. It is observed in code review instead of in a real case.
This patch fixes this by re-order the code path.
Signed-off-by: Wei Yang <[email protected]>
---
arch/x86/mm/numa_emulation.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index a8f90ce3dedf..eb017c816de6 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -353,6 +353,24 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
goto no_emu;
}
+ /*
+ * Determine the max emulated nid and the default phys nid to use
+ * for unmapped nodes.
+ */
+ max_emu_nid = 0;
+ dfl_phys_nid = NUMA_NO_NODE;
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
+ if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
+ max_emu_nid = i;
+ if (dfl_phys_nid == NUMA_NO_NODE)
+ dfl_phys_nid = emu_nid_to_phys[i];
+ }
+ }
+ if (dfl_phys_nid == NUMA_NO_NODE) {
+ pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
+ goto no_emu;
+ }
+
/* copy the physical distance table */
if (numa_dist_cnt) {
u64 phys;
@@ -372,24 +390,6 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
node_distance(i, j);
}
- /*
- * Determine the max emulated nid and the default phys nid to use
- * for unmapped nodes.
- */
- max_emu_nid = 0;
- dfl_phys_nid = NUMA_NO_NODE;
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
- if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
- max_emu_nid = i;
- if (dfl_phys_nid == NUMA_NO_NODE)
- dfl_phys_nid = emu_nid_to_phys[i];
- }
- }
- if (dfl_phys_nid == NUMA_NO_NODE) {
- pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
- goto no_emu;
- }
-
/* commit */
*numa_meminfo = ei;
--
2.11.0
According to current code path, numa_nodes_parsed is already setup when
numa_emucation() is called.
x86_numa_init()
numa_init()
init_func()
numa_emulation()
numa_register_memblks()
This means we can get the physnode_mask directly from numa_nodes_parsed. At
the same time, this patch correct the comment of these two functions.
Signed-off-by: Wei Yang <[email protected]>
---
arch/x86/mm/numa_emulation.c | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index eb017c816de6..a6d01931b9a1 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -75,13 +75,15 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
/*
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr. The return value is the number of nodes allocated.
+ * to max_addr.
+ *
+ * Returns zero on success or negative error code.
*/
static int __init split_nodes_interleave(struct numa_meminfo *ei,
struct numa_meminfo *pi,
u64 addr, u64 max_addr, int nr_nodes)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
+ nodemask_t physnode_mask = numa_nodes_parsed;
u64 size;
int big;
int nid = 0;
@@ -116,9 +118,6 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
return -1;
}
- for (i = 0; i < pi->nr_blks; i++)
- node_set(pi->blk[i].nid, physnode_mask);
-
/*
* Continue to fill physical nodes with fake nodes until there is no
* memory left on any of them.
@@ -200,13 +199,15 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
/*
* Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ * `addr' to `max_addr'.
+ *
+ * Returns zero on success or negative error code.
*/
static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
struct numa_meminfo *pi,
u64 addr, u64 max_addr, u64 size)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
+ nodemask_t physnode_mask = numa_nodes_parsed;
u64 min_size;
int nid = 0;
int i, ret;
@@ -231,9 +232,6 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
}
size &= FAKE_NODE_MIN_HASH_MASK;
- for (i = 0; i < pi->nr_blks; i++)
- node_set(pi->blk[i].nid, physnode_mask);
-
/*
* Fill physical nodes with fake nodes of size until there is no memory
* left on any of them.
--
2.11.0
On Tue, 11 Apr 2017, Wei Yang wrote:
> numa_emulation() needs to allocate a space for phys_dist[] temporarily,
> while current code may miss to release this when dfl_phys_nid ==
> NUMA_NO_NODE. It is observed in code review instead of in a real case.
>
> This patch fixes this by re-order the code path.
>
> Signed-off-by: Wei Yang <[email protected]>
Acked-by: David Rientjes <[email protected]>
On Tue, 11 Apr 2017, Wei Yang wrote:
> According to current code path, numa_nodes_parsed is already setup when
> numa_emucation() is called.
>
> x86_numa_init()
> numa_init()
> init_func()
>
> numa_emulation()
>
> numa_register_memblks()
>
s/numa_emucation/numa_emulation/, but I think everything above should just
be reworded to say the following since it establishes the dependency:
numa_init() has already called init_func(), which is responsible for
setting numa_nodes_parsed, so use this nodemask instead of re-finding it
when calling numa_emulation().
> This means we can get the physnode_mask directly from numa_nodes_parsed. At
> the same time, this patch correct the comment of these two functions.
>
> Signed-off-by: Wei Yang <[email protected]>
Acked-by: David Rientjes <[email protected]>
On Tue, 11 Apr 2017, Wei Yang wrote:
> By emulating numa, it may contains more or less nodes than physical nodes.
> In numa_emulation(), numa_meminfo/numa_distance/__apicid_to_node is
> restructured according to emulated nodes, except numa_nodes_parsed.
>
> This patch restructures numa_nodes_parsed from emulated nodes.
>
> Signed-off-by: Wei Yang <[email protected]>
Acked-by: David Rientjes <[email protected]>
although there's a small nit: NODE_MASK_NONE is only used for
initialization, this should be nodes_clear(numa_nodes_parsed) instead, but
that would be up to the x86 maintainers to allow it.
> ---
> arch/x86/mm/numa_emulation.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
> index a6d01931b9a1..14f075fc4cc5 100644
> --- a/arch/x86/mm/numa_emulation.c
> +++ b/arch/x86/mm/numa_emulation.c
> @@ -391,6 +391,13 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
> /* commit */
> *numa_meminfo = ei;
>
> + /* Make sure numa_nodes_parsed only contains emulated nodes */
> + numa_nodes_parsed = NODE_MASK_NONE;
> + for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
> + if (ei.blk[i].start != ei.blk[i].end &&
> + ei.blk[i].nid != NUMA_NO_NODE)
> + node_set(ei.blk[i].nid, numa_nodes_parsed);
> +
> /*
> * Transform __apicid_to_node table to use emulated nids by
> * reverse-mapping phys_nid. The maps should always exist but fall
On Mon, Apr 10, 2017 at 05:26:03PM -0700, David Rientjes wrote:
>On Tue, 11 Apr 2017, Wei Yang wrote:
>
>> According to current code path, numa_nodes_parsed is already setup when
>> numa_emucation() is called.
>>
>> x86_numa_init()
>> numa_init()
>> init_func()
>>
>> numa_emulation()
>>
>> numa_register_memblks()
>>
>
>s/numa_emucation/numa_emulation/, but I think everything above should just
>be reworded to say the following since it establishes the dependency:
>
>numa_init() has already called init_func(), which is responsible for
>setting numa_nodes_parsed, so use this nodemask instead of re-finding it
>when calling numa_emulation().
>
Yep, thanks.
Looks your change log is better :-)
>> This means we can get the physnode_mask directly from numa_nodes_parsed. At
>> the same time, this patch correct the comment of these two functions.
>>
>> Signed-off-by: Wei Yang <[email protected]>
>
>Acked-by: David Rientjes <[email protected]>
--
Wei Yang
Help you, Help me
On Mon, Apr 10, 2017 at 05:36:25PM -0700, David Rientjes wrote:
>On Tue, 11 Apr 2017, Wei Yang wrote:
>
>> By emulating numa, it may contains more or less nodes than physical nodes.
>> In numa_emulation(), numa_meminfo/numa_distance/__apicid_to_node is
>> restructured according to emulated nodes, except numa_nodes_parsed.
>>
>> This patch restructures numa_nodes_parsed from emulated nodes.
>>
>> Signed-off-by: Wei Yang <[email protected]>
>
>Acked-by: David Rientjes <[email protected]>
>
>although there's a small nit: NODE_MASK_NONE is only used for
>initialization, this should be nodes_clear(numa_nodes_parsed) instead, but
>that would be up to the x86 maintainers to allow it.
>
Thanks for pointing out :-)
I would pay attention to this next time.
>> ---
>> arch/x86/mm/numa_emulation.c | 7 +++++++
>> 1 file changed, 7 insertions(+)
>>
>> diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
>> index a6d01931b9a1..14f075fc4cc5 100644
>> --- a/arch/x86/mm/numa_emulation.c
>> +++ b/arch/x86/mm/numa_emulation.c
>> @@ -391,6 +391,13 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
>> /* commit */
>> *numa_meminfo = ei;
>>
>> + /* Make sure numa_nodes_parsed only contains emulated nodes */
>> + numa_nodes_parsed = NODE_MASK_NONE;
>> + for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
>> + if (ei.blk[i].start != ei.blk[i].end &&
>> + ei.blk[i].nid != NUMA_NO_NODE)
>> + node_set(ei.blk[i].nid, numa_nodes_parsed);
>> +
>> /*
>> * Transform __apicid_to_node table to use emulated nids by
>> * reverse-mapping phys_nid. The maps should always exist but fall
--
Wei Yang
Help you, Help me