2022-09-01 08:53:04

by Muchun Song

[permalink] [raw]
Subject: [PATCH] mm: hugetlb: eliminate memory-less nodes handling

The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
the complexity of code as pointed by David in thread [1]. The handling of memory-less
nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
From its commit message, we cannot find any necessity of handling this case. So, we can
simply register/unregister sysfs entries in register_node/unregister_node to simlify the
code.

https://lore.kernel.org/linux-mm/[email protected]/ [1]
Suggested-by: David Hildenbrand <[email protected]>
Signed-off-by: Muchun Song <[email protected]>
---
drivers/base/node.c | 7 +++++--
include/linux/node.h | 5 +++++
mm/hugetlb.c | 37 ++++++++++---------------------------
3 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index ed391cb09999..cf115d5a9b8a 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
node->dev.groups = node_dev_groups;
error = device_register(&node->dev);

- if (error)
+ if (error) {
put_device(&node->dev);
- else
+ } else {
+ hugetlb_register_node(node);
compaction_register_node(node);
+ }

return error;
}
@@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
*/
void unregister_node(struct node *node)
{
+ hugetlb_unregister_node(node);
compaction_unregister_node(node);
node_remove_accesses(node);
node_remove_caches(node);
diff --git a/include/linux/node.h b/include/linux/node.h
index 427a5975cf40..f5d41498c2bf 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
unsigned access);
+
+#ifdef CONFIG_HUGETLBFS
+void hugetlb_register_node(struct node *node);
+void hugetlb_unregister_node(struct node *node);
+#endif
#else
static inline void node_dev_init(void)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d0617d64d718..722e862bb6be 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
}

#ifdef CONFIG_NUMA
+static bool hugetlb_initialized __ro_after_init;

/*
* node_hstate/s - associate per node hstate attributes, via their kobjects,
@@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
* Unregister hstate attributes from a single node device.
* No-op if no hstate attributes attached.
*/
-static void hugetlb_unregister_node(struct node *node)
+void hugetlb_unregister_node(struct node *node)
{
struct hstate *h;
struct node_hstate *nhs = &node_hstates[node->dev.id];
@@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
* Register hstate attributes for a single node device.
* No-op if attributes already registered.
*/
-static int hugetlb_register_node(struct node *node)
+void hugetlb_register_node(struct node *node)
{
struct hstate *h;
struct node_hstate *nhs = &node_hstates[node->dev.id];
int err;

+ if (!hugetlb_initialized)
+ return;
+
if (nhs->hugepages_kobj)
- return 0; /* already allocated */
+ return; /* already allocated */

nhs->hugepages_kobj = kobject_create_and_add("hugepages",
&node->dev.kobj);
if (!nhs->hugepages_kobj)
- return -ENOMEM;
+ return;

for_each_hstate(h) {
err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
@@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
h->name, node->dev.id);
hugetlb_unregister_node(node);
- return -ENOMEM;
+ break;
}
}
- return 0;
-}
-
-static int __meminit hugetlb_memory_callback(struct notifier_block *self,
- unsigned long action, void *arg)
-{
- int ret = 0;
- struct memory_notify *mnb = arg;
- int nid = mnb->status_change_nid;
-
- if (nid == NUMA_NO_NODE)
- return NOTIFY_DONE;
-
- if (action == MEM_GOING_ONLINE)
- ret = hugetlb_register_node(node_devices[nid]);
- else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
- hugetlb_unregister_node(node_devices[nid]);
-
- return notifier_from_errno(ret);
}

/*
@@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
{
int nid;

- get_online_mems();
- hotplug_memory_notifier(hugetlb_memory_callback, 0);
+ hugetlb_initialized = true;
for_each_node_state(nid, N_MEMORY)
hugetlb_register_node(node_devices[nid]);
- put_online_mems();
}
#else /* !CONFIG_NUMA */

--
2.11.0


2022-09-01 09:35:20

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH] mm: hugetlb: eliminate memory-less nodes handling

On 9/1/22 2:00 PM, Muchun Song wrote:
> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
> the complexity of code as pointed by David in thread [1]. The handling of memory-less
> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
> From its commit message, we cannot find any necessity of handling this case. So, we can
> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
> code.

Isn't that hotplug callback added because in hugetlb_register_all_nodes() we register
sysfs nodes only for N_MEMORY nodes?


>
> https://lore.kernel.org/linux-mm/[email protected]/ [1]
> Suggested-by: David Hildenbrand <[email protected]>
> Signed-off-by: Muchun Song <[email protected]>
> ---
> drivers/base/node.c | 7 +++++--
> include/linux/node.h | 5 +++++
> mm/hugetlb.c | 37 ++++++++++---------------------------
> 3 files changed, 20 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index ed391cb09999..cf115d5a9b8a 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
> node->dev.groups = node_dev_groups;
> error = device_register(&node->dev);
>
> - if (error)
> + if (error) {
> put_device(&node->dev);
> - else
> + } else {
> + hugetlb_register_node(node);
> compaction_register_node(node);
> + }
>


I guess this will handle register of sysfs hugetlb files for new NUMA nodes
after hugetlb_initialized = true;

But what about N_CPU that can get memory added later. Do we need to update
hugetlb_register_all_nodes() to handle N_ONLINE?


> return error;
> }
> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
> */
> void unregister_node(struct node *node)
> {
> + hugetlb_unregister_node(node);
> compaction_unregister_node(node);
> node_remove_accesses(node);
> node_remove_caches(node);
> diff --git a/include/linux/node.h b/include/linux/node.h
> index 427a5975cf40..f5d41498c2bf 100644
> --- a/include/linux/node.h
> +++ b/include/linux/node.h
> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
> extern int register_memory_node_under_compute_node(unsigned int mem_nid,
> unsigned int cpu_nid,
> unsigned access);
> +
> +#ifdef CONFIG_HUGETLBFS
> +void hugetlb_register_node(struct node *node);
> +void hugetlb_unregister_node(struct node *node);
> +#endif
> #else
> static inline void node_dev_init(void)
> {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index d0617d64d718..722e862bb6be 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
> }
>
> #ifdef CONFIG_NUMA
> +static bool hugetlb_initialized __ro_after_init;
>
> /*
> * node_hstate/s - associate per node hstate attributes, via their kobjects,
> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
> * Unregister hstate attributes from a single node device.
> * No-op if no hstate attributes attached.
> */
> -static void hugetlb_unregister_node(struct node *node)
> +void hugetlb_unregister_node(struct node *node)
> {
> struct hstate *h;
> struct node_hstate *nhs = &node_hstates[node->dev.id];
> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
> * Register hstate attributes for a single node device.
> * No-op if attributes already registered.
> */
> -static int hugetlb_register_node(struct node *node)
> +void hugetlb_register_node(struct node *node)
> {
> struct hstate *h;
> struct node_hstate *nhs = &node_hstates[node->dev.id];
> int err;
>
> + if (!hugetlb_initialized)
> + return;
> +
> if (nhs->hugepages_kobj)
> - return 0; /* already allocated */
> + return; /* already allocated */
>
> nhs->hugepages_kobj = kobject_create_and_add("hugepages",
> &node->dev.kobj);
> if (!nhs->hugepages_kobj)
> - return -ENOMEM;
> + return;
>
> for_each_hstate(h) {
> err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
> pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
> h->name, node->dev.id);
> hugetlb_unregister_node(node);
> - return -ENOMEM;
> + break;
> }
> }
> - return 0;
> -}
> -
> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
> - unsigned long action, void *arg)
> -{
> - int ret = 0;
> - struct memory_notify *mnb = arg;
> - int nid = mnb->status_change_nid;
> -
> - if (nid == NUMA_NO_NODE)
> - return NOTIFY_DONE;
> -
> - if (action == MEM_GOING_ONLINE)
> - ret = hugetlb_register_node(node_devices[nid]);
> - else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
> - hugetlb_unregister_node(node_devices[nid]);
> -
> - return notifier_from_errno(ret);
> }
>
> /*
> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
> {
> int nid;
>
> - get_online_mems();
> - hotplug_memory_notifier(hugetlb_memory_callback, 0);
> + hugetlb_initialized = true;
> for_each_node_state(nid, N_MEMORY)


Should this be for_each_online_node() ?

> hugetlb_register_node(node_devices[nid]);
> - put_online_mems();
> }
> #else /* !CONFIG_NUMA */
>

2022-09-01 09:37:34

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH] mm: hugetlb: eliminate memory-less nodes handling

On 01.09.22 10:30, Muchun Song wrote:
> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
> the complexity of code as pointed by David in thread [1]. The handling of memory-less
> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
> From its commit message, we cannot find any necessity of handling this case. So, we can
> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
> code.
>
> https://lore.kernel.org/linux-mm/[email protected]/ [1]
> Suggested-by: David Hildenbrand <[email protected]>
> Signed-off-by: Muchun Song <[email protected]>
> ---
> drivers/base/node.c | 7 +++++--
> include/linux/node.h | 5 +++++
> mm/hugetlb.c | 37 ++++++++++---------------------------
> 3 files changed, 20 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index ed391cb09999..cf115d5a9b8a 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
> node->dev.groups = node_dev_groups;
> error = device_register(&node->dev);
>
> - if (error)
> + if (error) {
> put_device(&node->dev);
> - else
> + } else {
> + hugetlb_register_node(node);
> compaction_register_node(node);
> + }

Good, so this matches what other code does.

>
> return error;
> }
> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
> */
> void unregister_node(struct node *node)
> {
> + hugetlb_unregister_node(node);
> compaction_unregister_node(node);
> node_remove_accesses(node);
> node_remove_caches(node);
> diff --git a/include/linux/node.h b/include/linux/node.h
> index 427a5975cf40..f5d41498c2bf 100644
> --- a/include/linux/node.h
> +++ b/include/linux/node.h
> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
> extern int register_memory_node_under_compute_node(unsigned int mem_nid,
> unsigned int cpu_nid,
> unsigned access);
> +
> +#ifdef CONFIG_HUGETLBFS
> +void hugetlb_register_node(struct node *node);
> +void hugetlb_unregister_node(struct node *node);
> +#endif

compaction_register_node() resides in include/linux/compaction.h, so I
wonder if this should go into hugetlb.h (unless it causes trouble)

> #else
> static inline void node_dev_init(void)
> {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index d0617d64d718..722e862bb6be 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
> }
>
> #ifdef CONFIG_NUMA
> +static bool hugetlb_initialized __ro_after_init;

We set it out of hugetlb_register_all_nodes(), so it conceptually not
correct. We either need a better name here or set it from generic init code.

You could call it hugetlb_sysfs_initialized() and set that from
hugetlb_sysfs_init(), which is called just before
hugetlb_register_all_nodes().

[ shouldn't hugetlb_register_all_nodes() get called from
hugetlb_sysfs_init() ? it's all about sysfs as well ... ]

>
> /*
> * node_hstate/s - associate per node hstate attributes, via their kobjects,
> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
> * Unregister hstate attributes from a single node device.
> * No-op if no hstate attributes attached.
> */
> -static void hugetlb_unregister_node(struct node *node)
> +void hugetlb_unregister_node(struct node *node)
> {
> struct hstate *h;
> struct node_hstate *nhs = &node_hstates[node->dev.id];
> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
> * Register hstate attributes for a single node device.
> * No-op if attributes already registered.
> */
> -static int hugetlb_register_node(struct node *node)
> +void hugetlb_register_node(struct node *node)
> {
> struct hstate *h;
> struct node_hstate *nhs = &node_hstates[node->dev.id];
> int err;
>
> + if (!hugetlb_initialized)
> + return;
> +
> if (nhs->hugepages_kobj)
> - return 0; /* already allocated */
> + return; /* already allocated */
>
> nhs->hugepages_kobj = kobject_create_and_add("hugepages",
> &node->dev.kobj);
> if (!nhs->hugepages_kobj)
> - return -ENOMEM;
> + return;
>
> for_each_hstate(h) {
> err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
> pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
> h->name, node->dev.id);
> hugetlb_unregister_node(node);
> - return -ENOMEM;
> + break;
> }
> }
> - return 0;
> -}
> -
> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
> - unsigned long action, void *arg)
> -{
> - int ret = 0;
> - struct memory_notify *mnb = arg;
> - int nid = mnb->status_change_nid;
> -
> - if (nid == NUMA_NO_NODE)
> - return NOTIFY_DONE;
> -
> - if (action == MEM_GOING_ONLINE)
> - ret = hugetlb_register_node(node_devices[nid]);
> - else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
> - hugetlb_unregister_node(node_devices[nid]);
> -
> - return notifier_from_errno(ret);
> }
>
> /*
> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
> {
> int nid;
>
> - get_online_mems();
> - hotplug_memory_notifier(hugetlb_memory_callback, 0);
> + hugetlb_initialized = true;
> for_each_node_state(nid, N_MEMORY)
> hugetlb_register_node(node_devices[nid]);
> - put_online_mems();
> }
> #else /* !CONFIG_NUMA */
>

Apart from the comments, looks good and clean to me. Thanks!

--
Thanks,

David / dhildenb

2022-09-01 09:39:49

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH] mm: hugetlb: eliminate memory-less nodes handling

On 01.09.22 11:00, Aneesh Kumar K V wrote:
> On 9/1/22 2:00 PM, Muchun Song wrote:
>> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
>> the complexity of code as pointed by David in thread [1]. The handling of memory-less
>> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
>> From its commit message, we cannot find any necessity of handling this case. So, we can
>> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
>> code.
>
> Isn't that hotplug callback added because in hugetlb_register_all_nodes() we register
> sysfs nodes only for N_MEMORY nodes?
>

Right, that needs adjustment as well.


--
Thanks,

David / dhildenb

2022-09-02 02:48:21

by Muchun Song

[permalink] [raw]
Subject: Re: [PATCH] mm: hugetlb: eliminate memory-less nodes handling



> On Sep 1, 2022, at 16:50, David Hildenbrand <[email protected]> wrote:
>
> On 01.09.22 10:30, Muchun Song wrote:
>> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
>> the complexity of code as pointed by David in thread [1]. The handling of memory-less
>> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
>> From its commit message, we cannot find any necessity of handling this case. So, we can
>> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
>> code.
>>
>> https://lore.kernel.org/linux-mm/[email protected]/ [1]
>> Suggested-by: David Hildenbrand <[email protected]>
>> Signed-off-by: Muchun Song <[email protected]>
>> ---
>> drivers/base/node.c | 7 +++++--
>> include/linux/node.h | 5 +++++
>> mm/hugetlb.c | 37 ++++++++++---------------------------
>> 3 files changed, 20 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/base/node.c b/drivers/base/node.c
>> index ed391cb09999..cf115d5a9b8a 100644
>> --- a/drivers/base/node.c
>> +++ b/drivers/base/node.c
>> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
>> node->dev.groups = node_dev_groups;
>> error = device_register(&node->dev);
>>
>> - if (error)
>> + if (error) {
>> put_device(&node->dev);
>> - else
>> + } else {
>> + hugetlb_register_node(node);
>> compaction_register_node(node);
>> + }
>
> Good, so this matches what other code does.
>
>>
>> return error;
>> }
>> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
>> */
>> void unregister_node(struct node *node)
>> {
>> + hugetlb_unregister_node(node);
>> compaction_unregister_node(node);
>> node_remove_accesses(node);
>> node_remove_caches(node);
>> diff --git a/include/linux/node.h b/include/linux/node.h
>> index 427a5975cf40..f5d41498c2bf 100644
>> --- a/include/linux/node.h
>> +++ b/include/linux/node.h
>> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
>> extern int register_memory_node_under_compute_node(unsigned int mem_nid,
>> unsigned int cpu_nid,
>> unsigned access);
>> +
>> +#ifdef CONFIG_HUGETLBFS
>> +void hugetlb_register_node(struct node *node);
>> +void hugetlb_unregister_node(struct node *node);
>> +#endif
>
> compaction_register_node() resides in include/linux/compaction.h, so I
> wonder if this should go into hugetlb.h (unless it causes trouble)

I think yes. Will update in next version.

>
>> #else
>> static inline void node_dev_init(void)
>> {
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index d0617d64d718..722e862bb6be 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
>> }
>>
>> #ifdef CONFIG_NUMA
>> +static bool hugetlb_initialized __ro_after_init;
>
> We set it out of hugetlb_register_all_nodes(), so it conceptually not
> correct. We either need a better name here or set it from generic init code.
>
> You could call it hugetlb_sysfs_initialized() and set that from
> hugetlb_sysfs_init(), which is called just before
> hugetlb_register_all_nodes().

Make sense.

>
> [ shouldn't hugetlb_register_all_nodes() get called from
> hugetlb_sysfs_init() ? it's all about sysfs as well ... ]

Yep, we can call hugetlb_register_all_nodes() in hugetlb_sysfs_init().

>
>>
>> /*
>> * node_hstate/s - associate per node hstate attributes, via their kobjects,
>> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
>> * Unregister hstate attributes from a single node device.
>> * No-op if no hstate attributes attached.
>> */
>> -static void hugetlb_unregister_node(struct node *node)
>> +void hugetlb_unregister_node(struct node *node)
>> {
>> struct hstate *h;
>> struct node_hstate *nhs = &node_hstates[node->dev.id];
>> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
>> * Register hstate attributes for a single node device.
>> * No-op if attributes already registered.
>> */
>> -static int hugetlb_register_node(struct node *node)
>> +void hugetlb_register_node(struct node *node)
>> {
>> struct hstate *h;
>> struct node_hstate *nhs = &node_hstates[node->dev.id];
>> int err;
>>
>> + if (!hugetlb_initialized)
>> + return;
>> +
>> if (nhs->hugepages_kobj)
>> - return 0; /* already allocated */
>> + return; /* already allocated */
>>
>> nhs->hugepages_kobj = kobject_create_and_add("hugepages",
>> &node->dev.kobj);
>> if (!nhs->hugepages_kobj)
>> - return -ENOMEM;
>> + return;
>>
>> for_each_hstate(h) {
>> err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
>> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
>> pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
>> h->name, node->dev.id);
>> hugetlb_unregister_node(node);
>> - return -ENOMEM;
>> + break;
>> }
>> }
>> - return 0;
>> -}
>> -
>> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
>> - unsigned long action, void *arg)
>> -{
>> - int ret = 0;
>> - struct memory_notify *mnb = arg;
>> - int nid = mnb->status_change_nid;
>> -
>> - if (nid == NUMA_NO_NODE)
>> - return NOTIFY_DONE;
>> -
>> - if (action == MEM_GOING_ONLINE)
>> - ret = hugetlb_register_node(node_devices[nid]);
>> - else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
>> - hugetlb_unregister_node(node_devices[nid]);
>> -
>> - return notifier_from_errno(ret);
>> }
>>
>> /*
>> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
>> {
>> int nid;
>>
>> - get_online_mems();
>> - hotplug_memory_notifier(hugetlb_memory_callback, 0);
>> + hugetlb_initialized = true;
>> for_each_node_state(nid, N_MEMORY)
>> hugetlb_register_node(node_devices[nid]);
>> - put_online_mems();
>> }
>> #else /* !CONFIG_NUMA */
>>
>
> Apart from the comments, looks good and clean to me. Thanks!

Thanks for your suggestions and review.

Muchun

>
> --
> Thanks,
>
> David / dhildenb

2022-09-02 04:38:44

by Muchun Song

[permalink] [raw]
Subject: Re: [PATCH] mm: hugetlb: eliminate memory-less nodes handling



> On Sep 1, 2022, at 17:00, Aneesh Kumar K V <[email protected]> wrote:
>
> On 9/1/22 2:00 PM, Muchun Song wrote:
>> The memory-notify-based approach aims to handle meory-less nodes, however, it just adds
>> the complexity of code as pointed by David in thread [1]. The handling of memory-less
>> nodes is introduced by commit 4faf8d950ec4 ("hugetlb: handle memory hot-plug events").
>> From its commit message, we cannot find any necessity of handling this case. So, we can
>> simply register/unregister sysfs entries in register_node/unregister_node to simlify the
>> code.
>
> Isn't that hotplug callback added because in hugetlb_register_all_nodes() we register
> sysfs nodes only for N_MEMORY nodes?

I think you might right. I have looked at the commit 9a30523066cd which introduces the sysfs
creation. I saw it create the sysfs for every possible node.

for (nid = 0; nid < nr_node_ids; nid++)
hugetlb_register_node(node);

And then I checked the commit 9b5e5d0fdc91, which said it was a preparation for handling
memory-less nodes via memory hotplug.

>
>
>>
>> https://lore.kernel.org/linux-mm/[email protected]/ [1]
>> Suggested-by: David Hildenbrand <[email protected]>
>> Signed-off-by: Muchun Song <[email protected]>
>> ---
>> drivers/base/node.c | 7 +++++--
>> include/linux/node.h | 5 +++++
>> mm/hugetlb.c | 37 ++++++++++---------------------------
>> 3 files changed, 20 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/base/node.c b/drivers/base/node.c
>> index ed391cb09999..cf115d5a9b8a 100644
>> --- a/drivers/base/node.c
>> +++ b/drivers/base/node.c
>> @@ -608,10 +608,12 @@ static int register_node(struct node *node, int num)
>> node->dev.groups = node_dev_groups;
>> error = device_register(&node->dev);
>>
>> - if (error)
>> + if (error) {
>> put_device(&node->dev);
>> - else
>> + } else {
>> + hugetlb_register_node(node);
>> compaction_register_node(node);
>> + }
>>
>
>
> I guess this will handle register of sysfs hugetlb files for new NUMA nodes
> after hugetlb_initialized = true;

Yes.

>
> But what about N_CPU that can get memory added later. Do we need to update
> hugetlb_register_all_nodes() to handle N_ONLINE?

I think we should.

>
>
>> return error;
>> }
>> @@ -625,6 +627,7 @@ static int register_node(struct node *node, int num)
>> */
>> void unregister_node(struct node *node)
>> {
>> + hugetlb_unregister_node(node);
>> compaction_unregister_node(node);
>> node_remove_accesses(node);
>> node_remove_caches(node);
>> diff --git a/include/linux/node.h b/include/linux/node.h
>> index 427a5975cf40..f5d41498c2bf 100644
>> --- a/include/linux/node.h
>> +++ b/include/linux/node.h
>> @@ -138,6 +138,11 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
>> extern int register_memory_node_under_compute_node(unsigned int mem_nid,
>> unsigned int cpu_nid,
>> unsigned access);
>> +
>> +#ifdef CONFIG_HUGETLBFS
>> +void hugetlb_register_node(struct node *node);
>> +void hugetlb_unregister_node(struct node *node);
>> +#endif
>> #else
>> static inline void node_dev_init(void)
>> {
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index d0617d64d718..722e862bb6be 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -3898,6 +3898,7 @@ static void __init hugetlb_sysfs_init(void)
>> }
>>
>> #ifdef CONFIG_NUMA
>> +static bool hugetlb_initialized __ro_after_init;
>>
>> /*
>> * node_hstate/s - associate per node hstate attributes, via their kobjects,
>> @@ -3953,7 +3954,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
>> * Unregister hstate attributes from a single node device.
>> * No-op if no hstate attributes attached.
>> */
>> -static void hugetlb_unregister_node(struct node *node)
>> +void hugetlb_unregister_node(struct node *node)
>> {
>> struct hstate *h;
>> struct node_hstate *nhs = &node_hstates[node->dev.id];
>> @@ -3983,19 +3984,22 @@ static void hugetlb_unregister_node(struct node *node)
>> * Register hstate attributes for a single node device.
>> * No-op if attributes already registered.
>> */
>> -static int hugetlb_register_node(struct node *node)
>> +void hugetlb_register_node(struct node *node)
>> {
>> struct hstate *h;
>> struct node_hstate *nhs = &node_hstates[node->dev.id];
>> int err;
>>
>> + if (!hugetlb_initialized)
>> + return;
>> +
>> if (nhs->hugepages_kobj)
>> - return 0; /* already allocated */
>> + return; /* already allocated */
>>
>> nhs->hugepages_kobj = kobject_create_and_add("hugepages",
>> &node->dev.kobj);
>> if (!nhs->hugepages_kobj)
>> - return -ENOMEM;
>> + return;
>>
>> for_each_hstate(h) {
>> err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
>> @@ -4005,28 +4009,9 @@ static int hugetlb_register_node(struct node *node)
>> pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
>> h->name, node->dev.id);
>> hugetlb_unregister_node(node);
>> - return -ENOMEM;
>> + break;
>> }
>> }
>> - return 0;
>> -}
>> -
>> -static int __meminit hugetlb_memory_callback(struct notifier_block *self,
>> - unsigned long action, void *arg)
>> -{
>> - int ret = 0;
>> - struct memory_notify *mnb = arg;
>> - int nid = mnb->status_change_nid;
>> -
>> - if (nid == NUMA_NO_NODE)
>> - return NOTIFY_DONE;
>> -
>> - if (action == MEM_GOING_ONLINE)
>> - ret = hugetlb_register_node(node_devices[nid]);
>> - else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE)
>> - hugetlb_unregister_node(node_devices[nid]);
>> -
>> - return notifier_from_errno(ret);
>> }
>>
>> /*
>> @@ -4038,11 +4023,9 @@ static void __init hugetlb_register_all_nodes(void)
>> {
>> int nid;
>>
>> - get_online_mems();
>> - hotplug_memory_notifier(hugetlb_memory_callback, 0);
>> + hugetlb_initialized = true;
>> for_each_node_state(nid, N_MEMORY)
>
>
> Should this be for_each_online_node() ?

So, yes.

Thanks for your review.

Muchun.

>
>> hugetlb_register_node(node_devices[nid]);
>> - put_online_mems();
>> }
>> #else /* !CONFIG_NUMA */

2022-09-02 05:06:30

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] mm: hugetlb: eliminate memory-less nodes handling

Hi Muchun,

I love your patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Muchun-Song/mm-hugetlb-eliminate-memory-less-nodes-handling/20220901-163132
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: loongarch-buildonly-randconfig-r004-20220901 (https://download.01.org/0day-ci/archive/20220902/[email protected]/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/8e3d203cb06be81565d117b3f6d5e279a833174c
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Muchun-Song/mm-hugetlb-eliminate-memory-less-nodes-handling/20220901-163132
git checkout 8e3d203cb06be81565d117b3f6d5e279a833174c
# save the config file
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 ARCH=loongarch

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

drivers/base/node.c: In function 'register_node':
>> drivers/base/node.c:614:17: error: implicit declaration of function 'hugetlb_register_node'; did you mean 'unregister_node'? [-Werror=implicit-function-declaration]
614 | hugetlb_register_node(node);
| ^~~~~~~~~~~~~~~~~~~~~
| unregister_node
drivers/base/node.c: In function 'unregister_node':
>> drivers/base/node.c:630:9: error: implicit declaration of function 'hugetlb_unregister_node'; did you mean 'unregister_node'? [-Werror=implicit-function-declaration]
630 | hugetlb_unregister_node(node);
| ^~~~~~~~~~~~~~~~~~~~~~~
| unregister_node
cc1: some warnings being treated as errors


vim +614 drivers/base/node.c

594
595 /*
596 * register_node - Setup a sysfs device for a node.
597 * @num - Node number to use when creating the device.
598 *
599 * Initialize and register the node device.
600 */
601 static int register_node(struct node *node, int num)
602 {
603 int error;
604
605 node->dev.id = num;
606 node->dev.bus = &node_subsys;
607 node->dev.release = node_device_release;
608 node->dev.groups = node_dev_groups;
609 error = device_register(&node->dev);
610
611 if (error) {
612 put_device(&node->dev);
613 } else {
> 614 hugetlb_register_node(node);
615 compaction_register_node(node);
616 }
617
618 return error;
619 }
620
621 /**
622 * unregister_node - unregister a node device
623 * @node: node going away
624 *
625 * Unregisters a node device @node. All the devices on the node must be
626 * unregistered before calling this function.
627 */
628 void unregister_node(struct node *node)
629 {
> 630 hugetlb_unregister_node(node);
631 compaction_unregister_node(node);
632 node_remove_accesses(node);
633 node_remove_caches(node);
634 device_unregister(&node->dev);
635 }
636

--
0-DAY CI Kernel Test Service
https://01.org/lkp