From: Alastair D'Silva <[email protected]>
This series addresses some minor issues found when developing a
persistent memory driver.
As well as cleanup code, it also exports try_online_node so that
it can be called from driver modules that provide access to additional
physical memory.
Alastair D'Silva (5):
mm: Trigger bug on if a section is not found in __section_nr
mm: don't hide potentially null memmap pointer in
sparse_remove_one_section
mm: Don't manually decrement num_poisoned_pages
mm/hotplug: Avoid RCU stalls when removing large amounts of memory
mm/hotplug: export try_online_node
include/linux/memory_hotplug.h | 4 ++--
kernel/cpu.c | 2 +-
mm/memory_hotplug.c | 23 ++++++++++++++++++-----
mm/sparse.c | 28 +++++++++++++++++-----------
4 files changed, 38 insertions(+), 19 deletions(-)
--
2.21.0
From: Alastair D'Silva <[email protected]>
If a memory section comes in where the physical address is greater than
that which is managed by the kernel, this function would not trigger the
bug and instead return a bogus section number.
This patch tracks whether the section was actually found, and triggers the
bug if not.
Signed-off-by: Alastair D'Silva <[email protected]>
---
mm/sparse.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index fd13166949b5..104a79fedd00 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -105,20 +105,23 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
int __section_nr(struct mem_section* ms)
{
unsigned long root_nr;
- struct mem_section *root = NULL;
+ struct mem_section *found = NULL;
+ struct mem_section *root;
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
if (!root)
continue;
- if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
- break;
+ if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) {
+ found = root;
+ break;
+ }
}
- VM_BUG_ON(!root);
+ VM_BUG_ON(!found);
- return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
+ return (root_nr * SECTIONS_PER_ROOT) + (ms - found);
}
#else
int __section_nr(struct mem_section* ms)
--
2.21.0
From: Alastair D'Silva <[email protected]>
If an external driver module supplies physical memory and needs to expose
the memory on a specific NUMA node, it needs to be able to call
try_online_node to allocate the data structures for the node.
The previous assertion that all callers want to online the node, and that
the provided memory address starts at 0 is no longer true, so these
parameters must alse be exposed.
Signed-off-by: Alastair D'Silva <[email protected]>
---
include/linux/memory_hotplug.h | 4 ++--
kernel/cpu.c | 2 +-
mm/memory_hotplug.c | 20 +++++++++++++++-----
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index ae892eef8b82..9272e7955541 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -109,7 +109,7 @@ extern void __online_page_set_limits(struct page *page);
extern void __online_page_increment_counters(struct page *page);
extern void __online_page_free(struct page *page);
-extern int try_online_node(int nid);
+int try_online_node(int nid, u64 start, bool set_node_online);
extern int arch_add_memory(int nid, u64 start, u64 size,
struct mhp_restrictions *restrictions);
@@ -274,7 +274,7 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
{
}
-static inline int try_online_node(int nid)
+static inline int try_online_node(int nid, u64 start, bool set_node_online)
{
return 0;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 077fde6fb953..ffe5f7239a5c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1167,7 +1167,7 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
return -EINVAL;
}
- err = try_online_node(cpu_to_node(cpu));
+ err = try_online_node(cpu_to_node(cpu), 0, true);
if (err)
return err;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 382b3a0c9333..9c2784f89e60 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1004,7 +1004,7 @@ static void rollback_node_hotadd(int nid)
/**
- * try_online_node - online a node if offlined
+ * __try_online_node - online a node if offlined
* @nid: the node ID
* @start: start addr of the node
* @set_node_online: Whether we want to online the node
@@ -1039,18 +1039,28 @@ static int __try_online_node(int nid, u64 start, bool set_node_online)
return ret;
}
-/*
- * Users of this function always want to online/register the node
+/**
+ * try_online_node - online a node if offlined
+ * @nid: the node ID
+ * @start: start addr of the node
+ * @set_node_online: Whether we want to online the node
+ * called by cpu_up() to online a node without onlined memory.
+ *
+ * Returns:
+ * 1 -> a new node has been allocated
+ * 0 -> the node is already online
+ * -ENOMEM -> the node could not be allocated
*/
-int try_online_node(int nid)
+int try_online_node(int nid, u64 start, bool set_node_online)
{
int ret;
mem_hotplug_begin();
- ret = __try_online_node(nid, 0, true);
+ ret = __try_online_node(nid, start, set_node_online);
mem_hotplug_done();
return ret;
}
+EXPORT_SYMBOL_GPL(try_online_node);
static int check_hotplug_memory_range(u64 start, u64 size)
{
--
2.21.0
On Mon, Jun 17, 2019 at 02:36:27PM +1000, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> If a memory section comes in where the physical address is greater than
> that which is managed by the kernel, this function would not trigger the
> bug and instead return a bogus section number.
>
> This patch tracks whether the section was actually found, and triggers the
> bug if not.
>
> Signed-off-by: Alastair D'Silva <[email protected]>
> ---
> mm/sparse.c | 13 ++++++++-----
> 1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index fd13166949b5..104a79fedd00 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -105,20 +105,23 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
> int __section_nr(struct mem_section* ms)
> {
> unsigned long root_nr;
> - struct mem_section *root = NULL;
> + struct mem_section *found = NULL;
> + struct mem_section *root;
>
> for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
> root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
> if (!root)
> continue;
>
> - if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
> - break;
> + if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) {
> + found = root;
> + break;
> + }
> }
>
> - VM_BUG_ON(!root);
> + VM_BUG_ON(!found);
Isn't it enough to check for root_nr == NR_SECTION_ROOTS?
>
> - return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
> + return (root_nr * SECTIONS_PER_ROOT) + (ms - found);
It'll still return a bogus section number with CONFIG_DEBUG_VM=n
> }
> #else
> int __section_nr(struct mem_section* ms)
> --
> 2.21.0
>
--
Sincerely yours,
Mike.
On Mon, Jun 17, 2019 at 02:36:31PM +1000, Alastair D'Silva wrote:
> From: Alastair D'Silva <[email protected]>
>
> If an external driver module supplies physical memory and needs to expose
Why would you ever want to allow a module to do such a thing?
On Mon, 2019-06-17 at 08:59 +0200, Peter Zijlstra wrote:
> On Mon, Jun 17, 2019 at 02:36:31PM +1000, Alastair D'Silva wrote:
> > From: Alastair D'Silva <[email protected]>
> >
> > If an external driver module supplies physical memory and needs to
> > expose
>
> Why would you ever want to allow a module to do such a thing?
>
I'm working on a driver for Storage Class Memory, connected via an
OpenCAPI link.
The memory is only usable once the card says it's OK to access it.
--
Alastair D'Silva mob: 0423 762 819
skype: alastair_dsilva
Twitter: @EvilDeece
blog: http://alastair.d-silva.org
On Mon, Jun 17, 2019 at 05:05:30PM +1000, Alastair D'Silva wrote:
> On Mon, 2019-06-17 at 08:59 +0200, Peter Zijlstra wrote:
> > On Mon, Jun 17, 2019 at 02:36:31PM +1000, Alastair D'Silva wrote:
> > > From: Alastair D'Silva <[email protected]>
> > >
> > > If an external driver module supplies physical memory and needs to
> > > expose
> >
> > Why would you ever want to allow a module to do such a thing?
> >
>
> I'm working on a driver for Storage Class Memory, connected via an
> OpenCAPI link.
>
> The memory is only usable once the card says it's OK to access it.
And all that should go through our pmem APIs, not not directly
poke into mm internals. And if you still need core patches send them
along with the actual driver.
[Cc Jerome - email thread starts
http://lkml.kernel.org/r/[email protected]]
On Mon 17-06-19 17:05:30, Alastair D'Silva wrote:
> On Mon, 2019-06-17 at 08:59 +0200, Peter Zijlstra wrote:
> > On Mon, Jun 17, 2019 at 02:36:31PM +1000, Alastair D'Silva wrote:
> > > From: Alastair D'Silva <[email protected]>
> > >
> > > If an external driver module supplies physical memory and needs to
> > > expose
> >
> > Why would you ever want to allow a module to do such a thing?
> >
>
> I'm working on a driver for Storage Class Memory, connected via an
> OpenCAPI link.
>
> The memory is only usable once the card says it's OK to access it.
Isn't this what HMM is aiming for? Could you give a more precise
description of what the actual storage is, how it is going to be used
etc... In other words describe the usecase?
--
Michal Hocko
SUSE Labs
> -----Original Message-----
> From: Christoph Hellwig <[email protected]>
> Sent: Monday, 17 June 2019 5:15 PM
> To: Alastair D'Silva <[email protected]>
> Cc: Peter Zijlstra <[email protected]>; Andrew Morton <akpm@linux-
> foundation.org>; David Hildenbrand <[email protected]>; Oscar Salvador
> <[email protected]>; Michal Hocko <[email protected]>; Pavel Tatashin
> <[email protected]>; Wei Yang <[email protected]>;
> Arun KS <[email protected]>; Qian Cai <[email protected]>; Thomas Gleixner
> <[email protected]>; Ingo Molnar <[email protected]>; Josh Poimboeuf
> <[email protected]>; Jiri Kosina <[email protected]>; Mukesh Ojha
> <[email protected]>; Mike Rapoport <[email protected]>;
> Baoquan He <[email protected]>; Logan Gunthorpe
> <[email protected]>; [email protected]; linux-
> [email protected]; [email protected]
> Subject: Re: [PATCH 5/5] mm/hotplug: export try_online_node
>
> On Mon, Jun 17, 2019 at 05:05:30PM +1000, Alastair D'Silva wrote:
> > On Mon, 2019-06-17 at 08:59 +0200, Peter Zijlstra wrote:
> > > On Mon, Jun 17, 2019 at 02:36:31PM +1000, Alastair D'Silva wrote:
> > > > From: Alastair D'Silva <[email protected]>
> > > >
> > > > If an external driver module supplies physical memory and needs to
> > > > expose
> > >
> > > Why would you ever want to allow a module to do such a thing?
> > >
> >
> > I'm working on a driver for Storage Class Memory, connected via an
> > OpenCAPI link.
> >
> > The memory is only usable once the card says it's OK to access it.
>
> And all that should go through our pmem APIs, not not directly poke into
mm
> internals. And if you still need core patches send them along with the
actual
> driver.
I tried that, but I was getting crashes as the NUMA data structures for that
node were not initialised.
Calling this was required to prevent uninitialized accesses in the pmem
library.
--
Alastair D'Silva mob: 0423 762 819
skype: alastair_dsilva msn: [email protected]
blog: http://alastair.d-silva.org Twitter: @EvilDeece
On Mon, Jun 17, 2019 at 06:00:00PM +1000, Alastair D'Silva wrote:
> > And all that should go through our pmem APIs, not not directly poke into
> mm
> > internals. And if you still need core patches send them along with the
> actual
> > driver.
>
> I tried that, but I was getting crashes as the NUMA data structures for that
> node were not initialised.
>
> Calling this was required to prevent uninitialized accesses in the pmem
> library.
Please send your driver to the linux-nvdimm and linux-mm lists so that
it can be carefully reviewed.