2023-11-23 09:24:57

by Sumanth Korikkar

[permalink] [raw]
Subject: [PATCH v2 0/7] implement "memmap on memory" feature on s390

Hi All,

The patch series implements "memmap on memory" feature on s390.

Patch 1 introduces new mhp_flag MHP_OFFLINE_INACCESSIBLE to mark
memory as not accessible until memory hotplug online phase begins.

Patch 2 avoids page_init_poison() on memmap during mhp addition phase,
when mhp_flag MHP_OFFLINE_INACCESSIBLE is passed over from add_memory()

Patch 3 introduces MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE memory
notifiers to prepare the transition of memory to and from a physically
accessible state. This enhancement is crucial for implementing the
"memmap on memory" feature for s390 in a subsequent patch.

Patches 4 allocates vmemmap pages from self-contained memory range for
s390. It allocates memory map (struct pages array) from the hotplugged
memory range, rather than using system memory by passing altmap to
vmemmap functions.

Patch 5 removes unhandled memory notifier types on s390.

Patch 6 implements MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE memory
notifiers on s390. MEM_PREPARE_ONLINE memory notifier makes memory block
physical accessible via sclp assign command. The notifier ensures
self-contained memory maps are accessible and hence enabling the "memmap
on memory" on s390. MEM_FINISH_OFFLINE memory notifier shifts the memory
block to an inaccessible state via sclp unassign command

Patch 7 finally enables MHP_MEMMAP_ON_MEMORY on s390

These patches are rebased on top of three fixes:
mm: use vmem_altmap code without CONFIG_ZONE_DEVICE
mm/memory_hotplug: fix error handling in add_memory_resource()
mm/memory_hotplug: add missing mem_hotplug_lock

v2:
* Fixes are integrated and hence removed from this patch series
Suggestions from David:
* Add new flag MHP_OFFLINE_INACCESSIBLE to avoid accessing memory
during memory hotplug addition phase.
* Avoid page_init_poison() on memmap during mhp addition phase, when
MHP_OFFLINE_INACCESSIBLE mhp_flag is passed in add_memory().
* Do not skip add_pages() in arch_add_memory(). Similarly, remove
similar hacks in arch_remove_memory().
* Use MHP_PREPARE_ONLINE/MHP_FINISH_OFFLINE naming convention for
new memory notifiers.
* Rearrange removal of unused s390 memory notifier.
* Necessary commit messages changes.

Thank you

Sumanth Korikkar (7):
mm/memory_hotplug: introduce mhp_flag MHP_OFFLINE_INACCESSIBLE
mm/memory_hotplug: avoid poisoning memmap during mhp addition phase
mm/memory_hotplug: introduce MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE
notifiers
s390/mm: allocate vmemmap pages from self-contained memory range
s390/sclp: remove unhandled memory notifier type
s390/mm: implement MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers
s390: enable MHP_MEMMAP_ON_MEMORY

arch/s390/Kconfig | 1 +
arch/s390/mm/init.c | 3 --
arch/s390/mm/vmem.c | 62 +++++++++++++++++++---------------
drivers/base/memory.c | 23 +++++++++++--
drivers/s390/char/sclp_cmd.c | 31 ++++++++++++-----
include/linux/memory.h | 3 ++
include/linux/memory_hotplug.h | 12 ++++++-
include/linux/memremap.h | 1 +
mm/memory_hotplug.c | 30 ++++++++++++++--
mm/sparse.c | 3 +-
10 files changed, 124 insertions(+), 45 deletions(-)

--
2.39.2


2023-11-23 09:25:02

by Sumanth Korikkar

[permalink] [raw]
Subject: [PATCH v2 2/7] mm/memory_hotplug: avoid poisoning memmap during mhp addition phase

When memory block is marked inaccessible, avoid page_init_poison() on
memmap during memory hotplug addition phase. Instead, perform it later
when the memory is physically accessible in memory hotplug online phase.

When the memory block is marked accessible (by default turned on without
passing any mhp_flags), page poisoning initialization on memmap is
performed in sparse_add_section().

Page init poisining on memmap is performed with cond_resched(). This
reflects the functionality of commit d33695b16a9f ("mm/memory_hotplug:
poison memmap in remove_pfn_range_from_zone()")

Architectures can pass MHP_OFFLINE_INACCESSIBLE mhp_flag in add_memory()
to mark the memory block as initially inaccessible during memory hotplug
addition phase.

Signed-off-by: Sumanth Korikkar <[email protected]>
---
drivers/base/memory.c | 3 ++-
include/linux/memory_hotplug.h | 2 +-
mm/memory_hotplug.c | 27 ++++++++++++++++++++++++++-
mm/sparse.c | 3 ++-
4 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 51915d5c3f88..cbff43b2ef44 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -209,7 +209,8 @@ static int memory_block_online(struct memory_block *mem)

mem_hotplug_begin();
if (nr_vmemmap_pages) {
- ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
+ ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages,
+ zone, mem->inaccessible);
if (ret)
goto out;
}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8988cd5ad55d..791bc019e992 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -164,7 +164,7 @@ extern void adjust_present_page_count(struct page *page,
long nr_pages);
/* VM interface that may be used by firmware interface */
extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
- struct zone *zone);
+ struct zone *zone, bool mhp_off_inaccessible);
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1e02eba166b0..ac7cfc09502d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1083,8 +1083,25 @@ void adjust_present_page_count(struct page *page, struct memory_group *group,
group->present_kernel_pages += nr_pages;
}

+static void page_init_poison_with_resched(unsigned long start_pfn, unsigned long nr_pages)
+{
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn, cur_nr_pages;
+
+ /* Poison struct pages because they are now uninitialized again. */
+ for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
+ cond_resched();
+
+ /* Select all remaining pages up to the next section boundary */
+ cur_nr_pages =
+ min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
+ page_init_poison(pfn_to_page(pfn),
+ sizeof(struct page) * cur_nr_pages);
+ }
+}
+
int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
- struct zone *zone)
+ struct zone *zone, bool mhp_off_inaccessible)
{
unsigned long end_pfn = pfn + nr_pages;
int ret, i;
@@ -1092,6 +1109,14 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
if (ret)
return ret;
+ /*
+ * Memory block is accessible at this stage and hence poison the struct
+ * pages now. If the memory block is accessible during memory hotplug
+ * addition phase, then page poisining is already performed in
+ * sparse_add_section().
+ */
+ if (mhp_off_inaccessible)
+ page_init_poison_with_resched(pfn, nr_pages);
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);

for (i = 0; i < nr_pages; i++)
diff --git a/mm/sparse.c b/mm/sparse.c
index 77d91e565045..3991c717b769 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -907,7 +907,8 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
- page_init_poison(memmap, sizeof(struct page) * nr_pages);
+ if (!altmap || !altmap->inaccessible)
+ page_init_poison(memmap, sizeof(struct page) * nr_pages);

ms = __nr_to_section(section_nr);
set_section_nid(section_nr, nid);
--
2.39.2

2023-11-23 09:25:17

by Sumanth Korikkar

[permalink] [raw]
Subject: [PATCH v2 6/7] s390/mm: implement MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers

MEM_PREPARE_ONLINE memory notifier makes memory block physical
accessible via sclp assign command. The notifier ensures self-contained
memory maps are accessible and hence enabling the "memmap on memory" on
s390.

MEM_FINISH_OFFLINE memory notifier shifts the memory block to an
inaccessible state via sclp unassign command.

Implementation considerations:
* When MHP_MEMMAP_ON_MEMORY is disabled, the system retains the old
behavior. This means the memory map is allocated from default memory.
* If MACHINE_HAS_EDAT1 is unavailable, MHP_MEMMAP_ON_MEMORY is
automatically disabled. This ensures that vmemmap pagetables do not
consume additional memory from the default memory allocator.
* The MEM_GOING_ONLINE notifier has been modified to perform no
operation, as MEM_PREPARE_ONLINE already executes the sclp assign
command.
* The MEM_CANCEL_ONLINE/MEM_OFFLINE notifier now performs no operation, as
MEM_FINISH_OFFLINE already executes the sclp unassign command.

Reviewed-by: Gerald Schaefer <[email protected]>
Signed-off-by: Sumanth Korikkar <[email protected]>
---
drivers/s390/char/sclp_cmd.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 355e63e44e95..30b829e4c052 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
#include <linux/module.h>
#include <asm/ctlreg.h>
#include <asm/chpid.h>
@@ -26,6 +27,7 @@
#include <asm/sclp.h>
#include <asm/numa.h>
#include <asm/facility.h>
+#include <asm/page-states.h>

#include "sclp.h"

@@ -319,6 +321,7 @@ static bool contains_standby_increment(unsigned long start, unsigned long end)
static int sclp_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
+ struct memory_block *memory_block;
unsigned long start, size;
struct memory_notify *arg;
unsigned char id;
@@ -340,18 +343,29 @@ static int sclp_mem_notifier(struct notifier_block *nb,
if (contains_standby_increment(start, start + size))
rc = -EPERM;
break;
- case MEM_GOING_ONLINE:
+ case MEM_PREPARE_ONLINE:
+ memory_block = find_memory_block(pfn_to_section_nr(arg->start_pfn));
+ if (!memory_block) {
+ rc = -EINVAL;
+ goto out;
+ }
rc = sclp_mem_change_state(start, size, 1);
+ if (rc || !memory_block->altmap)
+ goto out;
+ /*
+ * Set CMMA state to nodat here, since the struct page memory
+ * at the beginning of the memory block will not go through the
+ * buddy allocator later.
+ */
+ __arch_set_page_nodat((void *)__va(start), memory_block->altmap->free);
break;
- case MEM_CANCEL_ONLINE:
- sclp_mem_change_state(start, size, 0);
- break;
- case MEM_OFFLINE:
+ case MEM_FINISH_OFFLINE:
sclp_mem_change_state(start, size, 0);
break;
default:
break;
}
+out:
mutex_unlock(&sclp_mem_mutex);
return rc ? NOTIFY_BAD : NOTIFY_OK;
}
@@ -397,7 +411,9 @@ static void __init add_memory_merged(u16 rn)
if (!size)
goto skip_add;
for (addr = start; addr < start + size; addr += block_size)
- add_memory(0, addr, block_size, MHP_NONE);
+ add_memory(0, addr, block_size,
+ MACHINE_HAS_EDAT1 ?
+ MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
skip_add:
first_rn = rn;
num = 1;
--
2.39.2