Memory sub-section hotplug was added to fix the issue that nvdimm could
be mapped at non-section aligned starting address. A subsection map is
added into struct mem_section_usage to implement it. However, sub-section
is only supported in VMEMMAP case. Hence there's no need to operate
subsection map in SPARSEMEM|!VMEMMAP case. In this patchset, change
codes to make sub-section map and the relevant operation only available
in VMEMMAP case.
And since sub-section hotplug added, the hot add/remove functionality
have been broken in SPARSEMEM|!VMEMMAP case. Wei Yang and I, each of us
make one patch to fix one of the failures. In this patchset, the patch
1/7 from me is used to fix the hot remove failure. Wei Yang's patch has
been merged by Andrew.
Changelog:
v1->v2:
Move the hot remove fixing patch to the front so that people can
back port it to easier. Suggested by David.
Split the old patch which invalidate the sub-section map in
!VMEMMAP case into two patches, patch 4/7, and patch 6/7. This
makes patch reviewing easier. David by David.
Take Wei Yang's fixing patch out to post alone, since it has been
reviewed and acked by people. Suggested by Andrew.
Fix a code comment mistake in the current patch 2/7. Found out by
Wei Yang during reviewing.
Baoquan He (7):
mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case
mm/sparse.c: introduce new function fill_subsection_map()
mm/sparse.c: introduce a new function clear_subsection_map()
mm/sparse.c: only use subsection map in VMEMMAP case
mm/sparse.c: add code comment about sub-section hotplug
mm/sparse.c: move subsection_map related codes together
mm/sparse.c: Use __get_free_pages() instead in
populate_section_memmap()
include/linux/mmzone.h | 2 +
mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
2 files changed, 127 insertions(+), 53 deletions(-)
--
2.17.2
In section_deactivate(), pfn_to_page() doesn't work any more after
ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
It caused hot remove failure:
kernel BUG at mm/page_alloc.c:4806!
invalid opcode: 0000 [#1] SMP PTI
CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
Workqueue: kacpi_hotplug acpi_hotplug_work_fn
RIP: 0010:free_pages+0x85/0xa0
Call Trace:
__remove_pages+0x99/0xc0
arch_remove_memory+0x23/0x4d
try_remove_memory+0xc8/0x130
? walk_memory_blocks+0x72/0xa0
__remove_memory+0xa/0x11
acpi_memory_device_remove+0x72/0x100
acpi_bus_trim+0x55/0x90
acpi_device_hotplug+0x2eb/0x3d0
acpi_hotplug_work_fn+0x1a/0x30
process_one_work+0x1a7/0x370
worker_thread+0x30/0x380
? flush_rcu_work+0x30/0x30
kthread+0x112/0x130
? kthread_create_on_node+0x60/0x60
ret_from_fork+0x35/0x40
Let's move the ->section_mem_map resetting after depopulate_section_memmap()
to fix it.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 596b2a45b100..b8e52c8fed7f 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = (unsigned long)NULL;
}
if (section_is_early && memmap)
free_map_bootmem(memmap);
else
depopulate_section_memmap(pfn, nr_pages, altmap);
+
+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
+ ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
--
2.17.2
Wrap the codes filling subsection map from section_activate() into
fill_subsection_map(), this makes section_activate() cleaner and
easier to follow.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 45 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 34 insertions(+), 11 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index b8e52c8fed7f..977b47acd38d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -790,24 +790,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->section_mem_map = (unsigned long)NULL;
}
-static struct page * __meminit section_activate(int nid, unsigned long pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+/**
+ * fill_subsection_map - fill subsection map of a memory region
+ * @pfn - start pfn of the memory range
+ * @nr_pages - number of pfns to add in the region
+ *
+ * This fills the related subsection map inside one section, and only
+ * intended for hotplug.
+ *
+ * Return:
+ * * 0 - On success.
+ * * -EINVAL - Invalid memory region.
+ * * -EEXIST - Subsection map has been set.
+ */
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
struct mem_section *ms = __pfn_to_section(pfn);
- struct mem_section_usage *usage = NULL;
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
unsigned long *subsection_map;
- struct page *memmap;
int rc = 0;
subsection_mask_set(map, pfn, nr_pages);
- if (!ms->usage) {
- usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
- if (!usage)
- return ERR_PTR(-ENOMEM);
- ms->usage = usage;
- }
subsection_map = &ms->usage->subsection_map[0];
if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
@@ -818,6 +822,25 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
bitmap_or(subsection_map, map, subsection_map,
SUBSECTIONS_PER_SECTION);
+ return rc;
+}
+
+static struct page * __meminit section_activate(int nid, unsigned long pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap)
+{
+ struct mem_section *ms = __pfn_to_section(pfn);
+ struct mem_section_usage *usage = NULL;
+ struct page *memmap;
+ int rc = 0;
+
+ if (!ms->usage) {
+ usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
+ if (!usage)
+ return ERR_PTR(-ENOMEM);
+ ms->usage = usage;
+ }
+
+ rc = fill_subsection_map(pfn, nr_pages);
if (rc) {
if (usage)
ms->usage = NULL;
--
2.17.2
Wrap the codes which clear subsection map of one memory region from
section_deactivate() into clear_subsection_map().
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 38 insertions(+), 8 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 977b47acd38d..df857ee9330c 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
- struct vmem_altmap *altmap)
+/**
+ * clear_subsection_map - Clear subsection map of one memory region
+ *
+ * @pfn - start pfn of the memory range
+ * @nr_pages - number of pfns to add in the region
+ *
+ * This is only intended for hotplug, and clear the related subsection
+ * map inside one section.
+ *
+ * Return:
+ * * -EINVAL - Section already deactived.
+ * * 0 - Subsection map is emptied.
+ * * 1 - Subsection map is not empty.
+ */
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
struct mem_section *ms = __pfn_to_section(pfn);
- bool section_is_early = early_section(ms);
- struct page *memmap = NULL;
unsigned long *subsection_map = ms->usage
? &ms->usage->subsection_map[0] : NULL;
@@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
"section already deactivated (%#lx + %ld)\n",
pfn, nr_pages))
- return;
+ return -EINVAL;
+
+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
+ return 0;
+
+ return 1;
+}
+
+static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
+{
+ struct mem_section *ms = __pfn_to_section(pfn);
+ bool section_is_early = early_section(ms);
+ struct page *memmap = NULL;
+ int rc;
+
+
+ rc = clear_subsection_map(pfn, nr_pages);
+ if (IS_ERR_VALUE((unsigned long)rc))
+ return;
/*
* There are 3 cases to handle across two configurations
* (SPARSEMEM_VMEMMAP={y,n}):
@@ -763,8 +794,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
*
* For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
*/
- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
+ if (!rc) {
unsigned long section_nr = pfn_to_section_nr(pfn);
/*
@@ -786,7 +816,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
else
depopulate_section_memmap(pfn, nr_pages, altmap);
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
+ if (!rc)
ms->section_mem_map = (unsigned long)NULL;
}
--
2.17.2
It's helpful to note that sub-section is only supported in
SPARSEMEM_VMEMMAP case, but not in SPARSEMEM|!VMEMMAP case. Add
sentences into the code comment above sparse_add_section.
And also move the code comments from inside section_deactivate()
to be above it. The code comments are reasonable to the whole
function, and the moving makes code cleaner.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 39 ++++++++++++++++++++++-----------------
1 file changed, 22 insertions(+), 17 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 66c497d6a229..14bff0b44e7c 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -778,6 +778,22 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
}
#endif
+/*
+ * To deactivate a memory region, there are 3 cases to handle across
+ * two configurations (SPARSEMEM_VMEMMAP={y,n}):
+ *
+ * 1. deactivation of a partial hot-added section (only possible in
+ * the SPARSEMEM_VMEMMAP=y case).
+ * a) section was present at memory init
+ * b) section was hot-added post memory init
+ * 2. deactivation of a complete hot-added section.
+ * 3. deactivation of a complete section from memory init.
+ *
+ * For case 1, when subsection_map does not empty we will not be freeing
+ * the usage map, but still need to free the vmemmap range.
+ *
+ * For case 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified.
+ */
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
@@ -790,23 +806,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
rc = clear_subsection_map(pfn, nr_pages);
if(IS_ERR_VALUE((unsigned long)rc))
return;
- /*
- * There are 3 cases to handle across two configurations
- * (SPARSEMEM_VMEMMAP={y,n}):
- *
- * 1/ deactivation of a partial hot-added section (only possible
- * in the SPARSEMEM_VMEMMAP=y case).
- * a/ section was present at memory init
- * b/ section was hot-added post memory init
- * 2/ deactivation of a complete hot-added section
- * 3/ deactivation of a complete section from memory init
- *
- * For 1/, when subsection_map does not empty we will not be
- * freeing the usage map, but still need to free the vmemmap
- * range.
- *
- * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
- */
+
if (!rc) {
unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -926,6 +926,11 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
*
* This is only intended for hotplug.
*
+ * Note that the added memory region is either section aligned, or
+ * sub-section aligned. The subsection hotplug is only supported in
+ * VMEMMAP case, please refer to ZONE_DEVICE part of memory-model.rst
+ * for more details.
+ *
* Return:
* * 0 - On success.
* * -EEXIST - Section has been present.
--
2.17.2
This removes the unnecessary goto, and simplify codes.
Signed-off-by: Baoquan He <[email protected]>
Reviewed-by: Wei Yang <[email protected]>
---
mm/sparse.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 053d6c2e5c1f..572b71bd15aa 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -754,23 +754,19 @@ static void free_map_bootmem(struct page *memmap)
struct page * __meminit populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
- struct page *page, *ret;
+ struct page *ret;
unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
- page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
- if (page)
- goto got_map_page;
+ ret = (void *)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
+ get_order(memmap_size));
+ if (ret)
+ return ret;
ret = vmalloc(memmap_size);
if (ret)
- goto got_map_ptr;
+ return ret;
return NULL;
-got_map_page:
- ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
-got_map_ptr:
-
- return ret;
}
static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
--
2.17.2
No functional change.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 172 +++++++++++++++++++++++++---------------------------
1 file changed, 84 insertions(+), 88 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 14bff0b44e7c..053d6c2e5c1f 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -244,10 +244,94 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
nr_pages -= pfns;
}
}
+
+/**
+ * clear_subsection_map - Clear subsection map of one memory region
+ *
+ * @pfn - start pfn of the memory range
+ * @nr_pages - number of pfns to add in the region
+ *
+ * This is only intended for hotplug, and clear the related subsection
+ * map inside one section.
+ *
+ * Return:
+ * * -EINVAL - Section already deactived.
+ * * 0 - Subsection map is emptied.
+ * * 1 - Subsection map is not empty.
+ */
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+ DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
+ struct mem_section *ms = __pfn_to_section(pfn);
+ unsigned long *subsection_map = ms->usage
+ ? &ms->usage->subsection_map[0] : NULL;
+
+ subsection_mask_set(map, pfn, nr_pages);
+ if (subsection_map)
+ bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
+
+ if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
+ "section already deactivated (%#lx + %ld)\n",
+ pfn, nr_pages))
+ return -EINVAL;
+
+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+
+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
+ return 0;
+
+ return 1;
+}
+
+/**
+ * fill_subsection_map - fill subsection map of a memory region
+ * @pfn - start pfn of the memory range
+ * @nr_pages - number of pfns to add in the region
+ *
+ * This fills the related subsection map inside one section, and only
+ * intended for hotplug.
+ *
+ * Return:
+ * * 0 - On success.
+ * * -EINVAL - Invalid memory region.
+ * * -EEXIST - Subsection map has been set.
+ */
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ struct mem_section *ms = __pfn_to_section(pfn);
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+ unsigned long *subsection_map;
+ int rc = 0;
+
+ subsection_mask_set(map, pfn, nr_pages);
+
+ subsection_map = &ms->usage->subsection_map[0];
+
+ if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
+ rc = -EINVAL;
+ else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
+ rc = -EEXIST;
+ else
+ bitmap_or(subsection_map, map, subsection_map,
+ SUBSECTIONS_PER_SECTION);
+
+ return rc;
+}
#else
void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
{
}
+
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ return 0;
+}
+
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ return 0;
+}
#endif
/* Record a memory area against a node. */
@@ -732,52 +816,6 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-/**
- * clear_subsection_map - Clear subsection map of one memory region
- *
- * @pfn - start pfn of the memory range
- * @nr_pages - number of pfns to add in the region
- *
- * This is only intended for hotplug, and clear the related subsection
- * map inside one section.
- *
- * Return:
- * * -EINVAL - Section already deactived.
- * * 0 - Subsection map is emptied.
- * * 1 - Subsection map is not empty.
- */
-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
- DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
- struct mem_section *ms = __pfn_to_section(pfn);
- unsigned long *subsection_map = ms->usage
- ? &ms->usage->subsection_map[0] : NULL;
-
- subsection_mask_set(map, pfn, nr_pages);
- if (subsection_map)
- bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
-
- if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
- "section already deactivated (%#lx + %ld)\n",
- pfn, nr_pages))
- return -EINVAL;
-
- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
-
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
- return 0;
-
- return 1;
-}
-#else
-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
- return 0;
-}
-#endif
-
/*
* To deactivate a memory region, there are 3 cases to handle across
* two configurations (SPARSEMEM_VMEMMAP={y,n}):
@@ -833,48 +871,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->section_mem_map = (unsigned long)NULL;
}
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-/**
- * fill_subsection_map - fill subsection map of a memory region
- * @pfn - start pfn of the memory range
- * @nr_pages - number of pfns to add in the region
- *
- * This fills the related subsection map inside one section, and only
- * intended for hotplug.
- *
- * Return:
- * * 0 - On success.
- * * -EINVAL - Invalid memory region.
- * * -EEXIST - Subsection map has been set.
- */
-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
- struct mem_section *ms = __pfn_to_section(pfn);
- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
- unsigned long *subsection_map;
- int rc = 0;
-
- subsection_mask_set(map, pfn, nr_pages);
-
- subsection_map = &ms->usage->subsection_map[0];
-
- if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
- rc = -EINVAL;
- else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
- rc = -EEXIST;
- else
- bitmap_or(subsection_map, map, subsection_map,
- SUBSECTIONS_PER_SECTION);
-
- return rc;
-}
-#else
-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
- return 0;
-}
-#endif
-
static struct page * __meminit section_activate(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
{
--
2.17.2
Currently, subsection map is used when SPARSEMEM is enabled, including
VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
and misleading. Let's adjust code to only allow subsection map being
used in VMEMMAP case.
Signed-off-by: Baoquan He <[email protected]>
---
include/linux/mmzone.h | 2 ++
mm/sparse.c | 20 ++++++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 462f6873905a..fc0de3a9a51e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
struct mem_section_usage {
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
+#endif
/* See declaration of similar field in struct zone */
unsigned long pageblock_flags[0];
};
diff --git a/mm/sparse.c b/mm/sparse.c
index df857ee9330c..66c497d6a229 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -209,6 +209,7 @@ static inline unsigned long first_present_section_nr(void)
return next_present_section_nr(-1);
}
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
static void subsection_mask_set(unsigned long *map, unsigned long pfn,
unsigned long nr_pages)
{
@@ -243,6 +244,11 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
nr_pages -= pfns;
}
}
+#else
+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+{
+}
+#endif
/* Record a memory area against a node. */
void __init memory_present(int nid, unsigned long start, unsigned long end)
@@ -726,6 +732,7 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
/**
* clear_subsection_map - Clear subsection map of one memory region
*
@@ -764,6 +771,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
return 1;
}
+#else
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ return 0;
+}
+#endif
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
@@ -820,6 +833,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->section_mem_map = (unsigned long)NULL;
}
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
/**
* fill_subsection_map - fill subsection map of a memory region
* @pfn - start pfn of the memory range
@@ -854,6 +868,12 @@ static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
return rc;
}
+#else
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ return 0;
+}
+#endif
static struct page * __meminit section_activate(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
--
2.17.2
On Thu, Feb 20, 2020 at 12:33:10PM +0800, Baoquan He wrote:
>In section_deactivate(), pfn_to_page() doesn't work any more after
>ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
>It caused hot remove failure:
>
>kernel BUG at mm/page_alloc.c:4806!
>invalid opcode: 0000 [#1] SMP PTI
>CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
>Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
>Workqueue: kacpi_hotplug acpi_hotplug_work_fn
>RIP: 0010:free_pages+0x85/0xa0
>Call Trace:
> __remove_pages+0x99/0xc0
> arch_remove_memory+0x23/0x4d
> try_remove_memory+0xc8/0x130
> ? walk_memory_blocks+0x72/0xa0
> __remove_memory+0xa/0x11
> acpi_memory_device_remove+0x72/0x100
> acpi_bus_trim+0x55/0x90
> acpi_device_hotplug+0x2eb/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
>Let's move the ->section_mem_map resetting after depopulate_section_memmap()
>to fix it.
>
>Signed-off-by: Baoquan He <[email protected]>
Reviewed-by: Wei Yang <[email protected]>
>---
> mm/sparse.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
>diff --git a/mm/sparse.c b/mm/sparse.c
>index 596b2a45b100..b8e52c8fed7f 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->usage = NULL;
> }
> memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
>- ms->section_mem_map = (unsigned long)NULL;
> }
>
> if (section_is_early && memmap)
> free_map_bootmem(memmap);
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
>+
>+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>+ ms->section_mem_map = (unsigned long)NULL;
> }
>
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
>--
>2.17.2
--
Wei Yang
Help you, Help me
On Thu, Feb 20, 2020 at 12:33:11PM +0800, Baoquan He wrote:
>Wrap the codes filling subsection map from section_activate() into
>fill_subsection_map(), this makes section_activate() cleaner and
>easier to follow.
>
>Signed-off-by: Baoquan He <[email protected]>
Reviewed-by: Wei Yang <[email protected]>
>---
> mm/sparse.c | 45 ++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 34 insertions(+), 11 deletions(-)
>
>diff --git a/mm/sparse.c b/mm/sparse.c
>index b8e52c8fed7f..977b47acd38d 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -790,24 +790,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>-static struct page * __meminit section_activate(int nid, unsigned long pfn,
>- unsigned long nr_pages, struct vmem_altmap *altmap)
>+/**
>+ * fill_subsection_map - fill subsection map of a memory region
>+ * @pfn - start pfn of the memory range
>+ * @nr_pages - number of pfns to add in the region
>+ *
>+ * This fills the related subsection map inside one section, and only
>+ * intended for hotplug.
>+ *
>+ * Return:
>+ * * 0 - On success.
>+ * * -EINVAL - Invalid memory region.
>+ * * -EEXIST - Subsection map has been set.
>+ */
>+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> {
>- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> struct mem_section *ms = __pfn_to_section(pfn);
>- struct mem_section_usage *usage = NULL;
>+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> unsigned long *subsection_map;
>- struct page *memmap;
> int rc = 0;
>
> subsection_mask_set(map, pfn, nr_pages);
>
>- if (!ms->usage) {
>- usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
>- if (!usage)
>- return ERR_PTR(-ENOMEM);
>- ms->usage = usage;
>- }
> subsection_map = &ms->usage->subsection_map[0];
>
> if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
>@@ -818,6 +822,25 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
> bitmap_or(subsection_map, map, subsection_map,
> SUBSECTIONS_PER_SECTION);
>
>+ return rc;
>+}
>+
>+static struct page * __meminit section_activate(int nid, unsigned long pfn,
>+ unsigned long nr_pages, struct vmem_altmap *altmap)
>+{
>+ struct mem_section *ms = __pfn_to_section(pfn);
>+ struct mem_section_usage *usage = NULL;
>+ struct page *memmap;
>+ int rc = 0;
>+
>+ if (!ms->usage) {
>+ usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
>+ if (!usage)
>+ return ERR_PTR(-ENOMEM);
>+ ms->usage = usage;
>+ }
>+
>+ rc = fill_subsection_map(pfn, nr_pages);
> if (rc) {
> if (usage)
> ms->usage = NULL;
>--
>2.17.2
--
Wei Yang
Help you, Help me
On Thu, Feb 20, 2020 at 12:33:12PM +0800, Baoquan He wrote:
>Wrap the codes which clear subsection map of one memory region from
>section_deactivate() into clear_subsection_map().
>
>Signed-off-by: Baoquan He <[email protected]>
Reviewed-by: Wei Yang <[email protected]>
>---
> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 38 insertions(+), 8 deletions(-)
>
>diff --git a/mm/sparse.c b/mm/sparse.c
>index 977b47acd38d..df857ee9330c 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
>-static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>- struct vmem_altmap *altmap)
>+/**
>+ * clear_subsection_map - Clear subsection map of one memory region
>+ *
>+ * @pfn - start pfn of the memory range
>+ * @nr_pages - number of pfns to add in the region
>+ *
>+ * This is only intended for hotplug, and clear the related subsection
>+ * map inside one section.
>+ *
>+ * Return:
>+ * * -EINVAL - Section already deactived.
>+ * * 0 - Subsection map is emptied.
>+ * * 1 - Subsection map is not empty.
>+ */
>+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> {
> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> struct mem_section *ms = __pfn_to_section(pfn);
>- bool section_is_early = early_section(ms);
>- struct page *memmap = NULL;
> unsigned long *subsection_map = ms->usage
> ? &ms->usage->subsection_map[0] : NULL;
>
>@@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> "section already deactivated (%#lx + %ld)\n",
> pfn, nr_pages))
>- return;
>+ return -EINVAL;
>+
>+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>
>+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>+ return 0;
>+
>+ return 1;
>+}
>+
>+static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>+ struct vmem_altmap *altmap)
>+{
>+ struct mem_section *ms = __pfn_to_section(pfn);
>+ bool section_is_early = early_section(ms);
>+ struct page *memmap = NULL;
>+ int rc;
>+
>+
>+ rc = clear_subsection_map(pfn, nr_pages);
>+ if (IS_ERR_VALUE((unsigned long)rc))
>+ return;
> /*
> * There are 3 cases to handle across two configurations
> * (SPARSEMEM_VMEMMAP={y,n}):
>@@ -763,8 +794,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> *
> * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
> */
>- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
>+ if (!rc) {
> unsigned long section_nr = pfn_to_section_nr(pfn);
>
> /*
>@@ -786,7 +816,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
>
>- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>+ if (!rc)
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>--
>2.17.2
--
Wei Yang
Help you, Help me
On Thu, Feb 20, 2020 at 12:33:13PM +0800, Baoquan He wrote:
>Currently, subsection map is used when SPARSEMEM is enabled, including
>VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
>supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
>and misleading. Let's adjust code to only allow subsection map being
>used in VMEMMAP case.
>
>Signed-off-by: Baoquan He <[email protected]>
Reviewed-by: Wei Yang <[email protected]>
>---
> include/linux/mmzone.h | 2 ++
> mm/sparse.c | 20 ++++++++++++++++++++
> 2 files changed, 22 insertions(+)
>
>diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>index 462f6873905a..fc0de3a9a51e 100644
>--- a/include/linux/mmzone.h
>+++ b/include/linux/mmzone.h
>@@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
>
> struct mem_section_usage {
>+#ifdef CONFIG_SPARSEMEM_VMEMMAP
> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
>+#endif
> /* See declaration of similar field in struct zone */
> unsigned long pageblock_flags[0];
> };
>diff --git a/mm/sparse.c b/mm/sparse.c
>index df857ee9330c..66c497d6a229 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -209,6 +209,7 @@ static inline unsigned long first_present_section_nr(void)
> return next_present_section_nr(-1);
> }
>
>+#ifdef CONFIG_SPARSEMEM_VMEMMAP
> static void subsection_mask_set(unsigned long *map, unsigned long pfn,
> unsigned long nr_pages)
> {
>@@ -243,6 +244,11 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> nr_pages -= pfns;
> }
> }
>+#else
>+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
>+{
>+}
>+#endif
>
> /* Record a memory area against a node. */
> void __init memory_present(int nid, unsigned long start, unsigned long end)
>@@ -726,6 +732,7 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
>+#ifdef CONFIG_SPARSEMEM_VMEMMAP
> /**
> * clear_subsection_map - Clear subsection map of one memory region
> *
>@@ -764,6 +771,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>
> return 1;
> }
>+#else
>+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ return 0;
>+}
>+#endif
>
> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> struct vmem_altmap *altmap)
>@@ -820,6 +833,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>+#ifdef CONFIG_SPARSEMEM_VMEMMAP
> /**
> * fill_subsection_map - fill subsection map of a memory region
> * @pfn - start pfn of the memory range
>@@ -854,6 +868,12 @@ static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>
> return rc;
> }
>+#else
>+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ return 0;
>+}
>+#endif
>
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap)
>--
>2.17.2
--
Wei Yang
Help you, Help me
On Thu, Feb 20, 2020 at 12:33:15PM +0800, Baoquan He wrote:
>No functional change.
>
Those functions are introduced in your previous patches.
Is it possible to define them close to each other at the very beginning?
>Signed-off-by: Baoquan He <[email protected]>
>---
> mm/sparse.c | 172 +++++++++++++++++++++++++---------------------------
> 1 file changed, 84 insertions(+), 88 deletions(-)
>
>diff --git a/mm/sparse.c b/mm/sparse.c
>index 14bff0b44e7c..053d6c2e5c1f 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -244,10 +244,94 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> nr_pages -= pfns;
> }
> }
>+
>+/**
>+ * clear_subsection_map - Clear subsection map of one memory region
>+ *
>+ * @pfn - start pfn of the memory range
>+ * @nr_pages - number of pfns to add in the region
>+ *
>+ * This is only intended for hotplug, and clear the related subsection
>+ * map inside one section.
>+ *
>+ * Return:
>+ * * -EINVAL - Section already deactived.
>+ * * 0 - Subsection map is emptied.
>+ * * 1 - Subsection map is not empty.
>+ */
>+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>+ DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>+ struct mem_section *ms = __pfn_to_section(pfn);
>+ unsigned long *subsection_map = ms->usage
>+ ? &ms->usage->subsection_map[0] : NULL;
>+
>+ subsection_mask_set(map, pfn, nr_pages);
>+ if (subsection_map)
>+ bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
>+
>+ if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>+ "section already deactivated (%#lx + %ld)\n",
>+ pfn, nr_pages))
>+ return -EINVAL;
>+
>+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>+
>+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>+ return 0;
>+
>+ return 1;
>+}
>+
>+/**
>+ * fill_subsection_map - fill subsection map of a memory region
>+ * @pfn - start pfn of the memory range
>+ * @nr_pages - number of pfns to add in the region
>+ *
>+ * This fills the related subsection map inside one section, and only
>+ * intended for hotplug.
>+ *
>+ * Return:
>+ * * 0 - On success.
>+ * * -EINVAL - Invalid memory region.
>+ * * -EEXIST - Subsection map has been set.
>+ */
>+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ struct mem_section *ms = __pfn_to_section(pfn);
>+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>+ unsigned long *subsection_map;
>+ int rc = 0;
>+
>+ subsection_mask_set(map, pfn, nr_pages);
>+
>+ subsection_map = &ms->usage->subsection_map[0];
>+
>+ if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
>+ rc = -EINVAL;
>+ else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
>+ rc = -EEXIST;
>+ else
>+ bitmap_or(subsection_map, map, subsection_map,
>+ SUBSECTIONS_PER_SECTION);
>+
>+ return rc;
>+}
> #else
> void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> {
> }
>+
>+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ return 0;
>+}
>+
>+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ return 0;
>+}
> #endif
>
> /* Record a memory area against a node. */
>@@ -732,52 +816,6 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
>-#ifdef CONFIG_SPARSEMEM_VMEMMAP
>-/**
>- * clear_subsection_map - Clear subsection map of one memory region
>- *
>- * @pfn - start pfn of the memory range
>- * @nr_pages - number of pfns to add in the region
>- *
>- * This is only intended for hotplug, and clear the related subsection
>- * map inside one section.
>- *
>- * Return:
>- * * -EINVAL - Section already deactived.
>- * * 0 - Subsection map is emptied.
>- * * 1 - Subsection map is not empty.
>- */
>-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>-{
>- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>- DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>- struct mem_section *ms = __pfn_to_section(pfn);
>- unsigned long *subsection_map = ms->usage
>- ? &ms->usage->subsection_map[0] : NULL;
>-
>- subsection_mask_set(map, pfn, nr_pages);
>- if (subsection_map)
>- bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
>-
>- if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>- "section already deactivated (%#lx + %ld)\n",
>- pfn, nr_pages))
>- return -EINVAL;
>-
>- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>-
>- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>- return 0;
>-
>- return 1;
>-}
>-#else
>-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>-{
>- return 0;
>-}
>-#endif
>-
> /*
> * To deactivate a memory region, there are 3 cases to handle across
> * two configurations (SPARSEMEM_VMEMMAP={y,n}):
>@@ -833,48 +871,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>-#ifdef CONFIG_SPARSEMEM_VMEMMAP
>-/**
>- * fill_subsection_map - fill subsection map of a memory region
>- * @pfn - start pfn of the memory range
>- * @nr_pages - number of pfns to add in the region
>- *
>- * This fills the related subsection map inside one section, and only
>- * intended for hotplug.
>- *
>- * Return:
>- * * 0 - On success.
>- * * -EINVAL - Invalid memory region.
>- * * -EEXIST - Subsection map has been set.
>- */
>-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>-{
>- struct mem_section *ms = __pfn_to_section(pfn);
>- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>- unsigned long *subsection_map;
>- int rc = 0;
>-
>- subsection_mask_set(map, pfn, nr_pages);
>-
>- subsection_map = &ms->usage->subsection_map[0];
>-
>- if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
>- rc = -EINVAL;
>- else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
>- rc = -EEXIST;
>- else
>- bitmap_or(subsection_map, map, subsection_map,
>- SUBSECTIONS_PER_SECTION);
>-
>- return rc;
>-}
>-#else
>-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>-{
>- return 0;
>-}
>-#endif
>-
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap)
> {
>--
>2.17.2
--
Wei Yang
Help you, Help me
On 02/20/20 at 02:18pm, Wei Yang wrote:
> On Thu, Feb 20, 2020 at 12:33:15PM +0800, Baoquan He wrote:
> >No functional change.
> >
>
> Those functions are introduced in your previous patches.
>
> Is it possible to define them close to each other at the very beginning?
Thanks for reviewing.
Do you mean to discard this patch and keep it as they are in the patch 4/7?
If yes, it's fine to me to drop it as you suggested. To me, I prefer to put
all subsection map handling codes together.
>
> >Signed-off-by: Baoquan He <[email protected]>
> >---
> > mm/sparse.c | 172 +++++++++++++++++++++++++---------------------------
> > 1 file changed, 84 insertions(+), 88 deletions(-)
> >
> >diff --git a/mm/sparse.c b/mm/sparse.c
> >index 14bff0b44e7c..053d6c2e5c1f 100644
> >--- a/mm/sparse.c
> >+++ b/mm/sparse.c
> >@@ -244,10 +244,94 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> > nr_pages -= pfns;
> > }
> > }
> >+
> >+/**
> >+ * clear_subsection_map - Clear subsection map of one memory region
> >+ *
> >+ * @pfn - start pfn of the memory range
> >+ * @nr_pages - number of pfns to add in the region
> >+ *
> >+ * This is only intended for hotplug, and clear the related subsection
> >+ * map inside one section.
> >+ *
> >+ * Return:
> >+ * * -EINVAL - Section already deactived.
> >+ * * 0 - Subsection map is emptied.
> >+ * * 1 - Subsection map is not empty.
> >+ */
> >+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >+{
> >+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >+ DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> >+ struct mem_section *ms = __pfn_to_section(pfn);
> >+ unsigned long *subsection_map = ms->usage
> >+ ? &ms->usage->subsection_map[0] : NULL;
> >+
> >+ subsection_mask_set(map, pfn, nr_pages);
> >+ if (subsection_map)
> >+ bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >+
> >+ if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> >+ "section already deactivated (%#lx + %ld)\n",
> >+ pfn, nr_pages))
> >+ return -EINVAL;
> >+
> >+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >+
> >+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> >+ return 0;
> >+
> >+ return 1;
> >+}
> >+
> >+/**
> >+ * fill_subsection_map - fill subsection map of a memory region
> >+ * @pfn - start pfn of the memory range
> >+ * @nr_pages - number of pfns to add in the region
> >+ *
> >+ * This fills the related subsection map inside one section, and only
> >+ * intended for hotplug.
> >+ *
> >+ * Return:
> >+ * * 0 - On success.
> >+ * * -EINVAL - Invalid memory region.
> >+ * * -EEXIST - Subsection map has been set.
> >+ */
> >+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >+{
> >+ struct mem_section *ms = __pfn_to_section(pfn);
> >+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >+ unsigned long *subsection_map;
> >+ int rc = 0;
> >+
> >+ subsection_mask_set(map, pfn, nr_pages);
> >+
> >+ subsection_map = &ms->usage->subsection_map[0];
> >+
> >+ if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
> >+ rc = -EINVAL;
> >+ else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
> >+ rc = -EEXIST;
> >+ else
> >+ bitmap_or(subsection_map, map, subsection_map,
> >+ SUBSECTIONS_PER_SECTION);
> >+
> >+ return rc;
> >+}
> > #else
> > void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> > {
> > }
> >+
> >+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >+{
> >+ return 0;
> >+}
> >+
> >+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >+{
> >+ return 0;
> >+}
> > #endif
> >
> > /* Record a memory area against a node. */
> >@@ -732,52 +816,6 @@ static void free_map_bootmem(struct page *memmap)
> > }
> > #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >
> >-#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >-/**
> >- * clear_subsection_map - Clear subsection map of one memory region
> >- *
> >- * @pfn - start pfn of the memory range
> >- * @nr_pages - number of pfns to add in the region
> >- *
> >- * This is only intended for hotplug, and clear the related subsection
> >- * map inside one section.
> >- *
> >- * Return:
> >- * * -EINVAL - Section already deactived.
> >- * * 0 - Subsection map is emptied.
> >- * * 1 - Subsection map is not empty.
> >- */
> >-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >-{
> >- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >- DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> >- struct mem_section *ms = __pfn_to_section(pfn);
> >- unsigned long *subsection_map = ms->usage
> >- ? &ms->usage->subsection_map[0] : NULL;
> >-
> >- subsection_mask_set(map, pfn, nr_pages);
> >- if (subsection_map)
> >- bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >-
> >- if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> >- "section already deactivated (%#lx + %ld)\n",
> >- pfn, nr_pages))
> >- return -EINVAL;
> >-
> >- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >-
> >- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> >- return 0;
> >-
> >- return 1;
> >-}
> >-#else
> >-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >-{
> >- return 0;
> >-}
> >-#endif
> >-
> > /*
> > * To deactivate a memory region, there are 3 cases to handle across
> > * two configurations (SPARSEMEM_VMEMMAP={y,n}):
> >@@ -833,48 +871,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> >-#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >-/**
> >- * fill_subsection_map - fill subsection map of a memory region
> >- * @pfn - start pfn of the memory range
> >- * @nr_pages - number of pfns to add in the region
> >- *
> >- * This fills the related subsection map inside one section, and only
> >- * intended for hotplug.
> >- *
> >- * Return:
> >- * * 0 - On success.
> >- * * -EINVAL - Invalid memory region.
> >- * * -EEXIST - Subsection map has been set.
> >- */
> >-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >-{
> >- struct mem_section *ms = __pfn_to_section(pfn);
> >- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >- unsigned long *subsection_map;
> >- int rc = 0;
> >-
> >- subsection_mask_set(map, pfn, nr_pages);
> >-
> >- subsection_map = &ms->usage->subsection_map[0];
> >-
> >- if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
> >- rc = -EINVAL;
> >- else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
> >- rc = -EEXIST;
> >- else
> >- bitmap_or(subsection_map, map, subsection_map,
> >- SUBSECTIONS_PER_SECTION);
> >-
> >- return rc;
> >-}
> >-#else
> >-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >-{
> >- return 0;
> >-}
> >-#endif
> >-
> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
> > unsigned long nr_pages, struct vmem_altmap *altmap)
> > {
> >--
> >2.17.2
>
> --
> Wei Yang
> Help you, Help me
>
On Thu, Feb 20, 2020 at 03:04:20PM +0800, Baoquan He wrote:
>On 02/20/20 at 02:18pm, Wei Yang wrote:
>> On Thu, Feb 20, 2020 at 12:33:15PM +0800, Baoquan He wrote:
>> >No functional change.
>> >
>>
>> Those functions are introduced in your previous patches.
>>
>> Is it possible to define them close to each other at the very beginning?
>
>Thanks for reviewing.
>
>Do you mean to discard this patch and keep it as they are in the patch 4/7?
>If yes, it's fine to me to drop it as you suggested. To me, I prefer to put
>all subsection map handling codes together.
>
I mean when you introduce clear_subsection_map() in patch 3, is it possible to
move close to the definition of fill_subsection_map()?
Since finally you are will to move them together.
--
Wei Yang
Help you, Help me
On 02/20/20 at 03:12pm, Wei Yang wrote:
> On Thu, Feb 20, 2020 at 03:04:20PM +0800, Baoquan He wrote:
> >On 02/20/20 at 02:18pm, Wei Yang wrote:
> >> On Thu, Feb 20, 2020 at 12:33:15PM +0800, Baoquan He wrote:
> >> >No functional change.
> >> >
> >>
> >> Those functions are introduced in your previous patches.
> >>
> >> Is it possible to define them close to each other at the very beginning?
> >
> >Thanks for reviewing.
> >
> >Do you mean to discard this patch and keep it as they are in the patch 4/7?
> >If yes, it's fine to me to drop it as you suggested. To me, I prefer to put
> >all subsection map handling codes together.
> >
>
> I mean when you introduce clear_subsection_map() in patch 3, is it possible to
> move close to the definition of fill_subsection_map()?
>
> Since finally you are will to move them together.
Oh, got it. Yeah, I just put them close to their callers separately. I
think it's also good to put them together as you suggested, but it
doesn't matter much, right? I will consider this and see if I can adjust
it if v3 is needed. Thanks.
On Thu 20-02-20 12:33:09, Baoquan He wrote:
> Memory sub-section hotplug was added to fix the issue that nvdimm could
> be mapped at non-section aligned starting address. A subsection map is
> added into struct mem_section_usage to implement it. However, sub-section
> is only supported in VMEMMAP case.
Why? Is there any fundamental reason or just a lack of implementation?
VMEMMAP should be really only an implementation detail unless I am
missing something subtle.
> Hence there's no need to operate
> subsection map in SPARSEMEM|!VMEMMAP case. In this patchset, change
> codes to make sub-section map and the relevant operation only available
> in VMEMMAP case.
>
> And since sub-section hotplug added, the hot add/remove functionality
> have been broken in SPARSEMEM|!VMEMMAP case. Wei Yang and I, each of us
> make one patch to fix one of the failures. In this patchset, the patch
> 1/7 from me is used to fix the hot remove failure. Wei Yang's patch has
> been merged by Andrew.
Not sure I understand. Are there more issues to be fixed?
> include/linux/mmzone.h | 2 +
> mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
> 2 files changed, 127 insertions(+), 53 deletions(-)
Why do we need to add so much code to remove a functionality from one
memory model?
--
Michal Hocko
SUSE Labs
On 20.02.20 05:33, Baoquan He wrote:
> In section_deactivate(), pfn_to_page() doesn't work any more after
> ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
> It caused hot remove failure:
>
> kernel BUG at mm/page_alloc.c:4806!
> invalid opcode: 0000 [#1] SMP PTI
> CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> RIP: 0010:free_pages+0x85/0xa0
> Call Trace:
> __remove_pages+0x99/0xc0
> arch_remove_memory+0x23/0x4d
> try_remove_memory+0xc8/0x130
> ? walk_memory_blocks+0x72/0xa0
> __remove_memory+0xa/0x11
> acpi_memory_device_remove+0x72/0x100
> acpi_bus_trim+0x55/0x90
> acpi_device_hotplug+0x2eb/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
> Let's move the ->section_mem_map resetting after depopulate_section_memmap()
> to fix it.
>
> Signed-off-by: Baoquan He <[email protected]>
Reviewed-by: David Hildenbrand <[email protected]>
--
Thanks,
David / dhildenb
On Thu, Feb 20, 2020 at 04:55:59PM +0800, Baoquan He wrote:
>On 02/20/20 at 03:12pm, Wei Yang wrote:
>> On Thu, Feb 20, 2020 at 03:04:20PM +0800, Baoquan He wrote:
>> >On 02/20/20 at 02:18pm, Wei Yang wrote:
>> >> On Thu, Feb 20, 2020 at 12:33:15PM +0800, Baoquan He wrote:
>> >> >No functional change.
>> >> >
>> >>
>> >> Those functions are introduced in your previous patches.
>> >>
>> >> Is it possible to define them close to each other at the very beginning?
>> >
>> >Thanks for reviewing.
>> >
>> >Do you mean to discard this patch and keep it as they are in the patch 4/7?
>> >If yes, it's fine to me to drop it as you suggested. To me, I prefer to put
>> >all subsection map handling codes together.
>> >
>>
>> I mean when you introduce clear_subsection_map() in patch 3, is it possible to
>> move close to the definition of fill_subsection_map()?
>>
>> Since finally you are will to move them together.
>
>Oh, got it. Yeah, I just put them close to their callers separately. I
>think it's also good to put them together as you suggested, but it
>doesn't matter much, right? I will consider this and see if I can adjust
>it if v3 is needed. Thanks.
Yes, doesn't matter much.
--
Wei Yang
Help you, Help me
On 02/20/20 at 11:38am, Michal Hocko wrote:
> On Thu 20-02-20 12:33:09, Baoquan He wrote:
> > Memory sub-section hotplug was added to fix the issue that nvdimm could
> > be mapped at non-section aligned starting address. A subsection map is
> > added into struct mem_section_usage to implement it. However, sub-section
> > is only supported in VMEMMAP case.
>
> Why? Is there any fundamental reason or just a lack of implementation?
> VMEMMAP should be really only an implementation detail unless I am
> missing something subtle.
Thanks for checking.
VMEMMAP is one of two ways to convert a PFN to the corresponding
'struct page' in SPARSE model. I mentioned them as VMEMMAP case, or
!VMEMMAP case because we called them like this previously when reviewed
patches, hope it won't cause confusion.
Currently, config ZONE_DEVICE depends on SPARSEMEM_VMEMMAP. The
subsection_map is added to struct mem_section_usage to track which sub
section is present, VMEMMAP fills those bits which corresponding
sub-sections are present, while !VMEMMAP, namely classic SPARSE, fills
the whole map always.
As we know, VMEMMAP builds page table to map a cluster of 'struct page'
into the corresponding area of 'vmemmap'. Subsection hotplug can be
supported naturally, w/o any change, just map needed region related to
sub-sections on demand. For !VMEMMAP, it allocates memmap with
alloc_pages() or vmalloc, thing is a little complicated, e.g the mixed
section, boot memory occupies the starting area, later pmem hot added to
the rear part.
About !VMEMMAP which doesn't support sub-section hotplog, Dan said
it's more because the effort and maintenance burden outweighs the
benefit. And the current 64 bit ARCHes all enable
SPARSEMEM_VMEMMAP_ENABLE by default.
So no need to keep subsection_map and its handling in SPARSE|!VMEMMAP.
>
> > Hence there's no need to operate
> > subsection map in SPARSEMEM|!VMEMMAP case. In this patchset, change
> > codes to make sub-section map and the relevant operation only available
> > in VMEMMAP case.
> >
> > And since sub-section hotplug added, the hot add/remove functionality
> > have been broken in SPARSEMEM|!VMEMMAP case. Wei Yang and I, each of us
> > make one patch to fix one of the failures. In this patchset, the patch
> > 1/7 from me is used to fix the hot remove failure. Wei Yang's patch has
> > been merged by Andrew.
>
> Not sure I understand. Are there more issues to be fixed?
Only these two. Wei Yang firstly posted the patch to fix the hot add
failure in SPARSE|!VMEMMAP. When I reviewed his patch and tested, found
hot remove failed too. So the patch 1/7 is to fix the hot remove failure
in !VMEMMAP. With these two patches, hot add/remove works well in !VMEMMAP.
Not sure if it's clear.
> > include/linux/mmzone.h | 2 +
> > mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
> > 2 files changed, 127 insertions(+), 53 deletions(-)
>
> Why do we need to add so much code to remove a functionality from one
> memory model?
Hmm, Dan also asked this before.
The adding mainly happens in patch 2, 3, 4, including the two newly
added function defitions, the code comments above them, and those added
dummy functions for !VMEMMAP.
Thanks
Baoquan
>>> include/linux/mmzone.h | 2 +
>>> mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
>>> 2 files changed, 127 insertions(+), 53 deletions(-)
>>
>> Why do we need to add so much code to remove a functionality from one
>> memory model?
>
> Hmm, Dan also asked this before.
>
> The adding mainly happens in patch 2, 3, 4, including the two newly
> added function defitions, the code comments above them, and those added
> dummy functions for !VMEMMAP.
AFAIKS, it's mostly a bunch of newly added comments on top of functions.
E.g., the comment for fill_subsection_map() alone spans 12 LOC in total.
I do wonder if we have to be that verbose. We are barely that verbose on
MM code (and usually I don't see much benefit unless it's a function
with many users from many different places).
--
Thanks,
David / dhildenb
On Thu 20-02-20 12:33:13, Baoquan He wrote:
> Currently, subsection map is used when SPARSEMEM is enabled, including
> VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> and misleading. Let's adjust code to only allow subsection map being
> used in VMEMMAP case.
This really needs more explanation I believe. What exactly happens if
somebody tries to hotremove a part of the section with !VMEMMAP? I can
see that clear_subsection_map returns 0 but that is not an error code.
Besides that section_deactivate doesn't propagate the error upwards.
/me stares into the code
OK, I can see it now. It is relying on check_pfn_span to use the proper
subsection granularity. This really begs for a comment in the code
somewhere.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> include/linux/mmzone.h | 2 ++
> mm/sparse.c | 20 ++++++++++++++++++++
> 2 files changed, 22 insertions(+)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 462f6873905a..fc0de3a9a51e 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
>
> struct mem_section_usage {
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> +#endif
> /* See declaration of similar field in struct zone */
> unsigned long pageblock_flags[0];
> };
> diff --git a/mm/sparse.c b/mm/sparse.c
> index df857ee9330c..66c497d6a229 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -209,6 +209,7 @@ static inline unsigned long first_present_section_nr(void)
> return next_present_section_nr(-1);
> }
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> static void subsection_mask_set(unsigned long *map, unsigned long pfn,
> unsigned long nr_pages)
> {
> @@ -243,6 +244,11 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> nr_pages -= pfns;
> }
> }
> +#else
> +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> +{
> +}
> +#endif
>
> /* Record a memory area against a node. */
> void __init memory_present(int nid, unsigned long start, unsigned long end)
> @@ -726,6 +732,7 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> /**
> * clear_subsection_map - Clear subsection map of one memory region
> *
> @@ -764,6 +771,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>
> return 1;
> }
> +#else
> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> +{
> + return 0;
> +}
> +#endif
>
> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> struct vmem_altmap *altmap)
> @@ -820,6 +833,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->section_mem_map = (unsigned long)NULL;
> }
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> /**
> * fill_subsection_map - fill subsection map of a memory region
> * @pfn - start pfn of the memory range
> @@ -854,6 +868,12 @@ static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>
> return rc;
> }
> +#else
> +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> +{
> + return 0;
> +}
> +#endif
>
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap)
> --
> 2.17.2
>
--
Michal Hocko
SUSE Labs
On Tue 25-02-20 10:10:45, David Hildenbrand wrote:
> >>> include/linux/mmzone.h | 2 +
> >>> mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
> >>> 2 files changed, 127 insertions(+), 53 deletions(-)
> >>
> >> Why do we need to add so much code to remove a functionality from one
> >> memory model?
> >
> > Hmm, Dan also asked this before.
> >
> > The adding mainly happens in patch 2, 3, 4, including the two newly
> > added function defitions, the code comments above them, and those added
> > dummy functions for !VMEMMAP.
>
> AFAIKS, it's mostly a bunch of newly added comments on top of functions.
> E.g., the comment for fill_subsection_map() alone spans 12 LOC in total.
> I do wonder if we have to be that verbose. We are barely that verbose on
> MM code (and usually I don't see much benefit unless it's a function
> with many users from many different places).
I would tend to agree here. Not that I am against kernel doc
documentation but these are internal functions and the comment doesn't
really give any better insight IMHO. I would be much more inclined if
this was the general pattern in the respective file but it just stands
out.
--
Michal Hocko
SUSE Labs
On Fri 21-02-20 22:28:47, Baoquan He wrote:
> On 02/20/20 at 11:38am, Michal Hocko wrote:
> > On Thu 20-02-20 12:33:09, Baoquan He wrote:
> > > Memory sub-section hotplug was added to fix the issue that nvdimm could
> > > be mapped at non-section aligned starting address. A subsection map is
> > > added into struct mem_section_usage to implement it. However, sub-section
> > > is only supported in VMEMMAP case.
> >
> > Why? Is there any fundamental reason or just a lack of implementation?
> > VMEMMAP should be really only an implementation detail unless I am
> > missing something subtle.
>
> Thanks for checking.
>
> VMEMMAP is one of two ways to convert a PFN to the corresponding
> 'struct page' in SPARSE model. I mentioned them as VMEMMAP case, or
> !VMEMMAP case because we called them like this previously when reviewed
> patches, hope it won't cause confusion.
>
> Currently, config ZONE_DEVICE depends on SPARSEMEM_VMEMMAP. The
> subsection_map is added to struct mem_section_usage to track which sub
> section is present, VMEMMAP fills those bits which corresponding
> sub-sections are present, while !VMEMMAP, namely classic SPARSE, fills
> the whole map always.
>
> As we know, VMEMMAP builds page table to map a cluster of 'struct page'
> into the corresponding area of 'vmemmap'. Subsection hotplug can be
> supported naturally, w/o any change, just map needed region related to
> sub-sections on demand. For !VMEMMAP, it allocates memmap with
> alloc_pages() or vmalloc, thing is a little complicated, e.g the mixed
> section, boot memory occupies the starting area, later pmem hot added to
> the rear part.
>
> About !VMEMMAP which doesn't support sub-section hotplog, Dan said
> it's more because the effort and maintenance burden outweighs the
> benefit. And the current 64 bit ARCHes all enable
> SPARSEMEM_VMEMMAP_ENABLE by default.
OK, if this is the primary argument then make sure to document it in the
changelog (cover letter).
--
Michal Hocko
SUSE Labs
On 02/25/20 at 11:02am, Michal Hocko wrote:
> On Tue 25-02-20 10:10:45, David Hildenbrand wrote:
> > >>> include/linux/mmzone.h | 2 +
> > >>> mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
> > >>> 2 files changed, 127 insertions(+), 53 deletions(-)
> > >>
> > >> Why do we need to add so much code to remove a functionality from one
> > >> memory model?
> > >
> > > Hmm, Dan also asked this before.
> > >
> > > The adding mainly happens in patch 2, 3, 4, including the two newly
> > > added function defitions, the code comments above them, and those added
> > > dummy functions for !VMEMMAP.
> >
> > AFAIKS, it's mostly a bunch of newly added comments on top of functions.
> > E.g., the comment for fill_subsection_map() alone spans 12 LOC in total.
> > I do wonder if we have to be that verbose. We are barely that verbose on
> > MM code (and usually I don't see much benefit unless it's a function
> > with many users from many different places).
>
> I would tend to agree here. Not that I am against kernel doc
> documentation but these are internal functions and the comment doesn't
> really give any better insight IMHO. I would be much more inclined if
> this was the general pattern in the respective file but it just stands
> out.
I saw there are internal functions which have code comments, e.g
shrink_slab() in mm/vmscan.c, not only this one place, there are several
places. I personally prefer to see code comment for function if
possible, this can save time, e.g people can skip the bitmap operation
when read code if not necessary. And here I mainly want to tell there
are different returned value to note different behaviour when call them.
Anyway, it's fine to me to remove them. The two functions are internal,
and not so complicated. I will remove them since you both object.
However, I disagree with the saying that we should not add code comment
for internal function.
Thanks
Baoquan
On 02/25/20 at 11:03am, Michal Hocko wrote:
> On Fri 21-02-20 22:28:47, Baoquan He wrote:
> > On 02/20/20 at 11:38am, Michal Hocko wrote:
> > > On Thu 20-02-20 12:33:09, Baoquan He wrote:
> > > > Memory sub-section hotplug was added to fix the issue that nvdimm could
> > > > be mapped at non-section aligned starting address. A subsection map is
> > > > added into struct mem_section_usage to implement it. However, sub-section
> > > > is only supported in VMEMMAP case.
> > >
> > > Why? Is there any fundamental reason or just a lack of implementation?
> > > VMEMMAP should be really only an implementation detail unless I am
> > > missing something subtle.
> >
> > Thanks for checking.
> >
> > VMEMMAP is one of two ways to convert a PFN to the corresponding
> > 'struct page' in SPARSE model. I mentioned them as VMEMMAP case, or
> > !VMEMMAP case because we called them like this previously when reviewed
> > patches, hope it won't cause confusion.
> >
> > Currently, config ZONE_DEVICE depends on SPARSEMEM_VMEMMAP. The
> > subsection_map is added to struct mem_section_usage to track which sub
> > section is present, VMEMMAP fills those bits which corresponding
> > sub-sections are present, while !VMEMMAP, namely classic SPARSE, fills
> > the whole map always.
> >
> > As we know, VMEMMAP builds page table to map a cluster of 'struct page'
> > into the corresponding area of 'vmemmap'. Subsection hotplug can be
> > supported naturally, w/o any change, just map needed region related to
> > sub-sections on demand. For !VMEMMAP, it allocates memmap with
> > alloc_pages() or vmalloc, thing is a little complicated, e.g the mixed
> > section, boot memory occupies the starting area, later pmem hot added to
> > the rear part.
> >
> > About !VMEMMAP which doesn't support sub-section hotplog, Dan said
> > it's more because the effort and maintenance burden outweighs the
> > benefit. And the current 64 bit ARCHes all enable
> > SPARSEMEM_VMEMMAP_ENABLE by default.
>
> OK, if this is the primary argument then make sure to document it in the
> changelog (cover letter).
Will add it when repost.
On 02/25/20 at 10:57am, Michal Hocko wrote:
> On Thu 20-02-20 12:33:13, Baoquan He wrote:
> > Currently, subsection map is used when SPARSEMEM is enabled, including
> > VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> > supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> > and misleading. Let's adjust code to only allow subsection map being
> > used in VMEMMAP case.
>
> This really needs more explanation I believe. What exactly happens if
> somebody tries to hotremove a part of the section with !VMEMMAP? I can
> see that clear_subsection_map returns 0 but that is not an error code.
> Besides that section_deactivate doesn't propagate the error upwards.
> /me stares into the code
>
> OK, I can see it now. It is relying on check_pfn_span to use the proper
> subsection granularity. This really begs for a comment in the code
> somewhere.
Yes, check_pfn_span() guards it. People have no way to hot add/remove
on non-section aligned block with !VMEMMAP.
I have added extra comment to above section_activate() to note this,
please check patch 5/7. Let me see how to add words to reflect the
check_pfn_span() guard thing.
On Wed 26-02-20 11:53:36, Baoquan He wrote:
> On 02/25/20 at 10:57am, Michal Hocko wrote:
> > On Thu 20-02-20 12:33:13, Baoquan He wrote:
> > > Currently, subsection map is used when SPARSEMEM is enabled, including
> > > VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> > > supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> > > and misleading. Let's adjust code to only allow subsection map being
> > > used in VMEMMAP case.
> >
> > This really needs more explanation I believe. What exactly happens if
> > somebody tries to hotremove a part of the section with !VMEMMAP? I can
> > see that clear_subsection_map returns 0 but that is not an error code.
> > Besides that section_deactivate doesn't propagate the error upwards.
> > /me stares into the code
> >
> > OK, I can see it now. It is relying on check_pfn_span to use the proper
> > subsection granularity. This really begs for a comment in the code
> > somewhere.
>
> Yes, check_pfn_span() guards it. People have no way to hot add/remove
> on non-section aligned block with !VMEMMAP.
>
> I have added extra comment to above section_activate() to note this,
> please check patch 5/7. Let me see how to add words to reflect the
> check_pfn_span() guard thing.
An explicit note about check_pfn_span gating the proper alignement and
sizing sounds sufficient to me.
--
Michal Hocko
SUSE Labs
On Wed 26-02-20 11:42:36, Baoquan He wrote:
> On 02/25/20 at 11:02am, Michal Hocko wrote:
> > On Tue 25-02-20 10:10:45, David Hildenbrand wrote:
> > > >>> include/linux/mmzone.h | 2 +
> > > >>> mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
> > > >>> 2 files changed, 127 insertions(+), 53 deletions(-)
> > > >>
> > > >> Why do we need to add so much code to remove a functionality from one
> > > >> memory model?
> > > >
> > > > Hmm, Dan also asked this before.
> > > >
> > > > The adding mainly happens in patch 2, 3, 4, including the two newly
> > > > added function defitions, the code comments above them, and those added
> > > > dummy functions for !VMEMMAP.
> > >
> > > AFAIKS, it's mostly a bunch of newly added comments on top of functions.
> > > E.g., the comment for fill_subsection_map() alone spans 12 LOC in total.
> > > I do wonder if we have to be that verbose. We are barely that verbose on
> > > MM code (and usually I don't see much benefit unless it's a function
> > > with many users from many different places).
> >
> > I would tend to agree here. Not that I am against kernel doc
> > documentation but these are internal functions and the comment doesn't
> > really give any better insight IMHO. I would be much more inclined if
> > this was the general pattern in the respective file but it just stands
> > out.
>
> I saw there are internal functions which have code comments, e.g
> shrink_slab() in mm/vmscan.c, not only this one place, there are several
> places. I personally prefer to see code comment for function if
> possible, this can save time, e.g people can skip the bitmap operation
> when read code if not necessary. And here I mainly want to tell there
> are different returned value to note different behaviour when call them.
... yet nobody really cares about different return code. It is an error
that is propagated up the call chain and that's all.
I also like when there is a kernel doc comment that helps to understand
the intented usage, context the function can be called from, potential
side effects, locking requirements and other details people need to know
when calling functions. But have a look at
/**
* clear_subsection_map - Clear subsection map of one memory region
*
* @pfn - start pfn of the memory range
* @nr_pages - number of pfns to add in the region
*
* This is only intended for hotplug, and clear the related subsection
* map inside one section.
*
* Return:
* * -EINVAL - Section already deactived.
* * 0 - Subsection map is emptied.
* * 1 - Subsection map is not empty.
*/
the only useful information in here is that this is a hotplug stuff but
I would be completely lost about the intention without already knowing
what is this whole subsection about.
--
Michal Hocko
SUSE Labs
On 02/26/20 at 10:14am, Michal Hocko wrote:
> On Wed 26-02-20 11:42:36, Baoquan He wrote:
> > On 02/25/20 at 11:02am, Michal Hocko wrote:
> > > On Tue 25-02-20 10:10:45, David Hildenbrand wrote:
> > > > >>> include/linux/mmzone.h | 2 +
> > > > >>> mm/sparse.c | 178 +++++++++++++++++++++++++++++------------
> > > > >>> 2 files changed, 127 insertions(+), 53 deletions(-)
> > > > >>
> > > > >> Why do we need to add so much code to remove a functionality from one
> > > > >> memory model?
> > > > >
> > > > > Hmm, Dan also asked this before.
> > > > >
> > > > > The adding mainly happens in patch 2, 3, 4, including the two newly
> > > > > added function defitions, the code comments above them, and those added
> > > > > dummy functions for !VMEMMAP.
> > > >
> > > > AFAIKS, it's mostly a bunch of newly added comments on top of functions.
> > > > E.g., the comment for fill_subsection_map() alone spans 12 LOC in total.
> > > > I do wonder if we have to be that verbose. We are barely that verbose on
> > > > MM code (and usually I don't see much benefit unless it's a function
> > > > with many users from many different places).
> > >
> > > I would tend to agree here. Not that I am against kernel doc
> > > documentation but these are internal functions and the comment doesn't
> > > really give any better insight IMHO. I would be much more inclined if
> > > this was the general pattern in the respective file but it just stands
> > > out.
> >
> > I saw there are internal functions which have code comments, e.g
> > shrink_slab() in mm/vmscan.c, not only this one place, there are several
> > places. I personally prefer to see code comment for function if
> > possible, this can save time, e.g people can skip the bitmap operation
> > when read code if not necessary. And here I mainly want to tell there
> > are different returned value to note different behaviour when call them.
>
> ... yet nobody really cares about different return code. It is an error
> that is propagated up the call chain and that's all.
>
> I also like when there is a kernel doc comment that helps to understand
> the intented usage, context the function can be called from, potential
> side effects, locking requirements and other details people need to know
Fair enough. As I have said, I didn't intend to stick to add kernel doc
comments for these two functions. Will remove them. Thanks for
reviewing.
> when calling functions. But have a look at
> /**
> * clear_subsection_map - Clear subsection map of one memory region
> *
> * @pfn - start pfn of the memory range
> * @nr_pages - number of pfns to add in the region
> *
> * This is only intended for hotplug, and clear the related subsection
> * map inside one section.
> *
> * Return:
> * * -EINVAL - Section already deactived.
> * * 0 - Subsection map is emptied.
> * * 1 - Subsection map is not empty.
> */
>
> the only useful information in here is that this is a hotplug stuff but
> I would be completely lost about the intention without already knowing
> what is this whole subsection about.
>
> --
> Michal Hocko
> SUSE Labs
>
On 20.02.20 05:33, Baoquan He wrote:
> In section_deactivate(), pfn_to_page() doesn't work any more after
> ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
> It caused hot remove failure:
>
> kernel BUG at mm/page_alloc.c:4806!
> invalid opcode: 0000 [#1] SMP PTI
> CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> RIP: 0010:free_pages+0x85/0xa0
> Call Trace:
> __remove_pages+0x99/0xc0
> arch_remove_memory+0x23/0x4d
> try_remove_memory+0xc8/0x130
> ? walk_memory_blocks+0x72/0xa0
> __remove_memory+0xa/0x11
> acpi_memory_device_remove+0x72/0x100
> acpi_bus_trim+0x55/0x90
> acpi_device_hotplug+0x2eb/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
> Let's move the ->section_mem_map resetting after depopulate_section_memmap()
> to fix it.
>
Fixes: Tag? Stable: Tag?
> Signed-off-by: Baoquan He <[email protected]>
> ---
> mm/sparse.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 596b2a45b100..b8e52c8fed7f 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->usage = NULL;
> }
> memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> - ms->section_mem_map = (unsigned long)NULL;
> }
>
> if (section_is_early && memmap)
> free_map_bootmem(memmap);
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
> +
> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> + ms->section_mem_map = (unsigned long)NULL;
> }
>
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
>
Reviewed-by: David Hildenbrand <[email protected]>
--
Thanks,
David / dhildenb
On 02/26/20 at 01:31pm, David Hildenbrand wrote:
> On 20.02.20 05:33, Baoquan He wrote:
> > In section_deactivate(), pfn_to_page() doesn't work any more after
> > ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
> > It caused hot remove failure:
> >
> > kernel BUG at mm/page_alloc.c:4806!
> > invalid opcode: 0000 [#1] SMP PTI
> > CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> > Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> > RIP: 0010:free_pages+0x85/0xa0
> > Call Trace:
> > __remove_pages+0x99/0xc0
> > arch_remove_memory+0x23/0x4d
> > try_remove_memory+0xc8/0x130
> > ? walk_memory_blocks+0x72/0xa0
> > __remove_memory+0xa/0x11
> > acpi_memory_device_remove+0x72/0x100
> > acpi_bus_trim+0x55/0x90
> > acpi_device_hotplug+0x2eb/0x3d0
> > acpi_hotplug_work_fn+0x1a/0x30
> > process_one_work+0x1a7/0x370
> > worker_thread+0x30/0x380
> > ? flush_rcu_work+0x30/0x30
> > kthread+0x112/0x130
> > ? kthread_create_on_node+0x60/0x60
> > ret_from_fork+0x35/0x40
> >
> > Let's move the ->section_mem_map resetting after depopulate_section_memmap()
> > to fix it.
> >
>
> Fixes: Tag? Stable: Tag?
Right, should add these. Will add them. Thanks for noticing.
>
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > mm/sparse.c | 4 +++-
> > 1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 596b2a45b100..b8e52c8fed7f 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > ms->usage = NULL;
> > }
> > memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> > - ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > if (section_is_early && memmap)
> > free_map_bootmem(memmap);
> > else
> > depopulate_section_memmap(pfn, nr_pages, altmap);
> > +
> > + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> > + ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
> >
>
> Reviewed-by: David Hildenbrand <[email protected]>
>
> --
> Thanks,
>
> David / dhildenb
On Wed, Feb 19, 2020 at 8:34 PM Baoquan He <[email protected]> wrote:
>
> In section_deactivate(), pfn_to_page() doesn't work any more after
> ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
> It caused hot remove failure:
>
> kernel BUG at mm/page_alloc.c:4806!
> invalid opcode: 0000 [#1] SMP PTI
> CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> RIP: 0010:free_pages+0x85/0xa0
> Call Trace:
> __remove_pages+0x99/0xc0
> arch_remove_memory+0x23/0x4d
> try_remove_memory+0xc8/0x130
> ? walk_memory_blocks+0x72/0xa0
> __remove_memory+0xa/0x11
> acpi_memory_device_remove+0x72/0x100
> acpi_bus_trim+0x55/0x90
> acpi_device_hotplug+0x2eb/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
> Let's move the ->section_mem_map resetting after depopulate_section_memmap()
> to fix it.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> mm/sparse.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 596b2a45b100..b8e52c8fed7f 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->usage = NULL;
> }
> memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> - ms->section_mem_map = (unsigned long)NULL;
> }
>
> if (section_is_early && memmap)
> free_map_bootmem(memmap);
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
> +
> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> + ms->section_mem_map = (unsigned long)NULL;
Looks good to me:
Reviewed-by: Dan Williams <[email protected]>
On 02/26/20 at 10:10am, Michal Hocko wrote:
> On Wed 26-02-20 11:53:36, Baoquan He wrote:
> > On 02/25/20 at 10:57am, Michal Hocko wrote:
> > > On Thu 20-02-20 12:33:13, Baoquan He wrote:
> > > > Currently, subsection map is used when SPARSEMEM is enabled, including
> > > > VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> > > > supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> > > > and misleading. Let's adjust code to only allow subsection map being
> > > > used in VMEMMAP case.
> > >
> > > This really needs more explanation I believe. What exactly happens if
> > > somebody tries to hotremove a part of the section with !VMEMMAP? I can
> > > see that clear_subsection_map returns 0 but that is not an error code.
> > > Besides that section_deactivate doesn't propagate the error upwards.
> > > /me stares into the code
> > >
> > > OK, I can see it now. It is relying on check_pfn_span to use the proper
> > > subsection granularity. This really begs for a comment in the code
> > > somewhere.
> >
> > Yes, check_pfn_span() guards it. People have no way to hot add/remove
> > on non-section aligned block with !VMEMMAP.
> >
> > I have added extra comment to above section_activate() to note this,
> > please check patch 5/7. Let me see how to add words to reflect the
> > check_pfn_span() guard thing.
>
> An explicit note about check_pfn_span gating the proper alignement and
> sizing sounds sufficient to me.
It's fine to me, I will adjust the description.
On 20.02.20 05:33, Baoquan He wrote:
> Wrap the codes filling subsection map from section_activate() into
"Factor out the code that fills the subsection" ...
> fill_subsection_map(), this makes section_activate() cleaner and
> easier to follow.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> mm/sparse.c | 45 ++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 34 insertions(+), 11 deletions(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index b8e52c8fed7f..977b47acd38d 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -790,24 +790,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->section_mem_map = (unsigned long)NULL;
> }
>
> -static struct page * __meminit section_activate(int nid, unsigned long pfn,
> - unsigned long nr_pages, struct vmem_altmap *altmap)
> +/**
> + * fill_subsection_map - fill subsection map of a memory region
> + * @pfn - start pfn of the memory range
> + * @nr_pages - number of pfns to add in the region
> + *
> + * This fills the related subsection map inside one section, and only
> + * intended for hotplug.
> + *
> + * Return:
> + * * 0 - On success.
> + * * -EINVAL - Invalid memory region.
> + * * -EEXIST - Subsection map has been set.
> + */
Without this comment (or a massively reduced one :) )
Reviewed-by: David Hildenbrand <[email protected]>
--
Thanks,
David / dhildenb
On 20.02.20 05:33, Baoquan He wrote:
> Wrap the codes which clear subsection map of one memory region from
> section_deactivate() into clear_subsection_map().
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 977b47acd38d..df857ee9330c 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> - struct vmem_altmap *altmap)
> +/**
> + * clear_subsection_map - Clear subsection map of one memory region
> + *
> + * @pfn - start pfn of the memory range
> + * @nr_pages - number of pfns to add in the region
> + *
> + * This is only intended for hotplug, and clear the related subsection
> + * map inside one section.
> + *
> + * Return:
> + * * -EINVAL - Section already deactived.
> + * * 0 - Subsection map is emptied.
> + * * 1 - Subsection map is not empty.
> + */
Less verbose please (in my preference: none and simplify return handling)
> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> {
> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> struct mem_section *ms = __pfn_to_section(pfn);
> - bool section_is_early = early_section(ms);
> - struct page *memmap = NULL;
> unsigned long *subsection_map = ms->usage
> ? &ms->usage->subsection_map[0] : NULL;
>
> @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> "section already deactivated (%#lx + %ld)\n",
> pfn, nr_pages))
> - return;
> + return -EINVAL;
> +
> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>
> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> + return 0;
> +
Can we please just have a
subsection_map_empty() instead and handle that in the caller?
(you can then always return true in the !VMEMMAP variant)
I dislike the "rc" handling in the caller.
> + return 1;
> +}
> +
> +static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> + struct vmem_altmap *altmap)
> +{
> + struct mem_section *ms = __pfn_to_section(pfn);
> + bool section_is_early = early_section(ms);
> + struct page *memmap = NULL;
> + int rc;
> +
> +
one superfluous empty line
> + rc = clear_subsection_map(pfn, nr_pages);
> + if (IS_ERR_VALUE((unsigned long)rc))
huh? "if (rc < 0)" ? or am I missing something?
> + return;
> /*
> * There are 3 cases to handle across two configurations
> * (SPARSEMEM_VMEMMAP={y,n}):
> @@ -763,8 +794,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> *
> * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
> */
> - bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
> + if (!rc) {
> unsigned long section_nr = pfn_to_section_nr(pfn);
>
> /*
> @@ -786,7 +816,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
>
> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> + if (!rc)
I don't really like that handling.
Either
s/rc/section_empty/
or use a separate subsection_map_empty()
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>
Thanks!
--
Thanks,
David / dhildenb
On 02/28/20 at 03:27pm, David Hildenbrand wrote:
> On 20.02.20 05:33, Baoquan He wrote:
> > Wrap the codes filling subsection map from section_activate() into
>
> "Factor out the code that fills the subsection" ...
Fine to me, I will replace it with this. Thanks.
>
> > fill_subsection_map(), this makes section_activate() cleaner and
> > easier to follow.
> >
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > mm/sparse.c | 45 ++++++++++++++++++++++++++++++++++-----------
> > 1 file changed, 34 insertions(+), 11 deletions(-)
> >
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index b8e52c8fed7f..977b47acd38d 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -790,24 +790,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > -static struct page * __meminit section_activate(int nid, unsigned long pfn,
> > - unsigned long nr_pages, struct vmem_altmap *altmap)
> > +/**
> > + * fill_subsection_map - fill subsection map of a memory region
> > + * @pfn - start pfn of the memory range
> > + * @nr_pages - number of pfns to add in the region
> > + *
> > + * This fills the related subsection map inside one section, and only
> > + * intended for hotplug.
> > + *
> > + * Return:
> > + * * 0 - On success.
> > + * * -EINVAL - Invalid memory region.
> > + * * -EEXIST - Subsection map has been set.
> > + */
>
> Without this comment (or a massively reduced one :) )
Yeah, as we discussed, I will remove it.
>
> Reviewed-by: David Hildenbrand <[email protected]>
>
> --
> Thanks,
>
> David / dhildenb
On 02/28/20 at 03:36pm, David Hildenbrand wrote:
> On 20.02.20 05:33, Baoquan He wrote:
> > Wrap the codes which clear subsection map of one memory region from
> > section_deactivate() into clear_subsection_map().
> >
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 38 insertions(+), 8 deletions(-)
> >
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 977b47acd38d..df857ee9330c 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
> > }
> > #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >
> > -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > - struct vmem_altmap *altmap)
> > +/**
> > + * clear_subsection_map - Clear subsection map of one memory region
> > + *
> > + * @pfn - start pfn of the memory range
> > + * @nr_pages - number of pfns to add in the region
> > + *
> > + * This is only intended for hotplug, and clear the related subsection
> > + * map inside one section.
> > + *
> > + * Return:
> > + * * -EINVAL - Section already deactived.
> > + * * 0 - Subsection map is emptied.
> > + * * 1 - Subsection map is not empty.
> > + */
>
> Less verbose please (in my preference: none and simplify return handling)
>
> > +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > {
> > DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> > DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> > struct mem_section *ms = __pfn_to_section(pfn);
> > - bool section_is_early = early_section(ms);
> > - struct page *memmap = NULL;
> > unsigned long *subsection_map = ms->usage
> > ? &ms->usage->subsection_map[0] : NULL;
> >
> > @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> > "section already deactivated (%#lx + %ld)\n",
> > pfn, nr_pages))
> > - return;
> > + return -EINVAL;
> > +
> > + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >
> > + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> > + return 0;
> > +
>
> Can we please just have a
>
> subsection_map_empty() instead and handle that in the caller?
> (you can then always return true in the !VMEMMAP variant)
I don't follow. Could you be more specific? or pseudo code please?
The old code has to handle below case in which subsection_map has been
cleared. And I introduce clear_subsection_map() to encapsulate all
subsection map realted code so that !VMEMMAP won't have to see it any
more.
if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
"section already deactivated (%#lx + %ld)\n",
pfn, nr_pages))
return;
>
> I dislike the "rc" handling in the caller.
>
> > + return 1;
> > +}
> > +
> > +static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > + struct vmem_altmap *altmap)
> > +{
> > + struct mem_section *ms = __pfn_to_section(pfn);
> > + bool section_is_early = early_section(ms);
> > + struct page *memmap = NULL;
> > + int rc;
> > +
> > +
>
> one superfluous empty line
>
> > + rc = clear_subsection_map(pfn, nr_pages);
> > + if (IS_ERR_VALUE((unsigned long)rc))
>
> huh? "if (rc < 0)" ? or am I missing something?
Both is fine to me, I have no preference, can change like this.
>
> > + return;
> > /*
> > * There are 3 cases to handle across two configurations
> > * (SPARSEMEM_VMEMMAP={y,n}):
> > @@ -763,8 +794,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > *
> > * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
> > */
> > - bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> > - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
> > + if (!rc) {
> > unsigned long section_nr = pfn_to_section_nr(pfn);
> >
> > /*
> > @@ -786,7 +816,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > else
> > depopulate_section_memmap(pfn, nr_pages, altmap);
> >
> > - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> > + if (!rc)
>
> I don't really like that handling.
>
> Either
>
> s/rc/section_empty/
>
> or use a separate subsection_map_empty()
>
> > ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> >
>
> Thanks!
>
> --
> Thanks,
>
> David / dhildenb
On 01.03.20 06:20, Baoquan He wrote:
> On 02/28/20 at 03:36pm, David Hildenbrand wrote:
>> On 20.02.20 05:33, Baoquan He wrote:
>>> Wrap the codes which clear subsection map of one memory region from
>>> section_deactivate() into clear_subsection_map().
>>>
>>> Signed-off-by: Baoquan He <[email protected]>
>>> ---
>>> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
>>> 1 file changed, 38 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/mm/sparse.c b/mm/sparse.c
>>> index 977b47acd38d..df857ee9330c 100644
>>> --- a/mm/sparse.c
>>> +++ b/mm/sparse.c
>>> @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
>>> }
>>> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>>>
>>> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>>> - struct vmem_altmap *altmap)
>>> +/**
>>> + * clear_subsection_map - Clear subsection map of one memory region
>>> + *
>>> + * @pfn - start pfn of the memory range
>>> + * @nr_pages - number of pfns to add in the region
>>> + *
>>> + * This is only intended for hotplug, and clear the related subsection
>>> + * map inside one section.
>>> + *
>>> + * Return:
>>> + * * -EINVAL - Section already deactived.
>>> + * * 0 - Subsection map is emptied.
>>> + * * 1 - Subsection map is not empty.
>>> + */
>>
>> Less verbose please (in my preference: none and simplify return handling)
>>
>>> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>>> {
>>> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>>> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>>> struct mem_section *ms = __pfn_to_section(pfn);
>>> - bool section_is_early = early_section(ms);
>>> - struct page *memmap = NULL;
>>> unsigned long *subsection_map = ms->usage
>>> ? &ms->usage->subsection_map[0] : NULL;
>>>
>>> @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>>> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>>> "section already deactivated (%#lx + %ld)\n",
>>> pfn, nr_pages))
>>> - return;
>>> + return -EINVAL;
>>> +
>>> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>>>
>>> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>>> + return 0;
>>> +
>>
>> Can we please just have a
>>
>> subsection_map_empty() instead and handle that in the caller?
>> (you can then always return true in the !VMEMMAP variant)
>
> I don't follow. Could you be more specific? or pseudo code please?
>
> The old code has to handle below case in which subsection_map has been
> cleared. And I introduce clear_subsection_map() to encapsulate all
> subsection map realted code so that !VMEMMAP won't have to see it any
> more.
>
Something like this on top would be easier to understand IMHO
diff --git a/mm/sparse.c b/mm/sparse.c
index dc79b00ddaaa..be5c80e9cfee 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -726,20 +726,6 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-/**
- * clear_subsection_map - Clear subsection map of one memory region
- *
- * @pfn - start pfn of the memory range
- * @nr_pages - number of pfns to add in the region
- *
- * This is only intended for hotplug, and clear the related subsection
- * map inside one section.
- *
- * Return:
- * * -EINVAL - Section already deactived.
- * * 0 - Subsection map is emptied.
- * * 1 - Subsection map is not empty.
- */
static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
{
DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
@@ -758,11 +744,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
return -EINVAL;
bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+ return 0;
+}
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
- return 0;
-
- return 1;
+static bool is_subsection_map_empty(unsigned long pfn, unsigned long nr_pages)
+{
+ return bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
}
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
@@ -771,11 +758,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct mem_section *ms = __pfn_to_section(pfn);
bool section_is_early = early_section(ms);
struct page *memmap = NULL;
- int rc;
-
- rc = clear_subsection_map(pfn, nr_pages);
- if (IS_ERR_VALUE((unsigned long)rc))
+ if (unlikely(clear_subsection_map(pfn, nr_pages)))
return;
/*
* There are 3 cases to handle across two configurations
@@ -794,7 +778,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
*
* For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
*/
- if (!rc) {
+ if (is_subsection_map_empty(pfn, nr_pages)) {
unsigned long section_nr = pfn_to_section_nr(pfn);
/*
@@ -816,7 +800,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
else
depopulate_section_memmap(pfn, nr_pages, altmap);
- if (!rc)
+ if (is_subsection_map_empty(pfn, nr_pages))
ms->section_mem_map = (unsigned long)NULL;
}
--
Thanks,
David / dhildenb
On 03/02/20 at 04:43pm, David Hildenbrand wrote:
> On 01.03.20 06:20, Baoquan He wrote:
> > On 02/28/20 at 03:36pm, David Hildenbrand wrote:
> >> On 20.02.20 05:33, Baoquan He wrote:
> >>> Wrap the codes which clear subsection map of one memory region from
> >>> section_deactivate() into clear_subsection_map().
> >>>
> >>> Signed-off-by: Baoquan He <[email protected]>
> >>> ---
> >>> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
> >>> 1 file changed, 38 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/mm/sparse.c b/mm/sparse.c
> >>> index 977b47acd38d..df857ee9330c 100644
> >>> --- a/mm/sparse.c
> >>> +++ b/mm/sparse.c
> >>> @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
> >>> }
> >>> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >>>
> >>> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >>> - struct vmem_altmap *altmap)
> >>> +/**
> >>> + * clear_subsection_map - Clear subsection map of one memory region
> >>> + *
> >>> + * @pfn - start pfn of the memory range
> >>> + * @nr_pages - number of pfns to add in the region
> >>> + *
> >>> + * This is only intended for hotplug, and clear the related subsection
> >>> + * map inside one section.
> >>> + *
> >>> + * Return:
> >>> + * * -EINVAL - Section already deactived.
> >>> + * * 0 - Subsection map is emptied.
> >>> + * * 1 - Subsection map is not empty.
> >>> + */
> >>
> >> Less verbose please (in my preference: none and simplify return handling)
> >>
> >>> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >>> {
> >>> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >>> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> >>> struct mem_section *ms = __pfn_to_section(pfn);
> >>> - bool section_is_early = early_section(ms);
> >>> - struct page *memmap = NULL;
> >>> unsigned long *subsection_map = ms->usage
> >>> ? &ms->usage->subsection_map[0] : NULL;
> >>>
> >>> @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >>> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> >>> "section already deactivated (%#lx + %ld)\n",
> >>> pfn, nr_pages))
> >>> - return;
> >>> + return -EINVAL;
> >>> +
> >>> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >>>
> >>> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> >>> + return 0;
> >>> +
> >>
> >> Can we please just have a
> >>
> >> subsection_map_empty() instead and handle that in the caller?
> >> (you can then always return true in the !VMEMMAP variant)
> >
> > I don't follow. Could you be more specific? or pseudo code please?
> >
> > The old code has to handle below case in which subsection_map has been
> > cleared. And I introduce clear_subsection_map() to encapsulate all
> > subsection map realted code so that !VMEMMAP won't have to see it any
> > more.
> >
>
> Something like this on top would be easier to understand IMHO
Ok, I see. Both is fine to me, I even like the old way a tiny little
bit more, but I would like to try to satisfy reviewer, will change as you
suggested. Thanks.
If no other concern, I will repost after changing and testing.
>
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index dc79b00ddaaa..be5c80e9cfee 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -726,20 +726,6 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> -/**
> - * clear_subsection_map - Clear subsection map of one memory region
> - *
> - * @pfn - start pfn of the memory range
> - * @nr_pages - number of pfns to add in the region
> - *
> - * This is only intended for hotplug, and clear the related subsection
> - * map inside one section.
> - *
> - * Return:
> - * * -EINVAL - Section already deactived.
> - * * 0 - Subsection map is emptied.
> - * * 1 - Subsection map is not empty.
> - */
> static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> {
> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> @@ -758,11 +744,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> return -EINVAL;
>
> bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> + return 0;
> +}
>
> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> - return 0;
> -
> - return 1;
> +static bool is_subsection_map_empty(unsigned long pfn, unsigned long nr_pages)
> +{
> + return bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
> }
>
> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> @@ -771,11 +758,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> struct mem_section *ms = __pfn_to_section(pfn);
> bool section_is_early = early_section(ms);
> struct page *memmap = NULL;
> - int rc;
> -
>
> - rc = clear_subsection_map(pfn, nr_pages);
> - if (IS_ERR_VALUE((unsigned long)rc))
> + if (unlikely(clear_subsection_map(pfn, nr_pages)))
> return;
> /*
> * There are 3 cases to handle across two configurations
> @@ -794,7 +778,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> *
> * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
> */
> - if (!rc) {
> + if (is_subsection_map_empty(pfn, nr_pages)) {
> unsigned long section_nr = pfn_to_section_nr(pfn);
>
> /*
> @@ -816,7 +800,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
>
> - if (!rc)
> + if (is_subsection_map_empty(pfn, nr_pages))
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>
> --
> Thanks,
>
> David / dhildenb
On 03/02/20 at 04:43pm, David Hildenbrand wrote:
> On 01.03.20 06:20, Baoquan He wrote:
> > On 02/28/20 at 03:36pm, David Hildenbrand wrote:
> >> On 20.02.20 05:33, Baoquan He wrote:
> >>> Wrap the codes which clear subsection map of one memory region from
> >>> section_deactivate() into clear_subsection_map().
> >>>
> >>> Signed-off-by: Baoquan He <[email protected]>
> >>> ---
> >>> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
> >>> 1 file changed, 38 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/mm/sparse.c b/mm/sparse.c
> >>> index 977b47acd38d..df857ee9330c 100644
> >>> --- a/mm/sparse.c
> >>> +++ b/mm/sparse.c
> >>> @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
> >>> }
> >>> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >>>
> >>> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >>> - struct vmem_altmap *altmap)
> >>> +/**
> >>> + * clear_subsection_map - Clear subsection map of one memory region
> >>> + *
> >>> + * @pfn - start pfn of the memory range
> >>> + * @nr_pages - number of pfns to add in the region
> >>> + *
> >>> + * This is only intended for hotplug, and clear the related subsection
> >>> + * map inside one section.
> >>> + *
> >>> + * Return:
> >>> + * * -EINVAL - Section already deactived.
> >>> + * * 0 - Subsection map is emptied.
> >>> + * * 1 - Subsection map is not empty.
> >>> + */
> >>
> >> Less verbose please (in my preference: none and simplify return handling)
> >>
> >>> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >>> {
> >>> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >>> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> >>> struct mem_section *ms = __pfn_to_section(pfn);
> >>> - bool section_is_early = early_section(ms);
> >>> - struct page *memmap = NULL;
> >>> unsigned long *subsection_map = ms->usage
> >>> ? &ms->usage->subsection_map[0] : NULL;
> >>>
> >>> @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >>> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> >>> "section already deactivated (%#lx + %ld)\n",
> >>> pfn, nr_pages))
> >>> - return;
> >>> + return -EINVAL;
> >>> +
> >>> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >>>
> >>> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> >>> + return 0;
> >>> +
> >>
> >> Can we please just have a
> >>
> >> subsection_map_empty() instead and handle that in the caller?
> >> (you can then always return true in the !VMEMMAP variant)
> >
> > I don't follow. Could you be more specific? or pseudo code please?
> >
> > The old code has to handle below case in which subsection_map has been
> > cleared. And I introduce clear_subsection_map() to encapsulate all
> > subsection map realted code so that !VMEMMAP won't have to see it any
> > more.
> >
>
> Something like this on top would be easier to understand IMHO
>
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index dc79b00ddaaa..be5c80e9cfee 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -726,20 +726,6 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> -/**
> - * clear_subsection_map - Clear subsection map of one memory region
> - *
> - * @pfn - start pfn of the memory range
> - * @nr_pages - number of pfns to add in the region
> - *
> - * This is only intended for hotplug, and clear the related subsection
> - * map inside one section.
> - *
> - * Return:
> - * * -EINVAL - Section already deactived.
> - * * 0 - Subsection map is emptied.
> - * * 1 - Subsection map is not empty.
> - */
> static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> {
> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> @@ -758,11 +744,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> return -EINVAL;
>
> bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> + return 0;
> +}
>
> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> - return 0;
> -
> - return 1;
> +static bool is_subsection_map_empty(unsigned long pfn, unsigned long nr_pages)
> +{
> + return bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
> }
>
> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> @@ -771,11 +758,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> struct mem_section *ms = __pfn_to_section(pfn);
> bool section_is_early = early_section(ms);
> struct page *memmap = NULL;
> - int rc;
> -
>
> - rc = clear_subsection_map(pfn, nr_pages);
> - if (IS_ERR_VALUE((unsigned long)rc))
> + if (unlikely(clear_subsection_map(pfn, nr_pages)))
> return;
> /*
> * There are 3 cases to handle across two configurations
> @@ -794,7 +778,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> *
> * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
> */
> - if (!rc) {
> + if (is_subsection_map_empty(pfn, nr_pages)) {
> unsigned long section_nr = pfn_to_section_nr(pfn);
Tried this way, it's not good in this patch. Since ms->usage might be
freed in this place.
if (!PageReserved(virt_to_page(ms->usage))) {
kfree(ms->usage);
ms->usage = NULL;
}
If have to introduce is_subsection_map_empty(), the code need be
adjusted a little bit. It may need be done in another separate patch to
adjust it. If you agree, I would like to keep this patch as is, later
I can refactor this on top of this patchset, or if anyone else post
patch to adjust it, I can help review.
>
> /*
> @@ -816,7 +800,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
>
> - if (!rc)
> + if (is_subsection_map_empty(pfn, nr_pages))
> ms->section_mem_map = (unsigned long)NULL;
> }
>
>
> --
> Thanks,
>
> David / dhildenb
On 20.02.20 05:33, Baoquan He wrote:
> In section_deactivate(), pfn_to_page() doesn't work any more after
> ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
> It caused hot remove failure:
>
> kernel BUG at mm/page_alloc.c:4806!
> invalid opcode: 0000 [#1] SMP PTI
> CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> RIP: 0010:free_pages+0x85/0xa0
> Call Trace:
> __remove_pages+0x99/0xc0
> arch_remove_memory+0x23/0x4d
> try_remove_memory+0xc8/0x130
> ? walk_memory_blocks+0x72/0xa0
> __remove_memory+0xa/0x11
> acpi_memory_device_remove+0x72/0x100
> acpi_bus_trim+0x55/0x90
> acpi_device_hotplug+0x2eb/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
> Let's move the ->section_mem_map resetting after depopulate_section_memmap()
> to fix it.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> mm/sparse.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 596b2a45b100..b8e52c8fed7f 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->usage = NULL;
> }
> memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> - ms->section_mem_map = (unsigned long)NULL;
> }
>
> if (section_is_early && memmap)
> free_map_bootmem(memmap);
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
> +
> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> + ms->section_mem_map = (unsigned long)NULL;
> }
>
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
>
As discussed, I think this is broken here already. As you explained, the
subsection_map can get freed via kfree(ms->usage) after the first
bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION) check.
--
Thanks,
David / dhildenb
On 03.03.20 09:22, Baoquan He wrote:
> On 03/02/20 at 04:43pm, David Hildenbrand wrote:
>> On 01.03.20 06:20, Baoquan He wrote:
>>> On 02/28/20 at 03:36pm, David Hildenbrand wrote:
>>>> On 20.02.20 05:33, Baoquan He wrote:
>>>>> Wrap the codes which clear subsection map of one memory region from
>>>>> section_deactivate() into clear_subsection_map().
>>>>>
>>>>> Signed-off-by: Baoquan He <[email protected]>
>>>>> ---
>>>>> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
>>>>> 1 file changed, 38 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/mm/sparse.c b/mm/sparse.c
>>>>> index 977b47acd38d..df857ee9330c 100644
>>>>> --- a/mm/sparse.c
>>>>> +++ b/mm/sparse.c
>>>>> @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
>>>>> }
>>>>> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>>>>>
>>>>> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>>>>> - struct vmem_altmap *altmap)
>>>>> +/**
>>>>> + * clear_subsection_map - Clear subsection map of one memory region
>>>>> + *
>>>>> + * @pfn - start pfn of the memory range
>>>>> + * @nr_pages - number of pfns to add in the region
>>>>> + *
>>>>> + * This is only intended for hotplug, and clear the related subsection
>>>>> + * map inside one section.
>>>>> + *
>>>>> + * Return:
>>>>> + * * -EINVAL - Section already deactived.
>>>>> + * * 0 - Subsection map is emptied.
>>>>> + * * 1 - Subsection map is not empty.
>>>>> + */
>>>>
>>>> Less verbose please (in my preference: none and simplify return handling)
>>>>
>>>>> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>>>>> {
>>>>> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>>>>> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>>>>> struct mem_section *ms = __pfn_to_section(pfn);
>>>>> - bool section_is_early = early_section(ms);
>>>>> - struct page *memmap = NULL;
>>>>> unsigned long *subsection_map = ms->usage
>>>>> ? &ms->usage->subsection_map[0] : NULL;
>>>>>
>>>>> @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>>>>> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>>>>> "section already deactivated (%#lx + %ld)\n",
>>>>> pfn, nr_pages))
>>>>> - return;
>>>>> + return -EINVAL;
>>>>> +
>>>>> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>>>>>
>>>>> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>>>>> + return 0;
>>>>> +
>>>>
>>>> Can we please just have a
>>>>
>>>> subsection_map_empty() instead and handle that in the caller?
>>>> (you can then always return true in the !VMEMMAP variant)
>>>
>>> I don't follow. Could you be more specific? or pseudo code please?
>>>
>>> The old code has to handle below case in which subsection_map has been
>>> cleared. And I introduce clear_subsection_map() to encapsulate all
>>> subsection map realted code so that !VMEMMAP won't have to see it any
>>> more.
>>>
>>
>> Something like this on top would be easier to understand IMHO
>>
>>
>> diff --git a/mm/sparse.c b/mm/sparse.c
>> index dc79b00ddaaa..be5c80e9cfee 100644
>> --- a/mm/sparse.c
>> +++ b/mm/sparse.c
>> @@ -726,20 +726,6 @@ static void free_map_bootmem(struct page *memmap)
>> }
>> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>>
>> -/**
>> - * clear_subsection_map - Clear subsection map of one memory region
>> - *
>> - * @pfn - start pfn of the memory range
>> - * @nr_pages - number of pfns to add in the region
>> - *
>> - * This is only intended for hotplug, and clear the related subsection
>> - * map inside one section.
>> - *
>> - * Return:
>> - * * -EINVAL - Section already deactived.
>> - * * 0 - Subsection map is emptied.
>> - * * 1 - Subsection map is not empty.
>> - */
>> static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>> {
>> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>> @@ -758,11 +744,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>> return -EINVAL;
>>
>> bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>> + return 0;
>> +}
>>
>> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>> - return 0;
>> -
>> - return 1;
>> +static bool is_subsection_map_empty(unsigned long pfn, unsigned long nr_pages)
>> +{
>> + return bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
>> }
>>
>> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>> @@ -771,11 +758,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>> struct mem_section *ms = __pfn_to_section(pfn);
>> bool section_is_early = early_section(ms);
>> struct page *memmap = NULL;
>> - int rc;
>> -
>>
>> - rc = clear_subsection_map(pfn, nr_pages);
>> - if (IS_ERR_VALUE((unsigned long)rc))
>> + if (unlikely(clear_subsection_map(pfn, nr_pages)))
>> return;
>> /*
>> * There are 3 cases to handle across two configurations
>> @@ -794,7 +778,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>> *
>> * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
>> */
>> - if (!rc) {
>> + if (is_subsection_map_empty(pfn, nr_pages)) {
>> unsigned long section_nr = pfn_to_section_nr(pfn);
>
> Tried this way, it's not good in this patch. Since ms->usage might be
> freed in this place.
>
> if (!PageReserved(virt_to_page(ms->usage))) {
> kfree(ms->usage);
> ms->usage = NULL;
> }
So your patch #1 is already broken. Just cache the result in patch #1.
bool empty;
...
empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
...
if (empty) {
...
}
--
Thanks,
David / dhildenb
On 03/03/20 at 09:33am, David Hildenbrand wrote:
> On 03.03.20 09:22, Baoquan He wrote:
> > On 03/02/20 at 04:43pm, David Hildenbrand wrote:
> >> On 01.03.20 06:20, Baoquan He wrote:
> >>> On 02/28/20 at 03:36pm, David Hildenbrand wrote:
> >>>> On 20.02.20 05:33, Baoquan He wrote:
> >>>>> Wrap the codes which clear subsection map of one memory region from
> >>>>> section_deactivate() into clear_subsection_map().
> >>>>>
> >>>>> Signed-off-by: Baoquan He <[email protected]>
> >>>>> ---
> >>>>> mm/sparse.c | 46 ++++++++++++++++++++++++++++++++++++++--------
> >>>>> 1 file changed, 38 insertions(+), 8 deletions(-)
> >>>>>
> >>>>> diff --git a/mm/sparse.c b/mm/sparse.c
> >>>>> index 977b47acd38d..df857ee9330c 100644
> >>>>> --- a/mm/sparse.c
> >>>>> +++ b/mm/sparse.c
> >>>>> @@ -726,14 +726,25 @@ static void free_map_bootmem(struct page *memmap)
> >>>>> }
> >>>>> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >>>>>
> >>>>> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >>>>> - struct vmem_altmap *altmap)
> >>>>> +/**
> >>>>> + * clear_subsection_map - Clear subsection map of one memory region
> >>>>> + *
> >>>>> + * @pfn - start pfn of the memory range
> >>>>> + * @nr_pages - number of pfns to add in the region
> >>>>> + *
> >>>>> + * This is only intended for hotplug, and clear the related subsection
> >>>>> + * map inside one section.
> >>>>> + *
> >>>>> + * Return:
> >>>>> + * * -EINVAL - Section already deactived.
> >>>>> + * * 0 - Subsection map is emptied.
> >>>>> + * * 1 - Subsection map is not empty.
> >>>>> + */
> >>>>
> >>>> Less verbose please (in my preference: none and simplify return handling)
> >>>>
> >>>>> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >>>>> {
> >>>>> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >>>>> DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> >>>>> struct mem_section *ms = __pfn_to_section(pfn);
> >>>>> - bool section_is_early = early_section(ms);
> >>>>> - struct page *memmap = NULL;
> >>>>> unsigned long *subsection_map = ms->usage
> >>>>> ? &ms->usage->subsection_map[0] : NULL;
> >>>>>
> >>>>> @@ -744,8 +755,28 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >>>>> if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> >>>>> "section already deactivated (%#lx + %ld)\n",
> >>>>> pfn, nr_pages))
> >>>>> - return;
> >>>>> + return -EINVAL;
> >>>>> +
> >>>>> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >>>>>
> >>>>> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> >>>>> + return 0;
> >>>>> +
> >>>>
> >>>> Can we please just have a
> >>>>
> >>>> subsection_map_empty() instead and handle that in the caller?
> >>>> (you can then always return true in the !VMEMMAP variant)
> >>>
> >>> I don't follow. Could you be more specific? or pseudo code please?
> >>>
> >>> The old code has to handle below case in which subsection_map has been
> >>> cleared. And I introduce clear_subsection_map() to encapsulate all
> >>> subsection map realted code so that !VMEMMAP won't have to see it any
> >>> more.
> >>>
> >>
> >> Something like this on top would be easier to understand IMHO
> >>
> >>
> >> diff --git a/mm/sparse.c b/mm/sparse.c
> >> index dc79b00ddaaa..be5c80e9cfee 100644
> >> --- a/mm/sparse.c
> >> +++ b/mm/sparse.c
> >> @@ -726,20 +726,6 @@ static void free_map_bootmem(struct page *memmap)
> >> }
> >> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >>
> >> -/**
> >> - * clear_subsection_map - Clear subsection map of one memory region
> >> - *
> >> - * @pfn - start pfn of the memory range
> >> - * @nr_pages - number of pfns to add in the region
> >> - *
> >> - * This is only intended for hotplug, and clear the related subsection
> >> - * map inside one section.
> >> - *
> >> - * Return:
> >> - * * -EINVAL - Section already deactived.
> >> - * * 0 - Subsection map is emptied.
> >> - * * 1 - Subsection map is not empty.
> >> - */
> >> static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >> {
> >> DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> >> @@ -758,11 +744,12 @@ static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> >> return -EINVAL;
> >>
> >> bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> >> + return 0;
> >> +}
> >>
> >> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> >> - return 0;
> >> -
> >> - return 1;
> >> +static bool is_subsection_map_empty(unsigned long pfn, unsigned long nr_pages)
> >> +{
> >> + return bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
> >> }
> >>
> >> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >> @@ -771,11 +758,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >> struct mem_section *ms = __pfn_to_section(pfn);
> >> bool section_is_early = early_section(ms);
> >> struct page *memmap = NULL;
> >> - int rc;
> >> -
> >>
> >> - rc = clear_subsection_map(pfn, nr_pages);
> >> - if (IS_ERR_VALUE((unsigned long)rc))
> >> + if (unlikely(clear_subsection_map(pfn, nr_pages)))
> >> return;
> >> /*
> >> * There are 3 cases to handle across two configurations
> >> @@ -794,7 +778,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >> *
> >> * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
> >> */
> >> - if (!rc) {
> >> + if (is_subsection_map_empty(pfn, nr_pages)) {
> >> unsigned long section_nr = pfn_to_section_nr(pfn);
> >
> > Tried this way, it's not good in this patch. Since ms->usage might be
> > freed in this place.
> >
> > if (!PageReserved(virt_to_page(ms->usage))) {
> > kfree(ms->usage);
> > ms->usage = NULL;
> > }
>
> So your patch #1 is already broken. Just cache the result in patch #1.
>
> bool empty;
Right, good catch. Will take this way. Thanks.
>
> ...
> empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
> ...
> if (empty) {
> ...
> }
On 03/03/20 at 09:34am, David Hildenbrand wrote:
> On 20.02.20 05:33, Baoquan He wrote:
> > In section_deactivate(), pfn_to_page() doesn't work any more after
> > ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
> > It caused hot remove failure:
> >
> > kernel BUG at mm/page_alloc.c:4806!
> > invalid opcode: 0000 [#1] SMP PTI
> > CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> > Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> > RIP: 0010:free_pages+0x85/0xa0
> > Call Trace:
> > __remove_pages+0x99/0xc0
> > arch_remove_memory+0x23/0x4d
> > try_remove_memory+0xc8/0x130
> > ? walk_memory_blocks+0x72/0xa0
> > __remove_memory+0xa/0x11
> > acpi_memory_device_remove+0x72/0x100
> > acpi_bus_trim+0x55/0x90
> > acpi_device_hotplug+0x2eb/0x3d0
> > acpi_hotplug_work_fn+0x1a/0x30
> > process_one_work+0x1a7/0x370
> > worker_thread+0x30/0x380
> > ? flush_rcu_work+0x30/0x30
> > kthread+0x112/0x130
> > ? kthread_create_on_node+0x60/0x60
> > ret_from_fork+0x35/0x40
> >
> > Let's move the ->section_mem_map resetting after depopulate_section_memmap()
> > to fix it.
> >
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > mm/sparse.c | 4 +++-
> > 1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 596b2a45b100..b8e52c8fed7f 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -779,13 +779,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > ms->usage = NULL;
> > }
> > memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> > - ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > if (section_is_early && memmap)
> > free_map_bootmem(memmap);
> > else
> > depopulate_section_memmap(pfn, nr_pages, altmap);
> > +
> > + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> > + ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
> >
>
> As discussed, I think this is broken here already. As you explained, the
> subsection_map can get freed via kfree(ms->usage) after the first
> bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION) check.
Yes, I will correct it as you suggested in another reply.