Memory sub-section hotplug was added to fix the issue that nvdimm could
be mapped at non-section aligned starting address. A subsection map is
added into struct mem_section_usage implement it. However, sub-section
is only supported in VMEMMAP case, there's no need to operate subsection
map in SPARSEMEM|!VMEMMAP. The former 4 patches is for it.
And since sub-section hotplug added, the hot add/remove functionality
have been broken in SPARSEMEM|!VMEMMAP case. Wei Yang and I, each of us
make one patch to fix one issue.
Baoquan He (6):
mm/sparse.c: Introduce new function fill_subsection_map()
mm/sparse.c: Introduce a new function clear_subsection_map()
mm/sparse.c: only use subsection map in VMEMMAP case
mm/sparse.c: Use __get_free_pages() instead in
populate_section_memmap()
mm/sparse.c: update code comment about section activate/deactivate
mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case
Wei Yang (1):
mm/sparsemem: pfn_to_page is not valid yet on SPARSEMEM
include/linux/mmzone.h | 2 +
mm/sparse.c | 248 ++++++++++++++++++++++++++---------------
2 files changed, 161 insertions(+), 89 deletions(-)
--
2.17.2
This removes the unnecessary goto, and simplify codes.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index cf55d272d0a9..36e6565ec67e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -751,23 +751,19 @@ static void free_map_bootmem(struct page *memmap)
struct page * __meminit populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
- struct page *page, *ret;
+ struct page *ret;
unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
- page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
- if (page)
- goto got_map_page;
+ ret = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
+ get_order(memmap_size));
+ if (ret)
+ return ret;
ret = vmalloc(memmap_size);
if (ret)
- goto got_map_ptr;
+ return ret;
return NULL;
-got_map_page:
- ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
-got_map_ptr:
-
- return ret;
}
static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
--
2.17.2
It's helpful to note that sub-section is only supported in
SPARSEMEM_VMEMMAP case, but not in SPARSEMEM|!VMEMMAP case. Add
sentences into the code comment above sparse_add_section.
And move the code comments inside section_deactivate() to be above
it, this makes code cleaner.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 40 +++++++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 36e6565ec67e..a7e78bfe0dce 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -809,6 +809,23 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+/*
+ * To deactivate a memory region, there are 3 cases across two
+ * two configurations (SPARSEMEM_VMEMMAP={y,n}):
+ *
+ * 1/ deactivation of a partial hot-added section (only possible
+ * in the SPARSEMEM_VMEMMAP=y case).
+ * a/ section was present at memory init
+ * b/ section was hot-added post memory init
+ * 2/ deactivation of a complete hot-added section
+ * 3/ deactivation of a complete section from memory init
+ *
+ * For 1/, when subsection_map does not empty we will not be
+ * freeing the usage map, but still need to free the vmemmap
+ * range.
+ *
+ * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
+ */
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
@@ -821,23 +838,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
rc = clear_subsection_map(pfn, nr_pages);
if(IS_ERR_VALUE((unsigned long)rc))
return;
- /*
- * There are 3 cases to handle across two configurations
- * (SPARSEMEM_VMEMMAP={y,n}):
- *
- * 1/ deactivation of a partial hot-added section (only possible
- * in the SPARSEMEM_VMEMMAP=y case).
- * a/ section was present at memory init
- * b/ section was hot-added post memory init
- * 2/ deactivation of a complete hot-added section
- * 3/ deactivation of a complete section from memory init
- *
- * For 1/, when subsection_map does not empty we will not be
- * freeing the usage map, but still need to free the vmemmap
- * range.
- *
- * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
- */
+
if (!rc) {
unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -913,6 +914,11 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
*
* This is only intended for hotplug.
*
+ * Note that the added memory region is either section aligned, or
+ * sub-section aligned. The sub-section aligned region can only be
+ * hot added in SPARSEMEM_VMEMMAP case, please refer to ZONE_DEVICE
+ * part of memory-model.rst for more details.
+ *
* Return:
* * 0 - On success.
* * -EEXIST - Section has been present.
--
2.17.2
Currently, subsection map is used when SPARSEMEM is enabled, including
VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
and misleading. Let's adjust code to only allow subsection map being
used in SPARSEMEM|VMEMMAP case.
Signed-off-by: Baoquan He <[email protected]>
---
include/linux/mmzone.h | 2 +
mm/sparse.c | 231 ++++++++++++++++++++++-------------------
2 files changed, 124 insertions(+), 109 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 462f6873905a..fc0de3a9a51e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
struct mem_section_usage {
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
+#endif
/* See declaration of similar field in struct zone */
unsigned long pageblock_flags[0];
};
diff --git a/mm/sparse.c b/mm/sparse.c
index 696f6b9f706e..cf55d272d0a9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -209,41 +209,6 @@ static inline unsigned long first_present_section_nr(void)
return next_present_section_nr(-1);
}
-static void subsection_mask_set(unsigned long *map, unsigned long pfn,
- unsigned long nr_pages)
-{
- int idx = subsection_map_index(pfn);
- int end = subsection_map_index(pfn + nr_pages - 1);
-
- bitmap_set(map, idx, end - idx + 1);
-}
-
-void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
-{
- int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
- unsigned long nr, start_sec = pfn_to_section_nr(pfn);
-
- if (!nr_pages)
- return;
-
- for (nr = start_sec; nr <= end_sec; nr++) {
- struct mem_section *ms;
- unsigned long pfns;
-
- pfns = min(nr_pages, PAGES_PER_SECTION
- - (pfn & ~PAGE_SECTION_MASK));
- ms = __nr_to_section(nr);
- subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
-
- pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
- pfns, subsection_map_index(pfn),
- subsection_map_index(pfn + pfns - 1));
-
- pfn += pfns;
- nr_pages -= pfns;
- }
-}
-
/* Record a memory area against a node. */
void __init memory_present(int nid, unsigned long start, unsigned long end)
{
@@ -432,12 +397,134 @@ static unsigned long __init section_map_size(void)
return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
}
+static void subsection_mask_set(unsigned long *map, unsigned long pfn,
+ unsigned long nr_pages)
+{
+ int idx = subsection_map_index(pfn);
+ int end = subsection_map_index(pfn + nr_pages - 1);
+
+ bitmap_set(map, idx, end - idx + 1);
+}
+
+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+{
+ int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
+ unsigned long nr, start_sec = pfn_to_section_nr(pfn);
+
+ if (!nr_pages)
+ return;
+
+ for (nr = start_sec; nr <= end_sec; nr++) {
+ struct mem_section *ms;
+ unsigned long pfns;
+
+ pfns = min(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK));
+ ms = __nr_to_section(nr);
+ subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
+
+ pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
+ pfns, subsection_map_index(pfn),
+ subsection_map_index(pfn + pfns - 1));
+
+ pfn += pfns;
+ nr_pages -= pfns;
+ }
+}
+
+/**
+ * clear_subsection_map - Clear subsection map of one memory region
+ *
+ * @pfn - start pfn of the memory range
+ * @nr_pages - number of pfns to add in the region
+ *
+ * This is only intended for hotplug, and clear the related subsection
+ * map inside one section.
+ *
+ * Return:
+ * * -EINVAL - Section already deactived.
+ * * 0 - Subsection map is emptied.
+ * * 1 - Subsection map is not empty.
+ */
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+ DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
+ struct mem_section *ms = __pfn_to_section(pfn);
+ unsigned long *subsection_map = ms->usage
+ ? &ms->usage->subsection_map[0] : NULL;
+
+ subsection_mask_set(map, pfn, nr_pages);
+ if (subsection_map)
+ bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
+
+ if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
+ "section already deactivated (%#lx + %ld)\n",
+ pfn, nr_pages))
+ return -EINVAL;
+
+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+
+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
+ return 0;
+
+ return 1;
+}
+
+/**
+ * fill_subsection_map - fill subsection map of a memory region
+ * @pfn - start pfn of the memory range
+ * @nr_pages - number of pfns to add in the region
+ *
+ * This clears the related subsection map inside one section, and only
+ * intended for hotplug.
+ *
+ * Return:
+ * * 0 - On success.
+ * * -EINVAL - Invalid memory region.
+ * * -EEXIST - Subsection map has been set.
+ */
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ struct mem_section *ms = __pfn_to_section(pfn);
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+ unsigned long *subsection_map;
+ int rc = 0;
+
+ subsection_mask_set(map, pfn, nr_pages);
+
+ subsection_map = &ms->usage->subsection_map[0];
+
+ if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
+ rc = -EINVAL;
+ else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
+ rc = -EEXIST;
+ else
+ bitmap_or(subsection_map, map, subsection_map,
+ SUBSECTIONS_PER_SECTION);
+
+ return rc;
+}
+
#else
static unsigned long __init section_map_size(void)
{
return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
}
+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+{
+}
+
+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ return 0;
+}
+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
+{
+ return 0;
+}
+
struct page __init *__populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
@@ -726,45 +813,6 @@ static void free_map_bootmem(struct page *memmap)
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-/**
- * clear_subsection_map - Clear subsection map of one memory region
- *
- * @pfn - start pfn of the memory range
- * @nr_pages - number of pfns to add in the region
- *
- * This is only intended for hotplug, and clear the related subsection
- * map inside one section.
- *
- * Return:
- * * -EINVAL - Section already deactived.
- * * 0 - Subsection map is emptied.
- * * 1 - Subsection map is not empty.
- */
-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
- DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
- struct mem_section *ms = __pfn_to_section(pfn);
- unsigned long *subsection_map = ms->usage
- ? &ms->usage->subsection_map[0] : NULL;
-
- subsection_mask_set(map, pfn, nr_pages);
- if (subsection_map)
- bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
-
- if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
- "section already deactivated (%#lx + %ld)\n",
- pfn, nr_pages))
- return -EINVAL;
-
- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
-
- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
- return 0;
-
- return 1;
-}
-
static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
@@ -818,41 +866,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
depopulate_section_memmap(pfn, nr_pages, altmap);
}
-/**
- * fill_subsection_map - fill subsection map of a memory region
- * @pfn - start pfn of the memory range
- * @nr_pages - number of pfns to add in the region
- *
- * This clears the related subsection map inside one section, and only
- * intended for hotplug.
- *
- * Return:
- * * 0 - On success.
- * * -EINVAL - Invalid memory region.
- * * -EEXIST - Subsection map has been set.
- */
-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
-{
- struct mem_section *ms = __pfn_to_section(pfn);
- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
- unsigned long *subsection_map;
- int rc = 0;
-
- subsection_mask_set(map, pfn, nr_pages);
-
- subsection_map = &ms->usage->subsection_map[0];
-
- if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
- rc = -EINVAL;
- else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
- rc = -EEXIST;
- else
- bitmap_or(subsection_map, map, subsection_map,
- SUBSECTIONS_PER_SECTION);
-
- return rc;
-}
-
static struct page * __meminit section_activate(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap)
{
--
2.17.2
From: Wei Yang <[email protected]>
When we use SPARSEMEM instead of SPARSEMEM_VMEMMAP, pfn_to_page()
doesn't work before sparse_init_one_section() is called. This leads to a
crash when hotplug memory.
PGD 0 P4D 0
Oops: 0002 [#1] SMP PTI
CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #339
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
Workqueue: kacpi_hotplug acpi_hotplug_work_fn
RIP: 0010:__memset+0x24/0x30
Call Trace:
sparse_add_section+0x150/0x1d8
__add_pages+0xbf/0x150
add_pages+0x12/0x60
add_memory_resource+0xc8/0x210
? wake_up_q+0xa0/0xa0
__add_memory+0x62/0xb0
acpi_memory_device_add+0x13f/0x300
acpi_bus_attach+0xf6/0x200
acpi_bus_scan+0x43/0x90
acpi_device_hotplug+0x275/0x3d0
acpi_hotplug_work_fn+0x1a/0x30
process_one_work+0x1a7/0x370
worker_thread+0x30/0x380
? flush_rcu_work+0x30/0x30
kthread+0x112/0x130
? kthread_create_on_node+0x60/0x60
ret_from_fork+0x35/0x40
We should use memmap as it did.
Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug")
Signed-off-by: Wei Yang <[email protected]>
Acked-by: David Hildenbrand <[email protected]>
Reviewed-by: Baoquan He <[email protected]>
CC: Dan Williams <[email protected]>
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index a7e78bfe0dce..623755e88255 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -944,7 +944,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
- page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
+ page_init_poison(memmap, sizeof(struct page) * nr_pages);
ms = __nr_to_section(section_nr);
set_section_nid(section_nr, nid);
--
2.17.2
In section_deactivate(), pfn_to_page() doesn't work any more after
ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
It caused hot remove failure, the trace is:
kernel BUG at mm/page_alloc.c:4806!
invalid opcode: 0000 [#1] SMP PTI
CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
Workqueue: kacpi_hotplug acpi_hotplug_work_fn
RIP: 0010:free_pages+0x85/0xa0
Call Trace:
__remove_pages+0x99/0xc0
arch_remove_memory+0x23/0x4d
try_remove_memory+0xc8/0x130
? walk_memory_blocks+0x72/0xa0
__remove_memory+0xa/0x11
acpi_memory_device_remove+0x72/0x100
acpi_bus_trim+0x55/0x90
acpi_device_hotplug+0x2eb/0x3d0
acpi_hotplug_work_fn+0x1a/0x30
process_one_work+0x1a7/0x370
worker_thread+0x30/0x380
? flush_rcu_work+0x30/0x30
kthread+0x112/0x130
? kthread_create_on_node+0x60/0x60
ret_from_fork+0x35/0x40
Let's defer the ->section_mem_map resetting after depopulate_section_memmap()
to fix it.
Signed-off-by: Baoquan He <[email protected]>
---
mm/sparse.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mm/sparse.c b/mm/sparse.c
index 623755e88255..345d065ef6ce 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -854,13 +854,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = (unsigned long)NULL;
}
if (section_is_early && memmap)
free_map_bootmem(memmap);
else
depopulate_section_memmap(pfn, nr_pages, altmap);
+
+ if(!rc)
+ ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
--
2.17.2
On Sun, Feb 09, 2020 at 06:48:22PM +0800, Baoquan He wrote:
>Currently, subsection map is used when SPARSEMEM is enabled, including
>VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
>supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
>and misleading. Let's adjust code to only allow subsection map being
>used in SPARSEMEM|VMEMMAP case.
>
>Signed-off-by: Baoquan He <[email protected]>
The change looks good to me.
Reviewed-by: Wei Yang <[email protected]>
>---
> include/linux/mmzone.h | 2 +
> mm/sparse.c | 231 ++++++++++++++++++++++-------------------
> 2 files changed, 124 insertions(+), 109 deletions(-)
>
>diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>index 462f6873905a..fc0de3a9a51e 100644
>--- a/include/linux/mmzone.h
>+++ b/include/linux/mmzone.h
>@@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
>
> struct mem_section_usage {
>+#ifdef CONFIG_SPARSEMEM_VMEMMAP
> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
>+#endif
> /* See declaration of similar field in struct zone */
> unsigned long pageblock_flags[0];
> };
>diff --git a/mm/sparse.c b/mm/sparse.c
>index 696f6b9f706e..cf55d272d0a9 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -209,41 +209,6 @@ static inline unsigned long first_present_section_nr(void)
> return next_present_section_nr(-1);
> }
>
>-static void subsection_mask_set(unsigned long *map, unsigned long pfn,
>- unsigned long nr_pages)
>-{
>- int idx = subsection_map_index(pfn);
>- int end = subsection_map_index(pfn + nr_pages - 1);
>-
>- bitmap_set(map, idx, end - idx + 1);
>-}
>-
>-void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
>-{
>- int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
>- unsigned long nr, start_sec = pfn_to_section_nr(pfn);
>-
>- if (!nr_pages)
>- return;
>-
>- for (nr = start_sec; nr <= end_sec; nr++) {
>- struct mem_section *ms;
>- unsigned long pfns;
>-
>- pfns = min(nr_pages, PAGES_PER_SECTION
>- - (pfn & ~PAGE_SECTION_MASK));
>- ms = __nr_to_section(nr);
>- subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
>-
>- pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
>- pfns, subsection_map_index(pfn),
>- subsection_map_index(pfn + pfns - 1));
>-
>- pfn += pfns;
>- nr_pages -= pfns;
>- }
>-}
>-
> /* Record a memory area against a node. */
> void __init memory_present(int nid, unsigned long start, unsigned long end)
> {
>@@ -432,12 +397,134 @@ static unsigned long __init section_map_size(void)
> return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
> }
>
>+static void subsection_mask_set(unsigned long *map, unsigned long pfn,
>+ unsigned long nr_pages)
>+{
>+ int idx = subsection_map_index(pfn);
>+ int end = subsection_map_index(pfn + nr_pages - 1);
>+
>+ bitmap_set(map, idx, end - idx + 1);
>+}
>+
>+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
>+{
>+ int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
>+ unsigned long nr, start_sec = pfn_to_section_nr(pfn);
>+
>+ if (!nr_pages)
>+ return;
>+
>+ for (nr = start_sec; nr <= end_sec; nr++) {
>+ struct mem_section *ms;
>+ unsigned long pfns;
>+
>+ pfns = min(nr_pages, PAGES_PER_SECTION
>+ - (pfn & ~PAGE_SECTION_MASK));
>+ ms = __nr_to_section(nr);
>+ subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
>+
>+ pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
>+ pfns, subsection_map_index(pfn),
>+ subsection_map_index(pfn + pfns - 1));
>+
>+ pfn += pfns;
>+ nr_pages -= pfns;
>+ }
>+}
>+
>+/**
>+ * clear_subsection_map - Clear subsection map of one memory region
>+ *
>+ * @pfn - start pfn of the memory range
>+ * @nr_pages - number of pfns to add in the region
>+ *
>+ * This is only intended for hotplug, and clear the related subsection
>+ * map inside one section.
>+ *
>+ * Return:
>+ * * -EINVAL - Section already deactived.
>+ * * 0 - Subsection map is emptied.
>+ * * 1 - Subsection map is not empty.
>+ */
>+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>+ DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>+ struct mem_section *ms = __pfn_to_section(pfn);
>+ unsigned long *subsection_map = ms->usage
>+ ? &ms->usage->subsection_map[0] : NULL;
>+
>+ subsection_mask_set(map, pfn, nr_pages);
>+ if (subsection_map)
>+ bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
>+
>+ if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>+ "section already deactivated (%#lx + %ld)\n",
>+ pfn, nr_pages))
>+ return -EINVAL;
>+
>+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>+
>+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>+ return 0;
>+
>+ return 1;
>+}
>+
>+/**
>+ * fill_subsection_map - fill subsection map of a memory region
>+ * @pfn - start pfn of the memory range
>+ * @nr_pages - number of pfns to add in the region
>+ *
>+ * This clears the related subsection map inside one section, and only
>+ * intended for hotplug.
>+ *
>+ * Return:
>+ * * 0 - On success.
>+ * * -EINVAL - Invalid memory region.
>+ * * -EEXIST - Subsection map has been set.
>+ */
>+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ struct mem_section *ms = __pfn_to_section(pfn);
>+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>+ unsigned long *subsection_map;
>+ int rc = 0;
>+
>+ subsection_mask_set(map, pfn, nr_pages);
>+
>+ subsection_map = &ms->usage->subsection_map[0];
>+
>+ if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
>+ rc = -EINVAL;
>+ else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
>+ rc = -EEXIST;
>+ else
>+ bitmap_or(subsection_map, map, subsection_map,
>+ SUBSECTIONS_PER_SECTION);
>+
>+ return rc;
>+}
>+
> #else
> static unsigned long __init section_map_size(void)
> {
> return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
> }
>
>+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
>+{
>+}
>+
>+static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ return 0;
>+}
>+static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>+{
>+ return 0;
>+}
>+
> struct page __init *__populate_section_memmap(unsigned long pfn,
> unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> {
>@@ -726,45 +813,6 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
>-/**
>- * clear_subsection_map - Clear subsection map of one memory region
>- *
>- * @pfn - start pfn of the memory range
>- * @nr_pages - number of pfns to add in the region
>- *
>- * This is only intended for hotplug, and clear the related subsection
>- * map inside one section.
>- *
>- * Return:
>- * * -EINVAL - Section already deactived.
>- * * 0 - Subsection map is emptied.
>- * * 1 - Subsection map is not empty.
>- */
>-static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
>-{
>- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>- DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>- struct mem_section *ms = __pfn_to_section(pfn);
>- unsigned long *subsection_map = ms->usage
>- ? &ms->usage->subsection_map[0] : NULL;
>-
>- subsection_mask_set(map, pfn, nr_pages);
>- if (subsection_map)
>- bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
>-
>- if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>- "section already deactivated (%#lx + %ld)\n",
>- pfn, nr_pages))
>- return -EINVAL;
>-
>- bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
>-
>- if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
>- return 0;
>-
>- return 1;
>-}
>-
> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> struct vmem_altmap *altmap)
> {
>@@ -818,41 +866,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> depopulate_section_memmap(pfn, nr_pages, altmap);
> }
>
>-/**
>- * fill_subsection_map - fill subsection map of a memory region
>- * @pfn - start pfn of the memory range
>- * @nr_pages - number of pfns to add in the region
>- *
>- * This clears the related subsection map inside one section, and only
>- * intended for hotplug.
>- *
>- * Return:
>- * * 0 - On success.
>- * * -EINVAL - Invalid memory region.
>- * * -EEXIST - Subsection map has been set.
>- */
>-static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
>-{
>- struct mem_section *ms = __pfn_to_section(pfn);
>- DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>- unsigned long *subsection_map;
>- int rc = 0;
>-
>- subsection_mask_set(map, pfn, nr_pages);
>-
>- subsection_map = &ms->usage->subsection_map[0];
>-
>- if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
>- rc = -EINVAL;
>- else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
>- rc = -EEXIST;
>- else
>- bitmap_or(subsection_map, map, subsection_map,
>- SUBSECTIONS_PER_SECTION);
>-
>- return rc;
>-}
>-
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap)
> {
>--
>2.17.2
--
Wei Yang
Help you, Help me
On Sun, Feb 09, 2020 at 06:48:23PM +0800, Baoquan He wrote:
>This removes the unnecessary goto, and simplify codes.
>
>Signed-off-by: Baoquan He <[email protected]>
Reasonable.
Reviewed-by: Wei Yang <[email protected]>
>---
> mm/sparse.c | 16 ++++++----------
> 1 file changed, 6 insertions(+), 10 deletions(-)
>
>diff --git a/mm/sparse.c b/mm/sparse.c
>index cf55d272d0a9..36e6565ec67e 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -751,23 +751,19 @@ static void free_map_bootmem(struct page *memmap)
> struct page * __meminit populate_section_memmap(unsigned long pfn,
> unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> {
>- struct page *page, *ret;
>+ struct page *ret;
> unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
>
>- page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
>- if (page)
>- goto got_map_page;
>+ ret = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
>+ get_order(memmap_size));
>+ if (ret)
>+ return ret;
>
> ret = vmalloc(memmap_size);
> if (ret)
>- goto got_map_ptr;
>+ return ret;
>
> return NULL;
>-got_map_page:
>- ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
>-got_map_ptr:
>-
>- return ret;
> }
>
> static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
>--
>2.17.2
--
Wei Yang
Help you, Help me
On Sun, Feb 09, 2020 at 06:48:26PM +0800, Baoquan He wrote:
>In section_deactivate(), pfn_to_page() doesn't work any more after
>ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case.
>It caused hot remove failure, the trace is:
>
>kernel BUG at mm/page_alloc.c:4806!
>invalid opcode: 0000 [#1] SMP PTI
>CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340
>Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
>Workqueue: kacpi_hotplug acpi_hotplug_work_fn
>RIP: 0010:free_pages+0x85/0xa0
>Call Trace:
> __remove_pages+0x99/0xc0
> arch_remove_memory+0x23/0x4d
> try_remove_memory+0xc8/0x130
> ? walk_memory_blocks+0x72/0xa0
> __remove_memory+0xa/0x11
> acpi_memory_device_remove+0x72/0x100
> acpi_bus_trim+0x55/0x90
> acpi_device_hotplug+0x2eb/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
>Let's defer the ->section_mem_map resetting after depopulate_section_memmap()
>to fix it.
>
>Signed-off-by: Baoquan He <[email protected]>
>---
> mm/sparse.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
>diff --git a/mm/sparse.c b/mm/sparse.c
>index 623755e88255..345d065ef6ce 100644
>--- a/mm/sparse.c
>+++ b/mm/sparse.c
>@@ -854,13 +854,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> ms->usage = NULL;
> }
> memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
>- ms->section_mem_map = (unsigned long)NULL;
> }
>
> if (section_is_early && memmap)
> free_map_bootmem(memmap);
> else
> depopulate_section_memmap(pfn, nr_pages, altmap);
The crash happens in depopulate_section_memmap() when trying to get memmap by
pfn_to_page(). Can we pass memmap directly?
>+
>+ if(!rc)
>+ ms->section_mem_map = (unsigned long)NULL;
> }
>
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
>--
>2.17.2
--
Wei Yang
Help you, Help me
On 02/10/20 at 07:52am, Wei Yang wrote:
> >---
> > mm/sparse.c | 4 +++-
> > 1 file changed, 3 insertions(+), 1 deletion(-)
> >
> >diff --git a/mm/sparse.c b/mm/sparse.c
> >index 623755e88255..345d065ef6ce 100644
> >--- a/mm/sparse.c
> >+++ b/mm/sparse.c
> >@@ -854,13 +854,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > ms->usage = NULL;
> > }
> > memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> >- ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > if (section_is_early && memmap)
> > free_map_bootmem(memmap);
> > else
> > depopulate_section_memmap(pfn, nr_pages, altmap);
>
> The crash happens in depopulate_section_memmap() when trying to get memmap by
> pfn_to_page(). Can we pass memmap directly?
Yes, that's also a good idea. While it needs to add a parameter for
depopulate_section_memmap(), the parameter is useless for VMEMMAP
though, I personally prefer the current fix which is a little simpler.
Anyway, both is fine to me, I can update if you think passing memmap is
better.
>
> >+
> >+ if(!rc)
> >+ ms->section_mem_map = (unsigned long)NULL;
> > }
> >
> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
> >--
> >2.17.2
>
> --
> Wei Yang
> Help you, Help me
>
On 02/09/20 at 06:48pm, Baoquan He wrote:
> From: Wei Yang <[email protected]>
>
> When we use SPARSEMEM instead of SPARSEMEM_VMEMMAP, pfn_to_page()
> doesn't work before sparse_init_one_section() is called. This leads to a
> crash when hotplug memory.
>
> PGD 0 P4D 0
> Oops: 0002 [#1] SMP PTI
> CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #339
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> Workqueue: kacpi_hotplug acpi_hotplug_work_fn
> RIP: 0010:__memset+0x24/0x30
> Call Trace:
> sparse_add_section+0x150/0x1d8
> __add_pages+0xbf/0x150
> add_pages+0x12/0x60
> add_memory_resource+0xc8/0x210
> ? wake_up_q+0xa0/0xa0
> __add_memory+0x62/0xb0
> acpi_memory_device_add+0x13f/0x300
> acpi_bus_attach+0xf6/0x200
> acpi_bus_scan+0x43/0x90
> acpi_device_hotplug+0x275/0x3d0
> acpi_hotplug_work_fn+0x1a/0x30
> process_one_work+0x1a7/0x370
> worker_thread+0x30/0x380
> ? flush_rcu_work+0x30/0x30
> kthread+0x112/0x130
> ? kthread_create_on_node+0x60/0x60
> ret_from_fork+0x35/0x40
>
> We should use memmap as it did.
>
> Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug")
> Signed-off-by: Wei Yang <[email protected]>
> Acked-by: David Hildenbrand <[email protected]>
> Reviewed-by: Baoquan He <[email protected]>
> CC: Dan Williams <[email protected]>
> Signed-off-by: Baoquan He <[email protected]>
Git format-patch added this line of Signed-off-by from me, I will
remove it if repost.
> ---
> mm/sparse.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index a7e78bfe0dce..623755e88255 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -944,7 +944,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
> * Poison uninitialized struct pages in order to catch invalid flags
> * combinations.
> */
> - page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
> + page_init_poison(memmap, sizeof(struct page) * nr_pages);
>
> ms = __nr_to_section(section_nr);
> set_section_nid(section_nr, nid);
> --
> 2.17.2
>
On Mon, Feb 10, 2020 at 11:41:05AM +0800, Baoquan He wrote:
>On 02/10/20 at 07:52am, Wei Yang wrote:
>> >---
>> > mm/sparse.c | 4 +++-
>> > 1 file changed, 3 insertions(+), 1 deletion(-)
>> >
>> >diff --git a/mm/sparse.c b/mm/sparse.c
>> >index 623755e88255..345d065ef6ce 100644
>> >--- a/mm/sparse.c
>> >+++ b/mm/sparse.c
>> >@@ -854,13 +854,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>> > ms->usage = NULL;
>> > }
>> > memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
>> >- ms->section_mem_map = (unsigned long)NULL;
>> > }
>> >
>> > if (section_is_early && memmap)
>> > free_map_bootmem(memmap);
>> > else
>> > depopulate_section_memmap(pfn, nr_pages, altmap);
>>
>> The crash happens in depopulate_section_memmap() when trying to get memmap by
>> pfn_to_page(). Can we pass memmap directly?
>
>Yes, that's also a good idea. While it needs to add a parameter for
>depopulate_section_memmap(), the parameter is useless for VMEMMAP
>though, I personally prefer the current fix which is a little simpler.
>
Not a new parameter, but replace pfn with memmap.
Not sure why the parameter is useless for VMEMMAP? memmap will be assigned to
start and finally pass to vmemmap_free().
>Anyway, both is fine to me, I can update if you think passing memmap is
>better.
>
>>
>> >+
>> >+ if(!rc)
>> >+ ms->section_mem_map = (unsigned long)NULL;
>> > }
>> >
>> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
>> >--
>> >2.17.2
>>
>> --
>> Wei Yang
>> Help you, Help me
>>
--
Wei Yang
Help you, Help me
On 02/10/20 at 02:08pm, Wei Yang wrote:
> On Mon, Feb 10, 2020 at 11:41:05AM +0800, Baoquan He wrote:
> >On 02/10/20 at 07:52am, Wei Yang wrote:
> >> >---
> >> > mm/sparse.c | 4 +++-
> >> > 1 file changed, 3 insertions(+), 1 deletion(-)
> >> >
> >> >diff --git a/mm/sparse.c b/mm/sparse.c
> >> >index 623755e88255..345d065ef6ce 100644
> >> >--- a/mm/sparse.c
> >> >+++ b/mm/sparse.c
> >> >@@ -854,13 +854,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> >> > ms->usage = NULL;
> >> > }
> >> > memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> >> >- ms->section_mem_map = (unsigned long)NULL;
> >> > }
> >> >
> >> > if (section_is_early && memmap)
> >> > free_map_bootmem(memmap);
> >> > else
> >> > depopulate_section_memmap(pfn, nr_pages, altmap);
> >>
> >> The crash happens in depopulate_section_memmap() when trying to get memmap by
> >> pfn_to_page(). Can we pass memmap directly?
> >
> >Yes, that's also a good idea. While it needs to add a parameter for
> >depopulate_section_memmap(), the parameter is useless for VMEMMAP
> >though, I personally prefer the current fix which is a little simpler.
> >
>
> Not a new parameter, but replace pfn with memmap.
>
> Not sure why the parameter is useless for VMEMMAP? memmap will be assigned to
> start and finally pass to vmemmap_free().
In section_deactivate(), per the code comments from Dan, we can know
that:
/*
* section which only contains bootmem will be handled by
* free_map_bootmem(), including a complete section, or partial
* section which only has memory starting from the begining.
*/
if (section_is_early && memmap)
free_map_bootmem(memmap);
else
/*
* section which contains region mixing bootmem with hot added
* sub-section region, only sub-section region, complete
* section. And in the mxied case, if hot remove the hot added
* sub-section aligned part, no memmap is got in the current
* code. So we still need pfn to calculate it for vmemmap case.
* To me, whenever we need, it looks better that we always use
* pfn to get its own memmap.
*/
depopulate_section_memmap(pfn, nr_pages, altmap);
This is why I would like to keep the current logic as is,only one line
of code adjusting can fix the issue. Please let me know if I miss
anything.
>
> >Anyway, both is fine to me, I can update if you think passing memmap is
> >better.
> >
> >>
> >> >+
> >> >+ if(!rc)
> >> >+ ms->section_mem_map = (unsigned long)NULL;
> >> > }
> >> >
> >> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
> >> >--
> >> >2.17.2
> >>
> >> --
> >> Wei Yang
> >> Help you, Help me
> >>
>
> --
> Wei Yang
> Help you, Help me
>
On Mon, Feb 10, 2020 at 03:54:06PM +0800, Baoquan He wrote:
>On 02/10/20 at 02:08pm, Wei Yang wrote:
>> On Mon, Feb 10, 2020 at 11:41:05AM +0800, Baoquan He wrote:
>> >On 02/10/20 at 07:52am, Wei Yang wrote:
>> >> >---
>> >> > mm/sparse.c | 4 +++-
>> >> > 1 file changed, 3 insertions(+), 1 deletion(-)
>> >> >
>> >> >diff --git a/mm/sparse.c b/mm/sparse.c
>> >> >index 623755e88255..345d065ef6ce 100644
>> >> >--- a/mm/sparse.c
>> >> >+++ b/mm/sparse.c
>> >> >@@ -854,13 +854,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>> >> > ms->usage = NULL;
>> >> > }
>> >> > memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
>> >> >- ms->section_mem_map = (unsigned long)NULL;
>> >> > }
>> >> >
>> >> > if (section_is_early && memmap)
>> >> > free_map_bootmem(memmap);
>> >> > else
>> >> > depopulate_section_memmap(pfn, nr_pages, altmap);
>> >>
>> >> The crash happens in depopulate_section_memmap() when trying to get memmap by
>> >> pfn_to_page(). Can we pass memmap directly?
>> >
>> >Yes, that's also a good idea. While it needs to add a parameter for
>> >depopulate_section_memmap(), the parameter is useless for VMEMMAP
>> >though, I personally prefer the current fix which is a little simpler.
>> >
>>
>> Not a new parameter, but replace pfn with memmap.
>>
>> Not sure why the parameter is useless for VMEMMAP? memmap will be assigned to
>> start and finally pass to vmemmap_free().
>
>In section_deactivate(), per the code comments from Dan, we can know
>that:
>
> /*
> * section which only contains bootmem will be handled by
> * free_map_bootmem(), including a complete section, or partial
> * section which only has memory starting from the begining.
> */
> if (section_is_early && memmap)
> free_map_bootmem(memmap);
> else
> /*
> * section which contains region mixing bootmem with hot added
> * sub-section region, only sub-section region, complete
> * section. And in the mxied case, if hot remove the hot added
> * sub-section aligned part, no memmap is got in the current
> * code. So we still need pfn to calculate it for vmemmap case.
> * To me, whenever we need, it looks better that we always use
> * pfn to get its own memmap.
> */
> depopulate_section_memmap(pfn, nr_pages, altmap);
>
>This is why I would like to keep the current logic as is,only one line
>of code adjusting can fix the issue. Please let me know if I miss
>anything.
>
You are right. I missed this point.
>
>>
>> >Anyway, both is fine to me, I can update if you think passing memmap is
>> >better.
>> >
>> >>
>> >> >+
>> >> >+ if(!rc)
>> >> >+ ms->section_mem_map = (unsigned long)NULL;
>> >> > }
>> >> >
>> >> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
>> >> >--
>> >> >2.17.2
>> >>
>> >> --
>> >> Wei Yang
>> >> Help you, Help me
>> >>
>>
>> --
>> Wei Yang
>> Help you, Help me
>>
--
Wei Yang
Help you, Help me
On 09.02.20 11:48, Baoquan He wrote:
> Currently, subsection map is used when SPARSEMEM is enabled, including
> VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> and misleading. Let's adjust code to only allow subsection map being
> used in SPARSEMEM|VMEMMAP case.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> include/linux/mmzone.h | 2 +
> mm/sparse.c | 231 ++++++++++++++++++++++-------------------
> 2 files changed, 124 insertions(+), 109 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 462f6873905a..fc0de3a9a51e 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
>
> struct mem_section_usage {
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> +#endif
> /* See declaration of similar field in struct zone */
> unsigned long pageblock_flags[0];
> };
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 696f6b9f706e..cf55d272d0a9 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -209,41 +209,6 @@ static inline unsigned long first_present_section_nr(void)
> return next_present_section_nr(-1);
> }
>
> -static void subsection_mask_set(unsigned long *map, unsigned long pfn,
> - unsigned long nr_pages)
> -{
> - int idx = subsection_map_index(pfn);
> - int end = subsection_map_index(pfn + nr_pages - 1);
> -
> - bitmap_set(map, idx, end - idx + 1);
> -}
> -
> -void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> -{
> - int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
> - unsigned long nr, start_sec = pfn_to_section_nr(pfn);
> -
> - if (!nr_pages)
> - return;
> -
> - for (nr = start_sec; nr <= end_sec; nr++) {
> - struct mem_section *ms;
> - unsigned long pfns;
> -
> - pfns = min(nr_pages, PAGES_PER_SECTION
> - - (pfn & ~PAGE_SECTION_MASK));
> - ms = __nr_to_section(nr);
> - subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
> -
> - pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
> - pfns, subsection_map_index(pfn),
> - subsection_map_index(pfn + pfns - 1));
> -
> - pfn += pfns;
> - nr_pages -= pfns;
> - }
> -}
> -
> /* Record a memory area against a node. */
> void __init memory_present(int nid, unsigned long start, unsigned long end)
> {
> @@ -432,12 +397,134 @@ static unsigned long __init section_map_size(void)
> return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
> }
>
> +static void subsection_mask_set(unsigned long *map, unsigned long pfn,
> + unsigned long nr_pages)
> +{
> + int idx = subsection_map_index(pfn);
> + int end = subsection_map_index(pfn + nr_pages - 1);
> +
> + bitmap_set(map, idx, end - idx + 1);
> +}
> +
> +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> +{
> + int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
> + unsigned long nr, start_sec = pfn_to_section_nr(pfn);
> +
> + if (!nr_pages)
> + return;
> +
> + for (nr = start_sec; nr <= end_sec; nr++) {
> + struct mem_section *ms;
> + unsigned long pfns;
> +
> + pfns = min(nr_pages, PAGES_PER_SECTION
> + - (pfn & ~PAGE_SECTION_MASK));
> + ms = __nr_to_section(nr);
> + subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
> +
> + pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
> + pfns, subsection_map_index(pfn),
> + subsection_map_index(pfn + pfns - 1));
> +
> + pfn += pfns;
> + nr_pages -= pfns;
> + }
> +}
> +
> +/**
> + * clear_subsection_map - Clear subsection map of one memory region
> + *
> + * @pfn - start pfn of the memory range
> + * @nr_pages - number of pfns to add in the region
> + *
> + * This is only intended for hotplug, and clear the related subsection
> + * map inside one section.
> + *
> + * Return:
> + * * -EINVAL - Section already deactived.
> + * * 0 - Subsection map is emptied.
> + * * 1 - Subsection map is not empty.
> + */
> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> +{
> + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> + struct mem_section *ms = __pfn_to_section(pfn);
> + unsigned long *subsection_map = ms->usage
> + ? &ms->usage->subsection_map[0] : NULL;
> +
> + subsection_mask_set(map, pfn, nr_pages);
> + if (subsection_map)
> + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
> +
> + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> + "section already deactivated (%#lx + %ld)\n",
> + pfn, nr_pages))
> + return -EINVAL;
> +
> + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> +
> + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> + return 0;
> +
> + return 1;
> +}
> +
> +/**
> + * fill_subsection_map - fill subsection map of a memory region
> + * @pfn - start pfn of the memory range
> + * @nr_pages - number of pfns to add in the region
> + *
> + * This clears the related subsection map inside one section, and only
> + * intended for hotplug.
> + *
> + * Return:
> + * * 0 - On success.
> + * * -EINVAL - Invalid memory region.
> + * * -EEXIST - Subsection map has been set.
> + */
> +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> +{
> + struct mem_section *ms = __pfn_to_section(pfn);
> + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> + unsigned long *subsection_map;
> + int rc = 0;
> +
> + subsection_mask_set(map, pfn, nr_pages);
> +
> + subsection_map = &ms->usage->subsection_map[0];
> +
> + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
> + rc = -EINVAL;
> + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
> + rc = -EEXIST;
> + else
> + bitmap_or(subsection_map, map, subsection_map,
> + SUBSECTIONS_PER_SECTION);
> +
> + return rc;
> +}
> +
> #else
> static unsigned long __init section_map_size(void)
> {
> return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
> }
>
> +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> +{
> +}
> +
> +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> +{
> + return 0;
> +}
> +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> +{
> + return 0;
> +}
> +
> struct page __init *__populate_section_memmap(unsigned long pfn,
> unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> {
> @@ -726,45 +813,6 @@ static void free_map_bootmem(struct page *memmap)
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> -/**
> - * clear_subsection_map - Clear subsection map of one memory region
> - *
> - * @pfn - start pfn of the memory range
> - * @nr_pages - number of pfns to add in the region
> - *
> - * This is only intended for hotplug, and clear the related subsection
> - * map inside one section.
> - *
> - * Return:
> - * * -EINVAL - Section already deactived.
> - * * 0 - Subsection map is emptied.
> - * * 1 - Subsection map is not empty.
> - */
> -static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> -{
> - DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> - DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> - struct mem_section *ms = __pfn_to_section(pfn);
> - unsigned long *subsection_map = ms->usage
> - ? &ms->usage->subsection_map[0] : NULL;
> -
> - subsection_mask_set(map, pfn, nr_pages);
> - if (subsection_map)
> - bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
> -
> - if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> - "section already deactivated (%#lx + %ld)\n",
> - pfn, nr_pages))
> - return -EINVAL;
> -
> - bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> -
> - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> - return 0;
> -
> - return 1;
> -}
> -
> static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> struct vmem_altmap *altmap)
> {
> @@ -818,41 +866,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> depopulate_section_memmap(pfn, nr_pages, altmap);
> }
>
> -/**
> - * fill_subsection_map - fill subsection map of a memory region
> - * @pfn - start pfn of the memory range
> - * @nr_pages - number of pfns to add in the region
> - *
> - * This clears the related subsection map inside one section, and only
> - * intended for hotplug.
> - *
> - * Return:
> - * * 0 - On success.
> - * * -EINVAL - Invalid memory region.
> - * * -EEXIST - Subsection map has been set.
> - */
> -static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> -{
> - struct mem_section *ms = __pfn_to_section(pfn);
> - DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> - unsigned long *subsection_map;
> - int rc = 0;
> -
> - subsection_mask_set(map, pfn, nr_pages);
> -
> - subsection_map = &ms->usage->subsection_map[0];
> -
> - if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
> - rc = -EINVAL;
> - else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
> - rc = -EEXIST;
> - else
> - bitmap_or(subsection_map, map, subsection_map,
> - SUBSECTIONS_PER_SECTION);
> -
> - return rc;
> -}
> -
> static struct page * __meminit section_activate(int nid, unsigned long pfn,
> unsigned long nr_pages, struct vmem_altmap *altmap)
> {
>
I'd prefer adding more ifdefs to avoid heavy code movement. Would make
it much easier to review :)
--
Thanks,
David / dhildenb
On Sun, Feb 9, 2020 at 2:48 AM Baoquan He <[email protected]> wrote:
>
> Currently, subsection map is used when SPARSEMEM is enabled, including
> VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> and misleading. Let's adjust code to only allow subsection map being
> used in SPARSEMEM|VMEMMAP case.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> include/linux/mmzone.h | 2 +
> mm/sparse.c | 231 ++++++++++++++++++++++-------------------
> 2 files changed, 124 insertions(+), 109 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 462f6873905a..fc0de3a9a51e 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
>
> struct mem_section_usage {
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> +#endif
This was done deliberately so that the SPARSEMEM_VMEMMAP=n case ran as
a subset of the SPARSEMEM_VMEMMAP=y case.
The diffstat does not seem to agree that this is any clearer:
124 insertions(+), 109 deletions(-)
> Am 11.02.2020 um 21:15 schrieb Dan Williams <[email protected]>:
>
> On Sun, Feb 9, 2020 at 2:48 AM Baoquan He <[email protected]> wrote:
>>
>> Currently, subsection map is used when SPARSEMEM is enabled, including
>> VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
>> supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
>> and misleading. Let's adjust code to only allow subsection map being
>> used in SPARSEMEM|VMEMMAP case.
>>
>> Signed-off-by: Baoquan He <[email protected]>
>> ---
>> include/linux/mmzone.h | 2 +
>> mm/sparse.c | 231 ++++++++++++++++++++++-------------------
>> 2 files changed, 124 insertions(+), 109 deletions(-)
>>
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index 462f6873905a..fc0de3a9a51e 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
>> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
>>
>> struct mem_section_usage {
>> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
>> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
>> +#endif
>
> This was done deliberately so that the SPARSEMEM_VMEMMAP=n case ran as
> a subset of the SPARSEMEM_VMEMMAP=y case.
>
> The diffstat does not seem to agree that this is any clearer:
>
> 124 insertions(+), 109 deletions(-)
>
I don‘t see a reason to work with subsections (+store them) if subsections are not supported.
I do welcome this cleanup. Diffstats don‘t tell the whole story.
On 02/12/20 at 10:39am, David Hildenbrand wrote:
>
>
> > Am 11.02.2020 um 21:15 schrieb Dan Williams <[email protected]>:
> >
> > On Sun, Feb 9, 2020 at 2:48 AM Baoquan He <[email protected]> wrote:
> >>
> >> Currently, subsection map is used when SPARSEMEM is enabled, including
> >> VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> >> supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> >> and misleading. Let's adjust code to only allow subsection map being
> >> used in SPARSEMEM|VMEMMAP case.
> >>
> >> Signed-off-by: Baoquan He <[email protected]>
> >> ---
> >> include/linux/mmzone.h | 2 +
> >> mm/sparse.c | 231 ++++++++++++++++++++++-------------------
> >> 2 files changed, 124 insertions(+), 109 deletions(-)
> >>
> >> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> >> index 462f6873905a..fc0de3a9a51e 100644
> >> --- a/include/linux/mmzone.h
> >> +++ b/include/linux/mmzone.h
> >> @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> >> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
> >>
> >> struct mem_section_usage {
> >> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> >> +#endif
> >
> > This was done deliberately so that the SPARSEMEM_VMEMMAP=n case ran as
> > a subset of the SPARSEMEM_VMEMMAP=y case.
Thanks for checking this, Dan.
Taking away the subsection part, won't affect the classic sparse being a
subset of VMEMMAP case, I would say.
> >
> > The diffstat does not seem to agree that this is any clearer:
> >
> > 124 insertions(+), 109 deletions(-)
> >
>
> I don‘t see a reason to work with subsections (+store them) if subsections are not supported.
>
> I do welcome this cleanup. Diffstats don‘t tell the whole story.
Thanks for clarifying this, David, I agree.
If applying the patch, it should be easier to observe that the code
is simpler to follow, at least won't be confusing on subsection handling
part.
On 02/11/20 at 03:43pm, David Hildenbrand wrote:
> On 09.02.20 11:48, Baoquan He wrote:
> > Currently, subsection map is used when SPARSEMEM is enabled, including
> > VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> > supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> > and misleading. Let's adjust code to only allow subsection map being
> > used in SPARSEMEM|VMEMMAP case.
> >
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > include/linux/mmzone.h | 2 +
> > mm/sparse.c | 231 ++++++++++++++++++++++-------------------
> > 2 files changed, 124 insertions(+), 109 deletions(-)
> >
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 462f6873905a..fc0de3a9a51e 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> > #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
> >
> > struct mem_section_usage {
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> > +#endif
> > /* See declaration of similar field in struct zone */
> > unsigned long pageblock_flags[0];
> > };
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 696f6b9f706e..cf55d272d0a9 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -209,41 +209,6 @@ static inline unsigned long first_present_section_nr(void)
> > return next_present_section_nr(-1);
> > }
> >
> > -static void subsection_mask_set(unsigned long *map, unsigned long pfn,
> > - unsigned long nr_pages)
> > -{
> > - int idx = subsection_map_index(pfn);
> > - int end = subsection_map_index(pfn + nr_pages - 1);
> > -
> > - bitmap_set(map, idx, end - idx + 1);
> > -}
> > -
> > -void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> > -{
> > - int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
> > - unsigned long nr, start_sec = pfn_to_section_nr(pfn);
> > -
> > - if (!nr_pages)
> > - return;
> > -
> > - for (nr = start_sec; nr <= end_sec; nr++) {
> > - struct mem_section *ms;
> > - unsigned long pfns;
> > -
> > - pfns = min(nr_pages, PAGES_PER_SECTION
> > - - (pfn & ~PAGE_SECTION_MASK));
> > - ms = __nr_to_section(nr);
> > - subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
> > -
> > - pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
> > - pfns, subsection_map_index(pfn),
> > - subsection_map_index(pfn + pfns - 1));
> > -
> > - pfn += pfns;
> > - nr_pages -= pfns;
> > - }
> > -}
> > -
> > /* Record a memory area against a node. */
> > void __init memory_present(int nid, unsigned long start, unsigned long end)
> > {
> > @@ -432,12 +397,134 @@ static unsigned long __init section_map_size(void)
> > return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
> > }
> >
> > +static void subsection_mask_set(unsigned long *map, unsigned long pfn,
> > + unsigned long nr_pages)
> > +{
> > + int idx = subsection_map_index(pfn);
> > + int end = subsection_map_index(pfn + nr_pages - 1);
> > +
> > + bitmap_set(map, idx, end - idx + 1);
> > +}
> > +
> > +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> > +{
> > + int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
> > + unsigned long nr, start_sec = pfn_to_section_nr(pfn);
> > +
> > + if (!nr_pages)
> > + return;
> > +
> > + for (nr = start_sec; nr <= end_sec; nr++) {
> > + struct mem_section *ms;
> > + unsigned long pfns;
> > +
> > + pfns = min(nr_pages, PAGES_PER_SECTION
> > + - (pfn & ~PAGE_SECTION_MASK));
> > + ms = __nr_to_section(nr);
> > + subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
> > +
> > + pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
> > + pfns, subsection_map_index(pfn),
> > + subsection_map_index(pfn + pfns - 1));
> > +
> > + pfn += pfns;
> > + nr_pages -= pfns;
> > + }
> > +}
> > +
> > +/**
> > + * clear_subsection_map - Clear subsection map of one memory region
> > + *
> > + * @pfn - start pfn of the memory range
> > + * @nr_pages - number of pfns to add in the region
> > + *
> > + * This is only intended for hotplug, and clear the related subsection
> > + * map inside one section.
> > + *
> > + * Return:
> > + * * -EINVAL - Section already deactived.
> > + * * 0 - Subsection map is emptied.
> > + * * 1 - Subsection map is not empty.
> > + */
> > +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > +{
> > + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> > + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> > + struct mem_section *ms = __pfn_to_section(pfn);
> > + unsigned long *subsection_map = ms->usage
> > + ? &ms->usage->subsection_map[0] : NULL;
> > +
> > + subsection_mask_set(map, pfn, nr_pages);
> > + if (subsection_map)
> > + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
> > +
> > + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> > + "section already deactivated (%#lx + %ld)\n",
> > + pfn, nr_pages))
> > + return -EINVAL;
> > +
> > + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> > +
> > + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> > + return 0;
> > +
> > + return 1;
> > +}
> > +
> > +/**
> > + * fill_subsection_map - fill subsection map of a memory region
> > + * @pfn - start pfn of the memory range
> > + * @nr_pages - number of pfns to add in the region
> > + *
> > + * This clears the related subsection map inside one section, and only
> > + * intended for hotplug.
> > + *
> > + * Return:
> > + * * 0 - On success.
> > + * * -EINVAL - Invalid memory region.
> > + * * -EEXIST - Subsection map has been set.
> > + */
> > +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > +{
> > + struct mem_section *ms = __pfn_to_section(pfn);
> > + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> > + unsigned long *subsection_map;
> > + int rc = 0;
> > +
> > + subsection_mask_set(map, pfn, nr_pages);
> > +
> > + subsection_map = &ms->usage->subsection_map[0];
> > +
> > + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
> > + rc = -EINVAL;
> > + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
> > + rc = -EEXIST;
> > + else
> > + bitmap_or(subsection_map, map, subsection_map,
> > + SUBSECTIONS_PER_SECTION);
> > +
> > + return rc;
> > +}
> > +
> > #else
> > static unsigned long __init section_map_size(void)
> > {
> > return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
> > }
> >
> > +void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
> > +{
> > +}
> > +
> > +static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > +{
> > + return 0;
> > +}
> > +static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > +{
> > + return 0;
> > +}
> > +
> > struct page __init *__populate_section_memmap(unsigned long pfn,
> > unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> > {
> > @@ -726,45 +813,6 @@ static void free_map_bootmem(struct page *memmap)
> > }
> > #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> >
> > -/**
> > - * clear_subsection_map - Clear subsection map of one memory region
> > - *
> > - * @pfn - start pfn of the memory range
> > - * @nr_pages - number of pfns to add in the region
> > - *
> > - * This is only intended for hotplug, and clear the related subsection
> > - * map inside one section.
> > - *
> > - * Return:
> > - * * -EINVAL - Section already deactived.
> > - * * 0 - Subsection map is emptied.
> > - * * 1 - Subsection map is not empty.
> > - */
> > -static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > -{
> > - DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> > - DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
> > - struct mem_section *ms = __pfn_to_section(pfn);
> > - unsigned long *subsection_map = ms->usage
> > - ? &ms->usage->subsection_map[0] : NULL;
> > -
> > - subsection_mask_set(map, pfn, nr_pages);
> > - if (subsection_map)
> > - bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
> > -
> > - if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
> > - "section already deactivated (%#lx + %ld)\n",
> > - pfn, nr_pages))
> > - return -EINVAL;
> > -
> > - bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> > -
> > - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> > - return 0;
> > -
> > - return 1;
> > -}
> > -
> > static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > struct vmem_altmap *altmap)
> > {
> > @@ -818,41 +866,6 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> > depopulate_section_memmap(pfn, nr_pages, altmap);
> > }
> >
> > -/**
> > - * fill_subsection_map - fill subsection map of a memory region
> > - * @pfn - start pfn of the memory range
> > - * @nr_pages - number of pfns to add in the region
> > - *
> > - * This clears the related subsection map inside one section, and only
> > - * intended for hotplug.
> > - *
> > - * Return:
> > - * * 0 - On success.
> > - * * -EINVAL - Invalid memory region.
> > - * * -EEXIST - Subsection map has been set.
> > - */
> > -static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
> > -{
> > - struct mem_section *ms = __pfn_to_section(pfn);
> > - DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
> > - unsigned long *subsection_map;
> > - int rc = 0;
> > -
> > - subsection_mask_set(map, pfn, nr_pages);
> > -
> > - subsection_map = &ms->usage->subsection_map[0];
> > -
> > - if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
> > - rc = -EINVAL;
> > - else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
> > - rc = -EEXIST;
> > - else
> > - bitmap_or(subsection_map, map, subsection_map,
> > - SUBSECTIONS_PER_SECTION);
> > -
> > - return rc;
> > -}
> > -
> > static struct page * __meminit section_activate(int nid, unsigned long pfn,
> > unsigned long nr_pages, struct vmem_altmap *altmap)
> > {
> >
>
> I'd prefer adding more ifdefs to avoid heavy code movement. Would make
> it much easier to review :)
OK, I did it in the first place. Later I think putting all subsectin
related handling into one place makes code look better. Now I understand
it may make patch format messy. I will adjust the place to make
reviewing easier. Thanks for your great suggestion.
On Wed, Feb 12, 2020 at 1:40 AM David Hildenbrand <[email protected]> wrote:
>
>
>
> > Am 11.02.2020 um 21:15 schrieb Dan Williams <[email protected]>:
> >
> > On Sun, Feb 9, 2020 at 2:48 AM Baoquan He <[email protected]> wrote:
> >>
> >> Currently, subsection map is used when SPARSEMEM is enabled, including
> >> VMEMMAP case and !VMEMMAP case. However, subsection hotplug is not
> >> supported at all in SPARSEMEM|!VMEMMAP case, subsection map is unnecessary
> >> and misleading. Let's adjust code to only allow subsection map being
> >> used in SPARSEMEM|VMEMMAP case.
> >>
> >> Signed-off-by: Baoquan He <[email protected]>
> >> ---
> >> include/linux/mmzone.h | 2 +
> >> mm/sparse.c | 231 ++++++++++++++++++++++-------------------
> >> 2 files changed, 124 insertions(+), 109 deletions(-)
> >>
> >> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> >> index 462f6873905a..fc0de3a9a51e 100644
> >> --- a/include/linux/mmzone.h
> >> +++ b/include/linux/mmzone.h
> >> @@ -1185,7 +1185,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
> >> #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
> >>
> >> struct mem_section_usage {
> >> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >> DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
> >> +#endif
> >
> > This was done deliberately so that the SPARSEMEM_VMEMMAP=n case ran as
> > a subset of the SPARSEMEM_VMEMMAP=y case.
> >
> > The diffstat does not seem to agree that this is any clearer:
> >
> > 124 insertions(+), 109 deletions(-)
> >
>
> I don‘t see a reason to work with subsections (+store them) if subsections are not supported.
>
> I do welcome this cleanup. Diffstats don‘t tell the whole story.
I'll take a look at the final result and see if my opinion changes,
but I just wanted to clarify upfront that making sparsemem run some of
the subsection logic was deliberate.