For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP
do not free the reserved memory for the page map, this patch do it.
Signed-off-by: Wei Li <[email protected]>
Signed-off-by: Chen Feng <[email protected]>
Signed-off-by: Xia Qing <[email protected]>
v2: fix the patch v1 compile errors that are not based on the latest mainline.
---
arch/arm64/mm/init.c | 81 +++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 71 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1e93cfc7c47a..600889945cd0 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -441,7 +441,48 @@ void __init bootmem_init(void)
memblock_dump_all();
}
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_PAGE_INUSE 0xFD
+static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
+{
+ unsigned long addr, end;
+ unsigned long next;
+ pmd_t *pmd;
+ void *page_addr;
+ phys_addr_t phys_addr;
+
+ addr = (unsigned long)pfn_to_page(start_pfn);
+ end = (unsigned long)pfn_to_page(end_pfn);
+
+ pmd = pmd_off_k(addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ phys_addr = __pfn_to_phys(pmd_pfn(*pmd));
+ memblock_free(phys_addr, PMD_SIZE);
+ pmd_clear(pmd);
+ } else {
+ /* If here, we are freeing vmemmap pages. */
+ memset((void *)addr, VMEMMAP_PAGE_INUSE, next - addr);
+ page_addr = page_address(pmd_page(*pmd));
+
+ if (!memchr_inv(page_addr, VMEMMAP_PAGE_INUSE,
+ PMD_SIZE)) {
+ phys_addr = __pfn_to_phys(pmd_pfn(*pmd));
+ memblock_free(phys_addr, PMD_SIZE);
+ pmd_clear(pmd);
+ }
+ }
+ }
+
+ flush_tlb_all();
+}
+#else
static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
{
struct page *start_pg, *end_pg;
@@ -468,31 +509,53 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
memblock_free(pg, pgend - pg);
}
+#endif
+
/*
* The mem_map array can get very big. Free the unused area of the memory map.
*/
static void __init free_unused_memmap(void)
{
- unsigned long start, prev_end = 0;
+ unsigned long start, cur_start, prev_end = 0;
struct memblock_region *reg;
for_each_memblock(memory, reg) {
- start = __phys_to_pfn(reg->base);
+ cur_start = __phys_to_pfn(reg->base);
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist due
* to SPARSEMEM sections which aren't present.
*/
- start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
-#endif
+ start = min(cur_start, ALIGN(prev_end, PAGES_PER_SECTION));
+
/*
- * If we had a previous bank, and there is a space between the
- * current bank and the previous, free it.
+ * Free memory in the case of:
+ * 1. if cur_start - prev_end <= PAGES_PER_SECTION,
+ * free pre_end ~ cur_start.
+ * 2. if cur_start - prev_end > PAGES_PER_SECTION,
+ * free pre_end ~ ALIGN(prev_end, PAGES_PER_SECTION).
*/
if (prev_end && prev_end < start)
free_memmap(prev_end, start);
+ /*
+ * Free memory in the case of:
+ * if cur_start - prev_end > PAGES_PER_SECTION,
+ * free ALIGN_DOWN(cur_start, PAGES_PER_SECTION) ~ cur_start.
+ */
+ if (cur_start > start &&
+ !IS_ALIGNED(cur_start, PAGES_PER_SECTION))
+ free_memmap(ALIGN_DOWN(cur_start, PAGES_PER_SECTION),
+ cur_start);
+#else
+ /*
+ * If we had a previous bank, and there is a space between the
+ * current bank and the previous, free it.
+ */
+ if (prev_end && prev_end < cur_start)
+ free_memmap(prev_end, cur_start);
+#endif
/*
* Align up here since the VM subsystem insists that the
* memmap entries are valid from the bank end aligned to
@@ -507,7 +570,6 @@ static void __init free_unused_memmap(void)
free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
#endif
}
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
/*
* mem_init() marks the free areas in the mem_map and tells us how much memory
@@ -524,9 +586,8 @@ void __init mem_init(void)
set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
free_unused_memmap();
-#endif
+
/* this will put all unused low memory onto the freelists */
memblock_free_all();
--
2.15.0
On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote:
> For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP
> do not free the reserved memory for the page map, this patch do it.
I've been thinking about it a bit more and it seems that instead of
freeing unused memory map it would be better to allocate the exact
memory map from the beginning.
In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to
__populate_section_memmap() with the calculated value for architectures
that define HAVE_ARCH_PFN_VALID.
Than, I beleive it would be possible to remove free_unused_memmap() in
arm64 and probably in arm32 as well.
> Signed-off-by: Wei Li <[email protected]>
> Signed-off-by: Chen Feng <[email protected]>
> Signed-off-by: Xia Qing <[email protected]>
>
> v2: fix the patch v1 compile errors that are not based on the latest mainline.
> ---
> arch/arm64/mm/init.c | 81 +++++++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 71 insertions(+), 10 deletions(-)
>
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 1e93cfc7c47a..600889945cd0 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -441,7 +441,48 @@ void __init bootmem_init(void)
> memblock_dump_all();
> }
>
> -#ifndef CONFIG_SPARSEMEM_VMEMMAP
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +#define VMEMMAP_PAGE_INUSE 0xFD
> +static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
> +{
> + unsigned long addr, end;
> + unsigned long next;
> + pmd_t *pmd;
> + void *page_addr;
> + phys_addr_t phys_addr;
> +
> + addr = (unsigned long)pfn_to_page(start_pfn);
> + end = (unsigned long)pfn_to_page(end_pfn);
> +
> + pmd = pmd_off_k(addr);
> + for (; addr < end; addr = next, pmd++) {
> + next = pmd_addr_end(addr, end);
> +
> + if (!pmd_present(*pmd))
> + continue;
> +
> + if (IS_ALIGNED(addr, PMD_SIZE) &&
> + IS_ALIGNED(next, PMD_SIZE)) {
> + phys_addr = __pfn_to_phys(pmd_pfn(*pmd));
> + memblock_free(phys_addr, PMD_SIZE);
> + pmd_clear(pmd);
> + } else {
> + /* If here, we are freeing vmemmap pages. */
> + memset((void *)addr, VMEMMAP_PAGE_INUSE, next - addr);
> + page_addr = page_address(pmd_page(*pmd));
> +
> + if (!memchr_inv(page_addr, VMEMMAP_PAGE_INUSE,
> + PMD_SIZE)) {
> + phys_addr = __pfn_to_phys(pmd_pfn(*pmd));
> + memblock_free(phys_addr, PMD_SIZE);
> + pmd_clear(pmd);
> + }
> + }
> + }
> +
> + flush_tlb_all();
> +}
> +#else
> static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
> {
> struct page *start_pg, *end_pg;
> @@ -468,31 +509,53 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
> memblock_free(pg, pgend - pg);
> }
>
> +#endif
> +
> /*
> * The mem_map array can get very big. Free the unused area of the memory map.
> */
> static void __init free_unused_memmap(void)
> {
> - unsigned long start, prev_end = 0;
> + unsigned long start, cur_start, prev_end = 0;
> struct memblock_region *reg;
>
> for_each_memblock(memory, reg) {
> - start = __phys_to_pfn(reg->base);
> + cur_start = __phys_to_pfn(reg->base);
>
> #ifdef CONFIG_SPARSEMEM
> /*
> * Take care not to free memmap entries that don't exist due
> * to SPARSEMEM sections which aren't present.
> */
> - start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
> -#endif
> + start = min(cur_start, ALIGN(prev_end, PAGES_PER_SECTION));
> +
> /*
> - * If we had a previous bank, and there is a space between the
> - * current bank and the previous, free it.
> + * Free memory in the case of:
> + * 1. if cur_start - prev_end <= PAGES_PER_SECTION,
> + * free pre_end ~ cur_start.
> + * 2. if cur_start - prev_end > PAGES_PER_SECTION,
> + * free pre_end ~ ALIGN(prev_end, PAGES_PER_SECTION).
> */
> if (prev_end && prev_end < start)
> free_memmap(prev_end, start);
>
> + /*
> + * Free memory in the case of:
> + * if cur_start - prev_end > PAGES_PER_SECTION,
> + * free ALIGN_DOWN(cur_start, PAGES_PER_SECTION) ~ cur_start.
> + */
> + if (cur_start > start &&
> + !IS_ALIGNED(cur_start, PAGES_PER_SECTION))
> + free_memmap(ALIGN_DOWN(cur_start, PAGES_PER_SECTION),
> + cur_start);
> +#else
> + /*
> + * If we had a previous bank, and there is a space between the
> + * current bank and the previous, free it.
> + */
> + if (prev_end && prev_end < cur_start)
> + free_memmap(prev_end, cur_start);
> +#endif
> /*
> * Align up here since the VM subsystem insists that the
> * memmap entries are valid from the bank end aligned to
> @@ -507,7 +570,6 @@ static void __init free_unused_memmap(void)
> free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
> #endif
> }
> -#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
>
> /*
> * mem_init() marks the free areas in the mem_map and tells us how much memory
> @@ -524,9 +586,8 @@ void __init mem_init(void)
>
> set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
>
> -#ifndef CONFIG_SPARSEMEM_VMEMMAP
> free_unused_memmap();
> -#endif
> +
> /* this will put all unused low memory onto the freelists */
> memblock_free_all();
>
> --
> 2.15.0
>
--
Sincerely yours,
Mike.
On Thu, Sep 03, 2020 at 01:05:58PM +0100, Catalin Marinas wrote:
> On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote:
> > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote:
> > > For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP
> > > do not free the reserved memory for the page map, this patch do it.
> >
> > I've been thinking about it a bit more and it seems that instead of
> > freeing unused memory map it would be better to allocate the exact
> > memory map from the beginning.
> >
> > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to
> > __populate_section_memmap() with the calculated value for architectures
> > that define HAVE_ARCH_PFN_VALID.
>
> Or just use a smaller PAGES_PER_SECTION and reduce the waste ;).
>
> Just to be clear, are you suggesting that we should use pfn_valid() on
> the pages within a section to calculate the actual range? The
> pfn_valid() implementation on arm64 checks for the validity of a sparse
> section, so this would be called from within the sparse_init() code
> path. I hope there's no dependency but I haven't checked. If it works,
> it's fine by me, it solves the FLATMEM mem_map freeing as well.
What I meant was that sparse_init()->__populate_section_memmap() would
not blindly presume that the entire section is valid, but would take
into account The actual DRAM banks listed in memblock.memory.
For that to work we'll need a version of pfn_valid() that does not rely
on the validity of sparse section, but uses some other means, e.g.
memblock. Apparently, I've looked at arm32 version of pfn_valid() and
missed the section validity check :)
I was thinking about doing something like this for 32-bit systems
(non-ARM) that cannot affort small sections because of the limited space
in the page->flags.
> With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so
> it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE
> amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE mem_map
> section would cover 2^(21-6) pages, so that's equivalent to a
> SECTION_SIZE_BITS of 21-6+12 = 27.
>
> If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op.
>
> --
> Catalin
--
Sincerely yours,
Mike.
On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote:
> On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote:
> > For the memory hole, sparse memory model that define SPARSEMEM_VMEMMAP
> > do not free the reserved memory for the page map, this patch do it.
>
> I've been thinking about it a bit more and it seems that instead of
> freeing unused memory map it would be better to allocate the exact
> memory map from the beginning.
>
> In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to
> __populate_section_memmap() with the calculated value for architectures
> that define HAVE_ARCH_PFN_VALID.
Or just use a smaller PAGES_PER_SECTION and reduce the waste ;).
Just to be clear, are you suggesting that we should use pfn_valid() on
the pages within a section to calculate the actual range? The
pfn_valid() implementation on arm64 checks for the validity of a sparse
section, so this would be called from within the sparse_init() code
path. I hope there's no dependency but I haven't checked. If it works,
it's fine by me, it solves the FLATMEM mem_map freeing as well.
With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so
it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE
amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE mem_map
section would cover 2^(21-6) pages, so that's equivalent to a
SECTION_SIZE_BITS of 21-6+12 = 27.
If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op.
--
Catalin
> -----Original Message-----
> From: Catalin Marinas [mailto:[email protected]]
> Sent: Friday, September 4, 2020 12:06 AM
> To: Mike Rapoport <[email protected]>
> Cc: liwei (CM) <[email protected]>; [email protected]; Xiaqing (A)
> <[email protected]>; Chenfeng (puck) <[email protected]>;
> butao <[email protected]>; fengbaopeng <[email protected]>;
> [email protected]; [email protected]; Song Bao Hua (Barry Song)
> <[email protected]>; [email protected];
> [email protected]; sujunfei <[email protected]>
> Subject: Re: [PATCH v2] arm64: mm: free unused memmap for sparse memory
> model that define VMEMMAP
>
> On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote:
> > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote:
> > > For the memory hole, sparse memory model that define
> SPARSEMEM_VMEMMAP
> > > do not free the reserved memory for the page map, this patch do it.
> >
> > I've been thinking about it a bit more and it seems that instead of
> > freeing unused memory map it would be better to allocate the exact
> > memory map from the beginning.
> >
> > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to
> > __populate_section_memmap() with the calculated value for architectures
> > that define HAVE_ARCH_PFN_VALID.
>
> Or just use a smaller PAGES_PER_SECTION and reduce the waste ;).
>
> Just to be clear, are you suggesting that we should use pfn_valid() on
> the pages within a section to calculate the actual range? The
> pfn_valid() implementation on arm64 checks for the validity of a sparse
> section, so this would be called from within the sparse_init() code
> path. I hope there's no dependency but I haven't checked. If it works,
> it's fine by me, it solves the FLATMEM mem_map freeing as well.
>
> With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so
> it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE
> amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE mem_map
> section would cover 2^(21-6) pages, so that's equivalent to a
> SECTION_SIZE_BITS of 21-6+12 = 27.
>
> If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op.
It would be the simplest way to fix this issue. It seems X86_64 is
also using 27.
@wei, has you ever tried to send a patch to change SECTION_SIZE_BITS
to 27 for ARM64?
>
> --
> Catalin
Thanks
Barry
Hi, Barry
I have changed SECTION_SIZE_BITS to 27 in our products, but I don't have to submit it.
-----?ʼ?ԭ??-----
??????: Song Bao Hua (Barry Song)
????ʱ??: 2020??11??16?? 16:34
?ռ???: Catalin Marinas <[email protected]>; Mike Rapoport <[email protected]>; liwei (CM) <[email protected]>
????: [email protected]; Xiaqing (A) <[email protected]>; Chenfeng (puck) <[email protected]>; butao <[email protected]>; fengbaopeng <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; sujunfei <[email protected]>; Linuxarm <[email protected]>
????: RE: [PATCH v2] arm64: mm: free unused memmap for sparse memory model that define VMEMMAP
> -----Original Message-----
> From: Catalin Marinas [mailto:[email protected]]
> Sent: Friday, September 4, 2020 12:06 AM
> To: Mike Rapoport <[email protected]>
> Cc: liwei (CM) <[email protected]>; [email protected]; Xiaqing (A)
> <[email protected]>; Chenfeng (puck)
> <[email protected]>; butao <[email protected]>; fengbaopeng
> <[email protected]>; [email protected];
> [email protected]; Song Bao Hua (Barry Song)
> <[email protected]>; [email protected];
> [email protected]; sujunfei <[email protected]>
> Subject: Re: [PATCH v2] arm64: mm: free unused memmap for sparse
> memory model that define VMEMMAP
>
> On Mon, Aug 17, 2020 at 11:04:05AM +0300, Mike Rapoport wrote:
> > On Wed, Aug 12, 2020 at 09:06:55AM +0800, Wei Li wrote:
> > > For the memory hole, sparse memory model that define
> SPARSEMEM_VMEMMAP
> > > do not free the reserved memory for the page map, this patch do it.
> >
> > I've been thinking about it a bit more and it seems that instead of
> > freeing unused memory map it would be better to allocate the exact
> > memory map from the beginning.
> >
> > In sparse_init_nid() we can replace PAGES_PER_SECTION parameter to
> > __populate_section_memmap() with the calculated value for
> > architectures that define HAVE_ARCH_PFN_VALID.
>
> Or just use a smaller PAGES_PER_SECTION and reduce the waste ;).
>
> Just to be clear, are you suggesting that we should use pfn_valid() on
> the pages within a section to calculate the actual range? The
> pfn_valid() implementation on arm64 checks for the validity of a
> sparse section, so this would be called from within the sparse_init()
> code path. I hope there's no dependency but I haven't checked. If it
> works, it's fine by me, it solves the FLATMEM mem_map freeing as well.
>
> With 4KB pages on arm64, vmemmap_populate() stops at the pmd level, so
> it always allocates PMD_SIZE. Wei's patch also only frees in PMD_SIZE
> amounts. So, with a sizeof(struct page) of 64 (2^6), a PMD_SIZE
> mem_map section would cover 2^(21-6) pages, so that's equivalent to a
> SECTION_SIZE_BITS of 21-6+12 = 27.
>
> If we reduce SECTION_SIZE_BITS to 27 or less, this patch is a no-op.
It would be the simplest way to fix this issue. It seems X86_64 is also using 27.
@wei, has you ever tried to send a patch to change SECTION_SIZE_BITS to 27 for ARM64?
>
> --
> Catalin
Thanks
Barry