From: Joerg Roedel <[email protected]>
Hi,
here is the third version of the patches to pre-allocate P4D/PUD pages
for the vmalloc/ioremap areas on x86-64. This makes the page-table
synchronization code obsolete and so it is also removed.
Please review.
Thanks,
Joerg
Changes to v2:
- Rebased to tip/master
- Some rewording of the commit-messages
Joerg Roedel (3):
x86/mm: Pre-allocate p4d/pud pages for vmalloc area
x86/mm/64: Do not sync vmalloc/ioremap mappings
x86/mm/64: Make sync_global_pgds() static
arch/x86/include/asm/pgtable_64.h | 2 -
arch/x86/include/asm/pgtable_64_types.h | 2 -
arch/x86/mm/init_64.c | 59 ++++++++++++++++++++++---
3 files changed, 53 insertions(+), 10 deletions(-)
--
2.27.0
From: Joerg Roedel <[email protected]>
Remove the code to sync the vmalloc and ioremap ranges for x86-64. The
page-table pages are all pre-allocated now so that synchronization is
no longer necessary.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/include/asm/pgtable_64_types.h | 2 --
arch/x86/mm/init_64.c | 5 -----
2 files changed, 7 deletions(-)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 8f63efb2a2cc..52e5f5f2240d 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -159,6 +159,4 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
-#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
-
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e76bdb001460..e0cd2dfd333d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -217,11 +217,6 @@ void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
-{
- sync_global_pgds(start, end);
-}
-
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
--
2.27.0
From: Joerg Roedel <[email protected]>
Pre-allocate the page-table pages for the vmalloc area at the level
which needs synchronization on x86-64, which is P4D for 5-level and
PUD for 4-level paging.
Doing this at boot makes sure all no synchronization of that area is
necessary at runtime. The synchronization takes the pgd_lock and
iterates over all page-tables in the system, so it can take quite long
and is better avoided.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/mm/init_64.c | 52 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dbae185511cd..e76bdb001460 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1238,6 +1238,56 @@ static void __init register_page_bootmem_info(void)
#endif
}
+/*
+ * Pre-allocates page-table pages for the vmalloc area in the kernel page-table.
+ * Only the level which needs to be synchronized between all page-tables is
+ * allocated because the synchronization can be expensive.
+ */
+static void __init preallocate_vmalloc_pages(void)
+{
+ unsigned long addr;
+ const char *lvl;
+
+ for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ /* Can only happen with 5-level paging */
+ p4d = p4d_alloc(&init_mm, pgd, addr);
+ if (!p4d) {
+ lvl = "p4d";
+ goto failed;
+ }
+ }
+
+ if (pgtable_l5_enabled())
+ continue;
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ /* Ends up here only with 4-level paging */
+ pud = pud_alloc(&init_mm, p4d, addr);
+ if (!pud) {
+ lvl = "pud";
+ goto failed;
+ }
+ }
+ }
+
+ return;
+
+failed:
+
+ /*
+ * The pages have to be there now or they will be missing in
+ * process page-tables later.
+ */
+ panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl);
+}
+
void __init mem_init(void)
{
pci_iommu_alloc();
@@ -1261,6 +1311,8 @@ void __init mem_init(void)
if (get_gate_vma(&init_mm))
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
+ preallocate_vmalloc_pages();
+
mem_init_print_info(NULL);
}
--
2.27.0
From: Joerg Roedel <[email protected]>
The function is only called from within init_64.c and can be static.
Also remove it from pgtable_64.h.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/include/asm/pgtable_64.h | 2 --
arch/x86/mm/init_64.c | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1b68d24dc6a0..95ac911b1a30 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,8 +168,6 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end);
-
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e0cd2dfd333d..e65b96f381a7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -209,7 +209,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
* When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-void sync_global_pgds(unsigned long start, unsigned long end)
+static void sync_global_pgds(unsigned long start, unsigned long end)
{
if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
--
2.27.0
On Tue, Jul 21, 2020 at 11:59:51AM +0200, Joerg Roedel wrote:
> From: Joerg Roedel <[email protected]>
>
> Pre-allocate the page-table pages for the vmalloc area at the level
> which needs synchronization on x86-64, which is P4D for 5-level and
> PUD for 4-level paging.
>
> Doing this at boot makes sure all no synchronization of that area is
Nit: ^all can be dropped
> necessary at runtime. The synchronization takes the pgd_lock and
> iterates over all page-tables in the system, so it can take quite long
> and is better avoided.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/x86/mm/init_64.c | 52 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 52 insertions(+)
>
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index dbae185511cd..e76bdb001460 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1238,6 +1238,56 @@ static void __init register_page_bootmem_info(void)
> #endif
> }
>
> +/*
> + * Pre-allocates page-table pages for the vmalloc area in the kernel page-table.
> + * Only the level which needs to be synchronized between all page-tables is
> + * allocated because the synchronization can be expensive.
> + */
> +static void __init preallocate_vmalloc_pages(void)
> +{
> + unsigned long addr;
> + const char *lvl;
> +
> + for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
> + pgd_t *pgd = pgd_offset_k(addr);
> + p4d_t *p4d;
> + pud_t *pud;
> +
> + p4d = p4d_offset(pgd, addr);
> + if (p4d_none(*p4d)) {
> + /* Can only happen with 5-level paging */
> + p4d = p4d_alloc(&init_mm, pgd, addr);
> + if (!p4d) {
> + lvl = "p4d";
> + goto failed;
> + }
> + }
> +
> + if (pgtable_l5_enabled())
> + continue;
> +
> + pud = pud_offset(p4d, addr);
> + if (pud_none(*pud)) {
> + /* Ends up here only with 4-level paging */
> + pud = pud_alloc(&init_mm, p4d, addr);
> + if (!pud) {
> + lvl = "pud";
> + goto failed;
> + }
> + }
> + }
> +
> + return;
> +
> +failed:
> +
> + /*
> + * The pages have to be there now or they will be missing in
> + * process page-tables later.
> + */
> + panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl);
> +}
> +
> void __init mem_init(void)
> {
> pci_iommu_alloc();
> @@ -1261,6 +1311,8 @@ void __init mem_init(void)
> if (get_gate_vma(&init_mm))
> kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
>
> + preallocate_vmalloc_pages();
> +
> mem_init_print_info(NULL);
> }
>
> --
> 2.27.0
>
--
Sincerely yours,
Mike.
On Tue, Jul 21, 2020 at 11:59:50AM +0200, Joerg Roedel wrote:
> From: Joerg Roedel <[email protected]>
>
> Hi,
>
> here is the third version of the patches to pre-allocate P4D/PUD pages
> for the vmalloc/ioremap areas on x86-64. This makes the page-table
> synchronization code obsolete and so it is also removed.
>
> Please review.
>
> Thanks,
>
> Joerg
>
> Changes to v2:
>
> - Rebased to tip/master
> - Some rewording of the commit-messages
I have a small nitpick for the commit message of the first patch,
otheriwise,
Reviewed-by: Mike Rapoport <[email protected]>
> Joerg Roedel (3):
> x86/mm: Pre-allocate p4d/pud pages for vmalloc area
> x86/mm/64: Do not sync vmalloc/ioremap mappings
> x86/mm/64: Make sync_global_pgds() static
>
> arch/x86/include/asm/pgtable_64.h | 2 -
> arch/x86/include/asm/pgtable_64_types.h | 2 -
> arch/x86/mm/init_64.c | 59 ++++++++++++++++++++++---
> 3 files changed, 53 insertions(+), 10 deletions(-)
>
> --
> 2.27.0
>
--
Sincerely yours,
Mike.
* Joerg Roedel <[email protected]> wrote:
> From: Joerg Roedel <[email protected]>
>
> Hi,
>
> here is the third version of the patches to pre-allocate P4D/PUD pages
> for the vmalloc/ioremap areas on x86-64. This makes the page-table
> synchronization code obsolete and so it is also removed.
>
> Please review.
>
> Thanks,
>
> Joerg
>
> Changes to v2:
>
> - Rebased to tip/master
> - Some rewording of the commit-messages
>
> Joerg Roedel (3):
> x86/mm: Pre-allocate p4d/pud pages for vmalloc area
> x86/mm/64: Do not sync vmalloc/ioremap mappings
> x86/mm/64: Make sync_global_pgds() static
>
> arch/x86/include/asm/pgtable_64.h | 2 -
> arch/x86/include/asm/pgtable_64_types.h | 2 -
> arch/x86/mm/init_64.c | 59 ++++++++++++++++++++++---
> 3 files changed, 53 insertions(+), 10 deletions(-)
Applied to tip:x86/mm, thanks Joerg!
Ingo
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 2b32ab031e82a109e2c5b0d30ce563db0fe286b4
Gitweb: https://git.kernel.org/tip/2b32ab031e82a109e2c5b0d30ce563db0fe286b4
Author: Joerg Roedel <[email protected]>
AuthorDate: Tue, 21 Jul 2020 11:59:53 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Mon, 27 Jul 2020 12:32:29 +02:00
x86/mm/64: Make sync_global_pgds() static
The function is only called from within init_64.c and can be static.
Also remove it from pgtable_64.h.
Signed-off-by: Joerg Roedel <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Mike Rapoport <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/include/asm/pgtable_64.h | 2 --
arch/x86/mm/init_64.c | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1b68d24..95ac911 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -168,8 +168,6 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end);
-
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e0cd2df..e65b96f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -209,7 +209,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
* When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-void sync_global_pgds(unsigned long start, unsigned long end)
+static void sync_global_pgds(unsigned long start, unsigned long end)
{
if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 6eb82f9940267d3af260989d077a2833f588beae
Gitweb: https://git.kernel.org/tip/6eb82f9940267d3af260989d077a2833f588beae
Author: Joerg Roedel <[email protected]>
AuthorDate: Tue, 21 Jul 2020 11:59:51 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Mon, 27 Jul 2020 12:32:29 +02:00
x86/mm: Pre-allocate P4D/PUD pages for vmalloc area
Pre-allocate the page-table pages for the vmalloc area at the level
which needs synchronization on x86-64, which is P4D for 5-level and
PUD for 4-level paging.
Doing this at boot makes sure no synchronization of that area is
necessary at runtime. The synchronization takes the pgd_lock and
iterates over all page-tables in the system, so it can take quite long
and is better avoided.
Signed-off-by: Joerg Roedel <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Mike Rapoport <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/mm/init_64.c | 52 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dbae185..e76bdb0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1238,6 +1238,56 @@ static void __init register_page_bootmem_info(void)
#endif
}
+/*
+ * Pre-allocates page-table pages for the vmalloc area in the kernel page-table.
+ * Only the level which needs to be synchronized between all page-tables is
+ * allocated because the synchronization can be expensive.
+ */
+static void __init preallocate_vmalloc_pages(void)
+{
+ unsigned long addr;
+ const char *lvl;
+
+ for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ /* Can only happen with 5-level paging */
+ p4d = p4d_alloc(&init_mm, pgd, addr);
+ if (!p4d) {
+ lvl = "p4d";
+ goto failed;
+ }
+ }
+
+ if (pgtable_l5_enabled())
+ continue;
+
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ /* Ends up here only with 4-level paging */
+ pud = pud_alloc(&init_mm, p4d, addr);
+ if (!pud) {
+ lvl = "pud";
+ goto failed;
+ }
+ }
+ }
+
+ return;
+
+failed:
+
+ /*
+ * The pages have to be there now or they will be missing in
+ * process page-tables later.
+ */
+ panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl);
+}
+
void __init mem_init(void)
{
pci_iommu_alloc();
@@ -1261,6 +1311,8 @@ void __init mem_init(void)
if (get_gate_vma(&init_mm))
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
+ preallocate_vmalloc_pages();
+
mem_init_print_info(NULL);
}
The following commit has been merged into the x86/mm branch of tip:
Commit-ID: 8bb9bf242d1fee925636353807c511d54fde8986
Gitweb: https://git.kernel.org/tip/8bb9bf242d1fee925636353807c511d54fde8986
Author: Joerg Roedel <[email protected]>
AuthorDate: Tue, 21 Jul 2020 11:59:52 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Mon, 27 Jul 2020 12:32:29 +02:00
x86/mm/64: Do not sync vmalloc/ioremap mappings
Remove the code to sync the vmalloc and ioremap ranges for x86-64. The
page-table pages are all pre-allocated now so that synchronization is
no longer necessary.
Signed-off-by: Joerg Roedel <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Mike Rapoport <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/include/asm/pgtable_64_types.h | 2 --
arch/x86/mm/init_64.c | 5 -----
2 files changed, 7 deletions(-)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 8f63efb..52e5f5f 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -159,6 +159,4 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
-#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
-
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e76bdb0..e0cd2df 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -217,11 +217,6 @@ void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
-{
- sync_global_pgds(start, end);
-}
-
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.