2017-04-25 09:14:06

by Xunlei Pang

[permalink] [raw]
Subject: [PATCH 1/2] x86/mm/ident_map: Add PUD level 1GB page support

The current kernel_ident_mapping_init() creates the identity
mapping using 2MB page(PMD level), this patch adds the 1GB
page(PUD level) support.

This is useful on large machines to save some reserved memory
(as paging structures) in the kdump case when kexec setups up
identity mappings before booting into the new kernel.

We will utilize this new support in the following patch.

Signed-off-by: Xunlei Pang <[email protected]>
---
arch/x86/boot/compressed/pagetable.c | 2 +-
arch/x86/include/asm/init.h | 3 ++-
arch/x86/kernel/machine_kexec_64.c | 2 +-
arch/x86/mm/ident_map.c | 13 ++++++++++++-
arch/x86/power/hibernate_64.c | 2 +-
5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 56589d0..1d78f17 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -70,7 +70,7 @@ static void *alloc_pgt_page(void *context)
* Due to relocation, pointers must be assigned at run time not build time.
*/
static struct x86_mapping_info mapping_info = {
- .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ .page_flag = __PAGE_KERNEL_LARGE_EXEC,
};

/* Locates and clears a region for a new top level page table. */
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62..46eab1a 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -4,8 +4,9 @@
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void *context; /* context for alloc_pgt_page */
- unsigned long pmd_flag; /* page flag for PMD entry */
+ unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
+ bool use_pud_page; /* PUD level 1GB page support */
};

int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 085c3b3..1d4f2b0 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
.context = image,
- .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ .page_flag = __PAGE_KERNEL_LARGE_EXEC,
};
unsigned long mstart, mend;
pgd_t *level4p;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 04210a2..0ad0280 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
if (pmd_present(*pmd))
continue;

- set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
+ set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
}
}

@@ -30,6 +30,17 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
if (next > end)
next = end;

+ if (info->use_pud_page) {
+ pud_t pudval;
+
+ if (pud_present(*pud))
+ continue;
+
+ pudval = __pud((addr - info->offset) | info->page_flag);
+ set_pud(pud, pudval);
+ continue;
+ }
+
if (pud_present(*pud)) {
pmd = pmd_offset(pud, 0);
ident_pmd_init(info, pmd, addr, next);
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 6a61194..a6e21fe 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
{
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
- .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ .page_flag = __PAGE_KERNEL_LARGE_EXEC,
.offset = __PAGE_OFFSET,
};
unsigned long mstart, mend;
--
1.8.3.1


2017-04-25 09:14:10

by Xunlei Pang

[permalink] [raw]
Subject: [PATCH 2/2] x86_64/kexec: Use PUD level 1GB page for identity mapping if available

Kexec setups all identity mappings before booting into the new
kernel, and this will cause extra memory consumption for paging
structures which is quite considerable on modern machines with
huge memory.

E.g. On one 32TB machine, in kdump case, it could waste around
128MB (around 4MB/TB) from the reserved memory after kexec set
all the identity mappings using the current 2MB page, plus the
loaded kdump kernel, initramfs, etc, it caused kexec syscall
-NOMEM failure. As a result, we had to enlarge reserved memory
via "crashkernel=X".

This causes some trouble for distributions that use policies
to evaluate the proper "crashkernel=X" value for users.

Given that on machines with large number of memory, 1GB feature
is very likely available, and that kernel_ident_mapping_init()
supports PUD level 1GB page, to solve this problem, we use 1GB
size page to create the identity mapping pgtable for kdump if
1GB feature is available.

Signed-off-by: Xunlei Pang <[email protected]>
---
arch/x86/kernel/machine_kexec_64.c | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1d4f2b0..41f1ae7 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -122,6 +122,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)

level4p = (pgd_t *)__va(start_pgtable);
clear_page(level4p);
+
+ /* Use PUD level page if available, to save crash memory for kdump */
+ if (direct_gbpages)
+ info.use_pud_page = true;
+
for (i = 0; i < nr_pfn_mapped; i++) {
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
--
1.8.3.1

2017-04-25 19:49:54

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/mm/ident_map: Add PUD level 1GB page support

On Tue, Apr 25, 2017 at 2:13 AM, Xunlei Pang <[email protected]> wrote:
> The current kernel_ident_mapping_init() creates the identity
> mapping using 2MB page(PMD level), this patch adds the 1GB
> page(PUD level) support.
>
> This is useful on large machines to save some reserved memory
> (as paging structures) in the kdump case when kexec setups up
> identity mappings before booting into the new kernel.
>
> We will utilize this new support in the following patch.
>
> Signed-off-by: Xunlei Pang <[email protected]>
> ---
> arch/x86/boot/compressed/pagetable.c | 2 +-
> arch/x86/include/asm/init.h | 3 ++-
> arch/x86/kernel/machine_kexec_64.c | 2 +-
> arch/x86/mm/ident_map.c | 13 ++++++++++++-
> arch/x86/power/hibernate_64.c | 2 +-
> 5 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
> index 56589d0..1d78f17 100644
> --- a/arch/x86/boot/compressed/pagetable.c
> +++ b/arch/x86/boot/compressed/pagetable.c
> @@ -70,7 +70,7 @@ static void *alloc_pgt_page(void *context)
> * Due to relocation, pointers must be assigned at run time not build time.
> */
> static struct x86_mapping_info mapping_info = {
> - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
> + .page_flag = __PAGE_KERNEL_LARGE_EXEC,
> };
>
> /* Locates and clears a region for a new top level page table. */
> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> index 737da62..46eab1a 100644
> --- a/arch/x86/include/asm/init.h
> +++ b/arch/x86/include/asm/init.h
> @@ -4,8 +4,9 @@
> struct x86_mapping_info {
> void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> void *context; /* context for alloc_pgt_page */
> - unsigned long pmd_flag; /* page flag for PMD entry */
> + unsigned long page_flag; /* page flag for PMD or PUD entry */
> unsigned long offset; /* ident mapping offset */
> + bool use_pud_page; /* PUD level 1GB page support */

how about use direct_gbpages instead?
use_pud_page is confusing.

> };
>
> int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
> index 085c3b3..1d4f2b0 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
> struct x86_mapping_info info = {
> .alloc_pgt_page = alloc_pgt_page,
> .context = image,
> - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
> + .page_flag = __PAGE_KERNEL_LARGE_EXEC,
> };
> unsigned long mstart, mend;
> pgd_t *level4p;
> diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
> index 04210a2..0ad0280 100644
> --- a/arch/x86/mm/ident_map.c
> +++ b/arch/x86/mm/ident_map.c
> @@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
> if (pmd_present(*pmd))
> continue;
>
> - set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
> + set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
> }
> }
>
> @@ -30,6 +30,17 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
> if (next > end)
> next = end;
>
> + if (info->use_pud_page) {
> + pud_t pudval;
> +
> + if (pud_present(*pud))
> + continue;
> +
> + pudval = __pud((addr - info->offset) | info->page_flag);
> + set_pud(pud, pudval);

should mask addr with PUD_MASK.
addr &= PUD_MASK;
set_pud(pud, __pmd(addr - info->offset) | info->page_flag);



> + continue;
> + }
> +
> if (pud_present(*pud)) {
> pmd = pmd_offset(pud, 0);
> ident_pmd_init(info, pmd, addr, next);
> diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
> index 6a61194..a6e21fe 100644
> --- a/arch/x86/power/hibernate_64.c
> +++ b/arch/x86/power/hibernate_64.c
> @@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
> {
> struct x86_mapping_info info = {
> .alloc_pgt_page = alloc_pgt_page,
> - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
> + .page_flag = __PAGE_KERNEL_LARGE_EXEC,
> .offset = __PAGE_OFFSET,
> };
> unsigned long mstart, mend;
> --
> 1.8.3.1
>

2017-04-26 02:46:09

by Xunlei Pang

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/mm/ident_map: Add PUD level 1GB page support

On 04/26/2017 at 03:49 AM, Yinghai Lu wrote:
> On Tue, Apr 25, 2017 at 2:13 AM, Xunlei Pang <[email protected]> wrote:
>> The current kernel_ident_mapping_init() creates the identity
>> mapping using 2MB page(PMD level), this patch adds the 1GB
>> page(PUD level) support.
>>
>> This is useful on large machines to save some reserved memory
>> (as paging structures) in the kdump case when kexec setups up
>> identity mappings before booting into the new kernel.
>>
>> We will utilize this new support in the following patch.
>>
>> Signed-off-by: Xunlei Pang <[email protected]>
>> ---
>> arch/x86/boot/compressed/pagetable.c | 2 +-
>> arch/x86/include/asm/init.h | 3 ++-
>> arch/x86/kernel/machine_kexec_64.c | 2 +-
>> arch/x86/mm/ident_map.c | 13 ++++++++++++-
>> arch/x86/power/hibernate_64.c | 2 +-
>> 5 files changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
>> index 56589d0..1d78f17 100644
>> --- a/arch/x86/boot/compressed/pagetable.c
>> +++ b/arch/x86/boot/compressed/pagetable.c
>> @@ -70,7 +70,7 @@ static void *alloc_pgt_page(void *context)
>> * Due to relocation, pointers must be assigned at run time not build time.
>> */
>> static struct x86_mapping_info mapping_info = {
>> - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
>> + .page_flag = __PAGE_KERNEL_LARGE_EXEC,
>> };
>>
>> /* Locates and clears a region for a new top level page table. */
>> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
>> index 737da62..46eab1a 100644
>> --- a/arch/x86/include/asm/init.h
>> +++ b/arch/x86/include/asm/init.h
>> @@ -4,8 +4,9 @@
>> struct x86_mapping_info {
>> void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
>> void *context; /* context for alloc_pgt_page */
>> - unsigned long pmd_flag; /* page flag for PMD entry */
>> + unsigned long page_flag; /* page flag for PMD or PUD entry */
>> unsigned long offset; /* ident mapping offset */
>> + bool use_pud_page; /* PUD level 1GB page support */
> how about use direct_gbpages instead?
> use_pud_page is confusing.

ok

>
>> };
>>
>> int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
>> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
>> index 085c3b3..1d4f2b0 100644
>> --- a/arch/x86/kernel/machine_kexec_64.c
>> +++ b/arch/x86/kernel/machine_kexec_64.c
>> @@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
>> struct x86_mapping_info info = {
>> .alloc_pgt_page = alloc_pgt_page,
>> .context = image,
>> - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
>> + .page_flag = __PAGE_KERNEL_LARGE_EXEC,
>> };
>> unsigned long mstart, mend;
>> pgd_t *level4p;
>> diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
>> index 04210a2..0ad0280 100644
>> --- a/arch/x86/mm/ident_map.c
>> +++ b/arch/x86/mm/ident_map.c
>> @@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
>> if (pmd_present(*pmd))
>> continue;
>>
>> - set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
>> + set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
>> }
>> }
>>
>> @@ -30,6 +30,17 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
>> if (next > end)
>> next = end;
>>
>> + if (info->use_pud_page) {
>> + pud_t pudval;
>> +
>> + if (pud_present(*pud))
>> + continue;
>> +
>> + pudval = __pud((addr - info->offset) | info->page_flag);
>> + set_pud(pud, pudval);
> should mask addr with PUD_MASK.
> addr &= PUD_MASK;
> set_pud(pud, __pmd(addr - info->offset) | info->page_flag);

Yes, will update, thanks for the catch.

Regards,
Xunlei

>
>
>> + continue;
>> + }
>> +
>> if (pud_present(*pud)) {
>> pmd = pmd_offset(pud, 0);
>> ident_pmd_init(info, pmd, addr, next);
>> diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
>> index 6a61194..a6e21fe 100644
>> --- a/arch/x86/power/hibernate_64.c
>> +++ b/arch/x86/power/hibernate_64.c
>> @@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
>> {
>> struct x86_mapping_info info = {
>> .alloc_pgt_page = alloc_pgt_page,
>> - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
>> + .page_flag = __PAGE_KERNEL_LARGE_EXEC,
>> .offset = __PAGE_OFFSET,
>> };
>> unsigned long mstart, mend;
>> --
>> 1.8.3.1
>>