2015-07-28 19:22:46

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH] x86: Kill E820_RESERVED_KERN

E820_RESERVED_KERN was introduced to do early allocation for
setup_data when we were using original early_res with e820 map.

Now we are using memblock to do early resource reserve/allocation, and
setup_data is reserved in memblock early already.

For kexec path, kexec generate setup_data (Now kexec-tools create SETUP_EFI
and SETUP_E820_EXT), and pass pointer to second kernel, and
second kernel reserve setup_data by their own without using e820 map.

So we do not need to touch e820 map at all, and we can kill
E820_RESERVED_KERN.

That make the code simpler, and at same time that will fix bug with
hibernation:
mark_nonsave_region that can not handle that case:
E820_RAM and E820_RESERVED_KERN ranges are continuous and
boundary is not page aligned.

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=913885
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96111
Reported-by: "Lee, Chun-Yi" <[email protected]>
Tested-by: "Lee, Chun-Yi" <[email protected]>
Reported-by: "Tian, Ye" <[email protected]>
Tested-by: "Tian, Ye" <[email protected]>
Cc: "Lee, Chun-Yi" <[email protected]>
Cc: Chen Yu <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Cc: Richard L Maliszewski <[email protected]>
Cc: Gang Wei <[email protected]>
Cc: Shane Wang <[email protected]>
Cc: [email protected]
Cc: [email protected]

---
arch/x86/include/uapi/asm/e820.h | 8 --------
arch/x86/kernel/e820.c | 6 ++----
arch/x86/kernel/setup.c | 25 -------------------------
arch/x86/kernel/tboot.c | 3 +--
arch/x86/mm/init_64.c | 11 ++++-------
5 files changed, 7 insertions(+), 46 deletions(-)

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -134,7 +134,6 @@ static void __init e820_print_type(u32 t
{
switch (type) {
case E820_RAM:
- case E820_RESERVED_KERN:
printk(KERN_CONT "usable");
break;
case E820_RESERVED:
@@ -693,7 +692,7 @@ void __init e820_mark_nosave_regions(uns

pfn = PFN_DOWN(ei->addr + ei->size);

- if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ if (ei->type != E820_RAM)
register_nosave_region(PFN_UP(ei->addr), pfn);

if (pfn >= limit_pfn)
@@ -914,7 +913,6 @@ void __init finish_e820_parsing(void)
static inline const char *e820_type_to_string(int e820_type)
{
switch (e820_type) {
- case E820_RESERVED_KERN:
case E820_RAM: return "System RAM";
case E820_ACPI: return "ACPI Tables";
case E820_NVS: return "ACPI Non-volatile Storage";
@@ -1111,7 +1109,7 @@ void __init memblock_x86_fill(void)
if (end != (resource_size_t)end)
continue;

- if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ if (ei->type != E820_RAM)
continue;

memblock_add(ei->addr, ei->size);
Index: linux-2.6/arch/x86/kernel/tboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/tboot.c
+++ linux-2.6/arch/x86/kernel/tboot.c
@@ -195,8 +195,7 @@ static int tboot_setup_sleep(void)
tboot->num_mac_regions = 0;

for (i = 0; i < e820.nr_map; i++) {
- if ((e820.map[i].type != E820_RAM)
- && (e820.map[i].type != E820_RESERVED_KERN))
+ if (e820.map[i].type != E820_RAM)
continue;

add_mac_region(e820.map[i].addr, e820.map[i].size);
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -412,8 +412,7 @@ phys_pte_init(pte_t *pte_page, unsigned
next = (addr & PAGE_MASK) + PAGE_SIZE;
if (addr >= end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM))
set_pte(pte, __pte(0));
continue;
}
@@ -459,9 +458,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned

next = (address & PMD_MASK) + PMD_SIZE;
if (address >= end) {
- if (!after_bootmem &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+ if (!after_bootmem && !e820_any_mapped(
+ address & PMD_MASK, next, E820_RAM))
set_pmd(pmd, __pmd(0));
continue;
}
@@ -534,8 +532,7 @@ phys_pud_init(pud_t *pud_page, unsigned
next = (addr & PUD_MASK) + PUD_SIZE;
if (addr >= end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(addr & PUD_MASK, next, E820_RAM))
set_pud(pud, __pud(0));
continue;
}
Index: linux-2.6/arch/x86/include/uapi/asm/e820.h
===================================================================
--- linux-2.6.orig/arch/x86/include/uapi/asm/e820.h
+++ linux-2.6/arch/x86/include/uapi/asm/e820.h
@@ -45,14 +45,6 @@
*/
#define E820_PRAM 12

-/*
- * reserved RAM used by kernel itself
- * if CONFIG_INTEL_TXT is enabled, memory of this type will be
- * included in the S3 integrity calculation and so should not include
- * any memory that BIOS might alter over the S3 transition
- */
-#define E820_RESERVED_KERN 128
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
struct e820entry {
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -457,29 +457,6 @@ static void __init parse_setup_data(void
}
}

-static void __init e820_reserve_setup_data(void)
-{
- struct setup_data *data;
- u64 pa_data;
-
- pa_data = boot_params.hdr.setup_data;
- if (!pa_data)
- return;
-
- while (pa_data) {
- data = early_memremap(pa_data, sizeof(*data));
- e820_update_range(pa_data, sizeof(*data)+data->len,
- E820_RAM, E820_RESERVED_KERN);
- pa_data = data->next;
- early_memunmap(data, sizeof(*data));
- }
-
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "extended physical RAM map:\n");
- e820_print_map("reserve setup_data");
-}
-
static void __init memblock_x86_reserve_range_setup_data(void)
{
struct setup_data *data;
@@ -1013,8 +990,6 @@ void __init setup_arch(char **cmdline_p)
early_dump_pci_devices();
#endif

- /* update the e820_saved too */
- e820_reserve_setup_data();
finish_e820_parsing();

if (efi_enabled(EFI_BOOT))


2015-08-19 06:33:42

by Chen Yu

[permalink] [raw]
Subject: Re: [PATCH] x86: Kill E820_RESERVED_KERN

Hi,
On 07/29/2015 03:21 AM, Yinghai Lu wrote:
> E820_RESERVED_KERN was introduced to do early allocation for
> setup_data when we were using original early_res with e820 map.
>
> Now we are using memblock to do early resource reserve/allocation, and
> setup_data is reserved in memblock early already.
>
> For kexec path, kexec generate setup_data (Now kexec-tools create SETUP_EFI
> and SETUP_E820_EXT), and pass pointer to second kernel, and
> second kernel reserve setup_data by their own without using e820 map.
>
> So we do not need to touch e820 map at all, and we can kill
> E820_RESERVED_KERN.
>
> That make the code simpler, and at same time that will fix bug with
> hibernation:
> mark_nonsave_region that can not handle that case:
> E820_RAM and E820_RESERVED_KERN ranges are continuous and
> boundary is not page aligned.
>
> Link: https://bugzilla.opensuse.org/show_bug.cgi?id=913885
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=96111

I've tested Hibernation on latest 4.2.-rc7 and encountered panic when
resuming, so I guess this patch has not been merged upstream:

BUG: unable to handle kernel paging request at ffff880085894000
IP: [<ffffffff810c5dc2>] load_image_lzo+0x8c2/0xe70

With current patch and Lee, Chun-Yi's patch applied, the panic
disappeared, would someone please have a look at this patch,
thanks a lot.

Best Regards,
Yu