2015-02-04 00:43:13

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH] x86: Kill E820_RESERVED_KERN

Now we are using memblock to do early resource reserver/allocation
instead of using e820 map directly, and setup_data is reserved in
memblock early.
Also kexec will generate setup_data and pass pointer to second kernel,
so second kernel will reserve setup_data by their own.

We can kill E820_RESERVED_KERN and not touch e820 map at all.

That will fix bug in mark_nonsave_region that can not handle that
case: E820_RAM and E820_RESERVED_KERN ranges are continuous and
boundary is not page aligned.

Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=913885
Reported-by: "Lee, Chun-Yi" <[email protected]>
Tested-by: "Lee, Chun-Yi" <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Cc: [email protected]

---
arch/x86/include/uapi/asm/e820.h | 9 ---------
arch/x86/kernel/e820.c | 6 ++----
arch/x86/kernel/setup.c | 26 --------------------------
arch/x86/kernel/tboot.c | 3 +--
arch/x86/mm/init_64.c | 11 ++++-------
5 files changed, 7 insertions(+), 48 deletions(-)

Index: linux-2.6/arch/x86/include/uapi/asm/e820.h
===================================================================
--- linux-2.6.orig/arch/x86/include/uapi/asm/e820.h
+++ linux-2.6/arch/x86/include/uapi/asm/e820.h
@@ -33,15 +33,6 @@
#define E820_NVS 4
#define E820_UNUSABLE 5

-
-/*
- * reserved RAM used by kernel itself
- * if CONFIG_INTEL_TXT is enabled, memory of this type will be
- * included in the S3 integrity calculation and so should not include
- * any memory that BIOS might alter over the S3 transition
- */
-#define E820_RESERVED_KERN 128
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
struct e820entry {
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -134,7 +134,6 @@ static void __init e820_print_type(u32 t
{
switch (type) {
case E820_RAM:
- case E820_RESERVED_KERN:
printk(KERN_CONT "usable");
break;
case E820_RESERVED:
@@ -688,7 +687,7 @@ void __init e820_mark_nosave_regions(uns
register_nosave_region(pfn, PFN_UP(ei->addr));

pfn = PFN_DOWN(ei->addr + ei->size);
- if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ if (ei->type != E820_RAM)
register_nosave_region(PFN_UP(ei->addr), pfn);

if (pfn >= limit_pfn)
@@ -902,7 +901,6 @@ void __init finish_e820_parsing(void)
static inline const char *e820_type_to_string(int e820_type)
{
switch (e820_type) {
- case E820_RESERVED_KERN:
case E820_RAM: return "System RAM";
case E820_ACPI: return "ACPI Tables";
case E820_NVS: return "ACPI Non-volatile Storage";
@@ -1077,7 +1075,7 @@ void __init memblock_x86_fill(void)
if (end != (resource_size_t)end)
continue;

- if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ if (ei->type != E820_RAM)
continue;

memblock_add(ei->addr, ei->size);
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -456,30 +456,6 @@ static void __init parse_setup_data(void
}
}

-static void __init e820_reserve_setup_data(void)
-{
- struct setup_data *data;
- u64 pa_data;
- int found = 0;
-
- pa_data = boot_params.hdr.setup_data;
- while (pa_data) {
- data = early_memremap(pa_data, sizeof(*data));
- e820_update_range(pa_data, sizeof(*data)+data->len,
- E820_RAM, E820_RESERVED_KERN);
- found = 1;
- pa_data = data->next;
- early_iounmap(data, sizeof(*data));
- }
- if (!found)
- return;
-
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "extended physical RAM map:\n");
- e820_print_map("reserve setup_data");
-}
-
static void __init memblock_x86_reserve_range_setup_data(void)
{
struct setup_data *data;
@@ -1011,8 +987,6 @@ void __init setup_arch(char **cmdline_p)
early_dump_pci_devices();
#endif

- /* update the e820_saved too */
- e820_reserve_setup_data();
finish_e820_parsing();

if (efi_enabled(EFI_BOOT))
Index: linux-2.6/arch/x86/kernel/tboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/tboot.c
+++ linux-2.6/arch/x86/kernel/tboot.c
@@ -195,8 +195,7 @@ static int tboot_setup_sleep(void)
tboot->num_mac_regions = 0;

for (i = 0; i < e820.nr_map; i++) {
- if ((e820.map[i].type != E820_RAM)
- && (e820.map[i].type != E820_RESERVED_KERN))
+ if (e820.map[i].type != E820_RAM)
continue;

add_mac_region(e820.map[i].addr, e820.map[i].size);
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -426,8 +426,7 @@ phys_pte_init(pte_t *pte_page, unsigned
next = (addr & PAGE_MASK) + PAGE_SIZE;
if (addr >= end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM))
set_pte(pte, __pte(0));
continue;
}
@@ -473,9 +472,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned

next = (address & PMD_MASK) + PMD_SIZE;
if (address >= end) {
- if (!after_bootmem &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+ if (!after_bootmem && !e820_any_mapped(
+ address & PMD_MASK, next, E820_RAM))
set_pmd(pmd, __pmd(0));
continue;
}
@@ -548,8 +546,7 @@ phys_pud_init(pud_t *pud_page, unsigned
next = (addr & PUD_MASK) + PUD_SIZE;
if (addr >= end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(addr & PUD_MASK, next, E820_RAM))
set_pud(pud, __pud(0));
continue;
}