Now we setup_data is reserved via memblock and e820 and different
handlers have different ways, and it is confusing.
1. SETUP_E820_EXT: is consumed early and will not copy or access again.
have memory wasted.
2. SETUP_EFI: is accessed via ioremap every time at early stage.
have memory wasted.
3. SETUP_DTB: is copied locally.
have memory wasted.
4. SETUP_PCI: is accessed via ioremap for every pci devices, even run-time.
5. SETUP_KASLR: is accessed early, will not copy or access again.
have memory wasted.
Also setup_data is exported to debugfs for debug purpose.
Here will convert to let every handler to decide how to handle it.
and will not reserve the setup_data generally, so will not
waste memory and also make memblock/e820 keep page aligned.
1. not touch E820 anymore.
2. copy SETUP_EFI to __initdata variable and access it without ioremap.
3. SETUP_DTB: reserver and copy to local and free.
4. SETUP_PCI: reverve localy and convert to list, to avoid keeping ioremap.
5. SETUP_KASLR: fix accessing kaslr_enabled accessing...
6. export SETUP_PCI via sysfs.
Those patches could be applied on top of tip/x86/urgent with SETUP_KASLR
support.
Yinghai Lu (8):
x86, kaslr: get kaslr_enabled back correctly
x86: Kill E820_RESERVED_KERN
x86, efi: copy SETUP_EFI data and access directly
x86, of: let add_dtb reserve by itself
x86, boot: Add add_pci handler for SETUP_PCI
x86: kill not used setup_data handling code
x86, pci: convert SETUP_PCI data to list
x86, pci: export SETUP_PCI data via sysfs
arch/x86/include/asm/efi.h | 2 +-
arch/x86/include/asm/pci.h | 2 +
arch/x86/include/asm/prom.h | 9 +-
arch/x86/include/uapi/asm/e820.h | 9 --
arch/x86/kernel/devicetree.c | 39 +++---
arch/x86/kernel/e820.c | 6 +-
arch/x86/kernel/kdebugfs.c | 142 --------------------
arch/x86/kernel/setup.c | 57 ++------
arch/x86/kernel/tboot.c | 3 +-
arch/x86/mm/init_64.c | 11 +-
arch/x86/pci/common.c | 281 ++++++++++++++++++++++++++++++++++++---
arch/x86/platform/efi/efi.c | 13 +-
arch/x86/platform/efi/efi_64.c | 13 +-
arch/x86/platform/efi/quirks.c | 23 +---
14 files changed, 336 insertions(+), 274 deletions(-)
--
1.8.4.5
We should access variable with referrence instead of using physical
address as value.
Cc: Matt Fleming <[email protected]>
Cc: Borislav Petkov <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/kernel/setup.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 98dc931..05d444f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -429,7 +429,13 @@ static void __init reserve_initrd(void)
static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
{
- kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
+ /* kaslr_setup_data is defined in aslr.c */
+ unsigned char *data;
+ unsigned long offset = sizeof(struct setup_data);
+
+ data = early_memremap(pa_data, offset + 1);
+ kaslr_enabled = *(data + offset);
+ early_memunmap(data, offset + 1);
}
static void __init parse_setup_data(void)
--
1.8.4.5
Now we are using memblock to do early resource reserver/allocation
instead of using e820 map directly, and setup_data is reserved in
memblock early already.
Also kexec will generate setup_data and pass pointer to second kernel,
so second kernel will reserve setup_data by their own.
We can kill E820_RESERVED_KERN and not touch e820 map at all.
That will fix bug in mark_nonsave_region that can not handle that
case: E820_RAM and E820_RESERVED_KERN ranges are continuous and
boundary is not page aligned.
Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=913885
Reported-by: "Lee, Chun-Yi" <[email protected]>
Tested-by: "Lee, Chun-Yi" <[email protected]>
Cc: "Lee, Chun-Yi" <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
Cc: [email protected]
---
arch/x86/include/uapi/asm/e820.h | 9 ---------
arch/x86/kernel/e820.c | 6 ++----
arch/x86/kernel/setup.c | 26 --------------------------
arch/x86/kernel/tboot.c | 3 +--
arch/x86/mm/init_64.c | 11 ++++-------
5 files changed, 7 insertions(+), 48 deletions(-)
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index d993e33..edc8a71 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -33,15 +33,6 @@
#define E820_NVS 4
#define E820_UNUSABLE 5
-
-/*
- * reserved RAM used by kernel itself
- * if CONFIG_INTEL_TXT is enabled, memory of this type will be
- * included in the S3 integrity calculation and so should not include
- * any memory that BIOS might alter over the S3 transition
- */
-#define E820_RESERVED_KERN 128
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
struct e820entry {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 46201de..2a6bed9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -134,7 +134,6 @@ static void __init e820_print_type(u32 type)
{
switch (type) {
case E820_RAM:
- case E820_RESERVED_KERN:
printk(KERN_CONT "usable");
break;
case E820_RESERVED:
@@ -688,7 +687,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
register_nosave_region(pfn, PFN_UP(ei->addr));
pfn = PFN_DOWN(ei->addr + ei->size);
- if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ if (ei->type != E820_RAM)
register_nosave_region(PFN_UP(ei->addr), pfn);
if (pfn >= limit_pfn)
@@ -902,7 +901,6 @@ void __init finish_e820_parsing(void)
static inline const char *e820_type_to_string(int e820_type)
{
switch (e820_type) {
- case E820_RESERVED_KERN:
case E820_RAM: return "System RAM";
case E820_ACPI: return "ACPI Tables";
case E820_NVS: return "ACPI Non-volatile Storage";
@@ -1077,7 +1075,7 @@ void __init memblock_x86_fill(void)
if (end != (resource_size_t)end)
continue;
- if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ if (ei->type != E820_RAM)
continue;
memblock_add(ei->addr, ei->size);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 05d444f..c9b3e2f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -473,30 +473,6 @@ static void __init parse_setup_data(void)
}
}
-static void __init e820_reserve_setup_data(void)
-{
- struct setup_data *data;
- u64 pa_data;
- int found = 0;
-
- pa_data = boot_params.hdr.setup_data;
- while (pa_data) {
- data = early_memremap(pa_data, sizeof(*data));
- e820_update_range(pa_data, sizeof(*data)+data->len,
- E820_RAM, E820_RESERVED_KERN);
- found = 1;
- pa_data = data->next;
- early_iounmap(data, sizeof(*data));
- }
- if (!found)
- return;
-
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "extended physical RAM map:\n");
- e820_print_map("reserve setup_data");
-}
-
static void __init memblock_x86_reserve_range_setup_data(void)
{
struct setup_data *data;
@@ -1032,8 +1008,6 @@ void __init setup_arch(char **cmdline_p)
early_dump_pci_devices();
#endif
- /* update the e820_saved too */
- e820_reserve_setup_data();
finish_e820_parsing();
if (efi_enabled(EFI_BOOT))
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 91a4496..3c2752a 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -195,8 +195,7 @@ static int tboot_setup_sleep(void)
tboot->num_mac_regions = 0;
for (i = 0; i < e820.nr_map; i++) {
- if ((e820.map[i].type != E820_RAM)
- && (e820.map[i].type != E820_RESERVED_KERN))
+ if (e820.map[i].type != E820_RAM)
continue;
add_mac_region(e820.map[i].addr, e820.map[i].size);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05a..19430d5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -426,8 +426,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
next = (addr & PAGE_MASK) + PAGE_SIZE;
if (addr >= end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM))
set_pte(pte, __pte(0));
continue;
}
@@ -473,9 +472,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
next = (address & PMD_MASK) + PMD_SIZE;
if (address >= end) {
- if (!after_bootmem &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
- !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+ if (!after_bootmem && !e820_any_mapped(
+ address & PMD_MASK, next, E820_RAM))
set_pmd(pmd, __pmd(0));
continue;
}
@@ -548,8 +546,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
next = (addr & PUD_MASK) + PUD_SIZE;
if (addr >= end) {
if (!after_bootmem &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
- !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+ !e820_any_mapped(addr & PUD_MASK, next, E820_RAM))
set_pud(pud, __pud(0));
continue;
}
--
1.8.4.5
the copy will be in __initdata, and it is small.
We can use pointer to access the setup_data instead of keeping on
early_memmap and early_memunmap everywhere.
Cc: Matt Fleming <[email protected]>
Cc: [email protected]
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/include/asm/efi.h | 2 +-
arch/x86/platform/efi/efi.c | 13 ++-----------
arch/x86/platform/efi/efi_64.c | 13 ++++++++++++-
arch/x86/platform/efi/quirks.c | 23 ++++++-----------------
4 files changed, 21 insertions(+), 30 deletions(-)
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 25bce45..edbecd6 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -114,7 +114,7 @@ struct efi_setup_data {
u64 reserved[8];
};
-extern u64 efi_setup;
+extern struct efi_setup_data *efi_setup;
#ifdef CONFIG_EFI
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index dbc8627..1cd38e8 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -68,7 +68,7 @@ static efi_config_table_type_t arch_tables[] __initdata = {
{NULL_GUID, NULL, NULL},
};
-u64 efi_setup; /* efi setup_data physical address */
+struct efi_setup_data *efi_setup __initdata; /* cached efi setup_data pointer */
static int add_efi_memmap __initdata;
static int __init setup_add_efi_memmap(char *arg)
@@ -225,20 +225,13 @@ static int __init efi_systab_init(void *phys)
{
if (efi_enabled(EFI_64BIT)) {
efi_system_table_64_t *systab64;
- struct efi_setup_data *data = NULL;
+ struct efi_setup_data *data = efi_setup;
u64 tmp = 0;
- if (efi_setup) {
- data = early_memremap(efi_setup, sizeof(*data));
- if (!data)
- return -ENOMEM;
- }
systab64 = early_memremap((unsigned long)phys,
sizeof(*systab64));
if (systab64 == NULL) {
pr_err("Couldn't map the system table!\n");
- if (data)
- early_memunmap(data, sizeof(*data));
return -ENOMEM;
}
@@ -271,8 +264,6 @@ static int __init efi_systab_init(void *phys)
tmp |= data ? data->tables : systab64->tables;
early_memunmap(systab64, sizeof(*systab64));
- if (data)
- early_memunmap(data, sizeof(*data));
#ifdef CONFIG_X86_32
if (tmp >> 32) {
pr_err("EFI data located above 4GB, disabling EFI.\n");
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 17e80d8..a541c6c 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -292,9 +292,20 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
return (void __iomem *)__va(phys_addr);
}
+static struct efi_setup_data efi_setup_data __initdata;
+
void __init parse_efi_setup(u64 phys_addr, u32 data_len)
{
- efi_setup = phys_addr + sizeof(struct setup_data);
+ struct efi_setup_data *data;
+
+ data = early_memremap(phys_addr + sizeof(struct setup_data),
+ sizeof(*data));
+ if (!data)
+ return;
+
+ efi_setup_data = *data;
+ early_memunmap(data, sizeof(*data));
+ efi_setup = &efi_setup_data;
}
void __init efi_runtime_mkexec(void)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 1c7380d..45fec7d 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -203,9 +203,8 @@ void __init efi_free_boot_services(void)
*/
int __init efi_reuse_config(u64 tables, int nr_tables)
{
- int i, sz, ret = 0;
+ int i, sz;
void *p, *tablep;
- struct efi_setup_data *data;
if (!efi_setup)
return 0;
@@ -213,22 +212,15 @@ int __init efi_reuse_config(u64 tables, int nr_tables)
if (!efi_enabled(EFI_64BIT))
return 0;
- data = early_memremap(efi_setup, sizeof(*data));
- if (!data) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (!data->smbios)
- goto out_memremap;
+ if (!efi_setup->smbios)
+ return 0;
sz = sizeof(efi_config_table_64_t);
p = tablep = early_memremap(tables, nr_tables * sz);
if (!p) {
pr_err("Could not map Configuration table!\n");
- ret = -ENOMEM;
- goto out_memremap;
+ return -ENOMEM;
}
for (i = 0; i < efi.systab->nr_tables; i++) {
@@ -237,15 +229,12 @@ int __init efi_reuse_config(u64 tables, int nr_tables)
guid = ((efi_config_table_64_t *)p)->guid;
if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
- ((efi_config_table_64_t *)p)->table = data->smbios;
+ ((efi_config_table_64_t *)p)->table = efi_setup->smbios;
p += sz;
}
early_memunmap(tablep, nr_tables * sz);
-out_memremap:
- early_memunmap(data, sizeof(*data));
-out:
- return ret;
+ return 0;
}
void __init efi_apply_memmap_quirks(void)
--
1.8.4.5
We will not reserve setup_data in general code. Every handler
need to reserve and copy.
Current dtd handling already have code copying, just add reserve code ...
also simplify code a bit with storing real dtb size.
Cc: Rob Herring <[email protected]>
Cc: David Vrabel <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/include/asm/prom.h | 9 ++++++---
arch/x86/kernel/devicetree.c | 39 +++++++++++++++++++++------------------
2 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 1d081ac..fb716eddc 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -24,17 +24,20 @@
#ifdef CONFIG_OF
extern int of_ioapic;
-extern u64 initial_dtb;
-extern void add_dtb(u64 data);
void x86_of_pci_init(void);
void x86_dtb_init(void);
#else
-static inline void add_dtb(u64 data) { }
static inline void x86_of_pci_init(void) { }
static inline void x86_dtb_init(void) { }
#define of_ioapic 0
#endif
+#ifdef CONFIG_OF_FLATTREE
+extern void add_dtb(u64 data);
+#else
+static inline void add_dtb(u64 data) { }
+#endif
+
extern char cmd_line[COMMAND_LINE_SIZE];
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3d35033..cc2fb61 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -2,6 +2,7 @@
* Architecture specific OF callbacks.
*/
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/export.h>
#include <linux/io.h>
#include <linux/irqdomain.h>
@@ -23,7 +24,6 @@
#include <asm/setup.h>
#include <asm/i8259.h>
-__initdata u64 initial_dtb;
char __initdata cmd_line[COMMAND_LINE_SIZE];
int __initdata of_ioapic;
@@ -43,11 +43,23 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
}
+#ifdef CONFIG_OF_FLATTREE
+static u64 initial_dtb __initdata;
+static u32 initial_dtb_size __initdata;
void __init add_dtb(u64 data)
{
+ u32 map_len;
+
initial_dtb = data + offsetof(struct setup_data, data);
-}
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
+ initial_boot_params = early_memremap(initial_dtb, map_len);
+ initial_dtb_size = of_get_flat_dt_size();
+ early_memunmap(initial_boot_params, map_len);
+ initial_boot_params = NULL;
+ memblock_reserve(initial_dtb, initial_dtb_size);
+}
+#endif
/*
* CE4100 ids. Will be moved to machine_device_initcall() once we have it.
*/
@@ -272,31 +284,22 @@ static void __init dtb_apic_setup(void)
dtb_ioapic_setup();
}
-#ifdef CONFIG_OF_FLATTREE
static void __init x86_flattree_get_config(void)
{
- u32 size, map_len;
+#ifdef CONFIG_OF_FLATTREE
void *dt;
if (!initial_dtb)
return;
- map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
-
- initial_boot_params = dt = early_memremap(initial_dtb, map_len);
- size = of_get_flat_dt_size();
- if (map_len < size) {
- early_iounmap(dt, map_len);
- initial_boot_params = dt = early_memremap(initial_dtb, size);
- map_len = size;
- }
-
+ initial_boot_params = dt = early_memremap(initial_dtb,
+ initial_dtb_size);
unflatten_and_copy_device_tree();
- early_iounmap(dt, map_len);
-}
-#else
-static inline void x86_flattree_get_config(void) { }
+ early_memunmap(dt, initial_dtb_size);
+
+ memblock_free(initial_dtb, initial_dtb_size);
#endif
+}
void __init x86_dtb_init(void)
{
--
1.8.4.5
Let it reserve setup_data, and keep it's own list.
Also clear the hdr.setup_data, as all handler will handle or
reserve setup_data locally already.
Cc: Bjorn Helgaas <[email protected]>
Cc: Matt Fleming <[email protected]>
Cc: [email protected]
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/include/asm/pci.h | 2 ++
arch/x86/kernel/setup.c | 6 ++++++
arch/x86/pci/common.c | 42 ++++++++++++++++++++++++++++--------------
3 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 4e370a5..aa25a22 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -155,4 +155,6 @@ struct pci_setup_rom {
uint8_t romdata[0];
};
+void add_pci(u64 pa_data);
+
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c9b3e2f..93c0adb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -460,6 +460,9 @@ static void __init parse_setup_data(void)
case SETUP_DTB:
add_dtb(pa_data);
break;
+ case SETUP_PCI:
+ add_pci(pa_data);
+ break;
case SETUP_EFI:
parse_efi_setup(pa_data, data_len);
break;
@@ -467,10 +470,13 @@ static void __init parse_setup_data(void)
parse_kaslr_setup(pa_data, data_len);
break;
default:
+ pr_warn("Unknown setup_data type: %d ignored!\n",
+ data_type);
break;
}
pa_data = pa_next;
}
+ boot_params.hdr.setup_data = 0; /* all done */
}
static void __init memblock_x86_reserve_range_setup_data(void)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3d2612b..4846db7 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -9,6 +9,7 @@
#include <linux/pci-acpi.h>
#include <linux/ioport.h>
#include <linux/init.h>
+#include <linux/memblock.h>
#include <linux/dmi.h>
#include <linux/slab.h>
@@ -667,31 +668,44 @@ unsigned int pcibios_assign_all_busses(void)
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
}
+static u64 pci_setup_data;
+void __init add_pci(u64 pa_data)
+{
+ struct setup_data *data;
+
+ data = early_memremap(pa_data, sizeof(*data));
+ memblock_reserve(pa_data, sizeof(*data) + data->len);
+ data->next = pci_setup_data;
+ pci_setup_data = pa_data;
+ early_memunmap(data, sizeof(*data));
+}
+
int pcibios_add_device(struct pci_dev *dev)
{
struct setup_data *data;
struct pci_setup_rom *rom;
u64 pa_data;
- pa_data = boot_params.hdr.setup_data;
+ pa_data = pci_setup_data;
while (pa_data) {
data = ioremap(pa_data, sizeof(*rom));
if (!data)
return -ENOMEM;
- if (data->type == SETUP_PCI) {
- rom = (struct pci_setup_rom *)data;
-
- if ((pci_domain_nr(dev->bus) == rom->segment) &&
- (dev->bus->number == rom->bus) &&
- (PCI_SLOT(dev->devfn) == rom->device) &&
- (PCI_FUNC(dev->devfn) == rom->function) &&
- (dev->vendor == rom->vendor) &&
- (dev->device == rom->devid)) {
- dev->rom = pa_data +
- offsetof(struct pci_setup_rom, romdata);
- dev->romlen = rom->pcilen;
- }
+ rom = (struct pci_setup_rom *)data;
+
+ if ((pci_domain_nr(dev->bus) == rom->segment) &&
+ (dev->bus->number == rom->bus) &&
+ (PCI_SLOT(dev->devfn) == rom->device) &&
+ (PCI_FUNC(dev->devfn) == rom->function) &&
+ (dev->vendor == rom->vendor) &&
+ (dev->device == rom->devid)) {
+ dev->rom = pa_data +
+ offsetof(struct pci_setup_rom, romdata);
+ dev->romlen = rom->pcilen;
+ dev_printk(KERN_DEBUG, &dev->dev, "set rom to [%#010lx, %#010lx] via SETUP_PCI\n",
+ (unsigned long)dev->rom,
+ (unsigned long)(dev->rom + dev->romlen - 1));
}
pa_data = data->next;
iounmap(data);
--
1.8.4.5
Cc: Matt Fleming <[email protected]>
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/kernel/kdebugfs.c | 142 ---------------------------------------------
arch/x86/kernel/setup.c | 17 ------
2 files changed, 159 deletions(-)
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index dc1404b..c8ca86c 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -21,142 +21,6 @@ struct dentry *arch_debugfs_dir;
EXPORT_SYMBOL(arch_debugfs_dir);
#ifdef CONFIG_DEBUG_BOOT_PARAMS
-struct setup_data_node {
- u64 paddr;
- u32 type;
- u32 len;
-};
-
-static ssize_t setup_data_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct setup_data_node *node = file->private_data;
- unsigned long remain;
- loff_t pos = *ppos;
- struct page *pg;
- void *p;
- u64 pa;
-
- if (pos < 0)
- return -EINVAL;
-
- if (pos >= node->len)
- return 0;
-
- if (count > node->len - pos)
- count = node->len - pos;
-
- pa = node->paddr + sizeof(struct setup_data) + pos;
- pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
- if (PageHighMem(pg)) {
- p = ioremap_cache(pa, count);
- if (!p)
- return -ENXIO;
- } else
- p = __va(pa);
-
- remain = copy_to_user(user_buf, p, count);
-
- if (PageHighMem(pg))
- iounmap(p);
-
- if (remain)
- return -EFAULT;
-
- *ppos = pos + count;
-
- return count;
-}
-
-static const struct file_operations fops_setup_data = {
- .read = setup_data_read,
- .open = simple_open,
- .llseek = default_llseek,
-};
-
-static int __init
-create_setup_data_node(struct dentry *parent, int no,
- struct setup_data_node *node)
-{
- struct dentry *d, *type, *data;
- char buf[16];
-
- sprintf(buf, "%d", no);
- d = debugfs_create_dir(buf, parent);
- if (!d)
- return -ENOMEM;
-
- type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
- if (!type)
- goto err_dir;
-
- data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
- if (!data)
- goto err_type;
-
- return 0;
-
-err_type:
- debugfs_remove(type);
-err_dir:
- debugfs_remove(d);
- return -ENOMEM;
-}
-
-static int __init create_setup_data_nodes(struct dentry *parent)
-{
- struct setup_data_node *node;
- struct setup_data *data;
- int error;
- struct dentry *d;
- struct page *pg;
- u64 pa_data;
- int no = 0;
-
- d = debugfs_create_dir("setup_data", parent);
- if (!d)
- return -ENOMEM;
-
- pa_data = boot_params.hdr.setup_data;
-
- while (pa_data) {
- node = kmalloc(sizeof(*node), GFP_KERNEL);
- if (!node) {
- error = -ENOMEM;
- goto err_dir;
- }
-
- pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
- if (PageHighMem(pg)) {
- data = ioremap_cache(pa_data, sizeof(*data));
- if (!data) {
- kfree(node);
- error = -ENXIO;
- goto err_dir;
- }
- } else
- data = __va(pa_data);
-
- node->paddr = pa_data;
- node->type = data->type;
- node->len = data->len;
- error = create_setup_data_node(d, no, node);
- pa_data = data->next;
-
- if (PageHighMem(pg))
- iounmap(data);
- if (error)
- goto err_dir;
- no++;
- }
-
- return 0;
-
-err_dir:
- debugfs_remove(d);
- return error;
-}
-
static struct debugfs_blob_wrapper boot_params_blob = {
.data = &boot_params,
.size = sizeof(boot_params),
@@ -181,14 +45,8 @@ static int __init boot_params_kdebugfs_init(void)
if (!data)
goto err_version;
- error = create_setup_data_nodes(dbp);
- if (error)
- goto err_data;
-
return 0;
-err_data:
- debugfs_remove(data);
err_version:
debugfs_remove(version);
err_dir:
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 93c0adb..b9f1687 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -479,20 +479,6 @@ static void __init parse_setup_data(void)
boot_params.hdr.setup_data = 0; /* all done */
}
-static void __init memblock_x86_reserve_range_setup_data(void)
-{
- struct setup_data *data;
- u64 pa_data;
-
- pa_data = boot_params.hdr.setup_data;
- while (pa_data) {
- data = early_memremap(pa_data, sizeof(*data));
- memblock_reserve(pa_data, sizeof(*data) + data->len);
- pa_data = data->next;
- early_iounmap(data, sizeof(*data));
- }
-}
-
/*
* --------- Crashkernel reservation ------------------------------
*/
@@ -999,9 +985,6 @@ void __init setup_arch(char **cmdline_p)
x86_report_nx();
- /* after early param, so could get panic from serial */
- memblock_x86_reserve_range_setup_data();
-
if (acpi_mps_check()) {
#ifdef CONFIG_X86_LOCAL_APIC
disable_apic = 1;
--
1.8.4.5
So we could avoid ioremap every time later.
Cc: Bjorn Helgaas <[email protected]>
Cc: [email protected]
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/pci/common.c | 77 +++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 63 insertions(+), 14 deletions(-)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 4846db7..7b6fb94 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -680,10 +680,41 @@ void __init add_pci(u64 pa_data)
early_memunmap(data, sizeof(*data));
}
-int pcibios_add_device(struct pci_dev *dev)
+struct firmware_setup_pci_entry {
+ struct list_head list;
+ uint16_t vendor;
+ uint16_t devid;
+ uint64_t pcilen;
+ unsigned long segment;
+ unsigned long bus;
+ unsigned long device;
+ unsigned long function;
+ phys_addr_t rom;
+};
+
+static LIST_HEAD(setup_pci_entries);
+
+static void __init firmware_setup_pci_add_entry(struct pci_setup_rom *rom,
+ u64 pa_data,
+ struct firmware_setup_pci_entry *entry)
+{
+ entry->segment = rom->segment;
+ entry->bus = rom->bus;
+ entry->device = rom->device;
+ entry->function = rom->function;
+ entry->vendor = rom->vendor;
+ entry->devid = rom->devid;
+ entry->rom = pa_data + offsetof(struct pci_setup_rom, romdata);
+ entry->pcilen = rom->pcilen;
+
+ list_add(&entry->list, &setup_pci_entries);
+}
+
+static __init int fill_setup_pci_entries(void)
{
struct setup_data *data;
struct pci_setup_rom *rom;
+ struct firmware_setup_pci_entry *entry;
u64 pa_data;
pa_data = pci_setup_data;
@@ -692,24 +723,42 @@ int pcibios_add_device(struct pci_dev *dev)
if (!data)
return -ENOMEM;
- rom = (struct pci_setup_rom *)data;
-
- if ((pci_domain_nr(dev->bus) == rom->segment) &&
- (dev->bus->number == rom->bus) &&
- (PCI_SLOT(dev->devfn) == rom->device) &&
- (PCI_FUNC(dev->devfn) == rom->function) &&
- (dev->vendor == rom->vendor) &&
- (dev->device == rom->devid)) {
- dev->rom = pa_data +
- offsetof(struct pci_setup_rom, romdata);
- dev->romlen = rom->pcilen;
+ entry = kzalloc(sizeof(struct firmware_setup_pci_entry),
+ GFP_ATOMIC);
+ if (!entry) {
+ iounmap(data);
+ return -ENOMEM;
+ }
+
+ firmware_setup_pci_add_entry((struct pci_setup_rom *)data,
+ pa_data, entry);
+ pa_data = data->next;
+ iounmap(data);
+ }
+
+ return 0;
+}
+postcore_initcall(fill_setup_pci_entries);
+
+int pcibios_add_device(struct pci_dev *dev)
+{
+ struct firmware_setup_pci_entry *entry;
+
+ list_for_each_entry(entry, &setup_pci_entries, list) {
+ if ((pci_domain_nr(dev->bus) == entry->segment) &&
+ (dev->bus->number == entry->bus) &&
+ (PCI_SLOT(dev->devfn) == entry->device) &&
+ (PCI_FUNC(dev->devfn) == entry->function) &&
+ (dev->vendor == entry->vendor) &&
+ (dev->device == entry->devid)) {
+ dev->rom = entry->rom;
+ dev->romlen = entry->pcilen;
dev_printk(KERN_DEBUG, &dev->dev, "set rom to [%#010lx, %#010lx] via SETUP_PCI\n",
(unsigned long)dev->rom,
(unsigned long)(dev->rom + dev->romlen - 1));
}
- pa_data = data->next;
- iounmap(data);
}
+
return 0;
}
--
1.8.4.5
So we could let kexec-tools to rebuild SETUP_PCI and pass it to
second kernel.
Now kexec-tools only can build SETUP_EFI and SETUP_E820EXT.
Cc: Bjorn Helgaas <[email protected]>
Cc: [email protected]
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/pci/common.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 188 insertions(+)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 7b6fb94..bf0209a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -682,6 +682,8 @@ void __init add_pci(u64 pa_data)
struct firmware_setup_pci_entry {
struct list_head list;
+ struct kobject kobj;
+ struct bin_attribute *rom_attr;
uint16_t vendor;
uint16_t devid;
uint64_t pcilen;
@@ -762,6 +764,192 @@ int pcibios_add_device(struct pci_dev *dev)
return 0;
}
+#ifdef CONFIG_SYSFS
+static inline struct firmware_setup_pci_entry *
+to_setup_pci_entry(struct kobject *kobj)
+{
+ return container_of(kobj, struct firmware_setup_pci_entry, kobj);
+}
+
+static ssize_t vendor_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%04llx\n",
+ (unsigned long long)entry->vendor);
+}
+
+static ssize_t devid_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%04llx\n",
+ (unsigned long long)entry->devid);
+}
+
+static ssize_t pcilen_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)entry->pcilen);
+}
+
+static ssize_t segment_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%04llx\n",
+ (unsigned long long)entry->segment);
+}
+
+static ssize_t bus_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%02llx\n",
+ (unsigned long long)entry->bus);
+}
+
+static ssize_t device_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%02llx\n",
+ (unsigned long long)entry->device);
+}
+
+static ssize_t function_show(struct firmware_setup_pci_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%1llx\n",
+ (unsigned long long)entry->function);
+}
+
+struct setup_pci_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct firmware_setup_pci_entry *entry, char *buf);
+};
+
+static inline struct setup_pci_attribute *to_setup_pci_attr(
+ struct attribute *attr)
+{
+ return container_of(attr, struct setup_pci_attribute, attr);
+}
+
+static ssize_t setup_pci_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct firmware_setup_pci_entry *entry = to_setup_pci_entry(kobj);
+ struct setup_pci_attribute *setup_pci_attr = to_setup_pci_attr(attr);
+
+ return setup_pci_attr->show(entry, buf);
+}
+
+static struct setup_pci_attribute setup_pci_vendor_attr = __ATTR_RO(vendor);
+static struct setup_pci_attribute setup_pci_devid_attr = __ATTR_RO(devid);
+static struct setup_pci_attribute setup_pci_pcilen_attr = __ATTR_RO(pcilen);
+static struct setup_pci_attribute setup_pci_segment_attr = __ATTR_RO(segment);
+static struct setup_pci_attribute setup_pci_bus_attr = __ATTR_RO(bus);
+static struct setup_pci_attribute setup_pci_device_attr = __ATTR_RO(device);
+static struct setup_pci_attribute setup_pci_function_attr = __ATTR_RO(function);
+
+/*
+ * These are default attributes that are added for every memmap entry.
+ */
+static struct attribute *def_attrs[] = {
+ &setup_pci_vendor_attr.attr,
+ &setup_pci_devid_attr.attr,
+ &setup_pci_pcilen_attr.attr,
+ &setup_pci_segment_attr.attr,
+ &setup_pci_bus_attr.attr,
+ &setup_pci_device_attr.attr,
+ &setup_pci_function_attr.attr,
+ NULL
+};
+
+static const struct sysfs_ops setup_pci_attr_ops = {
+ .show = setup_pci_attr_show,
+};
+
+static struct kobj_type __refdata setup_pci_ktype = {
+ .sysfs_ops = &setup_pci_attr_ops,
+ .default_attrs = def_attrs,
+};
+
+static ssize_t setup_pci_rom_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct firmware_setup_pci_entry *entry = to_setup_pci_entry(kobj);
+
+ if (off >= entry->pcilen)
+ count = 0;
+ else {
+ unsigned long start_pfn, end_pfn;
+ void *rom;
+
+ if (off + count > entry->pcilen)
+ count = entry->pcilen - off;
+
+ start_pfn = PFN_DOWN(entry->rom + off);
+ end_pfn = PFN_UP(entry->rom + off + count);
+ if (pfn_range_is_mapped(start_pfn, end_pfn)) {
+ rom = phys_to_virt(entry->rom);
+ memcpy(buf, rom + off, count);
+ } else {
+ rom = ioremap(entry->rom + off, count);
+ if (rom) {
+ memcpy_fromio(buf, rom, count);
+ iounmap(rom);
+ } else
+ count = 0;
+ }
+ }
+
+ return count;
+}
+
+static int __init add_sysfs_fw_setup_pci_entry(
+ struct firmware_setup_pci_entry *entry)
+{
+ int retval = 0;
+ static int setup_pci_entries_nr;
+ static struct kset *setup_pci_kset;
+ struct bin_attribute *attr;
+
+ kobject_init(&entry->kobj, &setup_pci_ktype);
+
+ if (!setup_pci_kset) {
+ setup_pci_kset = kset_create_and_add("setup_pci", NULL,
+ firmware_kobj);
+ if (!setup_pci_kset)
+ return -ENOMEM;
+ }
+
+ entry->kobj.kset = setup_pci_kset;
+ retval = kobject_add(&entry->kobj, NULL, "%d", setup_pci_entries_nr++);
+ if (retval) {
+ kobject_put(&entry->kobj);
+ return retval;
+ }
+
+ attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
+ if (!attr)
+ return -ENOMEM;
+
+ sysfs_bin_attr_init(attr);
+ attr->size = entry->pcilen;
+ attr->attr.name = "rom";
+ attr->attr.mode = S_IRUSR;
+ attr->read = setup_pci_rom_read;
+ retval = sysfs_create_bin_file(&entry->kobj, attr);
+ if (retval)
+ kfree(attr);
+ entry->rom_attr = attr;
+
+ return retval;
+}
+
+static int __init firmware_setup_pci_init(void)
+{
+ struct firmware_setup_pci_entry *entry;
+
+ list_for_each_entry(entry, &setup_pci_entries, list)
+ add_sysfs_fw_setup_pci_entry(entry);
+
+ return 0;
+}
+late_initcall(firmware_setup_pci_init);
+#endif
+
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
int err;
--
1.8.4.5
On Sat, Feb 28, 2015 at 6:17 PM, Yinghai Lu <[email protected]> wrote:
> We should access variable with referrence instead of using physical
> address as value.
>
> Cc: Matt Fleming <[email protected]>
> Cc: Borislav Petkov <[email protected]>
> Signed-off-by: Yinghai Lu <[email protected]>
> ---
> arch/x86/kernel/setup.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 98dc931..05d444f 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -429,7 +429,13 @@ static void __init reserve_initrd(void)
>
> static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
> {
> - kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
> + /* kaslr_setup_data is defined in aslr.c */
> + unsigned char *data;
> + unsigned long offset = sizeof(struct setup_data);
> +
> + data = early_memremap(pa_data, offset + 1);
> + kaslr_enabled = *(data + offset);
> + early_memunmap(data, offset + 1);
> }
>
> static void __init parse_setup_data(void)
> --
oh, no. the offending commit already got into linus tree.
commit f47233c2d34f243ecdaac179c3408a39ff9216a7
Author: Jiri Kosina <[email protected]>
Date: Fri Feb 13 16:04:55 2015 +0100
x86/mm/ASLR: Propagate base load address calculation