2008-06-26 08:14:06

by Bernhard Walle

[permalink] [raw]
Subject: x86: Add /sys/firmware/memmap

This patch series adds a new interface /sys/firmware/memmap
to export the BIOS (Firmware) provided memory map via sysfs
for usage with kexec.

While the first patch adds the generic interface, the second
patch implements that interface for E820 on x86. EFI is on the TODO
list, and other architectures can be added later.


Signed-off-by: Bernhard Walle <[email protected]>



2008-06-26 08:14:27

by Bernhard Walle

[permalink] [raw]
Subject: [PATCH 1/2] Add /sys/firmware/memmap

This patch adds /sys/firmware/memmap interface that represents the BIOS
(or Firmware) provided memory map. The tree looks like:

/sys/firmware/memmap/0/start (hex number)
end (hex number)
type (string)
... /1/start
end
type

With the following shell snippet one can print the memory map in the same form
the kernel prints itself when booting on x86 (the E820 map).

--------- 8< --------------------------
#!/bin/sh
cd /sys/firmware/memmap
for dir in * ; do
start=$(cat $dir/start)
end=$(cat $dir/end)
type=$(cat $dir/type)
printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type"
done
--------- >8 --------------------------

That patch only provides the needed interface:

1. The sysfs interface.
2. The structure and enumeration definition.
3. The function firmware_map_add() and firmware_map_add_early()
that should be called from architecture code (E820/EFI, for
example) to add the contents to the interface.

If the kernel is compiled without CONFIG_FIRMWARE_MEMMAP, the interface does
nothing without cluttering the architecture-specific code with #ifdef's.

The purpose of the new interface is kexec: While /proc/iomem represents
the *used* memory map (e.g. modified via kernel parameters like 'memmap'
and 'mem'), the /sys/firmware/memmap tree represents the unmodified memory
map provided via the firmware. So kexec can:

- use the original memory map for rebooting,
- use the /proc/iomem for setting up the ELF core headers for kdump
case that should only represent the memory of the system.

The patch has been tested on i386 and x86_64.


Signed-off-by: Bernhard Walle <[email protected]>
---
drivers/firmware/Kconfig | 4 +
drivers/firmware/Makefile | 1 +
drivers/firmware/memmap.c | 164 ++++++++++++++++++++++++++++++++++++++++++
include/linux/firmware-map.h | 63 ++++++++++++++++
4 files changed, 232 insertions(+), 0 deletions(-)
create mode 100644 drivers/firmware/memmap.c
create mode 100644 include/linux/firmware-map.h

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index dc2cec6..1008737 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -26,6 +26,10 @@ config EDD_OFF
kernel. Say N if you want EDD enabled by default. EDD can be dynamically set
using the kernel parameter 'edd={on|skipmbr|off}'.

+config FIRMWARE_MEMMAP
+ def_bool y
+ depends on X86_64 || X86_32
+
config EFI_VARS
tristate "EFI Variable Support via sysfs"
depends on EFI
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 4c91471..1c3c173 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_DCDBAS) += dcdbas.o
obj-$(CONFIG_DMIID) += dmi-id.o
obj-$(CONFIG_ISCSI_IBFT_FIND) += iscsi_ibft_find.o
obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o
+obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
new file mode 100644
index 0000000..998166a
--- /dev/null
+++ b/drivers/firmware/memmap.c
@@ -0,0 +1,164 @@
+/*
+ * linux/drivers/firmware/memmap.c
+ * Copyright (C) 2008 SUSE LINUX Products GmbH
+ * by Bernhard Walle <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License v2.0 as published by
+ * the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/string.h>
+#include <linux/firmware-map.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+
+/*
+ * Registration functions ------------------------------------------------------
+ */
+
+/*
+ * Firmware memory map entries
+ */
+LIST_HEAD(map_entries);
+
+/**
+ * Common implementation of firmware_map_add() and firmware_map_add_early()
+ * which expects a pre-allocated struct firmware_map_entry.
+ *
+ * @start: Start of the memory range.
+ * @end: End of the memory range (inclusive).
+ * @type: Type of the memory range.
+ * @entry: Pre-allocated (either kmalloc() or bootmem allocator), uninitialised
+ * entry.
+ */
+static int firmware_map_add_entry(resource_size_t start, resource_size_t end,
+ const char *type,
+ struct firmware_map_entry *entry)
+{
+ BUG_ON(start > end);
+
+ entry->start = start;
+ entry->end = end;
+ entry->type = type;
+ INIT_LIST_HEAD(&entry->list);
+
+ list_add_tail(&entry->list, &map_entries);
+
+ return 0;
+}
+
+int firmware_map_add(resource_size_t start, resource_size_t end,
+ const char *type)
+{
+ struct firmware_map_entry *entry;
+
+ entry = kmalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
+ WARN_ON(!entry);
+ if (!entry)
+ return -ENOMEM;
+
+ return firmware_map_add_entry(start, end, type, entry);
+}
+
+int firmware_map_add_early(resource_size_t start, resource_size_t end,
+ const char *type)
+{
+ struct firmware_map_entry *entry;
+
+ entry = alloc_bootmem_low(sizeof(struct firmware_map_entry));
+ WARN_ON(!entry);
+ if (!entry)
+ return -ENOMEM;
+
+ return firmware_map_add_entry(start, end, type, entry);
+}
+
+/*
+ * Sysfs functions
+ */
+
+struct memmap_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct firmware_map_entry *entry, char *buf);
+};
+
+static ssize_t start_show(struct firmware_map_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n", entry->start);
+}
+
+static ssize_t end_show(struct firmware_map_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n", entry->end);
+}
+
+static ssize_t type_show(struct firmware_map_entry *entry, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%s\n", entry->type);
+}
+
+struct memmap_attribute memmap_start_attr = __ATTR_RO(start);
+struct memmap_attribute memmap_end_attr = __ATTR_RO(end);
+struct memmap_attribute memmap_type_attr = __ATTR_RO(type);
+
+/*
+ * These are default attributes that are added for every memmap entry.
+ */
+static struct attribute *def_attrs[] = {
+ &memmap_start_attr.attr,
+ &memmap_end_attr.attr,
+ &memmap_type_attr.attr,
+ NULL
+};
+
+#define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr)
+#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
+
+static ssize_t memmap_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct firmware_map_entry *entry = to_memmap_entry(kobj);
+ struct memmap_attribute *memmap_attr = to_memmap_attr(attr);
+
+ return memmap_attr->show(entry, buf);
+}
+
+static struct sysfs_ops memmap_attr_ops = {
+ .show = memmap_attr_show,
+};
+
+static struct kobj_type memmap_ktype = {
+ .sysfs_ops = &memmap_attr_ops,
+ .default_attrs = def_attrs,
+};
+
+static int __init memmap_init(void)
+{
+ int i = 0;
+ struct firmware_map_entry *entry;
+ struct kset *memmap_kset;
+
+ memmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj);
+ WARN_ON(!memmap_kset);
+ if (!memmap_kset)
+ return -ENOMEM;
+
+ list_for_each_entry(entry, &map_entries, list) {
+ entry->kobj.kset = memmap_kset;
+ kobject_init_and_add(&entry->kobj, &memmap_ktype,
+ NULL, "%d", i++);
+ }
+
+ return 0;
+}
+late_initcall(memmap_init);
+
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
new file mode 100644
index 0000000..c6bd934
--- /dev/null
+++ b/include/linux/firmware-map.h
@@ -0,0 +1,63 @@
+/*
+ * include/linux/firmware-map.h:
+ * Copyright (C) 2008 SUSE LINUX Products GmbH
+ * by Bernhard Walle <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License v2.0 as published by
+ * the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _LINUX_FIRMWARE_MAP_H
+#define _LINUX_FIRMWARE_MAP_H
+
+#include <linux/list.h>
+#include <linux/kobject.h>
+
+
+/*
+ * Firmware map entry. Because firmware memory maps are flat and not
+ * hierarchical, it's ok to organise them in a linked list. No parent
+ * information is necessary as for the resource tree.
+ */
+struct firmware_map_entry {
+ resource_size_t start; /* start of the memory range */
+ resource_size_t end; /* end of the memory range (incl.) */
+ const char *type; /* type of the memory range */
+ struct list_head list; /* entry for the linked list */
+ struct kobject kobj; /* kobject for each entry */
+};
+
+/**
+ * Adds a firmware mapping entry. This function uses kmalloc() for memory
+ * allocation. Use firmware_map_add_early() if you want to use the bootmem
+ * allocator.
+ *
+ * @start: Start of the memory range.
+ * @end: End of the memory range (inclusive).
+ * @type: Type of the memory range.
+ *
+ * Returns 0 on success, or -ENOMEM if no memory could be allocated.
+ */
+int firmware_map_add(resource_size_t start, resource_size_t end,
+ const char *type);
+
+/**
+ * Adds a firmware mapping entry. This function uses the bootmem allocator
+ * for memory allocation. Use firmware_map_add() if you want to use kmalloc().
+ *
+ * @start: Start of the memory range.
+ * @end: End of the memory range (inclusive).
+ * @type: Type of the memory range.
+ *
+ * Returns 0 on success, or -ENOMEM if no memory could be allocated.
+ */
+int firmware_map_add_early(resource_size_t start, resource_size_t end,
+ const char *type);
+
+#endif /* _LINUX_FIRMWARE_MAP_H */
--
1.5.6

2008-06-26 08:14:55

by Bernhard Walle

[permalink] [raw]
Subject: [PATCH 2/2] Use FIRMWARE_MEMMAP on x86/E820

This patch uses the /sys/firmware/memmap interface provided in the last patch
on the x86 architecture when E820 is used. The patch copies the E820
memory map very early, and registers the E820 map afterwards via
firmware_map_add_early().


Signed-off-by: Bernhard Walle <[email protected]>
---
arch/x86/kernel/e820.c | 42 ++++++++++++++++++++++++++++++++++++------
include/asm-x86/e820.h | 2 ++
2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3771386..c27863e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
+#include <linux/firmware-map.h>

#include <asm/pgtable.h>
#include <asm/page.h>
@@ -27,7 +28,22 @@
#include <asm/setup.h>
#include <asm/trampoline.h>

+/*
+ * The e820 map is the map that gets modified e.g. with command line parameters
+ * and that is also registered with modifications in the kernel resource tree
+ * with the iomem_resource as parent.
+ *
+ * The e820_saved is directly saved after the BIOS-provided memory map is
+ * copied. It doesn't get modified afterwards. It's registered for the
+ * /sys/firmware/memmap interface.
+ *
+ * That memory map is not modified and is used as base for kexec. The kexec'd
+ * kernel should get the same memory map as the firmware provides. Then the
+ * user can e.g. boot the original kernel with mem=1G while still booting the
+ * next kernel with full memory.
+ */
struct e820map e820;
+struct e820map e820_saved;

/* For PCI or other memory-mapped resources */
unsigned long pci_mem_start = 0xaeedbabe;
@@ -1056,6 +1072,16 @@ void __init finish_e820_parsing(void)
}
}

+static inline const char *e820_type_to_string(int e820_type)
+{
+ switch (e820_type) {
+ case E820_RAM: return "System RAM";
+ case E820_ACPI: return "ACPI Tables";
+ case E820_NVS: return "ACPI Non-volatile Storage";
+ default: return "reserved";
+ }
+}
+
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
@@ -1066,12 +1092,7 @@ void __init e820_reserve_resources(void)

res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
for (i = 0; i < e820.nr_map; i++) {
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
+ res->name = e820_type_to_string(e820.map[i].type);
res->start = e820.map[i].addr;
res->end = res->start + e820.map[i].size - 1;
#ifndef CONFIG_RESOURCES_64BIT
@@ -1084,6 +1105,13 @@ void __init e820_reserve_resources(void)
insert_resource(&iomem_resource, res);
res++;
}
+
+ for (i = 0; i < e820_saved.nr_map; i++) {
+ struct e820entry *entry = &e820_saved.map[i];
+ firmware_map_add_early(entry->addr,
+ entry->addr + entry->size - 1,
+ e820_type_to_string(entry->type));
+ }
}

char *__init default_machine_specific_memory_setup(void)
@@ -1119,6 +1147,8 @@ char *__init default_machine_specific_memory_setup(void)
e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}

+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
+
/* In case someone cares... */
return who;
}
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 13a0a5f..607f940 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -56,7 +56,9 @@ struct e820map {
struct e820entry map[E820_X_MAX];
};

+/* see comment in arch/x86/kernel/e820.c */
extern struct e820map e820;
+extern struct e820map e820_saved;

extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern int e820_all_mapped(u64 start, u64 end, unsigned type);
--
1.5.6