Hi,
Attached is an updated patch against 2.6.0-test4 that enables Extensible Firmware Interface (EFI) awareness in ia32 Linux kernels. I've incorporated the feedback I've received since my initial posting (http://marc.theaimsgroup.com/?l=linux-kernel&m=105848983307228&w=2) including:
- reorganized initialization code to minimize indentation changes to existing code path.
- removed proc version of efivars driver - this driver has been rewritten and will be sent via a separate patch shortly.
- reserve memory for memmap using bootmem allocator to ensure that EFI call set_virtual_address_map() functions properly.
This patch adds/modifies the following files:
arch/i386/kernel/Makefile | 2
arch/i386/kernel/acpi/boot.c | 10
arch/i386/kernel/efi.c | 626 +++++++++++++++++++++++++++++++++++++++++++
arch/i386/kernel/efi_stub.S | 125 ++++++++
arch/i386/kernel/reboot.c | 12
arch/i386/kernel/setup.c | 212 +++++++++++---
arch/i386/kernel/time.c | 60 +++-
drivers/acpi/Kconfig | 11
drivers/acpi/osl.c | 1
include/asm-i386/setup.h | 28 +
include/linux/efi.h | 28 +
init/main.c | 3
12 files changed, 1066 insertions(+), 52 deletions(-)
I've been able to successfully boot kernels on EFI systems with this patch using version 3.4 of the ELILO boot loader released last week by Stephane Eranian as well as using GRUB on ia32 systems with legacy BIOS.
Special thanks to Bjorn for providing valuable feedback on the initial patch.
Please consider applying.
thanks,
matt
diff -urN linux-2.6.0-test4/arch/i386/kernel/acpi/boot.c linux-2.6.0-test4-efi/arch/i386/kernel/acpi/boot.c
--- linux-2.6.0-test4/arch/i386/kernel/acpi/boot.c 2003-08-22 16:59:02.000000000 -0700
+++ linux-2.6.0-test4-efi/arch/i386/kernel/acpi/boot.c 2003-08-28 16:05:49.000000000 -0700
@@ -26,6 +26,7 @@
#include <linux/init.h>
#include <linux/config.h>
#include <linux/acpi.h>
+#include <linux/efi.h>
#include <asm/pgalloc.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -274,7 +275,14 @@
acpi_find_rsdp (void)
{
unsigned long rsdp_phys = 0;
-
+ extern int efi_enabled;
+
+ if (efi_enabled) {
+ if (efi.acpi20)
+ return __pa(efi.acpi20);
+ else if (efi.acpi)
+ return __pa(efi.acpi);
+ }
/*
* Scan memory looking for the RSDP signature. First search EBDA (low
* memory) paragraphs and then search upper memory (E0000-FFFFF).
diff -urN linux-2.6.0-test4/arch/i386/kernel/efi.c linux-2.6.0-test4-efi/arch/i386/kernel/efi.c
--- linux-2.6.0-test4/arch/i386/kernel/efi.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test4-efi/arch/i386/kernel/efi.c 2003-08-28 18:04:04.000000000 -0700
@@ -0,0 +1,626 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 1.0
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <[email protected]>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ * David Mosberger-Tang <[email protected]>
+ * Stephane Eranian <[email protected]>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ *
+ * Goutham Rao: <[email protected]>
+ * Skip non-WB memory and ignore empty memory ranges.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+#define EFI_DEBUG 0
+#define PFX "EFI: "
+
+extern efi_status_t asmlinkage efi_call_phys(void *, ...);
+
+struct efi efi;
+struct efi efi_phys __initdata;
+struct efi_memory_map memmap __initdata;
+
+static int efi_pte = 0;
+static unsigned long efi_temp_page_table[1024]
+ __attribute__ ((aligned(4096))) __initdata ;
+extern pgd_t swapper_pg_dir[1024];
+
+/*
+ * efi_dir is allocated here, but the directory isn't created
+ * here, as proc_mkdir() doesn't work this early in the bootup
+ * process. Therefore, each module, like efivars, must test for
+ * if (!efi_dir) efi_dir = proc_mkdir("efi", NULL);
+ * prior to creating their own entries under /proc/efi.
+ */
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry *efi_dir;
+#endif
+
+
+/*
+ * To make EFI call EFI runtime service in physical addressing mode we need
+ * prelog/epilog before/after the invocation to disable interrupt, to
+ * claim EFI runtime service handler exclusively and to duplicate a memory in
+ * low memory space say 0 - 3G.
+ */
+
+static unsigned long efi_rt_eflags;
+static spinlock_t efi_rt_lock = SPIN_LOCK_UNLOCKED;
+static pgd_t efi_bak_pg_dir_pointer[2];
+
+static void efi_call_phys_prelog(void)
+{
+ unsigned long cr4;
+ unsigned long temp;
+
+ spin_lock(&efi_rt_lock);
+ local_irq_save(efi_rt_eflags);
+
+ /*
+ * If I don't have PSE, I should just duplicate two entries in page
+ * directory. I I have PSE, I just need to duplicate one entry in
+ * page directory.
+ */
+ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+
+ if (cr4 & X86_CR4_PSE) {
+ efi_bak_pg_dir_pointer[0].pgd =
+ swapper_pg_dir[pgd_index(0)].pgd;
+ swapper_pg_dir[0].pgd =
+ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
+ } else {
+ efi_bak_pg_dir_pointer[0].pgd =
+ swapper_pg_dir[pgd_index(0)].pgd;
+ efi_bak_pg_dir_pointer[1].pgd =
+ swapper_pg_dir[pgd_index(0x400000)].pgd;
+ swapper_pg_dir[pgd_index(0)].pgd =
+ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
+ temp = PAGE_OFFSET + 0x400000;
+ swapper_pg_dir[pgd_index(0x400000)].pgd =
+ swapper_pg_dir[pgd_index(temp)].pgd;
+ }
+
+ /*
+ * Only one processor can reach here. After the lock is
+ * released, the original page table is restored.
+ */
+ local_flush_tlb();
+
+ cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
+ __asm__ __volatile__("lgdt %0":"=m"
+ (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
+}
+
+static void efi_call_phys_epilog(void)
+{
+ unsigned long cr4;
+
+ cpu_gdt_descr[0].address =
+ (unsigned long) __va(cpu_gdt_descr[0].address);
+ __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
+ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+
+ if (cr4 & X86_CR4_PSE) {
+ swapper_pg_dir[pgd_index(0)].pgd =
+ efi_bak_pg_dir_pointer[0].pgd;
+ } else {
+ swapper_pg_dir[pgd_index(0)].pgd =
+ efi_bak_pg_dir_pointer[0].pgd;
+ swapper_pg_dir[pgd_index(0x400000)].pgd =
+ efi_bak_pg_dir_pointer[1].pgd;
+ }
+
+ /*
+ * Because only one processor can reach here, after the lock is
+ * released the original page table is restored.
+ */
+ local_flush_tlb();
+
+ local_irq_restore(efi_rt_eflags);
+ spin_unlock(&efi_rt_lock);
+}
+
+static efi_status_t
+phys_efi_set_virtual_address_map(unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map)
+{
+ efi_status_t status = EFI_NOT_FOUND;
+
+ efi_call_phys_prelog();
+ status = efi_call_phys(efi_phys.set_virtual_address_map,
+ memory_map_size, descriptor_size,
+ descriptor_version, virtual_map);
+ efi_call_phys_epilog();
+ return status;
+}
+
+efi_status_t
+phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+ efi_status_t status = EFI_NOT_FOUND;
+
+ efi_call_phys_prelog();
+ status = efi_call_phys(efi_phys.get_time, tm, tc);
+ efi_call_phys_epilog();
+ return status;
+}
+
+void efi_gettimeofday(struct timespec *tv)
+{
+ efi_time_t tm;
+
+ memset(tv, 0, sizeof(tv));
+ if ((*efi.get_time) (&tm, 0) != EFI_SUCCESS)
+ return;
+
+ tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute,
+ tm.second);
+ tv->tv_nsec = tm.nanosecond;
+}
+
+static int
+is_available_memory(efi_memory_desc_t * md)
+{
+ if (!(md->attribute & EFI_MEMORY_WB))
+ return 0;
+
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI
+ * memory descriptor that has memory that is available for kernel use.
+ */
+void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
+{
+ int prev_valid = 0;
+ struct range {
+ unsigned long start;
+ unsigned long end;
+ } prev, curr;
+ efi_memory_desc_t *md;
+ unsigned long start, end;
+ int i;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+
+ if (md->num_pages == 0) /* no pages means nothing to do... */
+ continue;
+ if (is_available_memory(md)) {
+ curr.start = md->phys_addr;
+ curr.end = curr.start +
+ (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (!prev_valid) {
+ prev = curr;
+ prev_valid = 1;
+ } else {
+ if (curr.start < prev.start)
+ printk(PFX "Unordered memory map\n");
+ if (prev.end == curr.start)
+ prev.end = curr.end;
+ else {
+ start =
+ (unsigned long) (PAGE_ALIGN(prev.start));
+ end = (unsigned long) (prev.end & PAGE_MASK);
+ if ((end > start)
+ && (*callback) (start, end, arg) < 0)
+ return;
+ prev = curr;
+ }
+ }
+ } else
+ continue;
+ }
+ if (prev_valid) {
+ start = (unsigned long) PAGE_ALIGN(prev.start);
+ end = (unsigned long) (prev.end & PAGE_MASK);
+ if (end > start)
+ (*callback) (start, end, arg);
+ }
+}
+
+/*
+ * mem_start is a physical address.
+ */
+unsigned long __init
+efi_setup_temp_page_table(unsigned long mem_start, unsigned long size)
+{
+ unsigned long region_start_addr = (mem_start & 0xfffff000);
+ unsigned long region_end_addr = mem_start + size - 1;
+ unsigned long virt_start_addr = 0;
+
+ if (region_start_addr > region_end_addr)
+ BUG();
+
+ virt_start_addr = (unsigned long) __va(MAXMEM) +
+ (efi_pte << EFI_PAGE_SHIFT) +
+ (mem_start & 0xfff);
+
+ while (region_start_addr < region_end_addr) {
+ if (efi_pte == 1024)
+ printk(PFX "EFI Page Table is full!\n");
+
+ efi_temp_page_table[efi_pte] = (region_start_addr | 7);
+
+ if (efi_pte == 0)
+ swapper_pg_dir[((unsigned long) ((unsigned long)
+ __va(MAXMEM) + (efi_pte << EFI_PAGE_SHIFT))) >> 22].pgd =
+ (unsigned long) (__pa(&(efi_temp_page_table[0])) | 7);
+
+ region_start_addr += 0x1000;
+ efi_pte++;
+ }
+ local_flush_tlb();
+ return virt_start_addr;
+}
+
+void __init efi_init(void)
+{
+ efi_config_table_t *config_tables;
+ efi_char16_t *c16;
+ char vendor[100] = "unknown";
+ int i = 0;
+
+ /*
+ * Set up the page tables for EFI system table.
+ */
+ memset(&efi, 0, sizeof(efi) );
+ memset(&efi_phys, 0, sizeof(efi_phys));
+
+ efi_phys.systab = EFI_SYSTAB;
+ memmap.phys_map = EFI_MEMMAP;
+ memmap.nr_map= EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
+
+ efi.systab =
+ (efi_system_table_t *)
+ efi_setup_temp_page_table((unsigned long) efi_phys.systab,
+ sizeof(efi_system_table_t));
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab == NULL)
+ printk(PFX "Woah! Can't find EFI system table.\n");
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ printk(PFX "Woah! EFI system table signature incorrect\n");
+ if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
+ printk(PFX
+ "Warning: EFI system table major version mismatch: "
+ "got %d.%02d, expected %d.%02d\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff,
+ EFI_SYSTEM_TABLE_REVISION >> 16,
+ EFI_SYSTEM_TABLE_REVISION & 0xffff);
+
+ /* Show what we know for posterity */
+ c16 = (efi_char16_t *) efi_setup_temp_page_table(efi.systab->fw_vendor, 2);
+ if (c16) {
+ /*
+ * Set up the page tables for fw_vendor.
+ */
+ for (i = 0; i < sizeof(vendor) && *c16; ++i) {
+ vendor[i] = *c16++;
+ /*
+ * If I cross the boundary of a page, then map more.
+ */
+ if ((((unsigned long) c16) & 0xfff) == 0)
+ c16 =
+ (efi_char16_t *) efi_setup_temp_page_table(
+ ((unsigned long) (efi.systab->fw_vendor)) + i, 4096);
+ }
+ vendor[i] = '\0';
+ }
+
+ printk(PFX "EFI v%u.%.02u by %s \n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ /*
+ * Set up the page tables for config_tables.
+ */
+ config_tables = (efi_config_table_t *)
+ efi_setup_temp_page_table(efi.systab->tables,
+ efi.systab->nr_tables * sizeof (efi_config_table_t));
+
+ for (i = 0; i < efi.systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+ efi.mps = (void *)config_tables[i].table;
+ printk(" MPS=0x%lx ", config_tables[i].table);
+ } else
+ if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+ efi.acpi20 = __va(config_tables[i].table);
+ printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
+ } else
+ if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+ efi.acpi = __va(config_tables[i].table);
+ printk(" ACPI=0x%lx ", config_tables[i].table);
+ } else
+ if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+ efi.smbios = (void *) config_tables[i].table;
+ printk(" SMBIOS=0x%lx ", config_tables[i].table);
+ } else
+ if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
+ efi.hcdp = (void *)config_tables[i].table;
+ printk(" HCDP=0x%lx ", config_tables[i].table);
+ } else
+ if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) {
+ efi.uga = (void *)config_tables[i].table;
+ printk(" UGA=0x%lx ", config_tables[i].table);
+ }
+ }
+ printk("\n");
+
+ /*
+ * Set up the page tables for runtime services. We need to map
+ * the runtime services table so that we can grab the physical
+ * address of the EFI runtime functions.
+ */
+
+ efi.systab->runtime =
+ (efi_runtime_services_t *) efi_setup_temp_page_table(
+ (unsigned long) efi.systab->runtime,
+ sizeof(efi_runtime_services_t));
+
+ /*
+ * We will only need *early* access to the following two EFI RT
+ * services before set_virtual_address_map is invoked.
+ */
+ efi_phys.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
+ efi_phys.set_virtual_address_map =
+ (efi_set_virtual_address_map_t *) efi.systab->runtime->set_virtual_address_map;
+
+ memmap.map = (efi_memory_desc_t *)
+ efi_setup_temp_page_table((unsigned long) EFI_MEMMAP,
+ EFI_MEMMAP_SIZE);
+ if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) {
+ printk(PFX "Warning! Kernel-defined memdesc doesn't "
+ "match the one from EFI!\n");
+ }
+}
+
+void __init efi_enter_virtual_mode(void)
+{
+ int i;
+ efi_memory_desc_t *md;
+ efi_status_t status;
+
+ memmap.map = ioremap((unsigned long) memmap.phys_map, EFI_MEMMAP_SIZE);
+
+ if (!memmap.map)
+ printk(PFX "ioremap of memmap.map failed \n");
+ /*
+ * start to set up the permanent virtual mapping.
+ */
+ efi.systab = NULL;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+
+ if (md->attribute & EFI_MEMORY_RUNTIME) {
+ md->virt_addr =
+ (u64) ioremap((unsigned long) md->phys_addr,
+ (unsigned long) (md->num_pages
+ << EFI_PAGE_SHIFT));
+ if (!(unsigned long) md->virt_addr) {
+ printk(PFX "ioremap of md: 0x%lX failed \n",
+ (unsigned long) md->phys_addr);
+ }
+
+ if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && ((unsigned long)efi_phys.systab < md->phys_addr + ((unsigned long) md->num_pages << EFI_PAGE_SHIFT))) {
+ efi.systab = (efi_system_table_t *)
+ ((md->virt_addr - md->phys_addr) +
+ (u64)efi_phys.systab);
+ }
+ }
+ }
+
+ if (!efi.systab)
+ BUG();
+
+ status = 0;
+ status = phys_efi_set_virtual_address_map(
+ EFI_MEMMAP_SIZE,
+ EFI_MEMDESC_SIZE,
+ EFI_MEMDESC_VERSION,
+ memmap.phys_map);
+
+ if (status != EFI_SUCCESS) {
+ printk ("You are screwed! "
+ "Unable to switch EFI into virtual mode "
+ "(status=%lu)\n", status);
+ panic("EFI call SetVirtualAddressMap() failed!");
+ }
+
+ /*
+ * Now that EFI is in virtual mode, update the function
+ * pointers in the runtime service table to the new virtual addresses
+ * so they may be called directly:
+ */
+
+ efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time;
+ efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time;
+ efi.get_wakeup_time = (efi_get_wakeup_time_t *)
+ efi.systab->runtime->get_wakeup_time;
+ efi.set_wakeup_time = (efi_set_wakeup_time_t *)
+ efi.systab->runtime->set_wakeup_time;
+ efi.get_variable = (efi_get_variable_t *)
+ efi.systab->runtime->get_variable;
+ efi.get_next_variable = (efi_get_next_variable_t *)
+ efi.systab->runtime->get_next_variable;
+ efi.set_variable = (efi_set_variable_t *)
+ efi.systab->runtime->set_variable;
+ efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *)
+ efi.systab->runtime->get_next_high_mono_count;
+ efi.reset_system = (efi_reset_system_t *)
+ efi.systab->runtime->reset_system;
+
+}
+
+void __init
+efi_initialize_iomem_resources(struct resource *code_resource,
+ struct resource *data_resource)
+{
+ struct resource *res;
+ efi_memory_desc_t *md;
+ int i;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+
+ if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
+ 0x100000000ULL)
+ continue;
+ res = alloc_bootmem_low(sizeof(struct resource));
+ switch (md->type) {
+ case EFI_RESERVED_TYPE:
+ res->name = "Reserved Memory";
+ break;
+ case EFI_LOADER_CODE:
+ res->name = "Loader Code";
+ break;
+ case EFI_LOADER_DATA:
+ res->name = "Loader Data";
+ break;
+ case EFI_BOOT_SERVICES_DATA:
+ res->name = "BootServices Data";
+ break;
+ case EFI_BOOT_SERVICES_CODE:
+ res->name = "BootServices Code";
+ break;
+ case EFI_RUNTIME_SERVICES_CODE:
+ res->name = "Runtime Service Code";
+ break;
+ case EFI_RUNTIME_SERVICES_DATA:
+ res->name = "Runtime Service Data";
+ break;
+ case EFI_CONVENTIONAL_MEMORY:
+ res->name = "Conventional Memory";
+ break;
+ case EFI_UNUSABLE_MEMORY:
+ res->name = "Unusable Memory";
+ break;
+ case EFI_ACPI_RECLAIM_MEMORY:
+ res->name = "ACPI Reclaim";
+ break;
+ case EFI_ACPI_MEMORY_NVS:
+ res->name = "ACPI NVS";
+ break;
+ case EFI_MEMORY_MAPPED_IO:
+ res->name = "Memory Mapped IO";
+ break;
+ case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+ res->name = "Memory Mapped IO Port Space";
+ break;
+ default:
+ res->name = "Reserved";
+ break;
+ }
+ res->start = md->phys_addr;
+ res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1);
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ if (request_resource(&iomem_resource, res))
+ printk(PFX "Failed to allocate res %s : 0x%lx-0x%lx\n",
+ res->name, res->start, res->end);
+
+ if (md->type == EFI_CONVENTIONAL_MEMORY) {
+ request_resource(res, code_resource);
+ request_resource(res, data_resource);
+ }
+ }
+}
+
+/*
+ * Reserve certain EFI related regions with the bootmem
+ * allocator - particularly the memmap.
+ */
+void __init efi_reserve_bootmem(void)
+{
+ reserve_bootmem((unsigned long)memmap.phys_map,
+ (memmap.nr_map * sizeof(efi_memory_desc_t)));
+}
+
+/*
+ * Convenience functions to obtain memory types and attributes
+ */
+
+u32 efi_mem_type(unsigned long phys_addr)
+{
+ efi_memory_desc_t *md;
+ int i;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+ if ((md->phys_addr <= phys_addr) && (phys_addr <
+ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
+ return md->type;
+ }
+ return 0;
+}
+
+u64 efi_mem_attributes(unsigned long phys_addr)
+{
+ efi_memory_desc_t *md;
+ int i;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+ if ((md->phys_addr <= phys_addr) && (phys_addr <
+ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
+ return md->attribute;
+ }
+ return 0;
+}
+
+void print_efi_memmap(void)
+{
+ efi_memory_desc_t *md;
+ int i;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+ printk("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n",
+ i, md->type, md->attribute, md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+ }
+}
+
diff -urN linux-2.6.0-test4/arch/i386/kernel/efi_stub.S linux-2.6.0-test4-efi/arch/i386/kernel/efi_stub.S
--- linux-2.6.0-test4/arch/i386/kernel/efi_stub.S 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.0-test4-efi/arch/i386/kernel/efi_stub.S 2003-08-28 16:05:49.000000000 -0700
@@ -0,0 +1,125 @@
+/*
+ * EFI call stub for IA32.
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+/*
+ * efi_call_phys(void *, ...) is a function with variable parameters.
+ * All the callers of this function assure that all the parameters are 4-bytes.
+ */
+
+/*
+ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
+ * So we'd better save all of them at the beginning of this function and restore
+ * at the end no matter how many we use, because we can not assure EFI runtime
+ * service functions will comply with gcc calling convention, too.
+ */
+
+.text
+.section .text, "a"
+ENTRY(efi_call_phys)
+ /*
+ * 0. The function can only be called in Linux kernel. So CS has been
+ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
+ * the values of these registers are the same. And, the corresponding
+ * GDT entries are identical. So I will do nothing about segment reg
+ * and GDT, but change GDT base register in prelog and epilog.
+ */
+
+ /*
+ * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
+ * But to make it smoothly switch from virtual mode to flat mode.
+ * The mapping of lower virtual memory has been created in prelog and
+ * epilog.
+ */
+ movl $1f, %edx
+ subl $__PAGE_OFFSET, %edx
+ jmp *%edx
+1:
+
+ /*
+ * 2. Now on the top of stack is the return
+ * address in the caller of efi_call_phys(), then parameter 1,
+ * parameter 2, ..., param n. To make things easy, we save the return
+ * address of efi_call_phys in a global variable.
+ */
+ popl %edx
+ movl %edx, saved_return_addr
+ /* get the function pointer into ECX*/
+ popl %ecx
+ movl %ecx, efi_rt_function_ptr
+ movl $2f, %edx
+ subl $__PAGE_OFFSET, %edx
+ pushl %edx
+
+ /*
+ * 3. Clear PG bit in %CR0.
+ */
+ movl %cr0, %edx
+ andl $0x7fffffff, %edx
+ movl %edx, %cr0
+ jmp 1f
+1:
+
+ /*
+ * 4. Adjust stack pointer.
+ */
+ subl $__PAGE_OFFSET, %esp
+
+ /*
+ * 5. Call the physical function.
+ */
+ jmp *%ecx
+
+2:
+ /*
+ * 6. After EFI runtime service returns, control will return to
+ * following instruction. We'd better readjust stack pointer first.
+ */
+ addl $__PAGE_OFFSET, %esp
+
+ /*
+ * 7. Restore PG bit
+ */
+ movl %cr0, %edx
+ orl $0x80000000, %edx
+ movl %edx, %cr0
+ jmp 1f
+1:
+ /*
+ * 8. Now restore the virtual mode from flat mode by
+ * adding EIP with PAGE_OFFSET.
+ */
+ movl $1f, %edx
+ jmp *%edx
+1:
+
+ /*
+ * 9. Balance the stack. And because EAX contain the return value,
+ * we'd better not clobber it.
+ */
+ leal efi_rt_function_ptr, %edx
+ movl (%edx), %ecx
+ pushl %ecx
+
+ /*
+ * 10. Push the saved return address onto the stack and return.
+ */
+ leal saved_return_addr, %edx
+ movl (%edx), %ecx
+ pushl %ecx
+ ret
+.previous
+
+.data
+saved_return_addr:
+ .long 0
+efi_rt_function_ptr:
+ .long 0
diff -urN linux-2.6.0-test4/arch/i386/kernel/Makefile linux-2.6.0-test4-efi/arch/i386/kernel/Makefile
--- linux-2.6.0-test4/arch/i386/kernel/Makefile 2003-08-22 16:52:57.000000000 -0700
+++ linux-2.6.0-test4-efi/arch/i386/kernel/Makefile 2003-08-28 16:05:49.000000000 -0700
@@ -7,7 +7,7 @@
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o
+ doublefault.o efi.o efi_stub.o
obj-y += cpu/
obj-y += timers/
diff -urN linux-2.6.0-test4/arch/i386/kernel/reboot.c linux-2.6.0-test4-efi/arch/i386/kernel/reboot.c
--- linux-2.6.0-test4/arch/i386/kernel/reboot.c 2003-08-22 16:53:47.000000000 -0700
+++ linux-2.6.0-test4-efi/arch/i386/kernel/reboot.c 2003-08-28 16:05:49.000000000 -0700
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
+#include <linux/efi.h>
#include <asm/uaccess.h>
#include <asm/apic.h>
#include "mach_reboot.h"
@@ -262,7 +263,12 @@
disable_IO_APIC();
#endif
- if(!reboot_thru_bios) {
+ if (!reboot_thru_bios) {
+ if (efi_enabled) {
+ efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, 0);
+ __asm__ __volatile__("lidt %0": :"m" (no_idt));
+ __asm__ __volatile__("int3");
+ }
/* rebooting needs to touch the page at absolute addr 0 */
*((unsigned short *)__va(0x472)) = reboot_mode;
for (;;) {
@@ -272,6 +278,8 @@
__asm__ __volatile__("int3");
}
}
+ if (efi_enabled)
+ efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, 0);
machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
}
@@ -282,6 +290,8 @@
void machine_power_off(void)
{
+ if (efi_enabled)
+ efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, 0);
if (pm_power_off)
pm_power_off();
}
diff -urN linux-2.6.0-test4/arch/i386/kernel/setup.c linux-2.6.0-test4-efi/arch/i386/kernel/setup.c
--- linux-2.6.0-test4/arch/i386/kernel/setup.c 2003-08-22 16:55:38.000000000 -0700
+++ linux-2.6.0-test4-efi/arch/i386/kernel/setup.c 2003-08-28 16:47:40.000000000 -0700
@@ -36,6 +36,8 @@
#include <linux/root_dev.h>
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/init.h>
#include <video/edid.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
@@ -55,6 +57,9 @@
* Machine setup..
*/
+struct ia32_boot_params efi_boot_params __initdata;
+int efi_enabled = 0;
+
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
/* common cpu data for all cpus */
@@ -140,7 +145,22 @@
{
int i;
unsigned long long current_size = 0;
+ extern struct efi_memory_map memmap;
+ if (efi_enabled) {
+ for (i = 0; i < memmap.nr_map; i++) {
+ current_size = memmap.map[i].phys_addr +
+ (memmap.map[i].num_pages << 12);
+ if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
+ if (current_size > size) {
+ memmap.map[i].num_pages -=
+ (((current_size-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
+ memmap.nr_map = i + 1;
+ return;
+ }
+ }
+ }
+ }
for (i = 0; i < e820.nr_map; i++) {
if (e820.map[i].type == E820_RAM) {
current_size += e820.map[i].size;
@@ -155,17 +175,21 @@
static void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
- int x = e820.nr_map;
+ int x;
+
+ if (!efi_enabled) {
+ x = e820.nr_map;
- if (x == E820MAX) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ return;
+ }
- e820.map[x].addr = start;
- e820.map[x].size = size;
- e820.map[x].type = type;
- e820.nr_map++;
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+ }
} /* add_memory_region */
#define E820_DEBUG 1
@@ -441,8 +465,12 @@
static void __init setup_memory_region(void)
{
+ if (efi_enabled) {
+ printk(KERN_INFO "EFI-provided physical memory map:\n");
+ print_efi_memmap();
+ return;
+ }
char *who = machine_specific_memory_setup();
-
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
print_memory_map(who);
} /* setup_memory_region */
@@ -575,6 +603,23 @@
}
/*
+ * Callback for efi_memory_walk...
+ */
+static int __init
+efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long *max_pfn = arg, pfn;
+
+ if (start < end) {
+ pfn = PFN_UP(end -1);
+ if (pfn > *max_pfn)
+ *max_pfn = pfn;
+ }
+ return 0;
+}
+
+
+/*
* Find the highest page frame number we have available
*/
void __init find_max_pfn(void)
@@ -582,6 +627,11 @@
int i;
max_pfn = 0;
+ if (efi_enabled) {
+ efi_memmap_walk(efi_find_max_pfn, &max_pfn);
+ return;
+ }
+
for (i = 0; i < e820.nr_map; i++) {
unsigned long start, end;
/* RAM? */
@@ -656,6 +706,25 @@
}
#ifndef CONFIG_DISCONTIGMEM
+
+/*
+ * Free all available memory for boot time allocation. Used
+ * as a callback function by efi_memory_walk()
+ */
+
+static int __init
+free_available_memory(unsigned long start, unsigned long end, void *arg)
+{
+ /* check max_low_pfn */
+ if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
+ return 0;
+ if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
+ end = (max_low_pfn + 1) << PAGE_SHIFT;
+ if (start < end)
+ free_bootmem(start, end - start);
+
+ return 0;
+}
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
@@ -663,6 +732,10 @@
{
int i;
+ if (efi_enabled) {
+ efi_memmap_walk(free_available_memory, NULL);
+ return;
+ }
for (i = 0; i < e820.nr_map; i++) {
unsigned long curr_pfn, last_pfn, size;
/*
@@ -762,21 +835,43 @@
*/
find_smp_config();
#endif
+ /*
+ * Reserve memory for the EFI memory map so later we can
+ * switch the Runtime Services in Virtual Mode.
+ */
+ if (efi_enabled)
+ efi_reserve_bootmem();
#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
- reserve_bootmem(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- INITRD_START + INITRD_SIZE,
- max_low_pfn << PAGE_SHIFT);
- initrd_start = 0;
+ if (efi_enabled) {
+ if (efi_boot_params.initrd_start) {
+ if (efi_boot_params.initrd_start + efi_boot_params.initrd_size <= (max_low_pfn << PAGE_SHIFT)) {
+ reserve_bootmem(efi_boot_params.initrd_start, efi_boot_params.initrd_size);
+ initrd_start = efi_boot_params.initrd_start + PAGE_OFFSET;
+ initrd_end = initrd_start + efi_boot_params.initrd_size;
+ } else {
+ printk(KERN_ERR "initrd extends beyond end of memory! "
+ "(0x%08lx > 0x%08lx)\n disabling initrd\n",
+ efi_boot_params.initrd_start + efi_boot_params.initrd_size,
+ max_low_pfn << PAGE_SHIFT);
+ initrd_start = 0;
+ }
+ }
+ } else {
+ if (LOADER_TYPE && INITRD_START) {
+ if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
+ reserve_bootmem(INITRD_START, INITRD_SIZE);
+ initrd_start =
+ INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ initrd_end = initrd_start+INITRD_SIZE;
+ }
+ else {
+ printk(KERN_ERR "initrd extends beyond end of memory "
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ INITRD_START + INITRD_SIZE,
+ max_low_pfn << PAGE_SHIFT);
+ initrd_start = 0;
+ }
}
}
#endif
@@ -790,11 +885,11 @@
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
-static void __init register_memory(unsigned long max_low_pfn)
+static void __init
+legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
{
- unsigned long low_mem_size;
int i;
-
+
probe_roms();
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
@@ -817,11 +912,26 @@
* so we try it repeatedly and let the resource manager
* test it.
*/
- request_resource(res, &code_resource);
- request_resource(res, &data_resource);
+ request_resource(res, code_resource);
+ request_resource(res, data_resource);
}
}
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static void __init register_memory(unsigned long max_low_pfn)
+{
+ unsigned long low_mem_size;
+ int i;
+
+ if (efi_enabled)
+ efi_initialize_iomem_resources(&code_resource, &data_resource);
+ else
+ legacy_init_iomem_resources(&code_resource, &data_resource);
+ /* EFI systems may still have VGA */
request_graphics_resource();
/* request I/O space for devices used on all i[345]86 PCs */
@@ -949,20 +1059,31 @@
pre_setup_arch_hook();
early_cpu_init();
- ROOT_DEV = ORIG_ROOT_DEV;
- drive_info = DRIVE_INFO;
- screen_info = SCREEN_INFO;
- edid_info = EDID_INFO;
- apm_info.bios = APM_BIOS_INFO;
- saved_videomode = VIDEO_MODE;
- printk("Video mode to be used for restore is %lx\n", saved_videomode);
- if( SYS_DESC_TABLE.length != 0 ) {
- MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
- machine_id = SYS_DESC_TABLE.table[0];
- machine_submodel_id = SYS_DESC_TABLE.table[1];
- BIOS_revision = SYS_DESC_TABLE.table[2];
+ memcpy(&efi_boot_params, EFI_BOOT_PARAMS, sizeof(struct ia32_boot_params));
+ efi_enabled = efi_boot_params.size;
+
+ if (efi_enabled) {
+ screen_info.orig_x = efi_boot_params.orig_x;
+ screen_info.orig_y = efi_boot_params.orig_y;
+ screen_info.orig_video_cols = efi_boot_params.num_cols;
+ screen_info.orig_video_lines = efi_boot_params.num_rows;
+ screen_info.orig_video_points = 400 / screen_info.orig_video_cols;
+ } else {
+ ROOT_DEV = ORIG_ROOT_DEV;
+ drive_info = DRIVE_INFO;
+ screen_info = SCREEN_INFO;
+ edid_info = EDID_INFO;
+ apm_info.bios = APM_BIOS_INFO;
+ saved_videomode = VIDEO_MODE;
+ printk("Video mode to be used for restore is %lx\n", saved_videomode);
+ if (SYS_DESC_TABLE.length != 0 ) {
+ MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
+ machine_id = SYS_DESC_TABLE.table[0];
+ machine_submodel_id = SYS_DESC_TABLE.table[1];
+ BIOS_revision = SYS_DESC_TABLE.table[2];
+ }
+ aux_device_present = AUX_DEVICE_INFO;
}
- aux_device_present = AUX_DEVICE_INFO;
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
@@ -970,7 +1091,11 @@
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
ARCH_SETUP
- setup_memory_region();
+ if (efi_enabled)
+ efi_init();
+ else
+ setup_memory_region();
+
copy_edd();
if (!MOUNT_ROOT_RDONLY)
@@ -1019,7 +1144,8 @@
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
+ if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
diff -urN linux-2.6.0-test4/arch/i386/kernel/time.c linux-2.6.0-test4-efi/arch/i386/kernel/time.c
--- linux-2.6.0-test4/arch/i386/kernel/time.c 2003-08-22 16:55:44.000000000 -0700
+++ linux-2.6.0-test4-efi/arch/i386/kernel/time.c 2003-08-28 16:05:49.000000000 -0700
@@ -44,6 +44,7 @@
#include <linux/module.h>
#include <linux/sysdev.h>
#include <linux/bcd.h>
+#include <linux/efi.h>
#include <asm/io.h>
#include <asm/smp.h>
@@ -158,6 +159,37 @@
return retval;
}
+static int efi_set_rtc_mmss(unsigned long nowtime)
+{
+ int real_seconds, real_minutes;
+ unsigned long flags;
+ efi_status_t status;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+
+ status = efi.get_time(&eft, &cap);
+ if (status != EFI_SUCCESS)
+ panic("Ooops, efitime: can't read time!\n");
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+
+ if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
+ real_minutes += 30;
+ real_minutes %= 60;
+
+ eft.minute = real_minutes;
+ eft.second = real_seconds;
+
+ status = efi.set_time(&eft);
+ if (status != EFI_SUCCESS)
+ panic("Ooops: efitime: can't read time!\n");
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return 0;
+}
+
/* last time the cmos clock got updated */
static long last_rtc_update;
@@ -210,7 +242,7 @@
>= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
(xtime.tv_nsec / 1000)
<= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
- if (set_rtc_mmss(xtime.tv_sec) == 0)
+ if ((efi_enabled && (!efi_set_rtc_mmss(xtime.tv_sec) )) || (set_rtc_mmss(xtime.tv_sec) == 0))
last_rtc_update = xtime.tv_sec;
else
last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
@@ -275,6 +307,27 @@
set_kset_name("pit"),
};
+/*
+ * This is called before the RT mappings are in place, so we
+ * need to be able to get the time in physical mode.
+ */
+unsigned long efi_get_time(void)
+{
+ efi_status_t status;
+ unsigned long flags;
+ efi_time_t eft;
+ efi_time_cap_t cap;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ status = phys_efi_get_time(&eft, &cap);
+ if (status != EFI_SUCCESS)
+ printk("Oops: efitime: can't read time status: 0x%lx\n", status);
+
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
+ return mktime(eft.year, eft.month, eft.day, eft.hour, eft.minute, eft.second);
+}
+
/* XXX this driverfs stuff should probably go elsewhere later -john */
static struct sys_device device_i8253 = {
.id = 0,
@@ -293,7 +346,10 @@
void __init time_init(void)
{
- xtime.tv_sec = get_cmos_time();
+ if (efi_enabled)
+ xtime.tv_sec = efi_get_time();
+ else
+ xtime.tv_sec = get_cmos_time();
wall_to_monotonic.tv_sec = -xtime.tv_sec;
xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
wall_to_monotonic.tv_nsec = -xtime.tv_nsec;
diff -urN linux-2.6.0-test4/drivers/acpi/Kconfig linux-2.6.0-test4-efi/drivers/acpi/Kconfig
--- linux-2.6.0-test4/drivers/acpi/Kconfig 2003-08-22 17:02:39.000000000 -0700
+++ linux-2.6.0-test4-efi/drivers/acpi/Kconfig 2003-08-28 16:05:49.000000000 -0700
@@ -263,9 +263,14 @@
dump your ACPI DSDT table using /proc/acpi/dsdt.
config ACPI_EFI
- bool
- depends on ACPI
- depends on IA64
+ bool "Obtain RSDP from EFI Configuration Table"
+ depends on IA64 && (!IA64_HP_SIM || IA64_SGI_SN) || X86 && ACPI
+ help
+ On EFI Systems the RSDP pointer is passed to the kernel via
+ the EFI Configuration Table. On Itanium systems this is
+ standard and required. For IA-32, systems that have
+ EFI firmware should leave this enabled. Platforms with
+ traditional legacy BIOS should disable this option.
default y
endmenu
diff -urN linux-2.6.0-test4/drivers/acpi/osl.c linux-2.6.0-test4-efi/drivers/acpi/osl.c
--- linux-2.6.0-test4/drivers/acpi/osl.c 2003-08-22 16:58:47.000000000 -0700
+++ linux-2.6.0-test4-efi/drivers/acpi/osl.c 2003-08-28 16:05:49.000000000 -0700
@@ -43,7 +43,6 @@
#ifdef CONFIG_ACPI_EFI
#include <linux/efi.h>
-u64 efi_mem_attributes (u64 phys_addr);
#endif
diff -urN linux-2.6.0-test4/include/asm-i386/setup.h linux-2.6.0-test4-efi/include/asm-i386/setup.h
--- linux-2.6.0-test4/include/asm-i386/setup.h 2003-08-22 16:57:22.000000000 -0700
+++ linux-2.6.0-test4-efi/include/asm-i386/setup.h 2003-08-28 16:05:49.000000000 -0700
@@ -28,6 +28,12 @@
#define APM_BIOS_INFO (*(struct apm_bios_info *) (PARAM+0x40))
#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
#define SYS_DESC_TABLE (*(struct sys_desc_table_struct*)(PARAM+0xa0))
+#define EFI_BOOT_PARAMS (PARAM + 0x0c00)
+#define EFI_SYSTAB ((efi_boot_params.efi_sys_tbl))
+#define EFI_MEMDESC_SIZE ( (efi_boot_params.efi_mem_desc_size))
+#define EFI_MEMDESC_VERSION ( (efi_boot_params.efi_mem_desc_version))
+#define EFI_MEMMAP ( (efi_boot_params.efi_mem_map))
+#define EFI_MEMMAP_SIZE ( (efi_boot_params.efi_mem_map_size))
#define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2))
#define RAMDISK_FLAGS (*(unsigned short *) (PARAM+0x1F8))
#define VIDEO_MODE (*(unsigned short *) (PARAM+0x1FA))
@@ -43,4 +49,26 @@
#define COMMAND_LINE ((char *) (PARAM+2048))
#define COMMAND_LINE_SIZE 256
+struct ia32_boot_params {
+ unsigned long size;
+ unsigned long command_line;
+ efi_system_table_t *efi_sys_tbl;
+ efi_memory_desc_t *efi_mem_map;
+ unsigned long efi_mem_map_size;
+ unsigned long efi_mem_desc_size;
+ unsigned long efi_mem_desc_version;
+ unsigned long initrd_start;
+ unsigned long initrd_size;
+ unsigned long loader_start;
+ unsigned long loader_size;
+ unsigned long kernel_start;
+ unsigned long kenrel_size;
+ unsigned long num_cols;
+ unsigned long num_rows;
+ unsigned long orig_x;
+ unsigned long orig_y;
+};
+
+extern struct ia32_boot_params efi_boot_params;
+
#endif /* _i386_SETUP_H */
diff -urN linux-2.6.0-test4/include/linux/efi.h linux-2.6.0-test4-efi/include/linux/efi.h
--- linux-2.6.0-test4/include/linux/efi.h 2003-08-22 17:00:39.000000000 -0700
+++ linux-2.6.0-test4-efi/include/linux/efi.h 2003-08-28 16:49:01.000000000 -0700
@@ -16,6 +16,8 @@
#include <linux/time.h>
#include <linux/types.h>
#include <linux/proc_fs.h>
+#include <linux/rtc.h>
+#include <linux/ioport.h>
#include <asm/page.h>
#include <asm/system.h>
@@ -96,6 +98,9 @@
u64 virt_addr;
u64 num_pages;
u64 attribute;
+#if defined (__i386__)
+ u64 pad1;
+#endif
} efi_memory_desc_t;
typedef int efi_freemem_callback_t (unsigned long start, unsigned long end, void *arg);
@@ -132,6 +137,7 @@
*/
#define EFI_RESET_COLD 0
#define EFI_RESET_WARM 1
+#define EFI_RESET_SHUTDOWN 2
/*
* EFI Runtime Services table
@@ -169,6 +175,10 @@
typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
typedef void efi_reset_system_t (int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data);
+typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_size,
+ unsigned long descriptor_size,
+ u32 descriptor_version,
+ efi_memory_desc_t *virtual_map);
/*
* EFI Configuration Table and GUID definitions
@@ -194,6 +204,9 @@
#define HCDP_TABLE_GUID \
EFI_GUID( 0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98 )
+#define UGA_IO_PROTOCOL_GUID \
+ EFI_GUID( 0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0xb, 0x7, 0xa2 )
+
typedef struct {
efi_guid_t guid;
unsigned long table;
@@ -218,6 +231,12 @@
unsigned long tables;
} efi_system_table_t;
+struct efi_memory_map {
+ efi_memory_desc_t *phys_map;
+ efi_memory_desc_t *map;
+ int nr_map;
+};
+
/*
* All runtime access to EFI goes through this structure:
*/
@@ -230,6 +249,7 @@
void *sal_systab; /* SAL system table */
void *boot_info; /* boot info table */
void *hcdp; /* HCDP table */
+ void *uga; /* UGA table */
efi_get_time_t *get_time;
efi_set_time_t *set_time;
efi_get_wakeup_time_t *get_wakeup_time;
@@ -239,6 +259,7 @@
efi_set_variable_t *set_variable;
efi_get_next_high_mono_count_t *get_next_high_mono_count;
efi_reset_system_t *reset_system;
+ efi_set_virtual_address_map_t *set_virtual_address_map;
} efi;
static inline int
@@ -266,6 +287,13 @@
extern u64 efi_get_iobase (void);
extern u32 efi_mem_type (unsigned long phys_addr);
extern u64 efi_mem_attributes (unsigned long phys_addr);
+extern void print_efi_memmap(void);
+extern void efi_reserve_bootmem(void);
+extern void efi_initialize_iomem_resources(struct resource *code_resource,
+ struct resource *data_resource);
+extern efi_status_t phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc);
+extern struct efi_memory_map_struct efi_memory_map;
+extern int efi_enabled;
/*
* Variable Attributes
diff -urN linux-2.6.0-test4/init/main.c linux-2.6.0-test4-efi/init/main.c
--- linux-2.6.0-test4/init/main.c 2003-08-22 16:52:56.000000000 -0700
+++ linux-2.6.0-test4-efi/init/main.c 2003-08-28 16:05:49.000000000 -0700
@@ -37,6 +37,7 @@
#include <linux/moduleparam.h>
#include <linux/writeback.h>
#include <linux/cpu.h>
+#include <linux/efi.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -436,6 +437,8 @@
pidmap_init();
pgtable_cache_init();
pte_chain_init();
+ if (efi_enabled)
+ efi_enter_virtual_mode();
fork_init(num_physpages);
proc_caches_init();
buffer_init();
Matt Tolentino <[email protected]> wrote:
>
>
> Attached is an updated patch against 2.6.0-test4 that enables Extensible Firmware
> Interface (EFI) awareness in ia32 Linux kernels.
Just for my edification: why does EFI exist?
"The EFI specification defines a new model for the interface between
operating systems and platform firmware. The interface consists of data
tables that contain platform-related information, plus boot and runtime
service calls that are available to the operating system and its loader.
Together, these provide a standard environment for booting an operating
system and running pre-boot applications.
"The EFI specification is primarily intended for the next generation
of IA-32 and Itanium Architecture-based computers, and is an outgrowth
of the "Intel Boot Initiative" (IBI) program that began in 1998."
It sounds like it's filling in some gaps in ACPI? What is its relationship to ACPI?
Well, having now learnt that this is in fact not electronic fuel injection,
let me give some feedback from the point of view of an experienced kernel
developer who wants to understand it - exactly the target audience for
those who wish to develop maintainable code, yes?
Mainly I am reduced to picking over trivia... Excuse me while I ask some
dumb questions as well.
> diff -urN linux-2.6.0-test4/arch/i386/kernel/acpi/boot.c linux-2.6.0-test4-efi/arch/i386/kernel/acpi/boot.c
> --- linux-2.6.0-test4/arch/i386/kernel/acpi/boot.c 2003-08-22 16:59:02.000000000 -0700
> +++ linux-2.6.0-test4-efi/arch/i386/kernel/acpi/boot.c 2003-08-28 16:05:49.000000000 -0700
> @@ -26,6 +26,7 @@
> #include <linux/init.h>
> #include <linux/config.h>
> #include <linux/acpi.h>
> +#include <linux/efi.h>
> #include <asm/pgalloc.h>
> #include <asm/io_apic.h>
> #include <asm/apic.h>
> @@ -274,7 +275,14 @@
> acpi_find_rsdp (void)
> {
> unsigned long rsdp_phys = 0;
> -
> + extern int efi_enabled;
> +
pleeeze never declare things in .c files. Put this declaration into efi.h
so the same declaration is visible to the users as well as the definition.
> +static int efi_pte = 0;
> +static unsigned long efi_temp_page_table[1024]
> + __attribute__ ((aligned(4096))) __initdata ;
We have two early ioremap-style functions already. Are they not suitable
for accessing the EFI tables?
> +extern pgd_t swapper_pg_dir[1024];
This should be in a header.
> +efi_status_t
> +phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
> +{
> + efi_status_t status = EFI_NOT_FOUND;
Need not be initialised.
> +void efi_gettimeofday(struct timespec *tv)
> +{
> + efi_time_t tm;
> +
> + memset(tv, 0, sizeof(tv));
buglet: sizeof(*tv)
> +/*
> + * Walks the EFI memory map and calls CALLBACK once for each EFI
> + * memory descriptor that has memory that is available for kernel use.
> + */
> +void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
> +{
> + int prev_valid = 0;
> + struct range {
> + unsigned long start;
> + unsigned long end;
> + } prev, curr;
> + efi_memory_desc_t *md;
> + unsigned long start, end;
> + int i;
> +
> + for (i = 0; i < memmap.nr_map; i++) {
> + md = &memmap.map[i];
> +
> + if (md->num_pages == 0) /* no pages means nothing to do... */
> + continue;
> + if (is_available_memory(md)) {
> + curr.start = md->phys_addr;
> + curr.end = curr.start +
> + (md->num_pages << EFI_PAGE_SHIFT);
> +
> + if (!prev_valid) {
> + prev = curr;
> + prev_valid = 1;
> + } else {
> + if (curr.start < prev.start)
> + printk(PFX "Unordered memory map\n");
> + if (prev.end == curr.start)
> + prev.end = curr.end;
> + else {
> + start =
> + (unsigned long) (PAGE_ALIGN(prev.start));
> + end = (unsigned long) (prev.end & PAGE_MASK);
> + if ((end > start)
> + && (*callback) (start, end, arg) < 0)
> + return;
> + prev = curr;
> + }
> + }
> + } else
> + continue;
> + }
The final `continue' here isn't needed. Would be neater to do
if (!is_available_memory(md))
continue;
curr.start = md->phys_addr;
curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
...
> +
> +/*
> + * mem_start is a physical address.
> + */
> +unsigned long __init
> +efi_setup_temp_page_table(unsigned long mem_start, unsigned long size)
> +{
Again, there's an awful lot of pagetable bashing here. We do need to work
out whether it is all really needed, or whether there are consolidation
opportunities with existing code.
Could you please describe this code's requirements?
> +
> +void __init efi_enter_virtual_mode(void)
> +{
> + int i;
> + efi_memory_desc_t *md;
> + efi_status_t status;
> +
> + memmap.map = ioremap((unsigned long) memmap.phys_map, EFI_MEMMAP_SIZE);
Now what is this function doing? I guess the reader should be familiar
with the EFI spec, but some decriptive roadmap-style commentary over key
data structures such as `struct efi_memory_map' would make this code much
more approachable by occasional readers.
> --- linux-2.6.0-test4/arch/i386/kernel/Makefile 2003-08-22 16:52:57.000000000 -0700
> +++ linux-2.6.0-test4-efi/arch/i386/kernel/Makefile 2003-08-28 16:05:49.000000000 -0700
> @@ -7,7 +7,7 @@
> obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
> ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
> pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
> - doublefault.o
> + doublefault.o efi.o efi_stub.o
Doesn't this mean we're linking all the EFI code even if CONFIG_ACPI_EFI=n?
> #ifdef CONFIG_BLK_DEV_INITRD
> - if (LOADER_TYPE && INITRD_START) {
> - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
> - reserve_bootmem(INITRD_START, INITRD_SIZE);
> - initrd_start =
> - INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
> - initrd_end = initrd_start+INITRD_SIZE;
> - }
> - else {
> - printk(KERN_ERR "initrd extends beyond end of memory "
> - "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
> - INITRD_START + INITRD_SIZE,
> - max_low_pfn << PAGE_SHIFT);
> - initrd_start = 0;
> + if (efi_enabled) {
> + if (efi_boot_params.initrd_start) {
> + if (efi_boot_params.initrd_start + efi_boot_params.initrd_size <= (max_low_pfn << PAGE_SHIFT)) {
> + reserve_bootmem(efi_boot_params.initrd_start, efi_boot_params.initrd_size);
> + initrd_start = efi_boot_params.initrd_start + PAGE_OFFSET;
> + initrd_end = initrd_start + efi_boot_params.initrd_size;
> + } else {
> + printk(KERN_ERR "initrd extends beyond end of memory! "
> + "(0x%08lx > 0x%08lx)\n disabling initrd\n",
> + efi_boot_params.initrd_start + efi_boot_params.initrd_size,
> + max_low_pfn << PAGE_SHIFT);
> + initrd_start = 0;
> + }
> + }
What is the relationship between EFI and initrd?
> @@ -817,11 +912,26 @@
> * so we try it repeatedly and let the resource manager
> * test it.
> */
> - request_resource(res, &code_resource);
> - request_resource(res, &data_resource);
> + request_resource(res, code_resource);
> + request_resource(res, data_resource);
hm, request_resource() can fail...
> +/*
> + * This is called before the RT mappings are in place, so we
> + * need to be able to get the time in physical mode.
> + */
> +unsigned long efi_get_time(void)
What is an "RT mapping"?
> config ACPI_EFI
> - bool
> - depends on ACPI
> - depends on IA64
> + bool "Obtain RSDP from EFI Configuration Table"
> + depends on IA64 && (!IA64_HP_SIM || IA64_SGI_SN) || X86 && ACPI
> + help
> + On EFI Systems the RSDP pointer is passed to the kernel via
> + the EFI Configuration Table. On Itanium systems this is
> + standard and required. For IA-32, systems that have
> + EFI firmware should leave this enabled. Platforms with
> + traditional legacy BIOS should disable this option.
Poor users ;)
Vendors will ship kernels with CONFIG_ACPI_EFI=y. I assume those kernels
will work OK on machines which have legacy BIOSes?
> +struct ia32_boot_params {
> + unsigned long size;
> + unsigned long command_line;
> + efi_system_table_t *efi_sys_tbl;
> + efi_memory_desc_t *efi_mem_map;
> + unsigned long efi_mem_map_size;
> + unsigned long efi_mem_desc_size;
> + unsigned long efi_mem_desc_version;
> + unsigned long initrd_start;
> + unsigned long initrd_size;
> + unsigned long loader_start;
> + unsigned long loader_size;
> + unsigned long kernel_start;
> + unsigned long kenrel_size;
> + unsigned long num_cols;
> + unsigned long num_rows;
> + unsigned long orig_x;
> + unsigned long orig_y;
> +};
Interesting. What's all this, and how does the user interact with it?
> diff -urN linux-2.6.0-test4/include/linux/efi.h linux-2.6.0-test4-efi/include/linux/efi.h
> --- linux-2.6.0-test4/include/linux/efi.h 2003-08-22 17:00:39.000000000 -0700
> +++ linux-2.6.0-test4-efi/include/linux/efi.h 2003-08-28 16:49:01.000000000 -0700
> @@ -16,6 +16,8 @@
> #include <linux/time.h>
> #include <linux/types.h>
> #include <linux/proc_fs.h>
> +#include <linux/rtc.h>
> +#include <linux/ioport.h>
>
> #include <asm/page.h>
> #include <asm/system.h>
> @@ -96,6 +98,9 @@
> u64 virt_addr;
> u64 num_pages;
> u64 attribute;
> +#if defined (__i386__)
> + u64 pad1;
> +#endif
> } efi_memory_desc_t;
Obscure things like this rather need a comment.
Thanks.
Andrew Morton <[email protected]> writes:
> Matt Tolentino <[email protected]> wrote:
> >
> >
> > Attached is an updated patch against 2.6.0-test4 that enables Extensible
> Firmware
>
> > Interface (EFI) awareness in ia32 Linux kernels.
>
> Just for my edification: why does EFI exist?
As I have heard the story.
The guys at Intel were having problems getting a traditional
PC style BIOS to run on the first Itaniums, realized they
had a opportunity to come up with a cleaner firmware interface
and came up with EFI. Open Firmware was considered but dropped
because it was not compatible with ACPI, and they did not want to
dilute the momentum that had built up for ACPI.
And now since Intel has something moderately portable, they intend
to back port it to x86 and start using/shipping it sometime early next
year.
What I find interesting is that I don't see it addressed how the 16bit
BIOS calls in setup.S can be bypassed on x86. And currently while it
works to enter at the kernels 32bit entry point if you know what you
are doing it is still officially not supported.
Eric
> Just for my edification: why does EFI exist?
>
> "The EFI specification defines a new model for the interface between
> operating systems and platform firmware. The interface
> consists of data
> tables that contain platform-related information, plus
> boot and runtime
> service calls that are available to the operating system
> and its loader.
> Together, these provide a standard environment for
> booting an operating
> system and running pre-boot applications.
>
> "The EFI specification is primarily intended for the next generation
> of IA-32 and Itanium Architecture-based computers, and is
> an outgrowth
> of the "Intel Boot Initiative" (IBI) program that began in 1998."
>
> It sounds like it's filling in some gaps in ACPI? What is
> its relationship to ACPI?
Not really. EFI is a broader interface to platform firmware and the hardware that has been designed to be generic, such that it may be implemented on any architecture and/or any platform. You can think of it as an interface to the traditional BIOS. In a pure EFI environment, the device model, various defined services and protocols, and structure negate the need for traditional BIOS calls. For example, you would no longer call int10h to change the video modes - instead you would call a function of a video/console protocol for the video device. Another example is the int15h call to get the e820 memory map is no longer required - instead EFI provides a memory map of all usable memory in the system, along with attributes, ranges, types, etc.
As for its relationship to ACPI it is complementary. The EFI specification does not rewrite or redefine accepted standards such as ACPI. Instead it enables this type of platform configuration information to be obtained in a standard fashion.
> Well, having now learnt that this is in fact not electronic
> fuel injection,
> let me give some feedback from the point of view of an
> experienced kernel
> developer who wants to understand it - exactly the target audience for
> those who wish to develop maintainable code, yes?
Indeed. It's funny that you mention that... It's funny to see ubiquitous acronyms (slapped on so many vehicles) reused like this ;-)
> Mainly I am reduced to picking over trivia... Excuse me
> while I ask some
> dumb questions as well.
I'm just glad someone bit!
> pleeeze never declare things in .c files. Put this
> declaration into efi.h
> so the same declaration is visible to the users as well as
> the definition.
Gotcha. I'll fix this up...
> > +static int efi_pte = 0;
> > +static unsigned long efi_temp_page_table[1024]
> > + __attribute__ ((aligned(4096))) __initdata ;
>
> We have two early ioremap-style functions already. Are they
> not suitable
> for accessing the EFI tables?
I'm looking at that now...
> > +extern pgd_t swapper_pg_dir[1024];
>
> This should be in a header.
Indeed. Hopefully once converted to use the early ioremap mechanism this won't even be needed!
>
> > +void efi_gettimeofday(struct timespec *tv)
> > +{
> > + efi_time_t tm;
> > +
> > + memset(tv, 0, sizeof(tv));
>
> buglet: sizeof(*tv)
Ouch, thanks.
> > +/*
> > + * Walks the EFI memory map and calls CALLBACK once for each EFI
> > + * memory descriptor that has memory that is available for
> kernel use.
> > + */
> > +void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
> > +{
> > + int prev_valid = 0;
> > + struct range {
> > + unsigned long start;
> > + unsigned long end;
> > + } prev, curr;
> > + efi_memory_desc_t *md;
> > + unsigned long start, end;
> > + int i;
> > +
> > + for (i = 0; i < memmap.nr_map; i++) {
> > + md = &memmap.map[i];
> > +
> > + if (md->num_pages == 0) /* no pages means
> nothing to do... */
> > + continue;
> > + if (is_available_memory(md)) {
> > + curr.start = md->phys_addr;
> > + curr.end = curr.start +
> > + (md->num_pages <<
> EFI_PAGE_SHIFT);
> > +
> > + if (!prev_valid) {
> > + prev = curr;
> > + prev_valid = 1;
> > + } else {
> > + if (curr.start < prev.start)
> > + printk(PFX "Unordered
> memory map\n");
> > + if (prev.end == curr.start)
> > + prev.end = curr.end;
> > + else {
> > + start =
> > + (unsigned long)
> (PAGE_ALIGN(prev.start));
> > + end = (unsigned long)
> (prev.end & PAGE_MASK);
> > + if ((end > start)
> > + && (*callback)
> (start, end, arg) < 0)
> > + return;
> > + prev = curr;
> > + }
> > + }
> > + } else
> > + continue;
> > + }
>
> The final `continue' here isn't needed. Would be neater to do
>
> if (!is_available_memory(md))
> continue;
> curr.start = md->phys_addr;
> curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
> ...
Sounds good...thanks!
> > +
> > +/*
> > + * mem_start is a physical address.
> > + */
> > +unsigned long __init
> > +efi_setup_temp_page_table(unsigned long mem_start,
> unsigned long size)
> > +{
>
> Again, there's an awful lot of pagetable bashing here. We do
> need to work
> out whether it is all really needed, or whether there are
> consolidation
> opportunities with existing code.
Indeed. Like I said above, I'm looking at nuking this...
> > +
> > +void __init efi_enter_virtual_mode(void)
> > +{
> > + int i;
> > + efi_memory_desc_t *md;
> > + efi_status_t status;
> > +
> > + memmap.map = ioremap((unsigned long) memmap.phys_map,
> EFI_MEMMAP_SIZE);
>
> Now what is this function doing? I guess the reader should
> be familiar
> with the EFI spec, but some decriptive roadmap-style
> commentary over key
> data structures such as `struct efi_memory_map' would make
> this code much
> more approachable by occasional readers.
Ok, I'll be sure to add more comments! And you're right, this is in the EFI spec, but briefly....
EFI operates in a flat, physical addressing mode. So in order to call any of the EFI runtime services (get_time, set_time, reset_system, etc.) without having to thunk back into physical mode, we can call EFI set_virtual_address_space() after ioremapping the regions in the memory map that have the runtime attribute set (indicating that the region contains something that can be called during OS runtime). This call will "fix up" the EFI runtime services such that we can now call them in virtual mode. This code was stolen from the ia64 tree...
> > --- linux-2.6.0-test4/arch/i386/kernel/Makefile
> 2003-08-22 16:52:57.000000000 -0700
> > +++ linux-2.6.0-test4-efi/arch/i386/kernel/Makefile
> 2003-08-28 16:05:49.000000000 -0700
> > @@ -7,7 +7,7 @@
> > obj-y := process.o semaphore.o signal.o entry.o
> traps.o irq.o vm86.o \
> > ptrace.o i8259.o ioport.o ldt.o setup.o time.o
> sys_i386.o \
> > pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
> > - doublefault.o
> > + doublefault.o efi.o efi_stub.o
>
> Doesn't this mean we're linking all the EFI code even if
> CONFIG_ACPI_EFI=n?
Arrrr. The config options need some work.
> > #ifdef CONFIG_BLK_DEV_INITRD
> > - if (LOADER_TYPE && INITRD_START) {
> > - if (INITRD_START + INITRD_SIZE <= (max_low_pfn
> << PAGE_SHIFT)) {
> > - reserve_bootmem(INITRD_START, INITRD_SIZE);
> > - initrd_start =
> > - INITRD_START ? INITRD_START +
> PAGE_OFFSET : 0;
> > - initrd_end = initrd_start+INITRD_SIZE;
> > - }
> > - else {
> > - printk(KERN_ERR "initrd extends beyond
> end of memory "
> > - "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
> > - INITRD_START + INITRD_SIZE,
> > - max_low_pfn << PAGE_SHIFT);
> > - initrd_start = 0;
> > + if (efi_enabled) {
> > + if (efi_boot_params.initrd_start) {
> > + if (efi_boot_params.initrd_start +
> efi_boot_params.initrd_size <= (max_low_pfn << PAGE_SHIFT)) {
> > +
> reserve_bootmem(efi_boot_params.initrd_start,
> efi_boot_params.initrd_size);
> > + initrd_start =
> efi_boot_params.initrd_start + PAGE_OFFSET;
> > + initrd_end = initrd_start +
> efi_boot_params.initrd_size;
> > + } else {
> > + printk(KERN_ERR "initrd extends
> beyond end of memory! "
> > + "(0x%08lx >
> 0x%08lx)\n disabling initrd\n",
> > +
> efi_boot_params.initrd_start + efi_boot_params.initrd_size,
> > + max_low_pfn <<
> PAGE_SHIFT);
> > + initrd_start = 0;
> > + }
> > + }
>
> What is the relationship between EFI and initrd?
I'm not sure what you mean here. Nothing really, except that the loader passes the location of the initrd to the kernel, even though the loader is currently putting where the kernel expects it. However, in the future this may allow the initrd to be placed somewhere else.
> > @@ -817,11 +912,26 @@
> > * so we try it repeatedly and let the
> resource manager
> > * test it.
> > */
> > - request_resource(res, &code_resource);
> > - request_resource(res, &data_resource);
> > + request_resource(res, code_resource);
> > + request_resource(res, data_resource);
>
> hm, request_resource() can fail...
So, is there a reason there hasn't been a check here before?
> > +/*
> > + * This is called before the RT mappings are in place, so we
> > + * need to be able to get the time in physical mode.
> > + */
> > +unsigned long efi_get_time(void)
>
> What is an "RT mapping"?
Sorry, runtime mapping - meaning that the code to make the call in physical mode is needed before the call to EFI's set_virtual_address_space() is made.
> > config ACPI_EFI
> > - bool
> > - depends on ACPI
> > - depends on IA64
> > + bool "Obtain RSDP from EFI Configuration Table"
> > + depends on IA64 && (!IA64_HP_SIM || IA64_SGI_SN) || X86 && ACPI
> > + help
> > + On EFI Systems the RSDP pointer is passed to the kernel via
> > + the EFI Configuration Table. On Itanium systems this is
> > + standard and required. For IA-32, systems that have
> > + EFI firmware should leave this enabled. Platforms with
> > + traditional legacy BIOS should disable this option.
>
> Poor users ;)
>
> Vendors will ship kernels with CONFIG_ACPI_EFI=y. I assume
> those kernels
> will work OK on machines which have legacy BIOSes?
This is more a matter of how the RSDP is obtained. On EFI systems, this is passed to the kernel via the EFI Configuration Table. So, we can look there, instead of scanning memory. However, if the kernel doesn't detect that it was loaded from EFI (i.e. no efi boot parameters), then the code this enables won't even be executed.
> > +struct ia32_boot_params {
> > + unsigned long size;
> > + unsigned long command_line;
> > + efi_system_table_t *efi_sys_tbl;
> > + efi_memory_desc_t *efi_mem_map;
> > + unsigned long efi_mem_map_size;
> > + unsigned long efi_mem_desc_size;
> > + unsigned long efi_mem_desc_version;
> > + unsigned long initrd_start;
> > + unsigned long initrd_size;
> > + unsigned long loader_start;
> > + unsigned long loader_size;
> > + unsigned long kernel_start;
> > + unsigned long kenrel_size;
> > + unsigned long num_cols;
> > + unsigned long num_rows;
> > + unsigned long orig_x;
> > + unsigned long orig_y;
> > +};
>
> Interesting. What's all this, and how does the user interact with it?
It's the boot parameters that the EFI linux boot loader (ELILO) passes to the kernel. It's only used in the early boot process.
> > diff -urN linux-2.6.0-test4/include/linux/efi.h
> linux-2.6.0-test4-efi/include/linux/efi.h
> > --- linux-2.6.0-test4/include/linux/efi.h 2003-08-22
> 17:00:39.000000000 -0700
> > +++ linux-2.6.0-test4-efi/include/linux/efi.h
> 2003-08-28 16:49:01.000000000 -0700
> > @@ -16,6 +16,8 @@
> > #include <linux/time.h>
> > #include <linux/types.h>
> > #include <linux/proc_fs.h>
> > +#include <linux/rtc.h>
> > +#include <linux/ioport.h>
> >
> > #include <asm/page.h>
> > #include <asm/system.h>
> > @@ -96,6 +98,9 @@
> > u64 virt_addr;
> > u64 num_pages;
> > u64 attribute;
> > +#if defined (__i386__)
> > + u64 pad1;
> > +#endif
> > } efi_memory_desc_t;
>
> Obscure things like this rather need a comment.
Yes, indeed. Comment to be added shortly...
thanks,
matt
> As I have heard the story.
>
> The guys at Intel were having problems getting a traditional
> PC style BIOS to run on the first Itaniums, realized they
> had a opportunity to come up with a cleaner firmware interface
> and came up with EFI. Open Firmware was considered but dropped
> because it was not compatible with ACPI, and they did not want to
> dilute the momentum that had built up for ACPI.
Yes, Itanium has had EFI since the beginning.
> And now since Intel has something moderately portable, they intend
> to back port it to x86 and start using/shipping it sometime early next
> year.
Hmmm... It's not so much of a back port as it is the implementation of the interface on x86 boxes. In fact, the EFI sample implementation can be used on boxes with legacy BIOSes and the interface is consistent with what is currently shipped on ia64 platforms. The intention is to have an interface to the firmware that is portable and consistent. For example, much of the linux loader is shared between ia64 and x86. Assuming add-in cards have EFI compliant drivers, this also makes option ROM and even system BIOS upgrades easy with EFI utilities and without the need for DOS.
> What I find interesting is that I don't see it addressed how the 16bit
> BIOS calls in setup.S can be bypassed on x86. And currently while it
> works to enter at the kernels 32bit entry point if you know what you
> are doing it is still officially not supported.
If one can obtain the required system configuration information from EFI before booting the kernel and pass this information to the kernel so as to enable kernel initialization, then why else might we even need the 16 bit BIOS calls in setup.S that essentially perform the same function? I'm curious why it wouldn't be better to enter the kernel in 32 bit, protected mode?
thanks,
matt
"Tolentino, Matthew E" <[email protected]> writes:
> > As I have heard the story.
> >
> > The guys at Intel were having problems getting a traditional
> > PC style BIOS to run on the first Itaniums, realized they
> > had a opportunity to come up with a cleaner firmware interface
> > and came up with EFI. Open Firmware was considered but dropped
> > because it was not compatible with ACPI, and they did not want to
> > dilute the momentum that had built up for ACPI.
>
> Yes, Itanium has had EFI since the beginning.
Except EFI came very late in the game. I have talked to the Intel guys
who thought it up. And from a practical standpoint the EFI interface
is still stabilizing.
> > And now since Intel has something moderately portable, they intend
> > to back port it to x86 and start using/shipping it sometime early next
> > year.
>
> Hmmm... It's not so much of a back port as it is the implementation of the
> interface on x86 boxes. In fact, the EFI sample implementation can be used on
> boxes with legacy BIOSes and the interface is consistent with what is currently
> shipped on ia64 platforms. The intention is to have an interface to the
> firmware that is portable and consistent. For example, much of the linux loader
> is shared between ia64 and x86. Assuming add-in cards have EFI compliant
> drivers, this also makes option ROM and even system BIOS upgrades easy with EFI
> utilities and without the need for DOS.
Getting EFI drivers in a byte code format would of course be nice.
But mostly this helps the Itanium, not x86. I can already get
standard x86 option roms.
As for not using DOS, DOS by any other name.... Even if it does uses GUIDs now.
As for the linux loader being able to share code that is great but
have you noticed how huge elilo is?
I have used EFI and I have ported etherboot to it. It is ok, but from a
practical standpoint very little changes. You still have to know which
apis are really supported and which one are not etc. EFI is not tied
as closely to the hardware as the legacy PC BIOS which is a plus. The fact
you have to have an AML interpreter to get some very trivial information, is
a down side, as is the fact that it is a new interface.
The fact that EFI's home is on a very expensive and slow platform also
does not help. From the Intel side I can see how it is certainly the
coming trend and it is the thing to embrace. I am much more in the
wait and see camp.
Open source firmware where bugs could be fixed would be another
issue, but so far EFI is a buggy piece of binary firmware. I have
to tolerate it if I am going to do Itanium, and but my customers don't
want it. They would rather rewrite the firmware.
> > What I find interesting is that I don't see it addressed how the 16bit
> > BIOS calls in setup.S can be bypassed on x86. And currently while it
> > works to enter at the kernels 32bit entry point if you know what you
> > are doing it is still officially not supported.
>
> If one can obtain the required system configuration information from EFI before
> booting the kernel and pass this information to the kernel so as to enable
> kernel initialization, then why else might we even need the 16 bit BIOS calls in
> setup.S that essentially perform the same function? I'm curious why it wouldn't
> be better to enter the kernel in 32 bit, protected mode?
I totally agree that it is reasonable to bypass setup.S. But to do that
reliably requires consensus that the 32bit entry point is stable. That
has not happen yet, and your patch did nothing to address that. I
know it has to happen because I know the boot process, and what has to
happen to boot with a different x86 BIOS implementation.
Entering via the 32bit entry point has not been previously discussed.
H. Petern Anvin has not been convinced it should be a stable kernel
entry point. The documentation has not been updated. A recent RedHat
kernel has even shipped with a different 32bit kernel entry point.
My hunch is that most of the EFI code should actually live in another
subarch. I think the kernel has support for compiling in multiple
subarches. If not it is simply because no one has gotten that far yet.
Eric
"Tolentino, Matthew E" <[email protected]> writes:
> Ok, I'll be sure to add more comments! And you're right, this is in the EFI
> spec, but briefly....
>
>
> EFI operates in a flat, physical addressing mode. So in order to call any of
> the EFI runtime services (get_time, set_time, reset_system, etc.) without having
> to thunk back into physical mode, we can call EFI set_virtual_address_space()
> after ioremapping the regions in the memory map that have the runtime attribute
> set (indicating that the region contains something that can be called during OS
> runtime). This call will "fix up" the EFI runtime services such that we can now
> call them in virtual mode. This code was stolen from the ia64 tree...
A problem is that set_virtual_address_space cannot be called multiple times,
and so it interacts badly with kexec. I was just about to disable it
on the ia64 tree, so I could use kexec. Besides that BIOS calls
should be quite infrequent so flipping to physical mode this should
not matter. That plus not being in physical mode looks like a great
way to trip up various implementation bugs when there are multiple
implementations.
> I'm not sure what you mean here. Nothing really, except that the loader passes
> the location of the initrd to the kernel, even though the loader is currently
> putting where the kernel expects it. However, in the future this may allow the
> initrd to be placed somewhere else.
>
> > > +struct ia32_boot_params {
> > > + unsigned long size;
> > > + unsigned long command_line;
> > > + efi_system_table_t *efi_sys_tbl;
> > > + efi_memory_desc_t *efi_mem_map;
> > > + unsigned long efi_mem_map_size;
> > > + unsigned long efi_mem_desc_size;
> > > + unsigned long efi_mem_desc_version;
> > > + unsigned long initrd_start;
> > > + unsigned long initrd_size;
> > > + unsigned long loader_start;
> > > + unsigned long loader_size;
> > > + unsigned long kernel_start;
> > > + unsigned long kenrel_size;
> > > + unsigned long num_cols;
> > > + unsigned long num_rows;
> > > + unsigned long orig_x;
> > > + unsigned long orig_y;
> > > +};
> >
> > Interesting. What's all this, and how does the user interact with it?
>
> It's the boot parameters that the EFI linux boot loader (ELILO) passes to the
> kernel. It's only used in the early boot process.
Hmm. You have added additional parameters passed to the kernel, but
have not updated the documentation. Nor have you bumped the protocol
number in setup.S.
Beyond that you have duplicated a bunch of variables that already have
perfectly valid ways of being passed to the kernel.
initrd_start, initrd_size, num_cols, num_rows, orig_x, orig_y and the
command line should be passed in their original locations. At least
baring the creation of a subarch and starting from scratch.
kernel_start, and kernel_size are not used.
loader_start, and loader_size are not used.
And are probably equally valid in other contexts.
Eric
> Getting EFI drivers in a byte code format would of course be nice.
> But mostly this helps the Itanium, not x86. I can already get
> standard x86 option roms.
It would be nice. It is especially nice for vendors because they can reuse a single driver image for multiple architectures assuming there is an interpreter and EFI support.
> I totally agree that it is reasonable to bypass setup.S. But
> to do that reliably requires consensus that the 32bit entry point is
> stable. That has not happen yet, and your patch did nothing to address that. I
> know it has to happen because I know the boot process, and what has to
> happen to boot with a different x86 BIOS implementation.
Ok, so how do we know it is stable and how might one address that? How have you addressed this with kexec?
> Entering via the 32bit entry point has not been previously discussed.
> H. Petern Anvin has not been convinced it should be a stable kernel
> entry point.
Why? I've missed this argument.
The documentation has not been updated. A recent RedHat
> kernel has even shipped with a different 32bit kernel entry point.
I'm afraid I haven't looked at kexec. Do you employ the standard 32 bit entry point or do you actually go back to real mode or something in between?
> My hunch is that most of the EFI code should actually live in another
> subarch. I think the kernel has support for compiling in multiple
> subarches. If not it is simply because no one has gotten
> that far yet.
I can see how this could be useful and potentially consolidate the efi related code in ia64, the ia32 stuff I've posted, and any other architecture that supports efi in the future, but don't know about compiling in multiple subarchs. Comments on how this is done?
matt
On Thu, 4 Sep 2003, Tolentino, Matthew E wrote:
>
> It would be nice. It is especially nice for vendors because they can
> reuse a single driver image for multiple architectures assuming there is
> an interpreter and EFI support.
No. It would be a total nightmare.
Vendor-supplied drivers without source are going to be BUGGY.
They are going to be doubly buggy if they are run with a compiler that
has a buggy back-end.
And that back-end is going to be buggy if it's for some random bytecode
that isn't widely used except for some silly EFI thing and is tested
exclusively with just a few versions of Windows and _maybe_ occasionally
on Linux.
Face it: firmware bytecode is a total braindamage. The only thing that
works is _source_code_ that can be fixed, and lacking that, we're better
off with a well-defined ISA that people are used to and that has stable
simple compilers.
In other words: x86 object code is a better choice than some random new
bytecode. It's a "bytecode" too, after all. And it's one that is stable
and runs fast on most hardware. But as long as it's some kind of binary
(and byte code is binary, don't make any mistake about it), it's going to
always be broken.
EFI is doing all the wrong things. Trying to fix BIOSes by being "more
generic". It's going to be a total nightmare if you go down that path.
What will work is:
- standard hardware interfaces. Instead of working on bytecode
interpreters, make the f*cking hardware definition instead, and make it
SANE and PUBLIC! So that we can write drivers that work, and that come
with source so that we can fix them when somebody has buggy hardware.
DO NOT MAKE ANOTHER FRIGGING BYTECODE INTERPRETER!
Didn't Intel learn anything from past mistakes? ACPI was supposed to be
"simple". Codswallop.
PCI works, because it had standard, and documented, hardware
interfaces. The interfaces aren't well specified enough to write a PCI
disk driver, of course, but they _are_ good enough to do discovery and
a lot of things.
Intel _could_ make a "PCI disk controller interface definition", and it
will work. The way USB does actually work, and UHCI was actually a fair
standard, even if it left _way_ too much to software.
- Source code. LinuxBIOS works today, and is a lot more flexible than EFI
will _ever_ be.
- Compatibility. Make hardware that works with old drivers and old
BIOSes. This works. The fact that Intel forgot about that with ia-64 is
not an excuse to make _more_ mistakes.
Don't screw this up. EFI is not going in the right direction.
Linus
On Iau, 2003-09-04 at 19:24, Linus Torvalds wrote:
> interfaces. The interfaces aren't well specified enough to write a PCI
> disk driver, of course, but they _are_ good enough to do discovery and
> a lot of things.
To be fair - for the hardware extant at the time - they were. Our
drivers/ide/pci/generic.c is exactly that. Also beyond the PCI code the
vendors managed to create a standard that actually basically works and
is back compatible. Bits of it are rather Lovecraftian but it works.
ide/pci/generic.c will drive almost any IDE controller today in BIOS
tuned mode including basic IDE DMA.
> Intel _could_ make a "PCI disk controller interface definition", and it
> will work. The way USB does actually work, and UHCI was actually a fair
> standard, even if it left _way_ too much to software.
UHCI, OHCI, their reuse for firewire and other stuff are all great
examples. VGA is another example which alas fell apart as cards changed
over time. Its always struck me as bizarre that graphics card vendors
can create a chip that can texture a billion triangles a second but
can't manage to agree on a hardware interface where I load height,
width, depth and refresh rate and it sets it up for me.
I grant I2O proved that you can make that control layer too complicated.
Even then it wasn't the hardware interface that was the problem, it was
the glue on top. People still use the i2o hw interface for many things.
I'm hopeful now the world is effectively down to two scsi vendors
(Adaptec and LSI) we can at least begin to see a reduction in the number
of permutations of scsi insanity.
On Thu, 4 Sep 2003, Linus Torvalds wrote:
>>
>> It would be nice. It is especially nice for vendors because they can
>> reuse a single driver image for multiple architectures assuming there
is
>> an interpreter and EFI support.
>
>No. It would be a total nightmare.
As one of the people responsible for the EFI Specification and our
industry enabling efforts around that spec, I'd like to offer some
background that I hope will illuminate some of the issues discussed
in this thread. This is going to be a bit long...let me apologize in
advance for that but I think there's quite a bit of context here and
sharing that may help people understand why EFI works the way it does
for Option ROMs.
In 1999 when we were first working on the EFI spec in draft form, a
number of the OEMs and IHV companies that we talked to told us that an
EFI spec without a solution for the "option ROM" problem would not be
accepted in the industry.
At that time, I tried to make the case that instead of propagating the
problem into the future we should focus on moving the industry to
"architectural hardware" that wouldn't even need option ROMs. What I
meant by that was add-in cards with common register-level hardware
interfaces to allow operating systems code to carry driver and boot
loader code that would be able to work across a range of vendors'
products. Perhaps the UNDI network card interface that Intel developed
would be a good model for a start at this approach as an example; both
in terms of how to do it and the level of traction (or lack thereof) one
can expect taking this approach.
The trouble with the "architectural hardware" argument proved to be that
PCI is already well established and there is a vibrant industry churning
out innovative PCI cards on a regular basis. The idea of a single
interface definition for all cards of each of the network, storage or
video classes is viewed as simply too limiting and the argument was made
to us that to force such a model would be to stifle innovation in
peripherals. So effectively the feedback we got on "architectural
hardware" was therefore along the lines of "good idea but not
practical..."
Faced with that and what amounts to a demand for a solution, we tried to
scope the problem. Today's IA-32 Option ROMs are typically 16-bit,
IA-32 real mode code, they must live in a magic 128k (192 on some boxes)
window below 1MB, and there are no hard and fast rules about what
resources on the machine they may or may not touch. The reason the OEM
folks asked us to look at solving this issue set in the context of EFI
is to try and improve the real nightmares that they face every day. The
kind of thing where you plug in an adapter card and suddenly the floppy
doesn't work anymore. The kind of thing where you plug in four SCSI
controllers and it's highly likely that you can't reach a perfectly good
OS install on a drive connected to one of those controllers because the
BIOS can't shadow that much ROM code. The kind of thing where ROM code
uses I/O reads in lieu of calibrated delays causing controllers to fail
on newer, faster systems.
EFI's origins come from the 64-bit side of the house. It was originally
conceived in the context of a need for a means to handle programmatic
transfer of control from the platform code (BIOS/firmware) to the OS; in
other words an abstraction for the platform to support booting
shrink-wrap OSes, installed right off the distribution CDs. However, we
also worked hard at building a C language binding for those interfaces
that would work just as well for IA-32 or even XScale or perhaps even
for non-Intel Family processors in fact. The idea being a piece of code
written to consume EFI services can compile unmodified and without
gratuitous #ifdef's for 32-bit or 64-bit system merely by choice of
compiler.
In the context of option ROMs then, this approach would say that you can
write a single chunk of C language EFI driver code, your option ROM
equivalent. This code can load anywhere in the address space of the
machine (EFI uses protected mode, virtual equals physical addressing
model), it can use the full address width of the machine for data
references and you can compile it for your target machine architecture
of choice. So far so good.
However there are some other practical deployment issues that add-in
cards bring to bear that we also had to address.
These cards have a habit of traveling from machine to machine.
Customers have a reasonable expectation that cards just work when you
move them from one system and plug them in to another. Since the
receiving system motherboard might have no knowledge of the card you
just added, how does it present devices connected to that card as
candidates for booting?? Motherboards cannot reasonably carry code for
every device a customer might choose to plug in. Addressing that
problem is what the Option ROM does for you.
We also tried to advocate for having the drivers/Op ROM images be
separately distributed. Some of the IHVs liked that: just ship a floppy
with the card, much cheaper than putting NVRAM memory on card. The OEM
folks however point out that the floppy gets lost and now the card is
useless to the customer...support calls ensue. Thus the code needs to
travel as part of the card.
If a card can travel from system to system, that also means it can cross
processor architecture boundaries too - there are Itanium Family and
IA-32 family machines with PCI slots that are electrically compatible
and the expectation is that the cards work equally well in both system
types. For the Option ROM content though that presents a dilemma - what
do you carry in the ROM?? Native compiled IA-32 code and also native
compiled Itanium family code perhaps. Well that works, the PCI spec
says a ROM container can have multiple images; we take advantage of that
now to build cards that carry a 16-bit conventional ROM and an EFI
driver together and there are also Forth images out there for SPARC and
Power systems.
As a practical matter carrying multiple instruction set versions of the
same code gets expensive in FLASH memory terms. Consider an EFI
compiled driver for IA-32 as the index, size: one unit. With code size
expansion, an Itanium compiled driver is going to be three to four times
that size. Total ROM container requirement: one unit for the legacy ROM
image plus one for an EFI IA-32 driver plus three to four units for an
Itanium compiled driver image; to make the card "just work" when you
plug it into a variety of systems is starting to require a lot of FLASH
on the card. More than the IHVs were willing to countenance in most
cases for cost reasons.
EFI Byte Code was born of this challenge. Its goals are pretty
straightforward: architecture neutral image, small foot print in the
add-in card ROM container and of course small footprint in the
motherboard which will have to carry an interpreter. We also insisted
that the C source for a driver should be the same regardless of whether
you build it for a native machine instruction set or EBC.
We did some other things with EBC's definition too; like not including
direct I/O instructions. That may sound odd for an environment
specifically designed for I/O devices but if you think about it, it's
the motherboard code that knows what is and is not "safe" to do by way
of I/O more than the device itself that could find itself in pretty much
any old machine design. This we believe will significantly improve the
reliability of ROM code...it relieves the add-in card Op ROM writer of
any attempts to guess and assume what the I/O environment is that the
card will encounter out in the field.
You may ask why we didn't just use an existing definition as opposed to
making a new one. We did actually spend quite a bit of time on that
very question. Most alternatives would have significantly swelled the
ROM container size requirement or the motherboard support overhead
requirement or had licensing, IP or other impediments to deployment into
the wider industry that we had no practical means to resolve. With
specific reference to why we chose not to use the IA-32 instruction set
for this purpose, it was all about the size of an interpreter for that
instruction set. To provide compatibility 100% for the universe of real
mode option ROM binaries out there would require a comprehensive
treatment of a very rich instruction set architecture. We could see no
practical way to persuade OEMs building systems using processors other
than IA-32 to carry along that much interpreter code in their
motherboard ROM.
Consider the model of Alpha and FX32 as an example; FX32 would be
impractical to carry on the motherboard outside the scope of a running
OS. At one point we did have an EFI draft that included a processor
binding for Alpha at Compaq's request. That material isn't in the final
spec for reasons that don't really relate to EFI. Nevertheless, making
an Option ROM solution that could plausibly work on multiple CPU
architectures (including ones from outside the IA family) and before
there is an OS on the box to support an expansive interpreter loomed
large in our thinking at the time. [By the by, we remain open to adding
other CPU bindings into the EFI spec should anyone approach us with such
a proposal in hand.]
By contrast EBC requires a small interpreter with no libraries (roughly
18k uncompressed total on IA-32 for example) and the average add-in card
ROM image size is 1.5 units relative to native IA-32 code. And keep in
mind that using byte code for this purpose is in widespread, long time
use on other CPU architectures so we felt the technique in general was
viable based on industry experience with it. Yes, it's a compromise but
the best balance point we have been able find to date.
I agree that the compiler back end for EBC will be used for small chunks
of code and relatively few of them at that. That compiler and its back
end will by definition end up with less code-mileage on it, if you will.
I can only say that Intel is supporting the compiler as a commercial
product and we stand behind it just as much as we do the native IA-32
and Itanium compilers. Find a bug, let us know - we'll fix it. We run
the same tests on the EBC compiler as we do on the native compilers and
a few more besides that do EBC torture exercises. The compiler has been
in testing for more than a year and in release for nearly than long now.
At any rate, feedback we've received so far doesn't seem to indicate
stability problems in the compiler; if your experience varies from that
please let me know - I'd like to help fix it for you! Incidentally,
nothing prevents someone from retargeting GCC for this application.
It is with no small trepidation, given the assembled company, that I
turn to the question of Open Source as it relates to EFI and Option ROM
code. However...
There is nothing about the definition of the EFI spec or the driver
model associated that prevents vendors from making add-in card drivers
and presenting them in Open Source form to the community. In fact we've
specifically included the ability to "late bind" a driver into a system
that speaks EFI. In practice that late binding means that code that
uses EFI services and that is GPL code can be used on systems that also
include EFI code that is not open source.
The decision on whether to make any given driver Open Source or not
therefore lies with the creator of that code. In the case of ROM
content for an add-in card that will usually be the IHV that makes the
card.
Now, we observe that the high-end add-in card makers often preserve
their intellectual property behind proprietary code (via binary drivers)
and/or "object models" that they implement in the ROM. In today's
lexicon that means an INT13 service for a SCSI card or an INT10 service
for a video card. Even for Linux OS-present drivers I understand that
some open source drivers for such cards don't actually touch the metal
directly for all operations they perform - they use some abstraction
between the driver and the actual hardware, something that is carried
around in the ROM. I suspect that this paradigm is one that will
continue for some time to come. Any change in this approach will have
to be worked with the vendors who feel commercial pressure to protect
their IP with these kinds of mechanisms.
The EFI spec itself is published with a simple copyright statement and
not one of Intel's "colored" NDA covers. The sample code that you can
download from our web site is free to you and comes with what amounts
to a patent license grant so that you can implement EFI, perhaps using
our code in derivative fashion, without royalty or other concern. We
also have support tools for folks building EFI code, like Option ROM
drivers, that is distributed under the FreeBSD license.
Thanks for reading along this far and please let me know if you have any
follow up questions or comments.
Cheers,
Mark.
--
Mark Doran
Principal Engineer
Intel Corp., DuPont, WA
Doran, Mark wrote:
> The EFI spec itself is published with a simple copyright statement and
> not one of Intel's "colored" NDA covers. The sample code that you can
> download from our web site is free to you and comes with what amounts
> to a patent license grant so that you can implement EFI, perhaps using
EFI is covered by patents? What are they?
Thanks,
-- Jamie
On Friday, September 05, 2003 2:24 PM Jamie Lokier wrote:
> EFI is covered by patents? What are they?
Actually no, there are no patents that I'm aware of that read on EFI.
We made a point of not filing any on the spec. We told everyone we
talked to during the spec's development that we wouldn't file any so
that the spec would end up free of any IP considerations when
complete. This was a deliberate effort to support the goal of
minimizing any potential barriers to adoption of EFI as much as
possible.
The patent license grant is thus in some sense a double coverage
approach...you don't really need a patent license grant since there
aren't any patents that read but to reinforce that you don't need to
worry about patents we give you the grant anyway. This helped make
some corporate entities more comfortable about implementing support
for EFI.
In practice we have required that any feedback or contribution to the
EFI spec or code from third parties that is given to us also comes
without any IP encumbrances. There are a couple of things that I've
been offered that I would like to have included but couldn't in the
end because it would not have been possible to continue telling folks
using the EFI spec that they can do so without concern for IP issues.
Cheers,
Mark.
On Thu, 2003-09-04 at 08:35, Eric W. Biederman wrote:
> "Tolentino, Matthew E" <[email protected]> writes:
>
> > Ok, I'll be sure to add more comments! And you're right, this is in the EFI
> > spec, but briefly....
> >
> >
> > EFI operates in a flat, physical addressing mode. So in order to call any of
> > the EFI runtime services (get_time, set_time, reset_system, etc.) without having
> > to thunk back into physical mode, we can call EFI set_virtual_address_space()
> > after ioremapping the regions in the memory map that have the runtime attribute
> > set (indicating that the region contains something that can be called during OS
> > runtime). This call will "fix up" the EFI runtime services such that we can now
> > call them in virtual mode. This code was stolen from the ia64 tree...
>
> A problem is that set_virtual_address_space cannot be called multiple times,
> and so it interacts badly with kexec. I was just about to disable it
> on the ia64 tree, so I could use kexec. Besides that BIOS calls
> should be quite infrequent so flipping to physical mode this should
> not matter. That plus not being in physical mode looks like a great
> way to trip up various implementation bugs when there are multiple
> implementations.
Hmmm. I bet there is some other way to get around this. Perhaps use
efi_set_varriable to implement a counter in NV memory or a page reserved
by the boot loader that gets initialized by elilo on the first boot up
and then have the kernel test / write to it in the startup before
calling set_vertual_address_space. They would requier a tweak to the
boot loader to make work.
>
> > I'm not sure what you mean here. Nothing really, except that the loader passes
> > the location of the initrd to the kernel, even though the loader is currently
> > putting where the kernel expects it. However, in the future this may allow the
> > initrd to be placed somewhere else.
>
> >
> > > > +struct ia32_boot_params {
> > > > + unsigned long size;
> > > > + unsigned long command_line;
> > > > + efi_system_table_t *efi_sys_tbl;
> > > > + efi_memory_desc_t *efi_mem_map;
> > > > + unsigned long efi_mem_map_size;
> > > > + unsigned long efi_mem_desc_size;
> > > > + unsigned long efi_mem_desc_version;
> > > > + unsigned long initrd_start;
> > > > + unsigned long initrd_size;
> > > > + unsigned long loader_start;
> > > > + unsigned long loader_size;
> > > > + unsigned long kernel_start;
> > > > + unsigned long kenrel_size;
> > > > + unsigned long num_cols;
> > > > + unsigned long num_rows;
> > > > + unsigned long orig_x;
> > > > + unsigned long orig_y;
> > > > +};
> > >
> > > Interesting. What's all this, and how does the user interact with it?
> >
> > It's the boot parameters that the EFI linux boot loader (ELILO) passes to the
> > kernel. It's only used in the early boot process.
>
> Hmm. You have added additional parameters passed to the kernel, but
> have not updated the documentation. Nor have you bumped the protocol
> number in setup.S.
Bumping the boot protocal in setup.S doesn't make sence as this new boot
protocal is only possible under EFI platforms. Legacy BIOS platform
boot up processing shouldn't know anything about it. Its ment to be
orthoganal to the older boot protocal.
This being said, some EFI boot protocall documentation could be cut and
pasted out of the OLS talk into a new file the Documentation directory.
>
> Beyond that you have duplicated a bunch of variables that already have
> perfectly valid ways of being passed to the kernel.
Actualy this is a step in getting away from those legacy boot
parrameters scattered about the boot parrameter block for the EFI boot
up processing.
The worst thing about continuing to use the legacy boot parrameters is
that we then need to go hunting for holes in the existing structure
where we can put the new EFI specific values as well as ending up
carrying along baggage that dates way WAY back that doesn't get used or
make sence any more.
>
> initrd_start, initrd_size, num_cols, num_rows, orig_x, orig_y and the
> command line should be passed in their original locations. At least
> baring the creation of a subarch and starting from scratch.
>
We are hoping to avoid doing a subarch with this boot parrameter design
and went for a coexistance approach that has zero impact to the current
booting up on legacy platforms.
I think this is THE key issue to get to the bottom of. EFI enabled
kernels need not be a new sub-architecture, as we can see that the EFI
start up design supports booting on legacy firmware/bios, and has zero
impact on execution flow for the legacy case. Do you think that doing a
sub architecture is really needed for this?
> kernel_start, and kernel_size are not used.
> loader_start, and loader_size are not used.
>
They are anticipated to be useful for embedded designs booting linux on
EFI firmwware. They could be removed but I'd rather see them stay.
--mgross
Mark Gross <[email protected]> writes:
> On Thu, 2003-09-04 at 08:35, Eric W. Biederman wrote:
> > "Tolentino, Matthew E" <[email protected]> writes:
> > A problem is that set_virtual_address_space cannot be called multiple times,
> > and so it interacts badly with kexec. I was just about to disable it
> > on the ia64 tree, so I could use kexec. Besides that BIOS calls
> > should be quite infrequent so flipping to physical mode this should
> > not matter. That plus not being in physical mode looks like a great
> > way to trip up various implementation bugs when there are multiple
> > implementations.
>
> Hmmm. I bet there is some other way to get around this.
Why enable a slow path case that will make the system less reliable?
> Perhaps use
> efi_set_varriable to implement a counter in NV memory or a page reserved
> by the boot loader that gets initialized by elilo on the first boot up
> and then have the kernel test / write to it in the startup before
> calling set_vertual_address_space. They would requier a tweak to the
> boot loader to make work.
Sure you can do things like that but then you can't call EFI in physical
address mode.
set_virtual_address_space is unnecessary, on a slow path, tricky to
test, and potentially bug prone.
Please just deprecate set_virtual_address please.
> > > I'm not sure what you mean here. Nothing really, except that the loader
> passes
>
> > > the location of the initrd to the kernel, even though the loader is
> currently
>
> > > putting where the kernel expects it. However, in the future this may allow
> the
>
> > > initrd to be placed somewhere else.
> >
> > >
> > > > > +struct ia32_boot_params {
> > > > > + unsigned long size;
> > > > > + unsigned long command_line;
> > > > > + efi_system_table_t *efi_sys_tbl;
> > > > > + efi_memory_desc_t *efi_mem_map;
> > > > > + unsigned long efi_mem_map_size;
> > > > > + unsigned long efi_mem_desc_size;
> > > > > + unsigned long efi_mem_desc_version;
> > > > > + unsigned long initrd_start;
> > > > > + unsigned long initrd_size;
> > > > > + unsigned long loader_start;
> > > > > + unsigned long loader_size;
> > > > > + unsigned long kernel_start;
> > > > > + unsigned long kenrel_size;
> > > > > + unsigned long num_cols;
> > > > > + unsigned long num_rows;
> > > > > + unsigned long orig_x;
> > > > > + unsigned long orig_y;
> > > > > +};
> > > >
> > > > Interesting. What's all this, and how does the user interact with it?
> > >
> > > It's the boot parameters that the EFI linux boot loader (ELILO) passes to
> the
>
> > > kernel. It's only used in the early boot process.
> >
> > Hmm. You have added additional parameters passed to the kernel, but
> > have not updated the documentation. Nor have you bumped the protocol
> > number in setup.S.
>
> Bumping the boot protocal in setup.S doesn't make sence as this new boot
> protocal is only possible under EFI platforms. Legacy BIOS platform
> boot up processing shouldn't know anything about it. Its ment to be
> orthoganal to the older boot protocal.
Bumping the minor revision indicates new features are present.
You added new features therefore the minor rev needs to be bumped.
> This being said, some EFI boot protocall documentation could be cut and
> pasted out of the OLS talk into a new file the Documentation directory.
>
> >
> > Beyond that you have duplicated a bunch of variables that already have
> > perfectly valid ways of being passed to the kernel.
>
> Actualy this is a step in getting away from those legacy boot
> parrameters scattered about the boot parrameter block for the EFI boot
> up processing.
>
> The worst thing about continuing to use the legacy boot parrameters is
> that we then need to go hunting for holes in the existing structure
> where we can put the new EFI specific values as well as ending up
> carrying along baggage that dates way WAY back that doesn't get used or
> make sence any more.
The joy of x86. And no you don't need to look for holes all you need to
do is to append to the end.
> > initrd_start, initrd_size, num_cols, num_rows, orig_x, orig_y and the
> > command line should be passed in their original locations. At least
> > baring the creation of a subarch and starting from scratch.
> >
>
> We are hoping to avoid doing a subarch with this boot parrameter design
> and went for a coexistance approach that has zero impact to the current
> booting up on legacy platforms.
Which is a reasonable way to go.
> I think this is THE key issue to get to the bottom of. EFI enabled
> kernels need not be a new sub-architecture, as we can see that the EFI
> start up design supports booting on legacy firmware/bios, and has zero
> impact on execution flow for the legacy case. Do you think that doing a
> sub architecture is really needed for this?
If you want a clean slate a sub arch looks necessary. If you want to
coexist with the legacy you need to put of with the issues of being
compatible.
But realize you will also want to know you started from efi even if
you were loaded in pcbios compatibility mode with lilo or grub. So
this is not a boolean kind of thing EFI or no EFI. It is does my BIOS
have the EFI features. At least on x86. And at that point you
probably want EFI detection in Setup.S.
> > kernel_start, and kernel_size are not used.
> > loader_start, and loader_size are not used.
> >
>
> They are anticipated to be useful for embedded designs booting linux on
> EFI firmwware. They could be removed but I'd rather see them stay.
Except when you are directly loading vmlinux you don't have the information
to populate kernel_start and kernel_size properly. And at that point
you are missing other interesting parts of the kernel. Like which
boot protocol it supports.
The x86 boot protocol is crusty and has plenty of warts but it works.
Just having a length and no version number or any other way to detect
features is worse, and that is what you are proposing in the EFI case.
Things change and evolve. So far I know of two distinct versions of
EFI. The EFI that has been so nicely described by Mark Doran. And
the version I have actually used with is quite a different animal.
Eric
"Tolentino, Matthew E" <[email protected]> writes:
> > I totally agree that it is reasonable to bypass setup.S. But
> > to do that reliably requires consensus that the 32bit entry point is
> > stable. That has not happen yet, and your patch did nothing to address that.
> I
>
> > know it has to happen because I know the boot process, and what has to
> > happen to boot with a different x86 BIOS implementation.
>
> Ok, so how do we know it is stable and how might one address that? How have you
> addressed this with kexec?
Getting consensus among conservative people is a challenge. I am slowly
working on it but I have been busy with other things.
> > Entering via the 32bit entry point has not been previously discussed.
> > H. Petern Anvin has not been convinced it should be a stable kernel
> > entry point.
>
> Why? I've missed this argument.
>
> The documentation has not been updated. A recent RedHat
> > kernel has even shipped with a different 32bit kernel entry point.
>
> I'm afraid I haven't looked at kexec. Do you employ the standard 32 bit entry
> point or do you actually go back to real mode or something in between?
I use it with the firm knowledge that kernel developers may change it if
the fancy takes them. There are getting to be fewer and fewer reasons
why someone would want to change it but...
> > My hunch is that most of the EFI code should actually live in another
> > subarch. I think the kernel has support for compiling in multiple
> > subarches. If not it is simply because no one has gotten
> > that far yet.
>
> I can see how this could be useful and potentially consolidate the efi related
> code in ia64, the ia32 stuff I've posted, and any other architecture that
> supports efi in the future, but don't know about compiling in multiple subarchs.
> Comments on how this is done?
I haven't had a chance to really look at that yet.
Eric