This patchset introduces device tree support on x86. The device tree is
passed by the bootloader via setup_data. It is used as an additional
source of information and does not replace the "traditional" x86 boot
page.
Right now we get the the following information from it:
- hpet location
- apic & ioapic location
- ioapic's interrupt routing
- legacy devices which are not initialized by bios
- devices which are behind a bus which does not support enumeration like
i2c
The series is based on the tip tree and is also available at
git://git.linutronix.de/users/bigeasy/soda.git ce_of
Sebastian Andrzej Siewior (11):
x86/kernel: remove conditional early remap in parse_e820_ext
x86: Add device tree support
x86/dtb: Add a device tree for CE4100
x86/dtb: add irq host abstraction
x86/dtb: add early parsing of APIC and IO APIC
x86/dtb: add support hpet
x86/dtb: add support for PCI devices backed by dtb nodes
x86/dtb: Add generic bus probe
x86/ioapic: Add OF bindings for IO-APIC
x86/io_apic: add simply id set
x86/ce4100: use OF for ioapic
Documentation/x86/boot_with_dtb.txt | 20 ++
arch/x86/Kconfig | 7 +
arch/x86/include/asm/bootparam.h | 1 +
arch/x86/include/asm/e820.h | 2 +-
arch/x86/include/asm/io_apic.h | 8 +
arch/x86/include/asm/irq_controller.h | 12 +
arch/x86/include/asm/prom.h | 67 ++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/apic/io_apic.c | 144 +++++++++++++
arch/x86/kernel/e820.c | 8 +-
arch/x86/kernel/irqinit.c | 9 +-
arch/x86/kernel/prom.c | 370 +++++++++++++++++++++++++++++++++
arch/x86/kernel/setup.c | 15 ++-
arch/x86/platform/ce4100/ce4100.c | 16 ++-
arch/x86/platform/ce4100/ce4100.dts | 210 +++++++++++++++++++
15 files changed, 877 insertions(+), 13 deletions(-)
create mode 100644 Documentation/x86/boot_with_dtb.txt
create mode 100644 arch/x86/include/asm/irq_controller.h
create mode 100644 arch/x86/include/asm/prom.h
create mode 100644 arch/x86/kernel/prom.c
create mode 100644 arch/x86/platform/ce4100/ce4100.dts
Sebastian
The here introduced irq_host abstraction represents a generic irq_host.
The xlate callback is resposible to parse irq informations like irq type
and number and returns the hardware irq number which is reported by the
hardware as active.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Tested-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/irq_controller.h | 12 ++++++++
arch/x86/include/asm/prom.h | 2 +
arch/x86/kernel/prom.c | 47 ++++++++++++++++++++++++++++++++-
3 files changed, 60 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/include/asm/irq_controller.h
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
new file mode 100644
index 0000000..1cbbfd0
--- /dev/null
+++ b/arch/x86/include/asm/irq_controller.h
@@ -0,0 +1,12 @@
+#ifndef __IRQ_CONTROLLER__
+#define __IRQ_CONTROLLER__
+
+struct irq_host {
+ int (*xlate)(struct irq_host *h, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type);
+ void *priv;
+ struct device_node *controller;
+ struct list_head l;
+};
+
+#endif
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 8fdb0d2..6c80e53 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -20,10 +20,12 @@
#include <asm/irq.h>
#include <asm/atomic.h>
#include <asm/setup.h>
+#include <asm/irq_controller.h>
#ifdef CONFIG_OF
extern void init_dtb(void);
extern void add_dtb(u64 data);
+void add_interrupt_host(struct irq_host *ih);
#else
static inline void init_dtb(void) { }
static inline void add_dtb(u64 data) { }
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
index ba9a096..996fd05 100644
--- a/arch/x86/kernel/prom.c
+++ b/arch/x86/kernel/prom.c
@@ -3,18 +3,63 @@
*/
#include <linux/io.h>
+#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
+#include <asm/irq_controller.h>
+
char __initdata cmd_line[COMMAND_LINE_SIZE];
+static LIST_HEAD(irq_hosts);
+static DEFINE_RAW_SPINLOCK(big_irq_lock);
+
+void add_interrupt_host(struct irq_host *ih)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_add(&ih->l, &irq_hosts);
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+}
+
+static struct irq_host *get_ih_from_node(struct device_node *controller)
+{
+ struct irq_host *ih, *found = NULL;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_for_each_entry(ih, &irq_hosts, l) {
+ if (ih->controller == controller) {
+ found = ih;
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+ return found;
+}
unsigned int irq_create_of_mapping(struct device_node *controller,
const u32 *intspec, unsigned int intsize)
{
- return intspec[0];
+ struct irq_host *ih;
+ u32 virq;
+ u32 type;
+ int ret;
+ ih = get_ih_from_node(controller);
+ if (!ih)
+ return -ENODEV;
+ ret = ih->xlate(ih, intspec, intsize, &virq, &type);
+ if (ret)
+ return ret;
+ if (type == IRQ_TYPE_NONE)
+ return virq;
+ /* set the mask if it is different from current */
+ if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
+ set_irq_type(virq, type);
+ return virq;
}
EXPORT_SYMBOL_GPL(irq_create_of_mapping);
--
1.7.3.2
ioapic_xlate provides a translation from the information in device tree
to ioapic related informations. This includes
- obtaining hw irq which is the vector number "=> pin number + gsi"
- obtaining type (level/edge/..)
- programming this information into ioapic
ioapic_add_ofnode adds an irq_host based on informations from the device
tree. The memory address is obtained from the phys_reg property instead
of reg. On the PCI bus we use reg property for the PCI address. We can't
use the PCI functions because we need the ioapic before the PCI bus is
up. This information (irq_host) is required in order to map a device to
its proper interrupt controller.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/io_apic.h | 7 +++
arch/x86/kernel/apic/io_apic.c | 100 ++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/prom.c | 8 +++
3 files changed, 115 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index d854b90..dc1169f 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -175,6 +175,13 @@ struct mp_ioapic_gsi{
u32 gsi_base;
u32 gsi_end;
};
+#ifdef CONFIG_X86_OF
+struct mp_of_ioapic {
+ struct device_node *node;
+};
+extern struct mp_of_ioapic mp_of_ioapic[MAX_IO_APICS];
+void __init ioapic_add_ofnode(struct device_node *np);
+#endif
extern struct mp_ioapic_gsi mp_gsi_routing[];
extern u32 gsi_top;
int mp_find_ioapic(u32 gsi);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ea51151..27a5709 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -43,6 +43,7 @@
#include <linux/bootmem.h>
#include <linux/dmar.h>
#include <linux/hpet.h>
+#include <linux/of_address.h>
#include <asm/idle.h>
#include <asm/io.h>
@@ -60,6 +61,7 @@
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
+#include <asm/irq_controller.h>
#include <asm/apic.h>
@@ -88,6 +90,10 @@ int nr_ioapics;
/* IO APIC gsi routing info */
struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
+#ifdef CONFIG_X86_OF
+/* OF -> IO APIC lookup */
+struct mp_of_ioapic mp_of_ioapic[MAX_IO_APICS];
+#endif
/* The one past the highest gsi number used */
u32 gsi_top;
@@ -4052,6 +4058,100 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
nr_ioapics++;
}
+#ifdef CONFIG_X86_OF
+static int ioapic_xlate(struct irq_host *h, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type)
+{
+ u32 line;
+ u32 idx;
+ u32 type;
+ u32 trigger;
+ u32 polarity;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+
+ if (intsize < 1)
+ return -EINVAL;
+
+ line = *intspec;
+ idx = (u32) h->priv;
+ *out_hwirq = line + mp_gsi_routing[idx].gsi_base;
+ if (intsize > 1) {
+ intspec++;
+ type = *intspec;
+ switch (type) {
+ case 0:
+ *out_type = IRQ_TYPE_EDGE_RISING;
+ trigger = IOAPIC_EDGE;
+ polarity = 1;
+ break;
+ case 1:
+ *out_type = IRQ_TYPE_LEVEL_LOW;
+ trigger = IOAPIC_LEVEL;
+ polarity = 0;
+ break;
+ case 2:
+ *out_type = IRQ_TYPE_LEVEL_HIGH;
+ trigger = IOAPIC_LEVEL;
+ polarity = 1;
+ break;
+ case 3:
+ *out_type = IRQ_TYPE_EDGE_FALLING;
+ trigger = IOAPIC_EDGE;
+ polarity = 0;
+ break;
+ default:
+ *out_type = IRQ_TYPE_NONE;
+ trigger = IOAPIC_AUTO;
+ polarity = 0;
+ break;
+ };
+ } else {
+ *out_type = IRQ_TYPE_NONE;
+ trigger = IOAPIC_AUTO;
+ polarity = 0;
+ }
+ /* And now tell the IO APIC to make the line ready */
+ desc = irq_to_desc_alloc_node(*out_hwirq, 0);
+ cfg = irq_cfg(*out_hwirq);
+ add_pin_to_irq_node(cfg, 0, idx, line);
+ /* make it edge by default, settype will update it */
+ setup_ioapic_irq(idx, line, *out_hwirq, cfg, trigger, polarity);
+ return 0;
+}
+
+void __init ioapic_add_ofnode(struct device_node *np)
+{
+ int i;
+ const __be32 *prop;
+ u32 addr;
+
+ prop = of_get_property(np, "phys_reg", NULL);
+ if (!prop) {
+ printk(KERN_ERR "IO APIC %s missing phys_reg property.\n",
+ np->full_name);
+ return;
+ }
+ addr = be32_to_cpup(prop);
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (addr == mp_ioapics[i].apicaddr) {
+ struct irq_host *ih;
+
+ mp_of_ioapic[i].node = np;
+ ih = kzalloc(sizeof(*ih), GFP_KERNEL);
+ BUG_ON(!ih);
+ ih->controller = np;
+ ih->xlate = ioapic_xlate;
+ ih->priv = (void *)i;
+ add_interrupt_host(ih);
+ return;
+ }
+ }
+ printk(KERN_ERR "IO APIC at 0x%08x is not registered.\n", addr);
+}
+#endif
+
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
index bbd6064..e9ff517 100644
--- a/arch/x86/kernel/prom.c
+++ b/arch/x86/kernel/prom.c
@@ -9,9 +9,11 @@
#include <linux/of_platform.h>
#include <linux/slab.h>
#include <linux/pci.h>
+#include <linux/of_fdt.h>
#include <asm/hpet.h>
#include <asm/irq_controller.h>
+#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/pci_x86.h>
@@ -355,8 +357,14 @@ void __init x86_early_of_parse(void)
void __init init_dtb(void)
{
+ struct device_node *dp;
+
if (!initial_boot_params)
return;
unflatten_device_tree();
+ for_each_node_by_type(dp, "interrupt-controller") {
+ if (of_device_is_compatible(dp, "intel,ioapic"))
+ ioapic_add_ofnode(dp);
+ }
}
--
1.7.3.2
The apic & ioapic have to be added to system early because
native_init_IRQ() requires it.
The phys_reg preoperty is used instead of the reg property because in
case of a PCI device this property is not holding the address of the
chip. In this case we can't query the PCI bar information because the
PCI bus is not (yet) up.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Tested-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/prom.h | 4 ++
arch/x86/kernel/irqinit.c | 2 +-
arch/x86/kernel/prom.c | 105 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 110 insertions(+), 1 deletions(-)
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 6c80e53..b74a49f 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -23,12 +23,16 @@
#include <asm/irq_controller.h>
#ifdef CONFIG_OF
+extern int of_ioapic;
extern void init_dtb(void);
extern void add_dtb(u64 data);
+void x86_early_of_parse(void);
void add_interrupt_host(struct irq_host *ih);
#else
static inline void init_dtb(void) { }
static inline void add_dtb(u64 data) { }
+static inline void x86_early_of_parse(void) { }
+#define of_ioapic 0
#endif
extern char cmd_line[COMMAND_LINE_SIZE];
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index d5970e2..8030193 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -250,7 +250,7 @@ void __init native_init_IRQ(void)
set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
- if (!acpi_ioapic)
+ if (!acpi_ioapic && !of_ioapic)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
index 996fd05..9551f2f 100644
--- a/arch/x86/kernel/prom.c
+++ b/arch/x86/kernel/prom.c
@@ -10,11 +10,14 @@
#include <linux/slab.h>
#include <asm/irq_controller.h>
+#include <asm/io_apic.h>
char __initdata cmd_line[COMMAND_LINE_SIZE];
static LIST_HEAD(irq_hosts);
static DEFINE_RAW_SPINLOCK(big_irq_lock);
+int __initdata of_ioapic;
+
void add_interrupt_host(struct irq_host *ih)
{
unsigned long flags;
@@ -96,6 +99,108 @@ void __init add_dtb(u64 data)
offsetof(struct setup_data, data));
}
+static void __init of_lapic_setup(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (apic_force_enable())
+ return ;
+
+ smp_found_config = 1;
+ pic_mode = 1;
+ /* Required for ioapic registration */
+ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = read_apic_id();
+
+ generic_processor_info(boot_cpu_physical_apicid,
+ GET_APIC_VERSION(apic_read(APIC_LVR)));
+#endif
+}
+
+#ifdef CONFIG_X86_IO_APIC
+static int __init early_scan_ioapic(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ unsigned long l;
+ int ret;
+ __be32 *cell;
+ int ioapic_id;
+ phys_addr_t ioapic_addr;
+
+ ret = of_flat_dt_is_compatible(node, "intel,ioapic");
+ if (!ret)
+ return 0;
+
+ cell = of_get_flat_dt_prop(node, "phys_reg", &l);
+ if (!cell)
+ return 0;
+
+ ioapic_addr = of_read_ulong(cell, l / 4);
+ cell = of_get_flat_dt_prop(node, "id", &l);
+ if (!cell)
+ return 0;
+ ioapic_id = of_read_ulong(cell, l / 4);
+
+ mp_register_ioapic(ioapic_id, ioapic_addr, gsi_top);
+ return 0;
+}
+
+static void __init of_ioapic_setup(void)
+{
+ if (!smp_found_config)
+ return;
+
+ of_scan_flat_dt(early_scan_ioapic, NULL);
+ if (nr_ioapics) {
+ of_ioapic = 1;
+ return;
+ }
+ printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+ smp_found_config = 0;
+}
+#else
+static void __init of_ioapic_setup(void) {}
+#endif
+
+static void __init of_apic_setup(void)
+{
+ of_lapic_setup();
+ of_ioapic_setup();
+}
+
+void __init x86_early_of_parse(void)
+{
+ void *virt_dtb;
+ resource_size_t phys_dtb;
+ u32 size;
+
+ if (!initial_boot_params)
+ return;
+ /*
+ * Here we grab some early informations before we the kernel mapping
+ * covers the complete lowmem. We setup a temporary fixmap for it and
+ * once we have what we were looking for we revert the address to its
+ * initial value. It will be used during unflattering and later during
+ * string lookups etc.
+ */
+
+ virt_dtb = initial_boot_params;
+ phys_dtb = virt_to_phys(virt_dtb);
+
+ initial_boot_params = early_memremap(phys_dtb, PAGE_SIZE);
+ size = be32_to_cpu(initial_boot_params->totalsize);
+ early_iounmap(initial_boot_params, PAGE_SIZE);
+
+ initial_boot_params = early_memremap(phys_dtb, size);
+
+ /* root level address cells */
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_apic_setup();
+
+ early_iounmap(initial_boot_params, size);
+ initial_boot_params = virt_dtb;
+}
+
void __init init_dtb(void)
{
if (!initial_boot_params)
--
1.7.3.2
x86_of_pci_init() does two things:
- it provides a generic irq enable and disable function. enable queries
the device tree for the interrupt information, calls ->xlate on the
irq host and updates the pci->irq information for the device.
- it walks through PCI buss(es) in the device tree and adds its children
(devices) nodes to appropriate pci_dev nodes in kernel. So the dtb
node information is available at probe time of the PCI device.
Adding a PCI bus based on the information in the device tree is
currently not supported. Right now direct access via ioports is used.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Tested-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/prom.h | 1 +
arch/x86/kernel/prom.c | 110 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 111 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index b74a49f..794c6a1 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -28,6 +28,7 @@ extern void init_dtb(void);
extern void add_dtb(u64 data);
void x86_early_of_parse(void);
void add_interrupt_host(struct irq_host *ih);
+void __cpuinit x86_of_pci_init(void);
#else
static inline void init_dtb(void) { }
static inline void add_dtb(u64 data) { }
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
index f61c541..c02777d 100644
--- a/arch/x86/kernel/prom.c
+++ b/arch/x86/kernel/prom.c
@@ -8,10 +8,12 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
+#include <linux/pci.h>
#include <asm/hpet.h>
#include <asm/irq_controller.h>
#include <asm/io_apic.h>
+#include <asm/pci_x86.h>
char __initdata cmd_line[COMMAND_LINE_SIZE];
static LIST_HEAD(irq_hosts);
@@ -100,6 +102,114 @@ void __init add_dtb(u64 data)
offsetof(struct setup_data, data));
}
+static int of_irq_map_pci(struct pci_dev *dev, struct of_irq *oirq)
+{
+ struct device_node *node;
+ __be32 laddr[3];
+ __be32 lspec[2];
+ int ret;
+ u8 pin;
+
+ node = dev->dev.of_node;
+ if (!node) {
+ node = dev->bus->dev.of_node;
+ if (node) {
+ ret = of_irq_map_one(node, 0, oirq);
+ if (!ret)
+ return ret;
+ }
+ }
+
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (ret)
+ return ret;
+ if (!pin)
+ return -EINVAL;
+
+ laddr[0] = cpu_to_be32((dev->bus->number << 16) | (dev->devfn << 8));
+ laddr[1] = 0;
+ laddr[2] = 0;
+
+ lspec[0] = cpu_to_be32(pin);
+ lspec[1] = cpu_to_be32(0);
+ ret = of_irq_map_raw(node, lspec, 1, laddr, oirq);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+static int x86_of_pci_irq_enable(struct pci_dev *dev)
+{
+ struct of_irq oirq;
+ u32 virq;
+ int ret;
+ u8 pin;
+
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (ret)
+ return ret;
+ if (!pin)
+ return 0;
+
+ ret = of_irq_map_pci(dev, &oirq);
+ if (ret)
+ return ret;
+
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ if (virq == NO_IRQ)
+ return -EINVAL;
+ dev->irq = virq;
+ return 0;
+}
+
+static void x86_of_pci_irq_disable(struct pci_dev *dev)
+{
+}
+
+void __cpuinit x86_of_pci_init(void)
+{
+ struct device_node *np;
+
+ pcibios_enable_irq = x86_of_pci_irq_enable;
+ pcibios_disable_irq = x86_of_pci_irq_disable;
+
+ for_each_node_by_type(np, "pci") {
+ const void *prop;
+ struct pci_bus *bus;
+ unsigned int bus_min;
+ struct device_node *child;
+
+ prop = of_get_property(np, "bus-range", NULL);
+ if (!prop)
+ continue;
+ bus_min = be32_to_cpup(prop);
+
+ bus = pci_find_bus(0, bus_min);
+ if (!bus) {
+ printk(KERN_ERR "Can't find a node for bus %s.\n",
+ np->full_name);
+ continue;
+ }
+
+ bus->dev.of_node = np;
+ for_each_child_of_node(np, child) {
+ struct pci_dev *dev;
+ u32 devfn;
+
+ prop = of_get_property(child, "reg", NULL);
+ if (!prop)
+ continue;
+
+ devfn = (be32_to_cpup(prop) >> 8) & 0xff;
+ dev = pci_get_slot(bus, devfn);
+ if (!dev)
+ continue;
+ dev->dev.of_node = child;
+ }
+ }
+}
+
static int __init early_scan_hpet(unsigned long node, const char *uname,
int depth, void *data)
{
--
1.7.3.2
This patch adds minimal support for device tree support on x86. It will
be passed to the kernel via setup_data which requires atleast boot
protocol 2.09.
Memory size, restricted memory regions, boot arguments are gathered the
traditional way so things like cmd_line are just here to let the code
compile.
The current plan is use the device tree as an extension and to gather
informations from it which can not be enumerated and have to be
hardcoded otherwise. This includes things like
- which devices are on this I2C/ SPI bus?
- how are the interrupts wired to IO APIC?
- where could my hpet be?
Dirk is working on some patches which provide generic infrastructure for
linking the dtb into the kernel. Once this is it its final shape, we
will relocate the device tree unconditionally. This will remove the
requirement for the boot loader to locate the device tree within lowmem.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
Documentation/x86/boot_with_dtb.txt | 20 +++++++++++
arch/x86/Kconfig | 7 ++++
arch/x86/include/asm/bootparam.h | 1 +
arch/x86/include/asm/prom.h | 60 +++++++++++++++++++++++++++++++++++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/irqinit.c | 7 ++++
arch/x86/kernel/prom.c | 60 +++++++++++++++++++++++++++++++++++
arch/x86/kernel/setup.c | 4 ++
8 files changed, 160 insertions(+), 0 deletions(-)
create mode 100644 Documentation/x86/boot_with_dtb.txt
create mode 100644 arch/x86/include/asm/prom.h
create mode 100644 arch/x86/kernel/prom.c
diff --git a/Documentation/x86/boot_with_dtb.txt b/Documentation/x86/boot_with_dtb.txt
new file mode 100644
index 0000000..3ba42b4
--- /dev/null
+++ b/Documentation/x86/boot_with_dtb.txt
@@ -0,0 +1,20 @@
+ Booting x86 with device tree
+=================================
+
+1. Introduction
+~~~~~~~~~~~~~~~
+This document contains device tree informations which are specific to
+the x86 platform. Generic informations as bindings can be found in
+Documentation/powerpc/dts-bindings/
+
+2. Passing the device tree
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The pointer to the device tree block (dtb) is passed via setup_data
+(see [0]) which requires atleast boot protocol 2.09. The type filed is
+defined as
+
+#define SETUP_DTB 2
+
+The setup_data struct has to be placed in lowmem.
+
+[0] Documentation/x86/boot.txt
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6ab6310..6c9ab9e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -299,6 +299,13 @@ config X86_BIGSMP
---help---
This option is needed for the systems that have more than 8 CPUs
+config X86_OF
+ bool "Support for device tree"
+ select OF
+ select OF_FLATTREE
+ ---help---
+ Device tree support on X86.
+
if X86_32
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index c8bfe63..e020d88 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -12,6 +12,7 @@
/* setup data types */
#define SETUP_NONE 0
#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
/* extensible setup data list node */
struct setup_data {
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
new file mode 100644
index 0000000..8fdb0d2
--- /dev/null
+++ b/arch/x86/include/asm/prom.h
@@ -0,0 +1,60 @@
+/*
+ * Definitions for Device tree / OpenFirmware handling on X86
+ *
+ * based on arch/powerpc/include/asm/prom.h which is
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_X86_PROM_H
+#define _ASM_X86_PROM_H
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <asm/irq.h>
+#include <asm/atomic.h>
+#include <asm/setup.h>
+
+#ifdef CONFIG_OF
+extern void init_dtb(void);
+extern void add_dtb(u64 data);
+#else
+static inline void init_dtb(void) { }
+static inline void add_dtb(u64 data) { }
+#endif
+
+extern char cmd_line[COMMAND_LINE_SIZE];
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ (-1)
+
+/**
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
+ *
+ * FIXME: We really should implement proper virq handling like power,
+ * but that's going to be major surgery.
+ */
+static inline void irq_dispose_mapping(unsigned int virq) { }
+
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
+/*
+ * These includes are put at the bottom because they may contain things
+ * that are overridden by this file. Ideally they shouldn't be included
+ * by this file, but there are a bunch of .c files that currently depend
+ * on it. Eventually they will be cleaned up.
+ */
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763..e0a00c5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
+obj-$(CONFIG_X86_OF) += prom.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e97..d5970e2 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -25,6 +25,7 @@
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
+#include <asm/prom.h>
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -118,6 +119,12 @@ void __init init_IRQ(void)
int i;
/*
+ * We probably need a better place for this, but it works for
+ * now ...
+ */
+ init_dtb();
+
+ /*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
new file mode 100644
index 0000000..ba9a096
--- /dev/null
+++ b/arch/x86/kernel/prom.c
@@ -0,0 +1,60 @@
+/*
+ * Architecture specific OF callbacks.
+ */
+
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+char __initdata cmd_line[COMMAND_LINE_SIZE];
+
+unsigned int irq_create_of_mapping(struct device_node *controller,
+ const u32 *intspec, unsigned int intsize)
+{
+ return intspec[0];
+
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ BUG();
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+void __init early_init_dt_scan_chosen_arch(unsigned long node)
+{
+ BUG();
+}
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+ BUG();
+}
+
+u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+ void *mem;
+
+ mem = kmalloc(size + align, GFP_KERNEL);
+ BUG_ON(!mem);
+ mem = PTR_ALIGN(mem, align);
+ return virt_to_phys(mem);
+}
+
+void __init add_dtb(u64 data)
+{
+ initial_boot_params = (struct boot_param_header *)
+ phys_to_virt((u64) (u32) data +
+ offsetof(struct setup_data, data));
+}
+
+void __init init_dtb(void)
+{
+ if (!initial_boot_params)
+ return;
+
+ unflatten_device_tree();
+}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 52f10e6..b1e1bcb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
#endif
#include <asm/mce.h>
#include <asm/alternative.h>
+#include <asm/prom.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -441,6 +442,9 @@ static void __init parse_setup_data(void)
case SETUP_E820_EXT:
parse_e820_ext(data);
break;
+ case SETUP_DTB:
+ add_dtb(pa_data);
+ break;
default:
break;
}
--
1.7.3.2
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/platform/ce4100/ce4100.dts | 210 +++++++++++++++++++++++++++++++++++
1 files changed, 210 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/platform/ce4100/ce4100.dts
diff --git a/arch/x86/platform/ce4100/ce4100.dts b/arch/x86/platform/ce4100/ce4100.dts
new file mode 100644
index 0000000..2901882
--- /dev/null
+++ b/arch/x86/platform/ce4100/ce4100.dts
@@ -0,0 +1,210 @@
+/*
+ * CE4100 on Falcon Falls
+ *
+ * (c) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+/dts-v1/;
+/ {
+ model = "x86,CE4100";
+ compatible = "x86,CE4100";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpus {
+ x86,Atom@0 {
+ device_type = "cpu";
+ };
+ };
+
+ atom@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "soc";
+ compatible = "simple-bus";
+ ranges;
+
+ /* Local APIC */
+ lapic@fee00000 {
+ compatible = "intel,lapic";
+ reg = <0xfee00000 0x1000>;
+ phys_reg = <0xfee00000>;
+ };
+ /* Primary IO-APIC */
+ ioapic1: pic@fec00000 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ioapic";
+ interrupt-controller;
+ device_type = "interrupt-controller";
+ id = <1>;
+ reg = <0xfec00000 0x1000>;
+ phys_reg = <0xfec00000>;
+ };
+
+ hpet@fed00000 {
+ compatible = "intel,hpet";
+ reg = <0xfed00000 0x200>;
+ phys_reg = <0xfed00000>;
+ };
+ };
+
+ isa@legacy {
+ device_type = "isa";
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x400>;
+
+ rtc@legacy {
+ compatible = "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl_reg = <2>;
+ freq_reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
+
+ pci@3fc {
+ #address-cells = <3>;
+ #interrupt-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,ce4100-pci";
+ device_type = "pci";
+ bus-range = <0 0>;
+
+ /* Secondary IO-APIC */
+ ioapic2: pic@bffff000 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ioapic";
+ interrupt-controller;
+ device_type = "interrupt-controller";
+ id = <2>;
+ reg = <0x100 0x0 0x0 0x0>;
+ phys_reg = <0xbffff000>;
+ };
+
+ pci@av {
+ #address-cells = <3>;
+ #interrupt-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,ce4100-pci";
+ device_type = "pci";
+ bus-range = <1 1>;
+ interrupt-map-mask = <0xffffff 0x0 0x0 0x0>;
+ interrupt-map = <
+ /* GFX: 0x2E5B */
+ 0x11000 0x0 0x0 0x0 &ioapic2 0 0x1
+ /* ***** FIXME ****** Compositing Engine: 0x2E72 */
+ /* 0x11100 0x0 0x0 0x1 &ioapic2 0 0x1 */
+ /* MFD: 0x2E5C */
+ 0x11800 0x0 0x0 0x0 &ioapic2 2 0x1
+ /* TS Prefilter: 0x2E5D */
+ 0x12000 0x0 0x0 0x0 &ioapic2 4 0x1
+ /* TS Demux: 0x2E5E */
+ 0x12100 0x0 0x0 0x0 &ioapic2 5 0x1
+ /* ***** FIXME ***** Audio DSP: 0x2E5F */
+ /* 0x13000 0x0 0x0 0x1 &ioapic2 0 0x1 */
+ /* Audio Interfaces: 0x2E60 */
+ 0x13200 0x0 0x0 0x0 &ioapic2 8 0x1
+ /* VDC: 0x2E61 */
+ 0x14000 0x0 0x0 0x0 &ioapic2 9 0x1
+ /* DPE: 0x2E62 */
+ 0x14100 0x0 0x0 0x0 &ioapic2 10 0x1
+ /* HDMI Tx: 0x2E63 */
+ 0x14200 0x0 0x0 0x0 &ioapic2 11 0x1
+ /* SEC: 0x2E64 */
+ 0x14800 0x0 0x0 0x0 &ioapic2 12 0x1
+ /* EXP: 0x2E65 */
+ 0x15000 0x0 0x0 0x0 &ioapic2 13 0x1
+ /* UART0/1: 0x2E66 */
+ 0x15800 0x0 0x0 0x0 &ioapic2 14 0x1
+ /* GPIO: 0x2E67 */
+ 0x15900 0x0 0x0 0x0 &ioapic2 15 0x1
+ /* I2C0/1/2: 0x2E68 */
+ 0x15a00 0x0 0x0 0x0 &ioapic2 16 0x1
+ /* Smart Card 0/1: 0x2E69 */
+ 0x15b00 0x0 0x0 0x0 &ioapic2 15 0x1
+ /* SPI: 0x2E6A */
+ 0x15c00 0x0 0x0 0x0 &ioapic2 15 0x1
+ /* MSPOD: 0x2E6B */
+ 0x15d00 0x0 0x0 0x0 &ioapic2 19 0x1
+ /* IR: 0x2E6C */
+ 0x15e00 0x0 0x0 0x0 &ioapic2 16 0x1
+ /* **** FIXME ***** DFX: 0x2E6D */
+ /* 0x15f00 0x0 0x0 0x1 &ioapic2 0x0 0x1 */
+ /* Gig Ethernet: 0x2E6E */
+ 0x16000 0x0 0x0 0x0 &ioapic2 21 0x1
+ /* IEEE1588 and Clock Recovery Unit: 0x2E6F */
+ 0x16100 0x0 0x0 0x0 &ioapic2 3 0x1
+ /* USB0: 0x2E70 */
+ 0x16800 0x0 0x0 0x0 &ioapic2 22 0x3
+ /* USB1: 0x2E70 */
+ 0x16900 0x0 0x0 0x0 &ioapic2 22 0x3
+ /* SATA: 0x2E71 */
+ 0x17000 0x0 0x0 0x0 &ioapic2 23 0x3
+ >;
+
+ i2c@15a00 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x15a00 0x0 0x0 0x0>;
+
+
+ i2c@0 {
+ reg = <0>;
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ pcf8575@26 {
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+
+ pcf8575@26 {
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ };
+ };
+ };
+
+ spi@15c00 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x15c00 0x0 0x0 0x0>;
+
+ pcm1755@0 {
+ compatible = "ti,pcm1755";
+ reg = <0>;
+ spi-max-frequency = <115200>;
+ };
+
+ pcm1609a@1 {
+ compatible = "ti,pcm1609a";
+ reg = <1>;
+ spi-max-frequency = <115200>;
+ };
+
+ at93c46@2 {
+ compatible = "atmel,at93c46";
+ reg = <2>;
+ spi-max-frequency = <115200>;
+ };
+ };
+ };
+ };
+ };
+};
--
1.7.3.2
Set hpet_address based on information provied form DTB
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Tested-by: Dirk Brandewie <[email protected]>
---
arch/x86/kernel/prom.c | 23 +++++++++++++++++++++++
1 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
index 9551f2f..f61c541 100644
--- a/arch/x86/kernel/prom.c
+++ b/arch/x86/kernel/prom.c
@@ -9,6 +9,7 @@
#include <linux/of_platform.h>
#include <linux/slab.h>
+#include <asm/hpet.h>
#include <asm/irq_controller.h>
#include <asm/io_apic.h>
@@ -99,6 +100,27 @@ void __init add_dtb(u64 data)
offsetof(struct setup_data, data));
}
+static int __init early_scan_hpet(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ unsigned long l;
+ int ret;
+ __be32 *cell;
+
+ if (depth != 2)
+ return 0;
+
+ ret = of_flat_dt_is_compatible(node, "intel,hpet");
+ if (!ret)
+ return 0;
+
+ cell = of_get_flat_dt_prop(node, "phys_reg", &l);
+ if (!cell)
+ return 0;
+ hpet_address = of_read_ulong(cell, l / 4);
+ return 1;
+}
+
static void __init of_lapic_setup(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
@@ -195,6 +217,7 @@ void __init x86_early_of_parse(void)
/* root level address cells */
of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_scan_hpet, NULL);
of_apic_setup();
early_iounmap(initial_boot_params, size);
--
1.7.3.2
For now we probe these busses and we change is to board dependent probes
once we have to.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Cc: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/kernel/prom.c | 19 +++++++++++++++++++
1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
index c02777d..bbd6064 100644
--- a/arch/x86/kernel/prom.c
+++ b/arch/x86/kernel/prom.c
@@ -102,6 +102,25 @@ void __init add_dtb(u64 data)
offsetof(struct setup_data, data));
}
+/*
+ * CE4100 ids. Will be moved to machine_device_initcall() once we have it.
+ */
+static struct of_device_id __initdata ce4100_ids[] = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init add_bus_probe(void)
+{
+ if (!initial_boot_params)
+ return 0;
+
+ return of_platform_bus_probe(NULL, ce4100_ids, NULL);
+}
+module_init(add_bus_probe);
+
static int of_irq_map_pci(struct pci_dev *dev, struct of_irq *oirq)
{
struct device_node *node;
--
1.7.3.2
and hpet and a few others things....
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/platform/ce4100/ce4100.c | 16 ++++++++++++++--
1 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 0ede12b..5ed25df 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -13,7 +13,11 @@
#include <linux/irq.h>
#include <linux/module.h>
+#include <asm/prom.h>
#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+#include <asm/io_apic.h>
static int ce4100_i8042_detect(void)
{
@@ -24,8 +28,11 @@ static void __init sdv_arch_setup(void)
{
}
-static void __init sdv_find_smp_config(void)
+static void __cpuinit sdv_pci_init(void)
{
+ x86_of_pci_init();
+ /* We can't set this earlier, because we need calibrate the timer */
+ legacy_pic = &null_legacy_pic;
}
/*
@@ -38,5 +45,10 @@ void __init x86_ce4100_early_setup(void)
x86_platform.i8042_detect = ce4100_i8042_detect;
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+ x86_init.mpparse.find_smp_config = x86_early_of_parse;
+
+#ifdef CONFIG_X86_IO_APIC
+ x86_init.pci.init_irq = sdv_pci_init;
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_apicid;
+#endif
}
--
1.7.3.2
This one goes through the registered IO-APICs and sets the id which the
core code is using.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/io_apic.h | 1 +
arch/x86/kernel/apic/io_apic.c | 44 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index dc1169f..c920657 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -170,6 +170,7 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int get_nr_irqs_gsi(void);
extern void setup_ioapic_ids_from_mpc(void);
+void setup_ioapic_ids_from_apicid(void);
struct mp_ioapic_gsi{
u32 gsi_base;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 27a5709..74cfe9b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2047,6 +2047,50 @@ void __init setup_ioapic_ids_from_mpc(void)
apic_printk(APIC_VERBOSE, " ok.\n");
}
}
+/*
+ * We assume here that the ids in mp_ioapics are correct but not yet
+ * written to the ioapic. While doing so we verify that those ids are
+ * unique.
+ */
+static __initdata DECLARE_BITMAP(apic_id_mask, MAX_APICS);
+void __init setup_ioapic_ids_from_apicid(void)
+{
+ union IO_APIC_reg_00 reg_00;
+ int apic_id;
+ unsigned long flags;
+
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
+
+ if (mp_ioapics[apic_id].apicid > MAX_APICS) {
+ WARN_ON(1);
+ continue;
+ }
+
+ if (test_bit(mp_ioapics[apic_id].apicid, apic_id_mask)) {
+ WARN_ON(1);
+ continue;
+ }
+
+ set_bit(mp_ioapics[apic_id].apicid, apic_id_mask);
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic_id, 0);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (reg_00.bits.ID == mp_ioapics[apic_id].apicid)
+ continue;
+
+ reg_00.bits.ID = mp_ioapics[apic_id].apicid;
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic_id, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(apic_id, 0);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
+ printk(KERN_ERR "Could not update id of IOAPIC %d\n",
+ mp_ioapics[apic_id].apicid);
+ }
+}
#endif
int no_timer_check __initdata;
--
1.7.3.2
parse_setup_data() uses early_memremap() for a PAGE_SIZE mapping in
order to figure out the type & size. If this mapping is not large enough
then parse_e820_ext() will remap this area again via early_ioremap()
since the first mapping is still in use.
This patch attempts to simplify the handling and parse_e820_ext() does
not need to worry about the mapping anymore.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/e820.h | 2 +-
arch/x86/kernel/e820.c | 8 +-------
arch/x86/kernel/setup.c | 11 +++++++++--
3 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 5be1542..e956492 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -93,7 +93,7 @@ extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data;
-extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
+extern void parse_e820_ext(struct setup_data *data);
#if defined(CONFIG_X86_64) || \
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0c2b7ef..33f6361 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -666,21 +666,15 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+void __init parse_e820_ext(struct setup_data *sdata)
{
- u32 map_len;
int entries;
struct e820entry *extmap;
entries = sdata->len / sizeof(struct e820entry);
- map_len = sdata->len + sizeof(struct setup_data);
- if (map_len > PAGE_SIZE)
- sdata = early_ioremap(pa_data, map_len);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- if (map_len > PAGE_SIZE)
- early_iounmap(sdata, map_len);
printk(KERN_INFO "extended physical RAM map:\n");
e820_print_map("extended");
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 000e4a1..52f10e6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -429,16 +429,23 @@ static void __init parse_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
+ u32 len;
+
data = early_memremap(pa_data, PAGE_SIZE);
+ len = data->len + sizeof(struct setup_data);
+ early_iounmap(data, PAGE_SIZE);
+
+ data = early_memremap(pa_data, len);
+
switch (data->type) {
case SETUP_E820_EXT:
- parse_e820_ext(data, pa_data);
+ parse_e820_ext(data);
break;
default:
break;
}
pa_data = data->next;
- early_iounmap(data, PAGE_SIZE);
+ early_iounmap(data, len);
}
}
--
1.7.3.2
> The here introduced irq_host abstraction represents a generic irq_host.
> The xlate callback is resposible to parse irq informations like irq type
> and number and returns the hardware irq number which is reported by the
> hardware as active.
> [...]
> diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq
> _controller.h
> new file mode 100644
> index 0000000..1cbbfd0
> --- /dev/null
> +++ b/arch/x86/include/asm/irq_controller.h
> @@ -0,0 +1,12 @@
> +#ifndef __IRQ_CONTROLLER__
> +#define __IRQ_CONTROLLER__
> +
> +struct irq_host {
> + int (*xlate)(struct irq_host *h, const u32 *intspec, u32 intsize,
> + u32 *out_hwirq, u32 *out_type);
> + void *priv;
> + struct device_node *controller;
> + struct list_head l;
> +};
I thought there was an intent and desire to rename the irq_host
as irq_domain.
jdl
On Thu, Nov 25, 2010 at 9:40 AM, Sebastian Andrzej Siewior
<[email protected]> wrote:
> This one goes through the registered IO-APICs and sets the id which the
> core code is using.
>
> Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
> CC: [email protected]
> Signed-off-by: Dirk Brandewie <[email protected]>
> ---
> ?arch/x86/include/asm/io_apic.h | ? ?1 +
> ?arch/x86/kernel/apic/io_apic.c | ? 44 ++++++++++++++++++++++++++++++++++++++++
> ?2 files changed, 45 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
> index dc1169f..c920657 100644
> --- a/arch/x86/include/asm/io_apic.h
> +++ b/arch/x86/include/asm/io_apic.h
> @@ -170,6 +170,7 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
>
> ?extern int get_nr_irqs_gsi(void);
> ?extern void setup_ioapic_ids_from_mpc(void);
> +void setup_ioapic_ids_from_apicid(void);
>
> ?struct mp_ioapic_gsi{
> ? ? ? ?u32 gsi_base;
> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> index 27a5709..74cfe9b 100644
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -2047,6 +2047,50 @@ void __init setup_ioapic_ids_from_mpc(void)
> ? ? ? ? ? ? ? ? ? ? ? ?apic_printk(APIC_VERBOSE, " ok.\n");
> ? ? ? ?}
> ?}
> +/*
> + * We assume here that the ids in mp_ioapics are correct but not yet
> + * written to the ioapic. While doing so we verify that those ids are
> + * unique.
> + */
> +static __initdata DECLARE_BITMAP(apic_id_mask, MAX_APICS);
> +void __init setup_ioapic_ids_from_apicid(void)
> +{
> + ? ? ? union IO_APIC_reg_00 reg_00;
> + ? ? ? int apic_id;
> + ? ? ? unsigned long flags;
> +
> + ? ? ? for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
> +
> + ? ? ? ? ? ? ? if (mp_ioapics[apic_id].apicid > MAX_APICS) {
> + ? ? ? ? ? ? ? ? ? ? ? WARN_ON(1);
> + ? ? ? ? ? ? ? ? ? ? ? continue;
> + ? ? ? ? ? ? ? }
> +
> + ? ? ? ? ? ? ? if (test_bit(mp_ioapics[apic_id].apicid, apic_id_mask)) {
> + ? ? ? ? ? ? ? ? ? ? ? WARN_ON(1);
> + ? ? ? ? ? ? ? ? ? ? ? continue;
> + ? ? ? ? ? ? ? }
> +
> + ? ? ? ? ? ? ? set_bit(mp_ioapics[apic_id].apicid, apic_id_mask);
> +
> + ? ? ? ? ? ? ? raw_spin_lock_irqsave(&ioapic_lock, flags);
> + ? ? ? ? ? ? ? reg_00.raw = io_apic_read(apic_id, 0);
> + ? ? ? ? ? ? ? raw_spin_unlock_irqrestore(&ioapic_lock, flags);
> +
> + ? ? ? ? ? ? ? if (reg_00.bits.ID == mp_ioapics[apic_id].apicid)
> + ? ? ? ? ? ? ? ? ? ? ? continue;
> +
> + ? ? ? ? ? ? ? reg_00.bits.ID = mp_ioapics[apic_id].apicid;
> + ? ? ? ? ? ? ? raw_spin_lock_irqsave(&ioapic_lock, flags);
> + ? ? ? ? ? ? ? io_apic_write(apic_id, 0, reg_00.raw);
> + ? ? ? ? ? ? ? reg_00.raw = io_apic_read(apic_id, 0);
> + ? ? ? ? ? ? ? raw_spin_unlock_irqrestore(&ioapic_lock, flags);
> +
> + ? ? ? ? ? ? ? if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_ERR "Could not update id of IOAPIC %d\n",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? mp_ioapics[apic_id].apicid);
> + ? ? ? }
> +}
> ?#endif
>
> ?int no_timer_check __initdata;
can you update and split setup_ioapic_ids_from_mpc() for your using?
Thanks
Yinghai
> diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
> new file mode 100644
> index 0000000..8fdb0d2
> --- /dev/null
> +++ b/arch/x86/include/asm/prom.h
> @@ -0,0 +1,60 @@
> +/*
> + * Definitions for Device tree / OpenFirmware handling on X86
> + *
> + * based on arch/powerpc/include/asm/prom.h which is
> + * Copyright (C) 1996-2005 Paul Mackerras.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _ASM_X86_PROM_H
> +#define _ASM_X86_PROM_H
> +#ifdef __KERNEL__
This file is not exported to userspace - so no need to guard with __KERNEL__
Sam
Sam Ravnborg wrote:
>> +++ b/arch/x86/include/asm/prom.h
>> +#ifdef __KERNEL__
>
> This file is not exported to userspace - so no need to guard with __KERNEL__
Removed.
>
> Sam
Sebastian
Yinghai Lu wrote:
> On Thu, Nov 25, 2010 at 9:40 AM, Sebastian Andrzej Siewior
> <[email protected]> wrote:
>> This one goes through the registered IO-APICs and sets the id which the
>> core code is using.
>
> can you update and split setup_ioapic_ids_from_mpc() for your using?
Probably, let me see.
> Thanks
>
> Yinghai
Sebastian
Jon Loeliger wrote:
>> +struct irq_host {
>> + int (*xlate)(struct irq_host *h, const u32 *intspec, u32 intsize,
>> + u32 *out_hwirq, u32 *out_type);
>> + void *priv;
>> + struct device_node *controller;
>> + struct list_head l;
>> +};
>
> I thought there was an intent and desire to rename the irq_host
> as irq_domain.
>
> jdl
AFAIK Benh was thinking about renaming it. I don't know if this is still
the case or when he intends to do so. Once he does so, this can be renamed
as well.
Sebastian
I think that I need this function to write the APIC id back to the chip
as it is not initialized at all. apic_version[boot_cpu_physical_apicid]
return 0x14 and therefore it is skipped. The manual on the other hand
says that these ids should be uniqe.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
CC: [email protected]
Signed-off-by: Dirk Brandewie <[email protected]>
---
arch/x86/include/asm/io_apic.h | 1 +
arch/x86/kernel/apic/io_apic.c | 28 ++++++++++++++++------------
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index dc1169f..185d484 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -170,6 +170,7 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int get_nr_irqs_gsi(void);
extern void setup_ioapic_ids_from_mpc(void);
+void setup_ioapic_ids_from_mpc_nocheck(void);
struct mp_ioapic_gsi{
u32 gsi_base;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 27a5709..56c45ab 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1939,8 +1939,7 @@ void disable_IO_APIC(void)
*
* by Matt Domsch <[email protected]> Tue Dec 21 12:25:05 CST 1999
*/
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
{
union IO_APIC_reg_00 reg_00;
physid_mask_t phys_id_present_map;
@@ -1949,15 +1948,6 @@ void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
- if (acpi_ioapic)
- return;
- /*
- * Don't check I/O APIC IDs for xAPIC systems. They have
- * no meaning without the serial APIC bus.
- */
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return;
/*
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
@@ -2011,7 +2001,6 @@ void __init setup_ioapic_ids_from_mpc(void)
physids_or(phys_id_present_map, phys_id_present_map, tmp);
}
-
/*
* We need to adjust the IRQ routing table
* if the ID changed.
@@ -2047,6 +2036,21 @@ void __init setup_ioapic_ids_from_mpc(void)
apic_printk(APIC_VERBOSE, " ok.\n");
}
}
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+ if (acpi_ioapic)
+ return;
+ /*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return;
+ setup_ioapic_ids_from_mpc_nocheck();
+}
#endif
int no_timer_check __initdata;
--
1.7.3.2
On Fri, 2010-11-26 at 15:19 +0100, Sebastian Andrzej Siewior wrote:
> AFAIK Benh was thinking about renaming it. I don't know if this is
> still
> the case or when he intends to do so. Once he does so, this can be
> renamed
> as well.
That and moving the powerpc code to a generic place so you don't have to
re-invent your own :-)
I think Grant has patches for that.
Cheers,
Ben.
On Thu, 2010-11-25 at 18:39 +0100, Sebastian Andrzej Siewior wrote:
> This patch adds minimal support for device tree support on x86. It will
> be passed to the kernel via setup_data which requires atleast boot
> protocol 2.09.
> Memory size, restricted memory regions, boot arguments are gathered the
> traditional way so things like cmd_line are just here to let the code
> compile.
> The current plan is use the device tree as an extension and to gather
> informations from it which can not be enumerated and have to be
> hardcoded otherwise. This includes things like
> - which devices are on this I2C/ SPI bus?
> - how are the interrupts wired to IO APIC?
> - where could my hpet be?
How do that work with platforms like OLPC that have a real OF ?
One thing we did on powerpc which among other things allow kexec to work
on such platforms when we kill OF at boot (it might stay alive on OLPC),
is to basically detect very early in asm that we are coming from OF,
have a trampoline that extract the DT and turns it into a flat dtb, then
continue to the main kernel entry using the dtb method.
That way, one can kexec using the dtb method over and there's one single
entry point for device-tree use.
Are you doing something similar for x86 ?
> Dirk is working on some patches which provide generic infrastructure for
> linking the dtb into the kernel. Once this is it its final shape, we
> will relocate the device tree unconditionally. This will remove the
> requirement for the boot loader to locate the device tree within lowmem.
Linking the dtb into the kernel is something we prefer not doing on
powerpc and I'm curious why you think that applies better on x86...
We -do- have ways to include it in the zImage wrapper. However, this is
different in subtle ways because of the way our zImage wrapper building
works. Basically, we always build all the per-platform .o's of the
wrapper that apply to supported platforms by the kernel. The
binding/linking together of the final wrapper with a kernel image, an
optional dtb and optional initrd is performed by a shell script that can
be used outside of the normal build context.
That means that it's possible for a distro for example to install a
kernel image, all the wrapper .o files and that script, and at runtime
rebuild zImage wrappers with the appropriate dtb without having the
whole built kernel tree at hand.
The direction taken by ARM (and possibly newer powerpc platforms as
well) is to have the dtb be passed by the bootloader. Typically
bootloaders like uboot provide a way to flash the dtb separately so it
can be udpated (*).
(*) That brings a separate topic we shall discuss: A consistent way for
versionning the device-tree would be really useful.
Cheers,
Ben.
> Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
> CC: [email protected]
> Cc: [email protected]
> Signed-off-by: Dirk Brandewie <[email protected]>
> ---
> Documentation/x86/boot_with_dtb.txt | 20 +++++++++++
> arch/x86/Kconfig | 7 ++++
> arch/x86/include/asm/bootparam.h | 1 +
> arch/x86/include/asm/prom.h | 60 +++++++++++++++++++++++++++++++++++
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/irqinit.c | 7 ++++
> arch/x86/kernel/prom.c | 60 +++++++++++++++++++++++++++++++++++
> arch/x86/kernel/setup.c | 4 ++
> 8 files changed, 160 insertions(+), 0 deletions(-)
> create mode 100644 Documentation/x86/boot_with_dtb.txt
> create mode 100644 arch/x86/include/asm/prom.h
> create mode 100644 arch/x86/kernel/prom.c
>
> diff --git a/Documentation/x86/boot_with_dtb.txt b/Documentation/x86/boot_with_dtb.txt
> new file mode 100644
> index 0000000..3ba42b4
> --- /dev/null
> +++ b/Documentation/x86/boot_with_dtb.txt
> @@ -0,0 +1,20 @@
> + Booting x86 with device tree
> +=================================
> +
> +1. Introduction
> +~~~~~~~~~~~~~~~
> +This document contains device tree informations which are specific to
> +the x86 platform. Generic informations as bindings can be found in
> +Documentation/powerpc/dts-bindings/
> +
> +2. Passing the device tree
> +~~~~~~~~~~~~~~~~~~~~~~~~~~
> +The pointer to the device tree block (dtb) is passed via setup_data
> +(see [0]) which requires atleast boot protocol 2.09. The type filed is
> +defined as
> +
> +#define SETUP_DTB 2
> +
> +The setup_data struct has to be placed in lowmem.
> +
> +[0] Documentation/x86/boot.txt
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 6ab6310..6c9ab9e 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -299,6 +299,13 @@ config X86_BIGSMP
> ---help---
> This option is needed for the systems that have more than 8 CPUs
>
> +config X86_OF
> + bool "Support for device tree"
> + select OF
> + select OF_FLATTREE
> + ---help---
> + Device tree support on X86.
> +
> if X86_32
> config X86_EXTENDED_PLATFORM
> bool "Support for extended (non-PC) x86 platforms"
> diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
> index c8bfe63..e020d88 100644
> --- a/arch/x86/include/asm/bootparam.h
> +++ b/arch/x86/include/asm/bootparam.h
> @@ -12,6 +12,7 @@
> /* setup data types */
> #define SETUP_NONE 0
> #define SETUP_E820_EXT 1
> +#define SETUP_DTB 2
>
> /* extensible setup data list node */
> struct setup_data {
> diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
> new file mode 100644
> index 0000000..8fdb0d2
> --- /dev/null
> +++ b/arch/x86/include/asm/prom.h
> @@ -0,0 +1,60 @@
> +/*
> + * Definitions for Device tree / OpenFirmware handling on X86
> + *
> + * based on arch/powerpc/include/asm/prom.h which is
> + * Copyright (C) 1996-2005 Paul Mackerras.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _ASM_X86_PROM_H
> +#define _ASM_X86_PROM_H
> +#ifdef __KERNEL__
> +#ifndef __ASSEMBLY__
> +
> +#include <linux/of.h>
> +#include <linux/types.h>
> +#include <asm/irq.h>
> +#include <asm/atomic.h>
> +#include <asm/setup.h>
> +
> +#ifdef CONFIG_OF
> +extern void init_dtb(void);
> +extern void add_dtb(u64 data);
> +#else
> +static inline void init_dtb(void) { }
> +static inline void add_dtb(u64 data) { }
> +#endif
> +
> +extern char cmd_line[COMMAND_LINE_SIZE];
> +/* This number is used when no interrupt has been assigned */
> +#define NO_IRQ (-1)
> +
> +/**
> + * irq_dispose_mapping - Unmap an interrupt
> + * @virq: linux virq number of the interrupt to unmap
> + *
> + * FIXME: We really should implement proper virq handling like power,
> + * but that's going to be major surgery.
> + */
> +static inline void irq_dispose_mapping(unsigned int virq) { }
> +
> +#define HAVE_ARCH_DEVTREE_FIXUPS
> +
> +#endif /* __ASSEMBLY__ */
> +#endif /* __KERNEL__ */
> +
> +/*
> + * These includes are put at the bottom because they may contain things
> + * that are overridden by this file. Ideally they shouldn't be included
> + * by this file, but there are a bunch of .c files that currently depend
> + * on it. Eventually they will be cleaned up.
> + */
> +#include <linux/of_fdt.h>
> +#include <linux/of_irq.h>
> +#include <linux/platform_device.h>
> +
> +#endif
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 9e13763..e0a00c5 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
> obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
>
> obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
> +obj-$(CONFIG_X86_OF) += prom.o
>
> ###
> # 64 bit specific files
> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
> index c752e97..d5970e2 100644
> --- a/arch/x86/kernel/irqinit.c
> +++ b/arch/x86/kernel/irqinit.c
> @@ -25,6 +25,7 @@
> #include <asm/setup.h>
> #include <asm/i8259.h>
> #include <asm/traps.h>
> +#include <asm/prom.h>
>
> /*
> * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
> @@ -118,6 +119,12 @@ void __init init_IRQ(void)
> int i;
>
> /*
> + * We probably need a better place for this, but it works for
> + * now ...
> + */
> + init_dtb();
> +
> + /*
> * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
> * If these IRQ's are handled by legacy interrupt-controllers like PIC,
> * then this configuration will likely be static after the boot. If
> diff --git a/arch/x86/kernel/prom.c b/arch/x86/kernel/prom.c
> new file mode 100644
> index 0000000..ba9a096
> --- /dev/null
> +++ b/arch/x86/kernel/prom.c
> @@ -0,0 +1,60 @@
> +/*
> + * Architecture specific OF callbacks.
> + */
> +
> +#include <linux/io.h>
> +#include <linux/list.h>
> +#include <linux/of.h>
> +#include <linux/of_platform.h>
> +#include <linux/slab.h>
> +
> +char __initdata cmd_line[COMMAND_LINE_SIZE];
> +
> +unsigned int irq_create_of_mapping(struct device_node *controller,
> + const u32 *intspec, unsigned int intsize)
> +{
> + return intspec[0];
> +
> +}
> +EXPORT_SYMBOL_GPL(irq_create_of_mapping);
> +
> +unsigned long pci_address_to_pio(phys_addr_t address)
> +{
> + BUG();
> +}
> +EXPORT_SYMBOL_GPL(pci_address_to_pio);
> +
> +void __init early_init_dt_scan_chosen_arch(unsigned long node)
> +{
> + BUG();
> +}
> +
> +void __init early_init_dt_add_memory_arch(u64 base, u64 size)
> +{
> + BUG();
> +}
> +
> +u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
> +{
> + void *mem;
> +
> + mem = kmalloc(size + align, GFP_KERNEL);
> + BUG_ON(!mem);
> + mem = PTR_ALIGN(mem, align);
> + return virt_to_phys(mem);
> +}
> +
> +void __init add_dtb(u64 data)
> +{
> + initial_boot_params = (struct boot_param_header *)
> + phys_to_virt((u64) (u32) data +
> + offsetof(struct setup_data, data));
> +}
> +
> +void __init init_dtb(void)
> +{
> + if (!initial_boot_params)
> + return;
> +
> + unflatten_device_tree();
> +}
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 52f10e6..b1e1bcb 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -113,6 +113,7 @@
> #endif
> #include <asm/mce.h>
> #include <asm/alternative.h>
> +#include <asm/prom.h>
>
> /*
> * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
> @@ -441,6 +442,9 @@ static void __init parse_setup_data(void)
> case SETUP_E820_EXT:
> parse_e820_ext(data);
> break;
> + case SETUP_DTB:
> + add_dtb(pa_data);
> + break;
> default:
> break;
> }
> + */
> +/dts-v1/;
> +/ {
> + model = "x86,CE4100";
> + compatible = "x86,CE4100";
Use a vendor name rather than "x86" here.
> + #address-cells = <1>;
> + #size-cells = <1>;
> +
> + cpus {
> + x86,Atom@0 {
"Atom" would benefit from being more precise, like adding the model
number. Also you want some properties there defining maybe the mask, the
cache characteristics, etc... There's an exising OFW binding for x86, I
suppose you could follow it. A "reg" property at least is mandatory
here.
Also how do you plan to expose threading capability ?
You probably also want some linkage from the processor to the local APIC
no ?
> + device_type = "cpu";
> + };
> + };
> +
> + atom@0 {
> + #address-cells = <1>;
> + #size-cells = <1>;
> + device_type = "soc";
> + compatible = "simple-bus";
> + ranges;
> +
> + /* Local APIC */
> + lapic@fee00000 {
> + compatible = "intel,lapic";
> + reg = <0xfee00000 0x1000>;
> + phys_reg = <0xfee00000>;
> + };
> + /* Primary IO-APIC */
> + ioapic1: pic@fec00000 {
> + #interrupt-cells = <2>;
> + compatible = "intel,ioapic";
> + interrupt-controller;
> + device_type = "interrupt-controller";
> + id = <1>;
> + reg = <0xfec00000 0x1000>;
> + phys_reg = <0xfec00000>;
> + };
> +
What are those phys-reg properties ? Also APICs have some kind od
versionning, they aren't all identical, so your compatible property
needs to be more precise at least.
> + hpet@fed00000 {
> + compatible = "intel,hpet";
> + reg = <0xfed00000 0x200>;
> + phys_reg = <0xfed00000>;
> + };
> + };
All HPETs are identical ? If not, make your compatible property more
precise or if they are generally compatible from a programmer
perspective, use two entries from more generic to more specific, for
example:
compatible = "intel,hpet","intel,hpet-atom-XXYY"
(or make up a numbering/naming scheme that makes sense for Intel gear)
> + isa@legacy {
So ISA isn't a child of "atom"... that makes "atom" a bit strange as a
node, tho not a big deal per se. I suppose it represent the on-die
peripherals but then you need at least some linkage between that and
the /cpus nodes.
> + device_type = "isa";
> + compatible = "simple-bus";
What does "simple-bus" means ? ISA has a well defined binding, you
should follow it.
> + #address-cells = <2>;
> + #size-cells = <1>;
> + ranges = <0 0 0 0x400>;
> +
> + rtc@legacy {
> + compatible = "motorola,mc146818";
> + interrupts = <8 3>;
> + interrupt-parent = <&ioapic1>;
> + ctrl_reg = <2>;
> + freq_reg = <0x26>;
> + reg = <1 0x70 2>;
> + };
I think the ISA binding mandate the use of the PNP codes in the
compatible properties, doesn't it ? At least that's the common usage
pattern I've seen so far on powerpc.
Also, "ctrl_reg" and "freq_reg" follow an existing binding ? If not,
then I'd suggest you use "-" instead of "_" which is more common in OFW
land and use more descriptive names since "reg" has a meaning of its own
and the above is a bit confusing.
> + pci@3fc {
> + #address-cells = <3>;
> + #interrupt-cells = <1>;
> + #size-cells = <1>;
> + compatible = "intel,ce4100-pci";
> + device_type = "pci";
> + bus-range = <0 0>;
> +
> + /* Secondary IO-APIC */
> + ioapic2: pic@bffff000 {
> + #interrupt-cells = <2>;
> + compatible = "intel,ioapic";
> + interrupt-controller;
> + device_type = "interrupt-controller";
> + id = <2>;
> + reg = <0x100 0x0 0x0 0x0>;
> + phys_reg = <0xbffff000>;
> + };
Here you define a PCI bus with a child device that isn't PCI from what I
can tell, tho the "reg" property content is confusing, and then there's
a unit address that doesn't match "reg" and a "phys_reg" (what the heck
is that ?) that matches the unit-address. Care to explain a bit
more ? :-) I suspect that isn't the right way to represent the secondary
APIC
Also same comments about the compatible property.
> + pci@av {
> + #address-cells = <3>;
> + #interrupt-cells = <1>;
> + #size-cells = <1>;
> + compatible = "intel,ce4100-pci";
> + device_type = "pci";
> + bus-range = <1 1>;
> + interrupt-map-mask = <0xffffff 0x0 0x0 0x0>;
I notice that the interrupt number isn't part of your mask, is that
expected ? If you decide to make it so, remember that INT_A is 1 not 0
iirc (dbl check maybe ? I think that's how it is :-)
> + interrupt-map = <
> + /* GFX: 0x2E5B */
> + 0x11000 0x0 0x0 0x0 &ioapic2 0 0x1
> + /* ***** FIXME ****** Compositing Engine: 0x2E72 */
> + /* 0x11100 0x0 0x0 0x1 &ioapic2 0 0x1 */
> + /* MFD: 0x2E5C */
> + 0x11800 0x0 0x0 0x0 &ioapic2 2 0x1
> + /* TS Prefilter: 0x2E5D */
> + 0x12000 0x0 0x0 0x0 &ioapic2 4 0x1
> + /* TS Demux: 0x2E5E */
> + 0x12100 0x0 0x0 0x0 &ioapic2 5 0x1
> + /* ***** FIXME ***** Audio DSP: 0x2E5F */
> + /* 0x13000 0x0 0x0 0x1 &ioapic2 0 0x1 */
> + /* Audio Interfaces: 0x2E60 */
> + 0x13200 0x0 0x0 0x0 &ioapic2 8 0x1
> + /* VDC: 0x2E61 */
> + 0x14000 0x0 0x0 0x0 &ioapic2 9 0x1
> + /* DPE: 0x2E62 */
> + 0x14100 0x0 0x0 0x0 &ioapic2 10 0x1
> + /* HDMI Tx: 0x2E63 */
> + 0x14200 0x0 0x0 0x0 &ioapic2 11 0x1
> + /* SEC: 0x2E64 */
> + 0x14800 0x0 0x0 0x0 &ioapic2 12 0x1
> + /* EXP: 0x2E65 */
> + 0x15000 0x0 0x0 0x0 &ioapic2 13 0x1
> + /* UART0/1: 0x2E66 */
> + 0x15800 0x0 0x0 0x0 &ioapic2 14 0x1
> + /* GPIO: 0x2E67 */
> + 0x15900 0x0 0x0 0x0 &ioapic2 15 0x1
> + /* I2C0/1/2: 0x2E68 */
> + 0x15a00 0x0 0x0 0x0 &ioapic2 16 0x1
> + /* Smart Card 0/1: 0x2E69 */
> + 0x15b00 0x0 0x0 0x0 &ioapic2 15 0x1
> + /* SPI: 0x2E6A */
> + 0x15c00 0x0 0x0 0x0 &ioapic2 15 0x1
> + /* MSPOD: 0x2E6B */
> + 0x15d00 0x0 0x0 0x0 &ioapic2 19 0x1
> + /* IR: 0x2E6C */
> + 0x15e00 0x0 0x0 0x0 &ioapic2 16 0x1
> + /* **** FIXME ***** DFX: 0x2E6D */
> + /* 0x15f00 0x0 0x0 0x1 &ioapic2 0x0 0x1 */
> + /* Gig Ethernet: 0x2E6E */
> + 0x16000 0x0 0x0 0x0 &ioapic2 21 0x1
> + /* IEEE1588 and Clock Recovery Unit: 0x2E6F */
> + 0x16100 0x0 0x0 0x0 &ioapic2 3 0x1
> + /* USB0: 0x2E70 */
> + 0x16800 0x0 0x0 0x0 &ioapic2 22 0x3
> + /* USB1: 0x2E70 */
> + 0x16900 0x0 0x0 0x0 &ioapic2 22 0x3
> + /* SATA: 0x2E71 */
> + 0x17000 0x0 0x0 0x0 &ioapic2 23 0x3
> + >;
> +
> + i2c@15a00 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + reg = <0x15a00 0x0 0x0 0x0>;
OFW PCI binding, which we follow, mandates an "assigned-addresses"
property, tho I suppose that if you haven't assigned anything yet (and
will let Linux do so) the above is kosher. Your "reg" is a bit odd but I
don't have time to dbl check vs. the binding right now.
> + i2c@0 {
> + reg = <0>;
> + };
> +
> + i2c@1 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + reg = <1>;
> +
> + pcf8575@26 {
> + compatible = "ti,pcf8575";
> + reg = <0x26>;
> + };
> + };
> +
> + i2c@2 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + reg = <2>;
> +
> + pcf8575@26 {
> + compatible = "ti,pcf8575";
> + reg = <0x26>;
> + };
> + };
> + };
> +
> + spi@15c00 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + reg = <0x15c00 0x0 0x0 0x0>;
> +
> + pcm1755@0 {
> + compatible = "ti,pcm1755";
> + reg = <0>;
> + spi-max-frequency = <115200>;
> + };
> +
> + pcm1609a@1 {
> + compatible = "ti,pcm1609a";
> + reg = <1>;
> + spi-max-frequency = <115200>;
> + };
> +
> + at93c46@2 {
> + compatible = "atmel,at93c46";
> + reg = <2>;
> + spi-max-frequency = <115200>;
> + };
> + };
> + };
> + };
> + };
> +};
Cheers,
Ben.
> Jon Loeliger wrote:
>
> >> +struct irq_host {
> >> + int (*xlate)(struct irq_host *h, const u32 *intspec, u32 intsize,
> >> + u32 *out_hwirq, u32 *out_type);
> >> + void *priv;
> >> + struct device_node *controller;
> >> + struct list_head l;
> >> +};
> >
> > I thought there was an intent and desire to rename the irq_host
> > as irq_domain.
> >
> > jdl
>
> AFAIK Benh was thinking about renaming it. I don't know if this is
> still the case or when he intends to do so.
I submitted a first version of that patch already.
I thought we were waiting on gkh's patches to go upstream
before re-submitting the next version of that patch.
If we're ready to proceed with that, we can rebase it
and re-submit it too.
> Once he does so, this can be renamed as well.
Hrm. Well, let's ask Ben what order he wants to do...?
jdl
On Thu, 2010-11-25 at 18:39 +0100, Sebastian Andrzej Siewior wrote:
> x86_of_pci_init() does two things:
> - it provides a generic irq enable and disable function. enable queries
> the device tree for the interrupt information, calls ->xlate on the
> irq host and updates the pci->irq information for the device.
>
> - it walks through PCI buss(es) in the device tree and adds its children
> (devices) nodes to appropriate pci_dev nodes in kernel. So the dtb
> node information is available at probe time of the PCI device.
>
> Adding a PCI bus based on the information in the device tree is
> currently not supported. Right now direct access via ioports is used.
That's something we need to eventually put into common code, ie matching
device nodes to PCI devices... In the meantime, your approach will do,
some nits:
> +static int of_irq_map_pci(struct pci_dev *dev, struct of_irq *oirq)
> +{
> + struct device_node *node;
> + __be32 laddr[3];
> + __be32 lspec[2];
> + int ret;
> + u8 pin;
> +
> + node = dev->dev.of_node;
> + if (!node) {
> + node = dev->bus->dev.of_node;
> + if (node) {
> + ret = of_irq_map_one(node, 0, oirq);
> + if (!ret)
> + return ret;
> + }
> + }
I don't quite get the logic in getting to the bus' interrupts if you
can't find a device own nodes here...
> + ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
> + if (ret)
> + return ret;
> + if (!pin)
> + return -EINVAL;
> +
> + laddr[0] = cpu_to_be32((dev->bus->number << 16) | (dev->devfn << 8));
> + laddr[1] = 0;
> + laddr[2] = 0;
> +
> + lspec[0] = cpu_to_be32(pin);
> + lspec[1] = cpu_to_be32(0);
> + ret = of_irq_map_raw(node, lspec, 1, laddr, oirq);
> + if (ret)
> + return ret;
> + return 0;
> +}
Ok so I see what you are trying to do, but I think it's not completely
correct, besides you miss the swizzling when crossing P2P bridges and
similar.
I suppose you looked at powerpc's of_irq_map_pci() so I'm not sure why
you modified it the way you did :-) You should probably either move it
to a generic place or copy it for now with a comment indicating where it
comes from so we spot it when we put it into a common code.
> +static int x86_of_pci_irq_enable(struct pci_dev *dev)
> +{
> + struct of_irq oirq;
> + u32 virq;
> + int ret;
> + u8 pin;
> +
> + ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
> + if (ret)
> + return ret;
> + if (!pin)
> + return 0;
> +
> + ret = of_irq_map_pci(dev, &oirq);
> + if (ret)
> + return ret;
> +
> + virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
> + oirq.size);
> + if (virq == NO_IRQ)
> + return -EINVAL;
> + dev->irq = virq;
> + return 0;
> +}
> +
> +static void x86_of_pci_irq_disable(struct pci_dev *dev)
> +{
> +}
> +
> +void __cpuinit x86_of_pci_init(void)
> +{
> + struct device_node *np;
> +
> + pcibios_enable_irq = x86_of_pci_irq_enable;
> + pcibios_disable_irq = x86_of_pci_irq_disable;
> +
> + for_each_node_by_type(np, "pci") {
> + const void *prop;
> + struct pci_bus *bus;
> + unsigned int bus_min;
> + struct device_node *child;
> +
> + prop = of_get_property(np, "bus-range", NULL);
> + if (!prop)
> + continue;
> + bus_min = be32_to_cpup(prop);
> +
> + bus = pci_find_bus(0, bus_min);
> + if (!bus) {
> + printk(KERN_ERR "Can't find a node for bus %s.\n",
> + np->full_name);
> + continue;
> + }
> +
> + bus->dev.of_node = np;
> + for_each_child_of_node(np, child) {
> + struct pci_dev *dev;
> + u32 devfn;
> +
> + prop = of_get_property(child, "reg", NULL);
> + if (!prop)
> + continue;
> +
> + devfn = (be32_to_cpup(prop) >> 8) & 0xff;
> + dev = pci_get_slot(bus, devfn);
> + if (!dev)
> + continue;
> + dev->dev.of_node = child;
> + }
> + }
> +}
That too won't go down bridges, atom never have any ? (no PCIe root
complex at all ? ever will be ? even then, it should be supported as got
knows what we'll handle in the future).
Eventually we want that matching between PCI devices and OF nodes to be
in generic code, so that's not a big deal to have an "inferior" version
temporarily in there I suppose.
Also, aren't you missing a pci_dev_put() after pci_get_slot() ?
> static int __init early_scan_hpet(unsigned long node, const char *uname,
> int depth, void *data)
> {
Cheers,
Ben.
* Benjamin Herrenschmidt | 2010-11-27 08:42:16 [+1100]:
>On Thu, 2010-11-25 at 18:39 +0100, Sebastian Andrzej Siewior wrote:
>> This patch adds minimal support for device tree support on x86. It will
>> be passed to the kernel via setup_data which requires atleast boot
>> protocol 2.09.
>> Memory size, restricted memory regions, boot arguments are gathered the
>> traditional way so things like cmd_line are just here to let the code
>> compile.
>> The current plan is use the device tree as an extension and to gather
>> informations from it which can not be enumerated and have to be
>> hardcoded otherwise. This includes things like
>> - which devices are on this I2C/ SPI bus?
>> - how are the interrupts wired to IO APIC?
>> - where could my hpet be?
>
>How do that work with platforms like OLPC that have a real OF ?
OLPC's openfirmware is embedded into the bootpage where ofw_magic is set
to OLPC_OFW_SIG (0x2057464F). I don't touch this, the device tree is
passed via setup_data. So you could use both at the same time.
>One thing we did on powerpc which among other things allow kexec to work
>on such platforms when we kill OF at boot (it might stay alive on OLPC),
>is to basically detect very early in asm that we are coming from OF,
>have a trampoline that extract the DT and turns it into a flat dtb, then
>continue to the main kernel entry using the dtb method.
>
>That way, one can kexec using the dtb method over and there's one single
>entry point for device-tree use.
>
>Are you doing something similar for x86 ?
Similar. We get most critical parameters from the so called bootpage
(the traditional x86 way) which also contains a pointer to the device
tree (we don't have open firmware or something else where we call back).
We plan to relocate the device tree (before it is unflattered) so the
bootloader does not need to know about the memory layout the kernel is
having.
On kexec, the bootpage is built from scratch AFAIK. So the kexec loader
needs to suck the dtb from /proc and can add it to the bootpage.
>> Dirk is working on some patches which provide generic infrastructure for
>> linking the dtb into the kernel. Once this is it its final shape, we
>> will relocate the device tree unconditionally. This will remove the
>> requirement for the boot loader to locate the device tree within lowmem.
>
>Linking the dtb into the kernel is something we prefer not doing on
>powerpc and I'm curious why you think that applies better on x86...
This is only for the case where we do not get a dtb from the bootloader
Microblaze also links dtb into the kernel in that case.
>We -do- have ways to include it in the zImage wrapper. However, this is
>different in subtle ways because of the way our zImage wrapper building
>works. Basically, we always build all the per-platform .o's of the
>wrapper that apply to supported platforms by the kernel. The
>binding/linking together of the final wrapper with a kernel image, an
>optional dtb and optional initrd is performed by a shell script that can
>be used outside of the normal build context.
The reason why you have multiple .o wrapper files is because the specific
platform code is not simply passing the device tree but also adding /
updating nodes like MAC address, bus clocks, ... which are coming from
the (different) bd_t struct or something else. The simpleboot target is
covering the case where you just pass the embedded dtb to kernel without
changing it.
On x86 we want to have the bootloader passing us the final dtb. The
in-kernel dtb is more like a generic simpleboot target.
>That means that it's possible for a distro for example to install a
>kernel image, all the wrapper .o files and that script, and at runtime
>rebuild zImage wrappers with the appropriate dtb without having the
>whole built kernel tree at hand.
For the distro reason the in-kernel dtb supports multiple dtbs. So a
distro kernel can include all of them into .init.data section and the
user can specify on the command line which device tree he wants. x86 gets
its command line via the bootpage so it is available before we have a
device tree. Microblaze should also be able to use this mechanism.
>The direction taken by ARM (and possibly newer powerpc platforms as
>well) is to have the dtb be passed by the bootloader. Typically
>bootloaders like uboot provide a way to flash the dtb separately so it
>can be udpated (*).
Yes, we want this as well. But what about the old ARMs where the
bootloader did not have dtb support? What about minimal bootloader which
just initialize the CPU and memory and jump then into the kernel? So the
in-kernel dtb is a simple way to solve this. However I don't know what
to do about updating the MAC address where it is not yet set.
>(*) That brings a separate topic we shall discuss: A consistent way for
>versionning the device-tree would be really useful.
This isn't a problem unless you move nodes or deprecate them, right? Or
do you think about another reason to versionning the device-tree?.
>
>Cheers,
>Ben.
Sebastian
* Benjamin Herrenschmidt | 2010-11-28 09:33:51 [+1100]:
>That's something we need to eventually put into common code, ie matching
>device nodes to PCI devices... In the meantime, your approach will do,
>some nits:
>
>> +static int of_irq_map_pci(struct pci_dev *dev, struct of_irq *oirq)
>> +{
>> + struct device_node *node;
>> + __be32 laddr[3];
>> + __be32 lspec[2];
>> + int ret;
>> + u8 pin;
>> +
>> + node = dev->dev.of_node;
>> + if (!node) {
>> + node = dev->bus->dev.of_node;
>> + if (node) {
>> + ret = of_irq_map_one(node, 0, oirq);
>> + if (!ret)
>> + return ret;
>> + }
>> + }
>
>I don't quite get the logic in getting to the bus' interrupts if you
>can't find a device own nodes here...
the of_irq_map_one() is here in case the device has an interrupt node in
the device tree. If not, looks it up in the device tree based on the pin
and the device address.
>> + ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
>> + if (ret)
>> + return ret;
>> + if (!pin)
>> + return -EINVAL;
>> +
>> + laddr[0] = cpu_to_be32((dev->bus->number << 16) | (dev->devfn << 8));
>> + laddr[1] = 0;
>> + laddr[2] = 0;
>> +
>> + lspec[0] = cpu_to_be32(pin);
>> + lspec[1] = cpu_to_be32(0);
>> + ret = of_irq_map_raw(node, lspec, 1, laddr, oirq);
>> + if (ret)
>> + return ret;
>> + return 0;
>> +}
>
>Ok so I see what you are trying to do, but I think it's not completely
>correct, besides you miss the swizzling when crossing P2P bridges and
>similar.
It should be correct. I did not understand the P2P bridge so I left it
out.
>I suppose you looked at powerpc's of_irq_map_pci() so I'm not sure why
>you modified it the way you did :-) You should probably either move it
>to a generic place or copy it for now with a comment indicating where it
>comes from so we spot it when we put it into a common code.
Microblaze had its own copy of this code so I though there is something
specific about it. If it is okay with you, I would move it to drivers/of
and share. Then I would have the swizzle part :)
>> +void __cpuinit x86_of_pci_init(void)
>> +{
>> + struct device_node *np;
>> +
>> + pcibios_enable_irq = x86_of_pci_irq_enable;
>> + pcibios_disable_irq = x86_of_pci_irq_disable;
>> +
>> + for_each_node_by_type(np, "pci") {
>> + const void *prop;
>> + struct pci_bus *bus;
>> + unsigned int bus_min;
>> + struct device_node *child;
>> +
>> + prop = of_get_property(np, "bus-range", NULL);
>> + if (!prop)
>> + continue;
>> + bus_min = be32_to_cpup(prop);
>> +
>> + bus = pci_find_bus(0, bus_min);
>> + if (!bus) {
>> + printk(KERN_ERR "Can't find a node for bus %s.\n",
>> + np->full_name);
>> + continue;
>> + }
>> +
>> + bus->dev.of_node = np;
>> + for_each_child_of_node(np, child) {
>> + struct pci_dev *dev;
>> + u32 devfn;
>> +
>> + prop = of_get_property(child, "reg", NULL);
>> + if (!prop)
>> + continue;
>> +
>> + devfn = (be32_to_cpup(prop) >> 8) & 0xff;
>> + dev = pci_get_slot(bus, devfn);
>> + if (!dev)
>> + continue;
>> + dev->dev.of_node = child;
>> + }
>> + }
>> +}
>
>That too won't go down bridges, atom never have any ? (no PCIe root
>complex at all ? ever will be ? even then, it should be supported as got
>knows what we'll handle in the future).
The cpu I have here for testing does not have any.
>Eventually we want that matching between PCI devices and OF nodes to be
>in generic code, so that's not a big deal to have an "inferior" version
>temporarily in there I suppose.
Inferior, I see :)
>Also, aren't you missing a pci_dev_put() after pci_get_slot() ?
probably. I will check.
>Cheers,
>Ben.
Sebastian
* Benjamin Herrenschmidt | 2010-11-27 08:57:25 [+1100]:
>> + */
>> +/dts-v1/;
>> +/ {
>> + model = "x86,CE4100";
>> + compatible = "x86,CE4100";
>
>Use a vendor name rather than "x86" here.
Okay.
>> + #address-cells = <1>;
>> + #size-cells = <1>;
>> +
>> + cpus {
>> + x86,Atom@0 {
>
>"Atom" would benefit from being more precise, like adding the model
>number. Also you want some properties there defining maybe the mask, the
>cache characteristics, etc... There's an exising OFW binding for x86, I
>suppose you could follow it. A "reg" property at least is mandatory
>here.
I wasn't aware of the OFW binding for X86. I will follow it once I find
it.
>Also how do you plan to expose threading capability ?
I haven't plan because this CPU has to threading capability. If there
is, I would follow the powerpc way (unless it is allready documented how
to do so on x86).
>You probably also want some linkage from the processor to the local APIC
>no ?
Like now I walk through the device tree and look for one but that sounds
like a good idea.
>> + device_type = "cpu";
>> + };
>> + };
>> +
>> + atom@0 {
>> + #address-cells = <1>;
>> + #size-cells = <1>;
>> + device_type = "soc";
>> + compatible = "simple-bus";
>> + ranges;
>> +
>> + /* Local APIC */
>> + lapic@fee00000 {
>> + compatible = "intel,lapic";
>> + reg = <0xfee00000 0x1000>;
>> + phys_reg = <0xfee00000>;
>> + };
>> + /* Primary IO-APIC */
>> + ioapic1: pic@fec00000 {
>> + #interrupt-cells = <2>;
>> + compatible = "intel,ioapic";
>> + interrupt-controller;
>> + device_type = "interrupt-controller";
>> + id = <1>;
>> + reg = <0xfec00000 0x1000>;
>> + phys_reg = <0xfec00000>;
>> + };
>> +
>
>What are those phys-reg properties ?
The second ioapic behind the PCI bus which uses the reg property for the
devfn number so I can't use it for the chip address. I can't query the
PCI information because the PCI bus is not up yet.
The phys_reg property contains the physical address of the chip. The
boot uart code in powerpc's tree has a virtual-reg property. So I though
that this would be nice to keep things simple.
>Also APICs have some kind od
>versionning, they aren't all identical, so your compatible property
>needs to be more precise at least.
The APIC has a register where you can read the version of the chip, yes.
You want me to add this version into the compatible field?
>> + hpet@fed00000 {
>> + compatible = "intel,hpet";
>> + reg = <0xfed00000 0x200>;
>> + phys_reg = <0xfed00000>;
>> + };
>> + };
>
>All HPETs are identical ? If not, make your compatible property more
>precise or if they are generally compatible from a programmer
>perspective, use two entries from more generic to more specific, for
>example:
>
> compatible = "intel,hpet","intel,hpet-atom-XXYY"
>
>(or make up a numbering/naming scheme that makes sense for Intel gear)
All hpets should be equal AFAIK. Some behave different but this was not
intendend in the first place. This information is not even included in
the ACPI tables.
>> + isa@legacy {
>
>So ISA isn't a child of "atom"... that makes "atom" a bit strange as a
>node, tho not a big deal per se. I suppose it represent the on-die
>peripherals but then you need at least some linkage between that and
>the /cpus nodes.
Yes, it should represent the on-die peripherals. How should that look
like? The bus the same level as the cpu node and link from the cpu to
the isa bus?
>> + device_type = "isa";
>> + compatible = "simple-bus";
>
>What does "simple-bus" means ?
I added simple bus in order to get probed. But I now I rember that this
is also supported per device_type. I get rid of it.
>ISA has a well defined binding, you
>should follow it.
I do. The reg property the rtc starts with "1" where 1 means it is an
ioport and not ioremap()able adderess.
>> + #address-cells = <2>;
>> + #size-cells = <1>;
>> + ranges = <0 0 0 0x400>;
>> +
>> + rtc@legacy {
>> + compatible = "motorola,mc146818";
>> + interrupts = <8 3>;
>> + interrupt-parent = <&ioapic1>;
>> + ctrl_reg = <2>;
>> + freq_reg = <0x26>;
>> + reg = <1 0x70 2>;
>> + };
>
>I think the ISA binding mandate the use of the PNP codes in the
>compatible properties, doesn't it ?
I'm not aware of it.
> At least that's the common usage
>pattern I've seen so far on powerpc.
That is true.
>
>Also, "ctrl_reg" and "freq_reg" follow an existing binding ? If not,
>then I'd suggest you use "-" instead of "_" which is more common in OFW
>land and use more descriptive names since "reg" has a meaning of its own
>and the above is a bit confusing.
I posted a patch for this at [0]. Powerpc uses the the pnpPNP,b00 node
for the rtc. This node is handled explictly by chrp and maple. Those two
don't use the generic driver but their own.
The remaining (mpc8572, p2020) handle this via add_rtc() in
arch/powerpc/sysdev/rtc_cmos_setup.c. What they do is, they create a
platform device for the OF node. What is missing is the initialization
of the ctrl_reg register and the frequency. This is performed in a PCI
quirk in quirk_final_uli1575() which is only performed on a few powerpc
machines (is is_quirk_valid() has a list).
This looks like dirty hack to me. I need to add every machine to it
rather then a simple entry into the device tree. If you replace the uli
with something else, you need another setup of this kind for the rtc.
>> + pci@3fc {
>> + #address-cells = <3>;
>> + #interrupt-cells = <1>;
>> + #size-cells = <1>;
>> + compatible = "intel,ce4100-pci";
>> + device_type = "pci";
>> + bus-range = <0 0>;
>> +
>> + /* Secondary IO-APIC */
>> + ioapic2: pic@bffff000 {
>> + #interrupt-cells = <2>;
>> + compatible = "intel,ioapic";
>> + interrupt-controller;
>> + device_type = "interrupt-controller";
>> + id = <2>;
>> + reg = <0x100 0x0 0x0 0x0>;
>> + phys_reg = <0xbffff000>;
>> + };
>
>Here you define a PCI bus with a child device that isn't PCI from what I
>can tell, tho the "reg" property content is confusing, and then there's
>a unit address that doesn't match "reg" and a "phys_reg" (what the heck
>is that ?) that matches the unit-address. Care to explain a bit
>more ? :-)
The reg property contains the devfn number, interrupt mask, pin number.
That is what I've been seeing in PCI nodes. phys_reg is the physical
address of the chip since reg is allready taken and PCI is not yet up
(as I allready explained).
>I suspect that isn't the right way to represent the secondary
>APIC
Well the secondary APIC shows up on this PCI bus. It has devfn, vendor
and device id and config space. Where should I put it if not here?
>Also same comments about the compatible property.
>
>> + pci@av {
>> + #address-cells = <3>;
>> + #interrupt-cells = <1>;
>> + #size-cells = <1>;
>> + compatible = "intel,ce4100-pci";
>> + device_type = "pci";
>> + bus-range = <1 1>;
>> + interrupt-map-mask = <0xffffff 0x0 0x0 0x0>;
>
>I notice that the interrupt number isn't part of your mask, is that
>expected ? If you decide to make it so, remember that INT_A is 1 not 0
>iirc (dbl check maybe ? I think that's how it is :-)
Yes INT_A is 1 :) 0 means no interrupt available. I'm not using the
interrupt pin here and therefore it is not in the mask.
The ref manual contains a static mapping for every device so the
interrupt pin has no meaning. Well this not enirely true: by default the
pic is in legacy mode and all devices are on INT A. You can change this
into a different mode where INT A - D are used. For this to happen you
need to fiddle in some SoC specific registers. If you choose not to
bother with it and use the ioapic instead then the interrupt pin remains
unused. And we don't have real PCI bus where you can plug devices so it
would matter.
>> + i2c@15a00 {
>> + #address-cells = <1>;
>> + #size-cells = <0>;
>> + reg = <0x15a00 0x0 0x0 0x0>;
>
>OFW PCI binding, which we follow, mandates an "assigned-addresses"
>property, tho I suppose that if you haven't assigned anything yet (and
>will let Linux do so) the above is kosher. Your "reg" is a bit odd but I
>don't have time to dbl check vs. the binding right now.
reg is devfn. I just looked up "assigned-addresses" in the PCI BUS Spec
and it looks like what I could use instead of phys-reg property. So if
this is the case then I need to to distinguish between the first on
secondary ioapic and go either for the reg property or
assigned-addresses.
[0]
http://groups.google.com/group/rtc-linux/browse_thread/thread/cd70c4d4865e2b30
>Cheers,
>Ben.
Sebastian
On Sun, 2010-11-28 at 15:04 +0100, Sebastian Andrzej Siewior wrote:
> Microblaze had its own copy of this code so I though there is
> something
> specific about it. If it is okay with you, I would move it to
> drivers/of
> and share. Then I would have the swizzle part :)
Appart from the accessor pci_device_to_OF_node() which might or might
not be specific, I thin the code is pretty common, probably something
Grant didn't have time to tackle yet :-)
> >Eventually we want that matching between PCI devices and OF nodes to
> be
> >in generic code, so that's not a big deal to have an "inferior"
> version
> >temporarily in there I suppose.
>
> Inferior, I see :)
Hehe yeah :-) It's actually not a simple problem. For example, we can't
just move the powerpc variant over to generic code as-is bcs ... we have
2 completely different ways of doing it between ppc32 and ppc64 for
historical reasons :-) They also have different "features". This is
something I need to reconcile at some stage.
For example our ppc32 variant support bus renumbering (ie, Linux
assigning different bus numbers than what the DT encodes) while our
ppc64 doesn't, but our ppc64 variant has additional "stuff" to deal with
hotplug for example etc...
> >Also, aren't you missing a pci_dev_put() after pci_get_slot() ?
> probably. I will check.
Cheers,
Ben.
On Sun, 2010-11-28 at 14:49 +0100, Sebastian Andrzej Siewior wrote:
>
> >(*) That brings a separate topic we shall discuss: A consistent way for
> >versionning the device-tree would be really useful.
> This isn't a problem unless you move nodes or deprecate them, right? Or
> do you think about another reason to versionning the device-tree?.
Move nodes, deprecate them, yes, but also maybe fix bugs/typos etc...
For most of these, of course, fixup code can figure things out without a
version. The version has a couple of (minor) advantages, such as being
something easier to get into a bug report rather than the whole tree,
for distro who may want to manage a "pool" of these, or maybe a
"generic" way to provide dtb "overrides" ...
Ben.
On Sun, 2010-11-28 at 17:04 +0100, Sebastian Andrzej Siewior wrote:
> * Benjamin Herrenschmidt | 2010-11-27 08:57:25 [+1100]:
>
> >> + */
> >> +/dts-v1/;
> >> +/ {
> >> + model = "x86,CE4100";
> >> + compatible = "x86,CE4100";
> >
> >Use a vendor name rather than "x86" here.
> Okay.
>
> >> + #address-cells = <1>;
> >> + #size-cells = <1>;
> >> +
> >> + cpus {
> >> + x86,Atom@0 {
> >
> >"Atom" would benefit from being more precise, like adding the model
> >number. Also you want some properties there defining maybe the mask, the
> >cache characteristics, etc... There's an exising OFW binding for x86, I
> >suppose you could follow it. A "reg" property at least is mandatory
> >here.
> I wasn't aware of the OFW binding for X86. I will follow it once I find
> it.
Interesting, I though I would find it on
http://www.openfirmware.info/Bindings but it's not there...
CC'ing Mitch who might know where to find that.
> >Also how do you plan to expose threading capability ?
> I haven't plan because this CPU has to threading capability. If there
> is, I would follow the powerpc way (unless it is allready documented how
> to do so on x86).
Atoms have SMT don't they ?
The powerpc way somewhat sucks since it uses a property named
"ibm,interrupt-server#" :-) We might want to define something more
generic here but the base idea is sane.
IE. There is a node per core. Each "thread" is identified by a unique
number we call the "interrupt server number". The "reg" property of a
core node is the isn of the first thread on that core. etc..
> >You probably also want some linkage from the processor to the local APIC
> >no ?
> Like now I walk through the device tree and look for one but that sounds
> like a good idea.
The day you have multiple Atom's on a board the "looking for one" won't
work well :-) Better have explicit references whenever you can for that
type of linkage.
> >> + device_type = "cpu";
> >> + };
> >> + };
> >> +
> >> + atom@0 {
> >> + #address-cells = <1>;
> >> + #size-cells = <1>;
> >> + device_type = "soc";
> >> + compatible = "simple-bus";
> >> + ranges;
> >> +
> >> + /* Local APIC */
> >> + lapic@fee00000 {
> >> + compatible = "intel,lapic";
> >> + reg = <0xfee00000 0x1000>;
> >> + phys_reg = <0xfee00000>;
> >> + };
> >> + /* Primary IO-APIC */
> >> + ioapic1: pic@fec00000 {
> >> + #interrupt-cells = <2>;
> >> + compatible = "intel,ioapic";
> >> + interrupt-controller;
> >> + device_type = "interrupt-controller";
> >> + id = <1>;
> >> + reg = <0xfec00000 0x1000>;
> >> + phys_reg = <0xfec00000>;
> >> + };
> >> +
> >
> >What are those phys-reg properties ?
> The second ioapic behind the PCI bus which uses the reg property for the
> devfn number so I can't use it for the chip address. I can't query the
> PCI information because the PCI bus is not up yet.
For the second ioapic, see below. This one isn't on PCI and should just
have "reg".
> The phys_reg property contains the physical address of the chip. The
> boot uart code in powerpc's tree has a virtual-reg property. So I though
> that this would be nice to keep things simple.
No, virtual-reg is a "hack" which contains a virtual address mapped by
the bootloader in the MMU for use by early boot code before takeover of
the MMU. It's not a physical address.
The physical addresses shall be expressed in the device-tree using the
normal mechanisms so that all the existing code to decode them "just
works".
> >Also APICs have some kind od
> >versionning, they aren't all identical, so your compatible property
> >needs to be more precise at least.
> The APIC has a register where you can read the version of the chip, yes.
> You want me to add this version into the compatible field?
Ideally, you should add something like
intel,ioapic-atomXXX intel-ioapic-vYY intel-ioapic
IE. From the most specific to the most generic. That way if a "quirk" is
ever needed due to an errata specific to that chip model that isn't
directly covered by the "version", you get to use that too (unless that
version register also contains things like mask number etc... in which
case it's probably enough).
> >> + hpet@fed00000 {
> >> + compatible = "intel,hpet";
> >> + reg = <0xfed00000 0x200>;
> >> + phys_reg = <0xfed00000>;
> >> + };
> >> + };
> >
> >All HPETs are identical ? If not, make your compatible property more
> >precise or if they are generally compatible from a programmer
> >perspective, use two entries from more generic to more specific, for
> >example:
> >
> > compatible = "intel,hpet","intel,hpet-atom-XXYY"
And of course I'm full of caca above, it shall be from the ost specific
to the most generic, so the other way around:
compatible = "intel,hpet-atom-XXYY","intel,hpet"
>
> >(or make up a numbering/naming scheme that makes sense for Intel gear)
> All hpets should be equal AFAIK. Some behave different but this was not
> intendend in the first place. This information is not even included in
> the ACPI tables.
Well, you should probably still at least provide the "specific" part in
compatible so that difference can be quirked easily (though of course
Linux probably won't care initially).
> >> + isa@legacy {
> >
> >So ISA isn't a child of "atom"... that makes "atom" a bit strange as a
> >node, tho not a big deal per se. I suppose it represent the on-die
> >peripherals but then you need at least some linkage between that and
> >the /cpus nodes.
>
> Yes, it should represent the on-die peripherals. How should that look
> like? The bus the same level as the cpu node and link from the cpu to
> the isa bus?
I would make isa a child of atom
> >> + device_type = "isa";
> >> + compatible = "simple-bus";
> >
> >What does "simple-bus" means ?
> I added simple bus in order to get probed. But I now I rember that this
> is also supported per device_type. I get rid of it.
device_type is a nasty bugger, we are trying to get rid of Linux
reliance on it.
Things like "simple-bus" don't rock my boat either, it's adding to the
device-tree "informations" that are specific to the way Linux will
interpret it, which is not how it should be.
In this case I would have said something like "atom,isa-bridge" but
heh...
> >ISA has a well defined binding, you
> >should follow it.
> I do. The reg property the rtc starts with "1" where 1 means it is an
> ioport and not ioremap()able adderess.
Ok.
> >> + #address-cells = <2>;
> >> + #size-cells = <1>;
> >> + ranges = <0 0 0 0x400>;
> >> +
> >> + rtc@legacy {
> >> + compatible = "motorola,mc146818";
> >> + interrupts = <8 3>;
> >> + interrupt-parent = <&ioapic1>;
> >> + ctrl_reg = <2>;
> >> + freq_reg = <0x26>;
> >> + reg = <1 0x70 2>;
> >> + };
> >
> >I think the ISA binding mandate the use of the PNP codes in the
> >compatible properties, doesn't it ?
> I'm not aware of it.
>
> > At least that's the common usage
> >pattern I've seen so far on powerpc.
> That is true.
>
> >
> >Also, "ctrl_reg" and "freq_reg" follow an existing binding ? If not,
> >then I'd suggest you use "-" instead of "_" which is more common in OFW
> >land and use more descriptive names since "reg" has a meaning of its own
> >and the above is a bit confusing.
> I posted a patch for this at [0]. Powerpc uses the the pnpPNP,b00 node
> for the rtc. This node is handled explictly by chrp and maple. Those two
> don't use the generic driver but their own.
>
> The remaining (mpc8572, p2020) handle this via add_rtc() in
> arch/powerpc/sysdev/rtc_cmos_setup.c. What they do is, they create a
> platform device for the OF node. What is missing is the initialization
> of the ctrl_reg register and the frequency. This is performed in a PCI
> quirk in quirk_final_uli1575() which is only performed on a few powerpc
> machines (is is_quirk_valid() has a list).
> This looks like dirty hack to me. I need to add every machine to it
> rather then a simple entry into the device tree. If you replace the uli
> with something else, you need another setup of this kind for the rtc.
I definitely agree that it's much better to use a property in the
device-tree and I'n not arguing against it :-) I was merely asking
whether that property name was already defined somewhere and if not,
suggesting a different naming (using dashes rather than underscores)
which is more consistent with traditional usage :-)
(Oh and maybe publish a binding wherever Grant puts these nowadays while
at it so we can all do the same thing from now on)
> >> + pci@3fc {
> >> + #address-cells = <3>;
> >> + #interrupt-cells = <1>;
> >> + #size-cells = <1>;
> >> + compatible = "intel,ce4100-pci";
> >> + device_type = "pci";
> >> + bus-range = <0 0>;
> >> +
> >> + /* Secondary IO-APIC */
> >> + ioapic2: pic@bffff000 {
> >> + #interrupt-cells = <2>;
> >> + compatible = "intel,ioapic";
> >> + interrupt-controller;
> >> + device_type = "interrupt-controller";
> >> + id = <2>;
> >> + reg = <0x100 0x0 0x0 0x0>;
> >> + phys_reg = <0xbffff000>;
> >> + };
> >
> >Here you define a PCI bus with a child device that isn't PCI from what I
> >can tell, tho the "reg" property content is confusing, and then there's
> >a unit address that doesn't match "reg" and a "phys_reg" (what the heck
> >is that ?) that matches the unit-address. Care to explain a bit
> >more ? :-)
> The reg property contains the devfn number, interrupt mask, pin number.
No. The "reg" property contains among other things the devfn, but
certainly not the interrupt mask and pin number. I recommend you look at
the PCI binding for OF, it's actually not very complicated :-)
The "reg" basically contains an entry for the "config space" of the
device which basically represents the devfn only, and an entry for each
BAR which contains the size and various attributes (not the assigned
address tho, this goes into a separate assigned-addresses property).
> That is what I've been seeing in PCI nodes. phys_reg is the physical
> address of the chip since reg is allready taken and PCI is not yet up
> (as I allready explained).
Right but with the appropriate assigned-addresses property, you can
represent that using standard properties (and use existing address
resolution helpers from drivers/of) without inventing a new "phys_reg"
which btw has issues too ("reg" traditionally is a tupple addr/size,
also where is the number of cells used in phys_reg defined ?).
> >I suspect that isn't the right way to represent the secondary
> >APIC
> Well the secondary APIC shows up on this PCI bus. It has devfn, vendor
> and device id and config space. Where should I put it if not here?
Ok, I didn't know that, so if it's a PCI device, do as I wrote above,
give it the proper set of PCI properties :-)
> >Also same comments about the compatible property.
> >
> >> + pci@av {
> >> + #address-cells = <3>;
> >> + #interrupt-cells = <1>;
> >> + #size-cells = <1>;
> >> + compatible = "intel,ce4100-pci";
> >> + device_type = "pci";
> >> + bus-range = <1 1>;
> >> + interrupt-map-mask = <0xffffff 0x0 0x0 0x0>;
> >
> >I notice that the interrupt number isn't part of your mask, is that
> >expected ? If you decide to make it so, remember that INT_A is 1 not 0
> >iirc (dbl check maybe ? I think that's how it is :-)
> Yes INT_A is 1 :) 0 means no interrupt available. I'm not using the
> interrupt pin here and therefore it is not in the mask.
> The ref manual contains a static mapping for every device so the
> interrupt pin has no meaning.
Ok, fair enough.
> Well this not enirely true: by default the
> pic is in legacy mode and all devices are on INT A. You can change this
> into a different mode where INT A - D are used. For this to happen you
> need to fiddle in some SoC specific registers. If you choose not to
> bother with it and use the ioapic instead then the interrupt pin remains
> unused. And we don't have real PCI bus where you can plug devices so it
> would matter.
That's ok, the DT should represent the way the SoC was configured, I
don't think it's worth bothering trying to cover all possible SoC
configurations and pin mux in one tree :-)
>
> >> + i2c@15a00 {
> >> + #address-cells = <1>;
> >> + #size-cells = <0>;
> >> + reg = <0x15a00 0x0 0x0 0x0>;
> >
> >OFW PCI binding, which we follow, mandates an "assigned-addresses"
> >property, tho I suppose that if you haven't assigned anything yet (and
> >will let Linux do so) the above is kosher. Your "reg" is a bit odd but I
> >don't have time to dbl check vs. the binding right now.
> reg is devfn. I just looked up "assigned-addresses" in the PCI BUS Spec
> and it looks like what I could use instead of phys-reg property. So if
> this is the case then I need to to distinguish between the first on
> secondary ioapic and go either for the reg property or
> assigned-addresses.
You don't really need to. There's code that will do that for you :-)
Stuff in drivers/of/address.c shall be able to parse the addresses by
index (tho I suppose you might still want to do a special case for PCI
since the natural way to get to a PCI based address is via a BAR number
while other stuff just takes an address index).
You shouldn't ever have to look directly at "reg" or
"assigned-addresses" yourself.
Cheers,
Ben.
> [0]
> http://groups.google.com/group/rtc-linux/browse_thread/thread/cd70c4d4865e2b30
>
> >Cheers,
> >Ben.
>
> Sebastian
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On 11/28/2010 12:53 PM, Benjamin Herrenschmidt wrote:
>> I wasn't aware of the OFW binding for X86. I will follow it once I find
>> it.
>
> Interesting, I though I would find it on
> http://www.openfirmware.info/Bindings but it's not there...
>
> CC'ing Mitch who might know where to find that.
I can't find the x86 binding either, which is a little bit embarrassing
since I wrote it, albeit about 15 years ago...
If my memory is correct, it is not particularly useful now. It
primarily dealt with the ABI for transferring control to the OS and for
calling back into the OFW client interface. The only company that used
it was Network Appliance, back when they were building their own x86
motherboards (because most off-the-shelf mobo's of that era did not meet
their stability requirements).
There has been a fair amount of churn since then, in relevant areas like
x86 privileged architecture, compiler versions and code generation
policies, popular bootloaders, OSs, and Linux early startup code. The
net result is that the ABI that the old binding specified probably isn't
right for today.
I'd be happy to work with people to develop a new x86 binding.
The OLPC interface might be of some use as a starting point, but would
need some work. It is currently in use on AMD Geode, Via C7, and Intel
Atom based systems, but, among other issues, it conflicts with the
Physical Address Extension feature.
Mitch
On Sat, Nov 27, 2010 at 08:57:25AM +1100, Benjamin Herrenschmidt wrote:
>
> > + */
> > +/dts-v1/;
> > +/ {
> > + model = "x86,CE4100";
> > + compatible = "x86,CE4100";
>
> Use a vendor name rather than "x86" here.
>
> > + #address-cells = <1>;
> > + #size-cells = <1>;
> > +
> > + cpus {
> > + x86,Atom@0 {
>
> "Atom" would benefit from being more precise, like adding the model
> number. Also you want some properties there defining maybe the mask, the
> cache characteristics, etc... There's an exising OFW binding for x86, I
> suppose you could follow it. A "reg" property at least is mandatory
> here.
In the PowerPC flat-tree world, the newly established convention is to
extend the generic names convention to cpu nodes, so we name the nodes
just "cpu@0" etc. and move the more specific cpu type ("PowerPC,970FX"
/ "x86,Atom" / whatever) to the compatible property. I'd recommend
this convention to you, even though it's a bit of a break from earlier
standard practice, it makes device tree manipulations by bootloaders
and other intermediate things a bit easier.
> Also how do you plan to expose threading capability ?
Unless the existing x86 bindings specify something different, I'd
suggest the method we're planning to put into ePAPR 1.1 for PowerPC
chips. That is, threads sharing an MMU go in the same cpu node, with
the individual thread numbers given as multiple entries in the "reg"
property.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
On 11/28/2010 05:34 PM, Mitch Bradley wrote:
>
> The OLPC interface might be of some use as a starting point, but would
> need some work. It is currently in use on AMD Geode, Via C7, and Intel
> Atom based systems, but, among other issues, it conflicts with the
> Physical Address Extension feature.
>
It conflicts with at least PAE, PAT and x86-64. Since NX requires PAE,
it also conflicts with that. As such, I would think it would have to be
considered obsolete.
-hpa
On Mon, 29 Nov 2010 09:53:29 +1100
Benjamin Herrenschmidt <[email protected]> wrote:
> On Sun, 2010-11-28 at 17:04 +0100, Sebastian Andrzej Siewior wrote:
> > >> + isa@legacy {
What is "@legacy"? I don't think I've seen that in a unit address
before, googling only turns up this device tree, and a quick grep
through the ISA and base OF specs turns up nothing.
> > >> + device_type = "isa";
> > >> + compatible = "simple-bus";
> > >
> > >What does "simple-bus" means ?
> > I added simple bus in order to get probed. But I now I rember that this
> > is also supported per device_type. I get rid of it.
>
> device_type is a nasty bugger, we are trying to get rid of Linux
> reliance on it.
>
> Things like "simple-bus" don't rock my boat either, it's adding to the
> device-tree "informations" that are specific to the way Linux will
> interpret it, which is not how it should be.
The motivation for simple-bus comes from Linux, but its definition is
OS-neutral. It indicates that no special bus knowledge is required to
access the devices under it.
I don't think it applies to ISA, though -- I/O space is special bus
knowledge, and the "ranges" looks weird for memory-space as well.
If we're going to get rid of device_type here, it would be nice to have
some other way to indicate that this node follows the ISA binding,
without having to recognize an implementation-specific compatible.
-Scott
Benjamin Herrenschmidt wrote:
>>> Also how do you plan to expose threading capability ?
>> I haven't plan because this CPU has to threading capability. If there
>> is, I would follow the powerpc way (unless it is allready documented how
>> to do so on x86).
>
> Atoms have SMT don't they ?
It is available on some Atom CPUs. This one does not support it. It has
just one CPU with one thread.
>>> You probably also want some linkage from the processor to the local APIC
>>> no ?
>> Like now I walk through the device tree and look for one but that sounds
>> like a good idea.
>
> The day you have multiple Atom's on a board the "looking for one" won't
> work well :-) Better have explicit references whenever you can for that
> type of linkage.
This also works with the flat tree, right?
>>> Also APICs have some kind od
>>> versionning, they aren't all identical, so your compatible property
>>> needs to be more precise at least.
>> The APIC has a register where you can read the version of the chip, yes.
>> You want me to add this version into the compatible field?
>
> Ideally, you should add something like
>
> intel,ioapic-atomXXX intel-ioapic-vYY intel-ioapic
>
> IE. From the most specific to the most generic. That way if a "quirk" is
> ever needed due to an errata specific to that chip model that isn't
> directly covered by the "version", you get to use that too (unless that
> version register also contains things like mask number etc... in which
> case it's probably enough).
Okay, so we want this for a quirk at a later point in time. Now I
understand.
>>>> + isa@legacy {
>>> So ISA isn't a child of "atom"... that makes "atom" a bit strange as a
>>> node, tho not a big deal per se. I suppose it represent the on-die
>>> peripherals but then you need at least some linkage between that and
>>> the /cpus nodes.
>> Yes, it should represent the on-die peripherals. How should that look
>> like? The bus the same level as the cpu node and link from the cpu to
>> the isa bus?
>
> I would make isa a child of atom
Okay.
>>>> + device_type = "isa";
>>>> + compatible = "simple-bus";
>>> What does "simple-bus" means ?
>> I added simple bus in order to get probed. But I now I rember that this
>> is also supported per device_type. I get rid of it.
>
> device_type is a nasty bugger, we are trying to get rid of Linux
> reliance on it.
>
> Things like "simple-bus" don't rock my boat either, it's adding to the
> device-tree "informations" that are specific to the way Linux will
> interpret it, which is not how it should be.
>
> In this case I would have said something like "atom,isa-bridge" but
> heh...
Would "isa-bridge" be acceptable? So I don't have to add a new bus to the
probe list for every new SoC.
>>>> + rtc@legacy {
>>>> + compatible = "motorola,mc146818";
>>>> + interrupts = <8 3>;
>>>> + interrupt-parent = <&ioapic1>;
>>>> + ctrl_reg = <2>;
>>>> + freq_reg = <0x26>;
>>>> + reg = <1 0x70 2>;
>>>> + };
>>> Also, "ctrl_reg" and "freq_reg" follow an existing binding ? If not,
>>> then I'd suggest you use "-" instead of "_" which is more common in OFW
>>> land and use more descriptive names since "reg" has a meaning of its own
>>> and the above is a bit confusing.
> I definitely agree that it's much better to use a property in the
> device-tree and I'n not arguing against it :-) I was merely asking
> whether that property name was already defined somewhere and if not,
> suggesting a different naming (using dashes rather than underscores)
> which is more consistent with traditional usage :-)
Okay, so I replace _ with - in ctrl_reg and freq_reg if this is your only
concern.
> (Oh and maybe publish a binding wherever Grant puts these nowadays while
> at it so we can all do the same thing from now on)
Good.
>>>> + pci@3fc {
>>>> + /* Secondary IO-APIC */
>>>> + ioapic2: pic@bffff000 {
>>>> + compatible = "intel,ioapic";
>>>> + reg = <0x100 0x0 0x0 0x0>;
>>>> + phys_reg = <0xbffff000>;
>>>> + };
>> The reg property contains the devfn number, interrupt mask, pin number.
>
> No. The "reg" property contains among other things the devfn, but
> certainly not the interrupt mask and pin number. I recommend you look at
> the PCI binding for OF, it's actually not very complicated :-)
>
> The "reg" basically contains an entry for the "config space" of the
> device which basically represents the devfn only, and an entry for each
> BAR which contains the size and various attributes (not the assigned
> address tho, this goes into a separate assigned-addresses property).
>
>> That is what I've been seeing in PCI nodes. phys_reg is the physical
>> address of the chip since reg is allready taken and PCI is not yet up
>> (as I allready explained).
>
> Right but with the appropriate assigned-addresses property, you can
> represent that using standard properties (and use existing address
> resolution helpers from drivers/of) without inventing a new "phys_reg"
> which btw has issues too ("reg" traditionally is a tupple addr/size,
> also where is the number of cells used in phys_reg defined ?).
phys_reg does not have it. And probably won't since we eliminated it.
>>>> + i2c@15a00 {
>>>> + #address-cells = <1>;
>>>> + #size-cells = <0>;
>>>> + reg = <0x15a00 0x0 0x0 0x0>;
>>> OFW PCI binding, which we follow, mandates an "assigned-addresses"
>>> property, tho I suppose that if you haven't assigned anything yet (and
>>> will let Linux do so) the above is kosher. Your "reg" is a bit odd but I
>>> don't have time to dbl check vs. the binding right now.
>> reg is devfn. I just looked up "assigned-addresses" in the PCI BUS Spec
>> and it looks like what I could use instead of phys-reg property. So if
>> this is the case then I need to to distinguish between the first on
>> secondary ioapic and go either for the reg property or
>> assigned-addresses.
>
> You don't really need to. There's code that will do that for you :-)
>
> Stuff in drivers/of/address.c shall be able to parse the addresses by
> index (tho I suppose you might still want to do a special case for PCI
> since the natural way to get to a PCI based address is via a BAR number
> while other stuff just takes an address index).
>
> You shouldn't ever have to look directly at "reg" or
> "assigned-addresses" yourself.
Yes. of_address_to_resource() will do the right thing in this case. It can
only be used after unflatten_device_tree() and I need this earlier.
Now using unflatten_device_tree() earlier isn't that easy, or is it.
I defered the ioapic init a little, so it is now called from
x86_init.mpparse.get_smp_config() so I have alloc_bootmem() working.
So unflatten_device_tree() seems to work here. The ugly part comes now:
early_init_dt_alloc_memory_arch() expects u64 which works with
phys_to_virt() and the other way around. This isn't really the case with
what __alloc_bootmem(). This looks like phys_map to me. Since the dtb code
simply uses phys_to_virt() it doesn't really matter. So it works and I
probably can use of_address_to_resource().
> Cheers,
> Ben.
Sebastian
Mitch Bradley wrote:
> On 11/28/2010 12:53 PM, Benjamin Herrenschmidt wrote:
>>> I wasn't aware of the OFW binding for X86. I will follow it once I find
>>> it.
>> Interesting, I though I would find it on
>> http://www.openfirmware.info/Bindings but it's not there...
>> CC'ing Mitch who might know where to find that.
>
> I'd be happy to work with people to develop a new x86 binding.
So for the CPU node I have so far:
cpus {
#address-cells = <1>;
#size-cells = <0>;
cpu@0 {
device_type = "cpu";
compatible = "Intel,CE4100";
reg = <0>;
lapic = <&lapic0>;
};
};
This one should match ePARP 1.0. David mentioned threads. I have just one.
No HyperThreading, nothing special. Should I just leave it as it or go
for:
cpus {
#address-cells = <1>;
#size-cells = <0>;
cpu@0 {
device_type = "cpu";
compatible = "Intel,CE4100";
reg = <0>;
lapic = <&lapic0>;
thread@0 {
reg = <0>;
};
};
};
?
Sebastian
On Mon, 2010-11-29 at 10:26 -0800, H. Peter Anvin wrote:
> > The OLPC interface might be of some use as a starting point, but would
> > need some work. It is currently in use on AMD Geode, Via C7, and Intel
> > Atom based systems, but, among other issues, it conflicts with the
> > Physical Address Extension feature.
> >
>
> It conflicts with at least PAE, PAT and x86-64. Since NX requires PAE,
> it also conflicts with that. As such, I would think it would have to be
> considered obsolete.
In any case, what is of interest to Sebastian here isn't the ABI for the
client interface, but the properties to put in the /cpus/ nodes, so we
could start with that...
Cheers,
Ben.
On Mon, 2010-11-29 at 13:07 -0600, Scott Wood wrote:
>
> The motivation for simple-bus comes from Linux, but its definition is
> OS-neutral. It indicates that no special bus knowledge is required to
> access the devices under it.
That's never 100% true. In the case of ISA it's even less true due to
the difference between IO and Memory space.
> I don't think it applies to ISA, though -- I/O space is special bus
> knowledge, and the "ranges" looks weird for memory-space as well.
Right.
> If we're going to get rid of device_type here, it would be nice to
> have some other way to indicate that this node follows the ISA
> binding, without having to recognize an implementation-specific
> compatible.
The code in drivers/of/address.c uses the name property to match isa
busses.
Cheers,
Ben.
On Mon, 2010-11-29 at 20:36 +0100, Sebastian Andrzej Siewior wrote:
>
> This also works with the flat tree, right?
Yes, of course. You use similar references in your interrupt-map :-)
> Okay, so we want this for a quirk at a later point in time. Now I
> understand.
More precisely, if something has to depend on a specific
revision/errata/feature, in the future, it would be problematic to have
to modify the device-tree.
The "rule" for compatible is to be a list going from a reasonably
precise description of the specific device to the more generic
programming interface the device implements.
> Would "isa-bridge" be acceptable? So I don't have to add a new bus to
> the probe list for every new SoC.
Just call it 'isa', as for device_type, we shouldn't need it.
The default "probe list" is crap. If you want to have platform devices
instanciated for the ISA devices from the device-tree, I'd rather you
explicitely do it from the architecture code. As Scott said, "isa"
doesn't quite qualify as a "generic" simple bus.
> Yes. of_address_to_resource() will do the right thing in this case. It
> can only be used after unflatten_device_tree() and I need this
> earlier.
This probably means you are doing the unflattening too late...
> Now using unflatten_device_tree() earlier isn't that easy, or is it.
> I defered the ioapic init a little, so it is now called from
> x86_init.mpparse.get_smp_config() so I have alloc_bootmem() working.
You can probably do the unflattening way before alloc_bootmem is
available.
The unflattening does a first pass to scan for the size, so all you need
is a way to get a single contiguous chunk of memory, I'm sure x86 has
ways to provide that sort of thing really early before bootmem is
initialized (what about memblock btw ?).
> So unflatten_device_tree() seems to work here. The ugly part comes
> now:
> early_init_dt_alloc_memory_arch() expects u64 which works with
> phys_to_virt() and the other way around. This isn't really the case
> with
> what __alloc_bootmem(). This looks like phys_map to me. Since the dtb
> code
> simply uses phys_to_virt() it doesn't really matter. So it works and
> I
> probably can use of_address_to_resource().
Yeah just __pa what alloc_bootmem returns but as I said, it should
probably be unflattened earlier than that.
Peter (CC) should be able to help finding the right spot/API there.
Cheers,
Ben.
> > Cheers,
> > Ben.
>
> Sebastian
> --
> To unsubscribe from this list: send the line "unsubscribe
> linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
The usual layout is that the PCI bus is a direct child of
the root node, and the ISA bus is a child of the PCI bus.
That reflects the "Northbridge + Southbridge" wiring that
was common at the time that PCI was first introduced.
It's usually the case that faster and wider buses are closer
to the root, with speed and address width decreasing as you
go away from the root.
The fact that PCI configuration accesses are done via I/O
port 0x3fc doesn't make it a child of the ISA bus, because
I/O space is inherent in the x86 CPU architecture and thus
can be considered to be part of the root address space.
In the systems that I have worked with, the ISA bridge is a
first-class PCI device with a PCI config header, so it fits
naturally underneath the PCI bus.
Here are the properties for PCI and ISA on the OLPC XO-1.5
platform (Via C7 x86 CPU with Via VX855 IO chip):
ok dev /pci
ok .properties
interrupt-map 00000800 00000000 00000000 00000001 ff86bf34 0000000a 00000000
00006000 00000000 00000000 00000001 ff86bf34 0000000a 00000000
00008000 00000000 00000000 00000001 ff86bf34 0000000a 00000000
00008100 00000000 00000000 00000002 ff86bf34 00000009 00000000
00008200 00000000 00000000 00000003 ff86bf34 0000000b 00000000
00008400 00000000 00000000 00000004 ff86bf34 0000000a 00000000
0000a000 00000000 00000000 00000001 ff86bf34 00000009 00000000
interrupt-map-mask 0000ff00 00000000 00000000 00000007
#interrupt-cells 00000001
slot-names 00000000
slave-only 00000000
clock-frequency 01fca055
bus-range 00000000 00000000
#size-cells 00000002
#address-cells 00000003
device_type pci
name pci
ok dev /pci/isa
ok .properties
devsel-speed 00000001
class-code 00060100
subsystem-vendor-id 0000152d
subsystem-id 00000833
max-latency 00000000
min-grant 00000000
revision-id 00000000
device-id 00008409
vendor-id 00001106
interrupt-parent ff86bf34
#interrupt-cells 00000002
ranges 00000000 00000000 02000000 00000000 00000000 01000000
00000001 00000000 01000000 00000000 00000000 00010000
clock-frequency 007ea5e0
reg 00008800 00000000 00000000 00000000 00000000
#size-cells 00000001
#address-cells 00000002
device_type isa
name isa
Note that the PCI node has no reg property. On a system with multiple independent PCI buses at the top level, it would be necessary to distinguish them with reg properties reflecting their different addresses in the root address space. PC-style architectures typically (always?) have a single top-level PCI domain, so I've never never needed to do that in x86 land. It used to be pretty common on PPC "big iron".
On Mon, 2010-11-29 at 10:32 -1000, Mitch Bradley wrote:
> The usual layout is that the PCI bus is a direct child of
> the root node, and the ISA bus is a child of the PCI bus.
Right, tho we have been relaxing that on SoC for some time now, at least
on powerpc, since the PCI bus itself tend to hang off one of the SoC
internal busses (as a sibling of other busses) and that those tend to be
represented in the tree, so we make PCI be a child of that SoC bus.
This is also useful in the case where you have multiple SoCs (some are
capable of SMP interconnects) in which case you really have multiple
separate PCI busses and it's clearer to have each of them be the child
of its own SoC node.
> That reflects the "Northbridge + Southbridge" wiring that
> was common at the time that PCI was first introduced.
> It's usually the case that faster and wider buses are closer
> to the root, with speed and address width decreasing as you
> go away from the root.
>
> The fact that PCI configuration accesses are done via I/O
> port 0x3fc doesn't make it a child of the ISA bus, because
> I/O space is inherent in the x86 CPU architecture and thus
> can be considered to be part of the root address space.
>
> In the systems that I have worked with, the ISA bridge is a
> first-class PCI device with a PCI config header, so it fits
> naturally underneath the PCI bus.
This is actually the case of most systems, tho those Atoms SoC are a bit
weird as, afaik, they don't really have PCI... they just simulate some
kind of PCI config space for on-chip devices ,at least that's my
understanding.
Sebastian, do you have a block diagram of the SoC ? Following the actual
bus hierarchy of the chip might be the best approach.
Cheers,
Ben.
> Here are the properties for PCI and ISA on the OLPC XO-1.5
> platform (Via C7 x86 CPU with Via VX855 IO chip):
>
>
> ok dev /pci
> ok .properties
> interrupt-map 00000800 00000000 00000000 00000001 ff86bf34 0000000a 00000000
> 00006000 00000000 00000000 00000001 ff86bf34 0000000a 00000000
> 00008000 00000000 00000000 00000001 ff86bf34 0000000a 00000000
> 00008100 00000000 00000000 00000002 ff86bf34 00000009 00000000
> 00008200 00000000 00000000 00000003 ff86bf34 0000000b 00000000
> 00008400 00000000 00000000 00000004 ff86bf34 0000000a 00000000
> 0000a000 00000000 00000000 00000001 ff86bf34 00000009 00000000
> interrupt-map-mask 0000ff00 00000000 00000000 00000007
> #interrupt-cells 00000001
> slot-names 00000000
> slave-only 00000000
> clock-frequency 01fca055
> bus-range 00000000 00000000
> #size-cells 00000002
> #address-cells 00000003
> device_type pci
> name pci
>
> ok dev /pci/isa
> ok .properties
> devsel-speed 00000001
> class-code 00060100
> subsystem-vendor-id 0000152d
> subsystem-id 00000833
> max-latency 00000000
> min-grant 00000000
> revision-id 00000000
> device-id 00008409
> vendor-id 00001106
> interrupt-parent ff86bf34
> #interrupt-cells 00000002
> ranges 00000000 00000000 02000000 00000000 00000000 01000000
> 00000001 00000000 01000000 00000000 00000000 00010000
> clock-frequency 007ea5e0
> reg 00008800 00000000 00000000 00000000 00000000
> #size-cells 00000001
> #address-cells 00000002
> device_type isa
> name isa
>
> Note that the PCI node has no reg property. On a system with multiple independent PCI buses at the top level, it would be necessary to distinguish them with reg properties reflecting their different addresses in the root address space. PC-style architectures typically (always?) have a single top-level PCI domain, so I've never never needed to do that in x86 land. It used to be pretty common on PPC "big iron".
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On 11/29/2010 10:44 AM, Benjamin Herrenschmidt wrote:
> On Mon, 2010-11-29 at 10:32 -1000, Mitch Bradley wrote:
>> The usual layout is that the PCI bus is a direct child of
>> the root node, and the ISA bus is a child of the PCI bus.
>
> Right, tho we have been relaxing that on SoC for some time now, at least
> on powerpc, since the PCI bus itself tend to hang off one of the SoC
> internal busses (as a sibling of other busses) and that those tend to be
> represented in the tree, so we make PCI be a child of that SoC bus.
That seems fine to me. It's not important that PCI be directly attached
to the root bus. I was mostly concerned about the relative positions of
ISA and PCI.
>
> This is also useful in the case where you have multiple SoCs (some are
> capable of SMP interconnects) in which case you really have multiple
> separate PCI busses and it's clearer to have each of them be the child
> of its own SoC node.
>
>> That reflects the "Northbridge + Southbridge" wiring that
>> was common at the time that PCI was first introduced.
>> It's usually the case that faster and wider buses are closer
>> to the root, with speed and address width decreasing as you
>> go away from the root.
>>
>> The fact that PCI configuration accesses are done via I/O
>> port 0x3fc doesn't make it a child of the ISA bus, because
>> I/O space is inherent in the x86 CPU architecture and thus
>> can be considered to be part of the root address space.
>>
>> In the systems that I have worked with, the ISA bridge is a
>> first-class PCI device with a PCI config header, so it fits
>> naturally underneath the PCI bus.
>
> This is actually the case of most systems, tho those Atoms SoC are a bit
> weird as, afaik, they don't really have PCI... they just simulate some
> kind of PCI config space for on-chip devices ,at least that's my
> understanding.
>
> Sebastian, do you have a block diagram of the SoC ? Following the actual
> bus hierarchy of the chip might be the best approach.
>
> Cheers,
> Ben.
>
>> Here are the properties for PCI and ISA on the OLPC XO-1.5
>> platform (Via C7 x86 CPU with Via VX855 IO chip):
>>
>>
>> ok dev /pci
>> ok .properties
>> interrupt-map 00000800 00000000 00000000 00000001 ff86bf34 0000000a 00000000
>> 00006000 00000000 00000000 00000001 ff86bf34 0000000a 00000000
>> 00008000 00000000 00000000 00000001 ff86bf34 0000000a 00000000
>> 00008100 00000000 00000000 00000002 ff86bf34 00000009 00000000
>> 00008200 00000000 00000000 00000003 ff86bf34 0000000b 00000000
>> 00008400 00000000 00000000 00000004 ff86bf34 0000000a 00000000
>> 0000a000 00000000 00000000 00000001 ff86bf34 00000009 00000000
>> interrupt-map-mask 0000ff00 00000000 00000000 00000007
>> #interrupt-cells 00000001
>> slot-names 00000000
>> slave-only 00000000
>> clock-frequency 01fca055
>> bus-range 00000000 00000000
>> #size-cells 00000002
>> #address-cells 00000003
>> device_type pci
>> name pci
>>
>> ok dev /pci/isa
>> ok .properties
>> devsel-speed 00000001
>> class-code 00060100
>> subsystem-vendor-id 0000152d
>> subsystem-id 00000833
>> max-latency 00000000
>> min-grant 00000000
>> revision-id 00000000
>> device-id 00008409
>> vendor-id 00001106
>> interrupt-parent ff86bf34
>> #interrupt-cells 00000002
>> ranges 00000000 00000000 02000000 00000000 00000000 01000000
>> 00000001 00000000 01000000 00000000 00000000 00010000
>> clock-frequency 007ea5e0
>> reg 00008800 00000000 00000000 00000000 00000000
>> #size-cells 00000001
>> #address-cells 00000002
>> device_type isa
>> name isa
>>
>> Note that the PCI node has no reg property. On a system with multiple independent PCI buses at the top level, it would be necessary to distinguish them with reg properties reflecting their different addresses in the root address space. PC-style architectures typically (always?) have a single top-level PCI domain, so I've never never needed to do that in x86 land. It used to be pretty common on PPC "big iron".
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>
>
> The usual layout is that the PCI bus is a direct child of
> the root node, and the ISA bus is a child of the PCI bus.
> That reflects the "Northbridge + Southbridge" wiring that
That isn't strictly true either. On many PC devices the ISA bus (or LPC
bus nowdays) has no heirarchy as such because ISA cycles get issued if
the PCI cycles don't generate a response. In addition some cycles go to
both busses on some chipsets and there are various bits of magic so the
I/O spaces and particularly the memory spaces are intertwined.
So it's not a subordinate bus really, its a bit weirder. PCMCIA is
probably a sub-bus when you've got a PCI/PCMCIA adapter but ISA in
general is a bit fuzzy.
And then there are systems like PA-RISC where there are multiple entire
PCI/ISA busses hung off the primary bus which is neither 8)
There are also various bits of "architectural" space which are on the
motherboard (traditionally 0x00-0xFF) but some of which are on the CPU in
some cases (Cyrix was 0x21/22 if I remember), and there are other
architectural spaces like the ELCR which are "magic".
The PC is alas to computer architecture what perl is to programming
languages.
Alan
> This is actually the case of most systems, tho those Atoms SoC are a bit
> weird as, afaik, they don't really have PCI... they just simulate some
> kind of PCI config space for on-chip devices ,at least that's my
> understanding.
This is true of a lot of devices on most "PCI" chipsets today. Even back
to things like the VIA K6 era chipsets with the V-Bus, or the
MediaGX/Geode where some of PCI space is a hallucination brought on by
SMM traps and BIOS upgrades have been known to add devices to the bus
which are not even neatly on their own sub-tree of any kind. Indeed some
PCI devices may be half PCI half magic.
> Sebastian, do you have a block diagram of the SoC ? Following the actual
> bus hierarchy of the chip might be the best approach.
That may not be wise. Your real bus heirarchy may not be architecturally
defined on some systems so you can't incorporate it into code, nor is it
necessarily a heirarchy - eg some of the Geodes.
Alan
On Mon, Nov 29, 2010 at 01:07:20PM -0600, Scott Wood wrote:
> On Mon, 29 Nov 2010 09:53:29 +1100
> Benjamin Herrenschmidt <[email protected]> wrote:
> > On Sun, 2010-11-28 at 17:04 +0100, Sebastian Andrzej Siewior wrote:
> > > >> + isa@legacy {
>
> What is "@legacy"? I don't think I've seen that in a unit address
> before, googling only turns up this device tree, and a quick grep
> through the ISA and base OF specs turns up nothing.
>
> > > >> + device_type = "isa";
> > > >> + compatible = "simple-bus";
> > > >
> > > >What does "simple-bus" means ?
> > > I added simple bus in order to get probed. But I now I rember that this
> > > is also supported per device_type. I get rid of it.
> >
> > device_type is a nasty bugger, we are trying to get rid of Linux
> > reliance on it.
> >
> > Things like "simple-bus" don't rock my boat either, it's adding to the
> > device-tree "informations" that are specific to the way Linux will
> > interpret it, which is not how it should be.
>
> The motivation for simple-bus comes from Linux, but its definition is
> OS-neutral. It indicates that no special bus knowledge is required to
> access the devices under it.
Well, sort of. More specifically, that plus it indicates that the bus
does not support any method of dynamic probing, so the device tree is
the *only* way to figure out what's on it.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
On Mon, 2010-11-29 at 23:47 +0000, Alan Cox wrote:
>
> That may not be wise. Your real bus heirarchy may not be architecturally
> defined on some systems so you can't incorporate it into code, nor is it
> necessarily a heirarchy - eg some of the Geodes.
Ok, so I'd suggest doing something like:
- pci is below the corresponding atom node
- isa is a child of pci
The later is a useful representation even if it doesn't correspond to
reality. From an address representation perspective, ISA can be
considered somewhat as a substractive decoding child of PCI (again even
if that's not 100% true), which simplifies the representation in the
device-tree a bit, and allows to still have things like VGA devices on
the PCI segment that decode IO ports in the ISA range.
Cheers,
Ben.
Benjamin Herrenschmidt wrote:
> On Mon, 2010-11-29 at 23:47 +0000, Alan Cox wrote:
>> That may not be wise. Your real bus heirarchy may not be architecturally
>> defined on some systems so you can't incorporate it into code, nor is it
>> necessarily a heirarchy - eg some of the Geodes.
>
> Ok, so I'd suggest doing something like:
>
> - pci is below the corresponding atom node
> - isa is a child of pci
>
> The later is a useful representation even if it doesn't correspond to
> reality. From an address representation perspective, ISA can be
> considered somewhat as a substractive decoding child of PCI (again even
> if that's not 100% true), which simplifies the representation in the
> device-tree a bit, and allows to still have things like VGA devices on
> the PCI segment that decode IO ports in the ISA range.
okay.
> Cheers,
> Ben.
Sebastian
Mitch Bradley wrote:
> Here are the properties for PCI and ISA on the OLPC XO-1.5
> platform (Via C7 x86 CPU with Via VX855 IO chip):
>
> ok dev /pci/isa
> ok .properties
> reg 00008800 00000000 00000000 00000000 00000000
> Note that the PCI node has no reg property. On a system with multiple independent PCI buses at the top level, it would be necessary to distinguish them with reg properties reflecting their different addresses in the root address space. PC-style architectures typically (always?) have a single top-level PCI domain, so I've never never needed to do that in x86 land. It used to be pretty common on PPC "big iron".
My PCI node does not have a reg property but its child nodes.
In x86_of_pci_init() [0] I walk through the PCI child nodes, read the reg
property of each node which gives me the devfn of the device. I pass this
pci_get_slot() and get the pci_dev struct where I attach the of_node. So
it can by used by the pci driver.
How do I create the PCI device <-> OF node mapping with this? I think your
isa node has this as well. 00008800 is the devfn and I simply forgot the
four blocks of zeros.
[0] http://lkml.org/lkml/2010/11/25/294
Sebastian
On Tue, 2010-11-30 at 12:51 +0100, Sebastian Andrzej Siewior wrote:
> My PCI node does not have a reg property but its child nodes.
> In x86_of_pci_init() [0] I walk through the PCI child nodes, read the reg
> property of each node which gives me the devfn of the device. I pass this
> pci_get_slot() and get the pci_dev struct where I attach the of_node. So
> it can by used by the pci driver.
>
> How do I create the PCI device <-> OF node mapping with this? I think your
> isa node has this as well. 00008800 is the devfn and I simply forgot the
> four blocks of zeros.
I think Mitch meant the PCI host bridge usually has a reg property in
the address space of the parent, used to identify the bridge uniquely if
you have multiple of them.
Cheers,
Ben.
On 11/29/2010 03:42 PM, Alan Cox wrote:
>> The usual layout is that the PCI bus is a direct child of
>> the root node, and the ISA bus is a child of the PCI bus.
>> That reflects the "Northbridge + Southbridge" wiring that
>
> That isn't strictly true either. On many PC devices the ISA bus (or LPC
> bus nowdays) has no heirarchy as such because ISA cycles get issued if
> the PCI cycles don't generate a response. In addition some cycles go to
> both busses on some chipsets and there are various bits of magic so the
> I/O spaces and particularly the memory spaces are intertwined.
>
> So it's not a subordinate bus really, its a bit weirder. PCMCIA is
> probably a sub-bus when you've got a PCI/PCMCIA adapter but ISA in
> general is a bit fuzzy.
>
Actually, it can go both ways -- there are ISA/LPC busses which are true
childs of PCI busses -- in particular, are subject to the decoding
restrictions of the host bridge -- and there are those that aren't
logically even if they are physically. The reason for this is that
subtractive decoding can be done either at the back end (as in a classic
PCI/ISA system with a single PCI bus) or at the front end (as in
HyperTransport for example.)
-hpa
Benjamin Herrenschmidt wrote:
> On Fri, 2010-11-26 at 15:19 +0100, Sebastian Andrzej Siewior wrote:
>> AFAIK Benh was thinking about renaming it. I don't know if this is
>> still
>> the case or when he intends to do so. Once he does so, this can be
>> renamed
>> as well.
>
> That and moving the powerpc code to a generic place so you don't have to
> re-invent your own :-)
>
> I think Grant has patches for that.
I've found only patches which rename the struct, none that move it into
generic code. Should I rename mine and wait until it appears in generic
code?
>
> Cheers,
> Ben.
Sebastian
On Mon, Nov 29, 2010 at 08:44:45PM +0100, Sebastian Andrzej Siewior wrote:
> Mitch Bradley wrote:
> >On 11/28/2010 12:53 PM, Benjamin Herrenschmidt wrote:
> >>>I wasn't aware of the OFW binding for X86. I will follow it once I find
> >>>it.
> >>Interesting, I though I would find it on
> >>http://www.openfirmware.info/Bindings but it's not there...
> >>CC'ing Mitch who might know where to find that.
> >
> >I'd be happy to work with people to develop a new x86 binding.
>
> So for the CPU node I have so far:
>
> cpus {
> #address-cells = <1>;
> #size-cells = <0>;
>
> cpu@0 {
> device_type = "cpu";
> compatible = "Intel,CE4100";
> reg = <0>;
> lapic = <&lapic0>;
> };
> };
>
> This one should match ePARP 1.0. David mentioned threads. I have just one.
> No HyperThreading, nothing special. Should I just leave it as it or go
> for:
> cpus {
> #address-cells = <1>;
> #size-cells = <0>;
>
> cpu@0 {
> device_type = "cpu";
> compatible = "Intel,CE4100";
> reg = <0>;
> lapic = <&lapic0>;
>
> thread@0 {
> reg = <0>;
> };
> };
> };
> ?
Leave it as is. For hyperthreading there's a good chance you'll be
able to get away with the simple extension we're planning to use in
ePAPR 1.1, which would be:
cpu@0 {
...
reg = <0 1 2 3>;
...
};
For, e.g. a cpu with 4 threads.
If more detailed per-thread information is needed then we or you might
want sub-nodes one day. But even if we do that, we should allow them
to be omitted in the single-thread case.
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
* Benjamin Herrenschmidt | 2010-11-29 09:32:06 [+1100]:
>Hehe yeah :-) It's actually not a simple problem. For example, we can't
>just move the powerpc variant over to generic code as-is bcs ... we have
>2 completely different ways of doing it between ppc32 and ppc64 for
>historical reasons :-) They also have different "features". This is
>something I need to reconcile at some stage.
>
>For example our ppc32 variant support bus renumbering (ie, Linux
>assigning different bus numbers than what the DT encodes) while our
>ppc64 doesn't, but our ppc64 variant has additional "stuff" to deal with
>hotplug for example etc...
With this patch I can get rid of my custom of_irq_map_pci() in x86 tree.
The remaining thing would be the inferior pci_device <=> of_node match
code.
This is only x86 tested.
>From 8c7eeae45f28ea6737a8f5c5c32026a02432d5cc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <[email protected]>
Date: Wed, 1 Dec 2010 21:55:12 +0100
Subject: [PATCH] of: move of_irq_map_pci() into generic code
There is a tiny difference between PPC32 and PPC64. Microblaze uses the
PPC32 variant.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
arch/microblaze/include/asm/prom.h | 15 -----
arch/microblaze/kernel/prom_parse.c | 77 ---------------------------
arch/microblaze/pci/pci-common.c | 1 +
arch/powerpc/include/asm/prom.h | 15 -----
arch/powerpc/kernel/pci-common.c | 1 +
arch/powerpc/kernel/prom_parse.c | 84 ------------------------------
drivers/of/Makefile | 1 +
drivers/of/of_pci.c | 97 +++++++++++++++++++++++++++++++++++
include/linux/of_pci.h | 24 +++++++++
9 files changed, 124 insertions(+), 191 deletions(-)
create mode 100644 drivers/of/of_pci.c
create mode 100644 include/linux/of_pci.h
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index bdc3831..aa3ab12 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -67,21 +67,6 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
/* Get the MAC address */
extern const void *of_get_mac_address(struct device_node *np);
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-struct of_irq;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 99d9b61..306f41d 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -2,88 +2,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/pci_regs.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
#include <linux/of_address.h>
#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#ifdef CONFIG_PCI
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
-{
- struct device_node *dn, *ppnode;
- struct pci_dev *ppdev;
- u32 lspec;
- u32 laddr[3];
- u8 pin;
- int rc;
-
- /* Check if we have a device node, if yes, fallback to standard OF
- * parsing
- */
- dn = pci_device_to_OF_node(pdev);
- if (dn)
- return of_irq_map_one(dn, 0, out_irq);
-
- /* Ok, we don't, time to have fun. Let's start by building up an
- * interrupt spec. we assume #interrupt-cells is 1, which is standard
- * for PCI. If you do different, then don't use that routine.
- */
- rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
- if (rc != 0)
- return rc;
- /* No pin, exit */
- if (pin == 0)
- return -ENODEV;
-
- /* Now we walk up the PCI tree */
- lspec = pin;
- for (;;) {
- /* Get the pci_dev of our parent */
- ppdev = pdev->bus->self;
-
- /* Ouch, it's a host bridge... */
- if (ppdev == NULL) {
- struct pci_controller *host;
- host = pci_bus_to_host(pdev->bus);
- ppnode = host ? host->dn : NULL;
- /* No node for host bridge ? give up */
- if (ppnode == NULL)
- return -EINVAL;
- } else
- /* We found a P2P bridge, check if it has a node */
- ppnode = pci_device_to_OF_node(ppdev);
-
- /* Ok, we have found a parent with a device-node, hand over to
- * the OF parsing code.
- * We build a unit address from the linux device to be used for
- * resolution. Note that we use the linux bus number which may
- * not match your firmware bus numbering.
- * Fortunately, in most cases, interrupt-map-mask doesn't
- * include the bus number as part of the matching.
- * You should still be careful about that though if you intend
- * to rely on this function (you ship a firmware that doesn't
- * create device nodes for all PCI devices).
- */
- if (ppnode)
- break;
-
- /* We can only get here if we hit a P2P bridge with no node,
- * let's do standard swizzling and try again
- */
- lspec = pci_swizzle_interrupt_pin(pdev, lspec);
- pdev = ppdev;
- }
-
- laddr[0] = (pdev->bus->number << 16)
- | (pdev->devfn << 8);
- laddr[1] = laddr[2] = 0;
- return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
-}
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
-#endif /* CONFIG_PCI */
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index e363615..1e01a12 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <asm/processor.h>
#include <asm/io.h>
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index ae26f2e..01c3302 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -73,21 +73,6 @@ static inline int of_node_to_nid(struct device_node *device) { return 0; }
#endif
#define of_node_to_nid of_node_to_nid
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-struct of_irq;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
extern void of_instantiate_rtc(void);
/* These includes are put at the bottom because they may contain things
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 10a44e6..eb341be 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <linux/mm.h>
#include <linux/list.h>
#include <linux/syscalls.h>
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 88334af..306f41d 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -2,95 +2,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/pci_regs.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
#include <linux/of_address.h>
#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#ifdef CONFIG_PCI
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
-{
- struct device_node *dn, *ppnode;
- struct pci_dev *ppdev;
- u32 lspec;
- u32 laddr[3];
- u8 pin;
- int rc;
-
- /* Check if we have a device node, if yes, fallback to standard OF
- * parsing
- */
- dn = pci_device_to_OF_node(pdev);
- if (dn) {
- rc = of_irq_map_one(dn, 0, out_irq);
- if (!rc)
- return rc;
- }
-
- /* Ok, we don't, time to have fun. Let's start by building up an
- * interrupt spec. we assume #interrupt-cells is 1, which is standard
- * for PCI. If you do different, then don't use that routine.
- */
- rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
- if (rc != 0)
- return rc;
- /* No pin, exit */
- if (pin == 0)
- return -ENODEV;
-
- /* Now we walk up the PCI tree */
- lspec = pin;
- for (;;) {
- /* Get the pci_dev of our parent */
- ppdev = pdev->bus->self;
-
- /* Ouch, it's a host bridge... */
- if (ppdev == NULL) {
-#ifdef CONFIG_PPC64
- ppnode = pci_bus_to_OF_node(pdev->bus);
-#else
- struct pci_controller *host;
- host = pci_bus_to_host(pdev->bus);
- ppnode = host ? host->dn : NULL;
-#endif
- /* No node for host bridge ? give up */
- if (ppnode == NULL)
- return -EINVAL;
- } else
- /* We found a P2P bridge, check if it has a node */
- ppnode = pci_device_to_OF_node(ppdev);
-
- /* Ok, we have found a parent with a device-node, hand over to
- * the OF parsing code.
- * We build a unit address from the linux device to be used for
- * resolution. Note that we use the linux bus number which may
- * not match your firmware bus numbering.
- * Fortunately, in most cases, interrupt-map-mask doesn't include
- * the bus number as part of the matching.
- * You should still be careful about that though if you intend
- * to rely on this function (you ship a firmware that doesn't
- * create device nodes for all PCI devices).
- */
- if (ppnode)
- break;
-
- /* We can only get here if we hit a P2P bridge with no node,
- * let's do standard swizzling and try again
- */
- lspec = pci_swizzle_interrupt_pin(pdev, lspec);
- pdev = ppdev;
- }
-
- laddr[0] = (pdev->bus->number << 16)
- | (pdev->devfn << 8);
- laddr[1] = laddr[2] = 0;
- return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
-}
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
-#endif /* CONFIG_PCI */
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 7888155..4dcb177 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_OF_GPIO) += gpio.o
obj-$(CONFIG_OF_I2C) += of_i2c.o
obj-$(CONFIG_OF_SPI) += of_spi.o
obj-$(CONFIG_OF_MDIO) += of_mdio.o
+obj-$(CONFIG_PCI) += of_pci.o
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
new file mode 100644
index 0000000..136fef8
--- /dev/null
+++ b/drivers/of/of_pci.c
@@ -0,0 +1,97 @@
+#include <linux/kernel.h>
+#include <linux/pci_regs.h>
+#include <asm/prom.h>
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_X86)
+static struct device_node *pci_get_parent_node(struct pci_dev *pdev)
+{
+ return pci_bus_to_OF_node(pdev->bus);
+}
+#endif
+
+#if defined(CONFIG_PPC32) || defined(CONFIG_MICROBLAZE)
+static struct device_node *pci_get_parent_node(struct pci_dev *pdev)
+{
+ struct pci_controller *host;
+
+ host = pci_bus_to_host(pdev->bus);
+ ppnode = host ? host->dn : NULL;
+}
+#endif
+
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
+{
+ struct device_node *dn, *ppnode;
+ struct pci_dev *ppdev;
+ u32 lspec;
+ __be32 lspec_be;
+ __be32 laddr[3];
+ u8 pin;
+ int rc;
+
+ /* Check if we have a device node, if yes, fallback to standard OF
+ * parsing
+ */
+ dn = pci_device_to_OF_node(pdev);
+ if (dn) {
+ rc = of_irq_map_one(dn, 0, out_irq);
+ if (!rc)
+ return rc;
+ }
+
+ /* Ok, we don't, time to have fun. Let's start by building up an
+ * interrupt spec. we assume #interrupt-cells is 1, which is standard
+ * for PCI. If you do different, then don't use that routine.
+ */
+ rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+ if (rc != 0)
+ return rc;
+ /* No pin, exit */
+ if (pin == 0)
+ return -ENODEV;
+
+ /* Now we walk up the PCI tree */
+ lspec = pin;
+ for (;;) {
+ /* Get the pci_dev of our parent */
+ ppdev = pdev->bus->self;
+
+ /* Ouch, it's a host bridge... */
+ if (ppdev == NULL) {
+ ppnode = pci_get_parent_node(pdev);
+
+ /* No node for host bridge ? give up */
+ if (ppnode == NULL)
+ return -EINVAL;
+ } else {
+ /* We found a P2P bridge, check if it has a node */
+ ppnode = pci_device_to_OF_node(ppdev);
+ }
+
+ /* Ok, we have found a parent with a device-node, hand over to
+ * the OF parsing code.
+ * We build a unit address from the linux device to be used for
+ * resolution. Note that we use the linux bus number which may
+ * not match your firmware bus numbering.
+ * Fortunately, in most cases, interrupt-map-mask doesn't include
+ * the bus number as part of the matching.
+ * You should still be careful about that though if you intend
+ * to rely on this function (you ship a firmware that doesn't
+ * create device nodes for all PCI devices).
+ */
+ if (ppnode)
+ break;
+
+ /* We can only get here if we hit a P2P bridge with no node,
+ * let's do standard swizzling and try again
+ */
+ lspec = pci_swizzle_interrupt_pin(pdev, lspec);
+ pdev = ppdev;
+ }
+
+ lspec_be = cpu_to_be32(lspec);
+ laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
+ laddr[1] = laddr[2] = cpu_to_be32(0);
+ return of_irq_map_raw(ppnode, &lspec_be, 1, laddr, out_irq);
+}
+EXPORT_SYMBOL_GPL(of_irq_map_pci);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
new file mode 100644
index 0000000..36b3cfa
--- /dev/null
+++ b/include/linux/of_pci.h
@@ -0,0 +1,24 @@
+#ifndef __OF_PCI_H
+#define __OF_PCI_H
+
+#include <linux/pci.h>
+
+#ifdef CONFIG_PPC_OF
+#include <asm/pci-bridge.h>
+#endif
+
+/**
+ * of_irq_map_pci - Resolve the interrupt for a PCI device
+ * @pdev: the device whose interrupt is to be resolved
+ * @out_irq: structure of_irq filled by this function
+ *
+ * This function resolves the PCI interrupt for a given PCI device. If a
+ * device-node exists for a given pci_dev, it will use normal OF tree
+ * walking. If not, it will implement standard swizzling and walk up the
+ * PCI tree until an device-node is found, at which point it will finish
+ * resolving using the OF tree walking.
+ */
+struct pci_dev;
+struct of_irq;
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
+#endif
--
1.7.3.2
Sebastian
Commit-ID: a38c5380ef9f088be9f49b6e4c5d80af8b1b5cd4
Gitweb: http://git.kernel.org/tip/a38c5380ef9f088be9f49b6e4c5d80af8b1b5cd4
Author: Sebastian Andrzej Siewior <[email protected]>
AuthorDate: Fri, 26 Nov 2010 17:50:20 +0100
Committer: Thomas Gleixner <[email protected]>
CommitDate: Mon, 6 Dec 2010 14:30:28 +0100
x86: io_apic: Split setup_ioapic_ids_from_mpc()
Sodaville needs to setup the IO_APIC ids as the boot loader leaves
them uninitialized. Split out the setter function so it can be called
unconditionally from the sodaville board code.
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Cc: Yinghai Lu <[email protected]>
LKML-Reference: <20101126165020.GA26361@http://www.tglx.de>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/io_apic.h | 1 +
arch/x86/kernel/apic/io_apic.c | 28 ++++++++++++++++------------
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 240a0a5..d7d46cb 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -169,6 +169,7 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
extern void setup_ioapic_ids_from_mpc(void);
+extern void setup_ioapic_ids_from_mpc_nocheck(void);
struct mp_ioapic_gsi{
u32 gsi_base;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ce3c6fb..4f026a6 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1934,8 +1934,7 @@ void disable_IO_APIC(void)
*
* by Matt Domsch <[email protected]> Tue Dec 21 12:25:05 CST 1999
*/
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
{
union IO_APIC_reg_00 reg_00;
physid_mask_t phys_id_present_map;
@@ -1944,15 +1943,6 @@ void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
- if (acpi_ioapic)
- return;
- /*
- * Don't check I/O APIC IDs for xAPIC systems. They have
- * no meaning without the serial APIC bus.
- */
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return;
/*
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
@@ -2006,7 +1996,6 @@ void __init setup_ioapic_ids_from_mpc(void)
physids_or(phys_id_present_map, phys_id_present_map, tmp);
}
-
/*
* We need to adjust the IRQ routing table
* if the ID changed.
@@ -2042,6 +2031,21 @@ void __init setup_ioapic_ids_from_mpc(void)
apic_printk(APIC_VERBOSE, " ok.\n");
}
}
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+ if (acpi_ioapic)
+ return;
+ /*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return;
+ setup_ioapic_ids_from_mpc_nocheck();
+}
#endif
int no_timer_check __initdata;
For 32bit mptable path, setup_ids_from_mpc() always write io apic id
register, even there is no change needed.
So try to do that when they are different bewteen reading out and mptable
-v2: update to recent setup_ioapic_ids_from_mpc() split..
Signed-off-by: Yinghai Lu <[email protected]>
---
arch/x86/kernel/apic/io_apic.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
Index: linux-2.6/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/io_apic.c
+++ linux-2.6/arch/x86/kernel/apic/io_apic.c
@@ -2006,9 +2006,12 @@ void __init setup_ioapic_ids_from_mpc_no
= mp_ioapics[apic_id].apicid;
/*
- * Read the right value from the MPC table and
- * write it into the ID register.
+ * Update the ID register according to the right value from
+ * the MPC table if they are different.
*/
+ if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
+ continue;
+
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
mp_ioapics[apic_id].apicid);
* Sebastian Andrzej Siewior | 2010-11-25 18:39:51 [+0100]:
>parse_setup_data() uses early_memremap() for a PAGE_SIZE mapping in
>order to figure out the type & size. If this mapping is not large enough
>then parse_e820_ext() will remap this area again via early_ioremap()
>since the first mapping is still in use.
>
>This patch attempts to simplify the handling and parse_e820_ext() does
>not need to worry about the mapping anymore.
>
>Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
>CC: [email protected]
>Signed-off-by: Dirk Brandewie <[email protected]>
>---
Nobody commented this and haven't seen it merged. Is it good to go?
Sebastian
On Wed, 8 Dec 2010, Sebastian Andrzej Siewior wrote:
>
> Nobody commented this and haven't seen it merged. Is it good to go?
Yup. It's in my list of stuff to take care of :)
Thanks,
tglx
Commit-ID: 60d79fd99ff3b9c692b260a4d53a203f537c052a
Gitweb: http://git.kernel.org/tip/60d79fd99ff3b9c692b260a4d53a203f537c052a
Author: Yinghai Lu <[email protected]>
AuthorDate: Tue, 7 Dec 2010 00:59:49 -0800
Committer: Thomas Gleixner <[email protected]>
CommitDate: Thu, 9 Dec 2010 21:52:05 +0100
x86, ioapic: Avoid writing io_apic id if already correct
For 32bit mptable path, setup_ids_from_mpc() always writes the io_apic
id register, even there is no change needed.
Skip the write, when readout and mptable match.
Signed-off-by: Yinghai Lu <[email protected]>
Cc: Sebastian Siewior <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/kernel/apic/io_apic.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4abf08a..8a02150 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2007,9 +2007,12 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
= mp_ioapics[apic_id].apicid;
/*
- * Read the right value from the MPC table and
- * write it into the ID register.
+ * Update the ID register according to the right value
+ * from the MPC table if they are different.
*/
+ if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
+ continue;
+
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
mp_ioapics[apic_id].apicid);
On 11/25/2010 09:39 AM, Sebastian Andrzej Siewior wrote:
> parse_setup_data() uses early_memremap() for a PAGE_SIZE mapping in
> order to figure out the type & size. If this mapping is not large enough
> then parse_e820_ext() will remap this area again via early_ioremap()
> since the first mapping is still in use.
>
> This patch attempts to simplify the handling and parse_e820_ext() does
> not need to worry about the mapping anymore.
I like the fact that this puts all the mapping in the same layer, but I
also think it's unfortunate to discard the optimization of always
mapping the minimum of <header length, rest of page>; your code will
*always* map-unmap-map the code, even in the (presumably very common)
case of the data fitting on a single page.
Furthermore, your code retains a minor bug from the original code, which
is that if the header is not page-aligned, it may be needlessly map more
than one page with unknown content.
The proper way to do it is probably (this is pseudocode):
maplen = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
sizeof(struct setup_data));
data = early_memremap(pa_data, maplen);
len = data->len + sizeof(struct setup_data);
if (len > maplen) {
early_iounmap(pa_data, maplen);
data = early_memremap(pa_data, maplen);
}
/* ... */
early_iounmap(pa_data, maplen);
I also found your patch description to be needlessly hard to follow.
The key point is that it puts all the map manipulation into
parse_setup_data() where it belongs. Since you're changing an
interface, however, also do note that you have checked that there are no
other callers to parse_e820_ext().
-hpa
H. Peter Anvin wrote:
>
> I like the fact that this puts all the mapping in the same layer, but I
> also think it's unfortunate to discard the optimization of always
> mapping the minimum of <header length, rest of page>; your code will
> *always* map-unmap-map the code, even in the (presumably very common)
> case of the data fitting on a single page.
Okay, I will take this into account.
> Furthermore, your code retains a minor bug from the original code, which
> is that if the header is not page-aligned, it may be needlessly map more
> than one page with unknown content.
I just checked that early_memremap() maps the memory if it is not on a
page boundary. I take care of this.
> I also found your patch description to be needlessly hard to follow.
> The key point is that it puts all the map manipulation into
> parse_setup_data() where it belongs. Since you're changing an
> interface, however, also do note that you have checked that there are no
> other callers to parse_e820_ext().
okay.
> -hpa
Sebastian
On Sun, Nov 28, 2010 at 02:49:07PM +0100, Sebastian Andrzej Siewior wrote:
> * Benjamin Herrenschmidt | 2010-11-27 08:42:16 [+1100]:
>
> >On Thu, 2010-11-25 at 18:39 +0100, Sebastian Andrzej Siewior wrote:
> >> Dirk is working on some patches which provide generic infrastructure for
> >> linking the dtb into the kernel. Once this is it its final shape, we
> >> will relocate the device tree unconditionally. This will remove the
> >> requirement for the boot loader to locate the device tree within lowmem.
> >
> >Linking the dtb into the kernel is something we prefer not doing on
> >powerpc and I'm curious why you think that applies better on x86...
> This is only for the case where we do not get a dtb from the bootloader
> Microblaze also links dtb into the kernel in that case.
>
> >We -do- have ways to include it in the zImage wrapper. However, this is
> >different in subtle ways because of the way our zImage wrapper building
> >works. Basically, we always build all the per-platform .o's of the
> >wrapper that apply to supported platforms by the kernel. The
> >binding/linking together of the final wrapper with a kernel image, an
> >optional dtb and optional initrd is performed by a shell script that can
> >be used outside of the normal build context.
>
> The reason why you have multiple .o wrapper files is because the specific
> platform code is not simply passing the device tree but also adding /
> updating nodes like MAC address, bus clocks, ... which are coming from
> the (different) bd_t struct or something else. The simpleboot target is
> covering the case where you just pass the embedded dtb to kernel without
> changing it.
>
> On x86 we want to have the bootloader passing us the final dtb. The
> in-kernel dtb is more like a generic simpleboot target.
/me gets ready to dodge tomatoes thrown at him.
Hmmm, back up a minute...
Since Linux on x86 has pretty much always depended on a two stage boot
(firmware boots a bootloader like grub which in turn boots the
kernel), then what is the use case for pursuing an in-kernel dtb
linkage? simpleimage was used on powerpc for the use-case where there
is no 2nd stage bootloader, but instead only the kernel which is
booted from some firmware that is non-upgradeable (or at least too
risky to upgrade). Same with the cuImages. The wrapper is
effectively a 2nd stage bootloader to adapt from what older u-boot
provides and what the kernel needs.
What is the boot sequence for the embedded x86 platforms? Is there
still a bootloader? If so, what prevents always depending on the
bootloader to pass in the device tree blob? If the bootloader is
software (not firmware) then it should be something we have control
over when shipping a distribution.
BTW, don't take microblaze as the example to be emulated. Some of
the things it does for device tree support is not scalable, like
linking the .dtbs directly into the kernel.
John Bonesio has also prototyped doing a similar zImage bootwrapper on
arm which allows a dtb to be concatenated to the kernel image and
updated before passing it to the kernel. As it stands, there are no
plans to use in-kernel .dtb linking on ARM.
I know it's not very fair to bring up these issues again right before
the merge window opens. I got myself overcommitted and dropped the
ball over the last 1.5 months and I beg forgiveness. However, I do
want to make sure that the right decision is made and I'd be happier
if a consistent scheme is used for passing the .dtb on all
architectures.
>
> >That means that it's possible for a distro for example to install a
> >kernel image, all the wrapper .o files and that script, and at runtime
> >rebuild zImage wrappers with the appropriate dtb without having the
> >whole built kernel tree at hand.
> For the distro reason the in-kernel dtb supports multiple dtbs. So a
> distro kernel can include all of them into .init.data section and the
> user can specify on the command line which device tree he wants. x86 gets
> its command line via the bootpage so it is available before we have a
> device tree. Microblaze should also be able to use this
> mechanism.
Should equally be able to support this as a boot wrapper with the
added advantage that additional .dtbs could be added to the kernel
image at install time without rebuilding the kernel.
g.
On Thu, Dec 30, 2010 at 2:26 AM, Grant Likely <[email protected]> wrote:
> What is the boot sequence for the embedded x86 platforms? ?Is there
> still a bootloader?
There's no one embedded setup on any platform, but one of the few
constants of embedded development is trying to eliminate unnecessary
requirements.
Just on standard-ish PC hardware I've seen people try to stick Linux in the
BIOS flash (generally not enough room), I've seen people try to stick
it as the first stage PXE payload, there's the fun and games with
kexec of emergency kernels for crash dumps...
If the capability to skip an unnecessary bootloader was available, people
would use it.
Rob
On Thu, Dec 30, 2010 at 1:26 AM, Grant Likely <[email protected]> wrote:
> On Sun, Nov 28, 2010 at 02:49:07PM +0100, Sebastian Andrzej Siewior wrote:
>> The reason why you have multiple .o wrapper files is because the specific
>> platform code is not simply passing the device tree but also adding /
>> updating nodes like MAC address, bus clocks, ... which are coming from
>> the (different) bd_t struct or something else. The simpleboot target is
>> covering the case where you just pass the embedded dtb to kernel without
>> changing it.
>>
>> On x86 we want to have the bootloader passing us the final dtb. The
>> in-kernel dtb is more like a generic simpleboot target.
>
> /me gets ready to dodge tomatoes thrown at him.
>
> Hmmm, back up a minute...
>
> Since Linux on x86 has pretty much always depended on a two stage boot
> (firmware boots a bootloader like grub which in turn boots the
> kernel), then what is the use case for pursuing an in-kernel dtb
> linkage? ?simpleimage was used on powerpc for the use-case where there
> is no 2nd stage bootloader, but instead only the kernel which is
> booted from some firmware that is non-upgradeable (or at least too
> risky to upgrade). ?Same with the cuImages. ?The wrapper is
> effectively a 2nd stage bootloader to adapt from what older u-boot
> provides and what the kernel needs.
>
> What is the boot sequence for the embedded x86 platforms? ?Is there
> still a bootloader? ?If so, what prevents always depending on the
> bootloader to pass in the device tree blob? ?If the bootloader is
> software (not firmware) then it should be something we have control
> over when shipping a distribution.
>
> BTW, don't take microblaze as the example to be emulated. ?Some of
> the things it does for device tree support is not scalable, like
> linking the .dtbs directly into the kernel.
>
> John Bonesio has also prototyped doing a similar zImage bootwrapper on
> arm which allows a dtb to be concatenated to the kernel image and
> updated before passing it to the kernel. ?As it stands, there are no
> plans to use in-kernel .dtb linking on ARM.
Hmmm, I shouldn't be sending email at 1:30 in the morning. The above
statement is not really true. One of the use-cases on ARM is using a
device tree with existing firmware that doesn't pass a dt blob. Right
now there are two possible methods for doing this. Option one is to
link the .dtbs in the the kernel proper and point to them from the
machine struct. The dtb would be used when a matching machine id is
passed by the firmware. Option 2 is to select the correct .dtb with a
kernel boot wrapper, which is exactly the method used by the powerpc
boot wrappers and is the mechanism that John Bonesio is prototyping
(hopefully will have patches out on the list early in the new year).
Personally I prefer the boot wrapper method because it means there is
exactly one mechanism for providing the kernel proper with a .dtb and
it allows the set of dtbs to be provided at install time instead of
kernel compile time. Since the boot wrapper prototype is so-far
successful, it is the approach that I'm going to pursue on ARM (but
I'm not yet completely ruling out option 1).
Peter & Dirk, I realize that this is different from what we talked
about at Plumbers this year. I'm not saying no to linking the .dtbs
into the kernel proper on x86, and I don't pretend to know the details
of the x86 Linux boot interface, but if dtb linking is merged, then
x86 will probably be the only major architecture to use it (microblaze
doesn't count as major) :-).
g.
On Thu, Dec 30, 2010 at 1:45 AM, Rob Landley <[email protected]> wrote:
> On Thu, Dec 30, 2010 at 2:26 AM, Grant Likely <[email protected]> wrote:
>> What is the boot sequence for the embedded x86 platforms? ?Is there
>> still a bootloader?
>
> There's no one embedded setup on any platform, but one of the few
> constants of embedded development is trying to eliminate unnecessary
> requirements.
>
> Just on standard-ish PC hardware I've seen people try to stick Linux in the
> BIOS flash (generally not enough room), I've seen people try to stick
> it as the first stage PXE payload, there's the fun and games with
> kexec of emergency kernels for crash dumps...
>
> If the capability to skip an unnecessary bootloader was available, people
> would use it.
Right, but in all of those cases a boot wrapper provides the same
functionality with better flexability, such as being able to provided
the dtb image(s) at install time instead of compile time.
g.
On 12/30/2010 12:26 AM, Grant Likely wrote:
>
> Since Linux on x86 has pretty much always depended on a two stage boot
> (firmware boots a bootloader like grub which in turn boots the
> kernel), then what is the use case for pursuing an in-kernel dtb
> linkage? simpleimage was used on powerpc for the use-case where there
> is no 2nd stage bootloader, but instead only the kernel which is
> booted from some firmware that is non-upgradeable (or at least too
> risky to upgrade). Same with the cuImages. The wrapper is
> effectively a 2nd stage bootloader to adapt from what older u-boot
> provides and what the kernel needs.
>
> What is the boot sequence for the embedded x86 platforms? Is there
> still a bootloader? If so, what prevents always depending on the
> bootloader to pass in the device tree blob? If the bootloader is
> software (not firmware) then it should be something we have control
> over when shipping a distribution.
>
> BTW, don't take microblaze as the example to be emulated. Some of
> the things it does for device tree support is not scalable, like
> linking the .dtbs directly into the kernel.
>
> John Bonesio has also prototyped doing a similar zImage bootwrapper on
> arm which allows a dtb to be concatenated to the kernel image and
> updated before passing it to the kernel. As it stands, there are no
> plans to use in-kernel .dtb linking on ARM.
>
> I know it's not very fair to bring up these issues again right before
> the merge window opens. I got myself overcommitted and dropped the
> ball over the last 1.5 months and I beg forgiveness. However, I do
> want to make sure that the right decision is made and I'd be happier
> if a consistent scheme is used for passing the .dtb on all
> architectures.
>
There are a number of different boot loader solutions in use on embedded
platforms, as much as we would like to avoid it.
However, the ability to link in the dtb will provide a
architecture-neutral option of last resort. I'm not saying it's a good
option, but it's better than random ad hoc stuff, and if that means that
it will only ever be used during in-lab platform bringup, *that is still
a huge win*.
-hpa