2012-02-24 03:49:47

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 00/13] x86, irq: support ioapic device hotplug for x86

Hi, Thomas

Current x86 code does not support iapic hotplug yet.

This patcheset will try to pre-reserve irq block in allocated_irqs bitmap.
for hot add ioapic controller. also record irq_base in gsi_config, so later
could use it to convert gsi to irq for pci device using that ioapic controller.

need to update x86/irq to support realloc_irq after those bit are pre-reserved.

It is tested with pci remove/rescan method plus some test stubs.

Please review them.

could get them from:

git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git for-x86-irq


Thanks

Yinghai

94738bb: PCI: Disable mem in the ioapic removing path
6a53ea9: PCI: Make sure hotplug ioapic driver get loaded early
77f4e2b: x86, acpi, irq: Enable pci device type ioapic hotplug
b495bde: x86, irq: Make ioapics loop code skip blank slot
56cb7ba: x86, irq: Add mp_unregister_ioapic could handle hotremove ioapic
bd48b79: x86, irq: Make mp_register_ioapic could handle hotadd ioapic
3b23367: x86, irq: more strict check for register ioapic
996f15b: genirq: bail out early in free_desc()
f83ad47: x86, irq: add ioapic_gsi_to_irq
e864fc4: x86, irq: pre-reserve irq range that are used by ioapic
16b8b29: x86, irq: Add realloc_irq_and_cfg_at()
660c577: genirq: Split __irq_reserve_irqs from irq_alloc_descs
1060703: x86, irq: Convert irq_2_pin list to generic list

arch/x86/include/asm/hw_irq.h | 2 +-
arch/x86/include/asm/io_apic.h | 2 +
arch/x86/include/asm/mpspec.h | 2 +
arch/x86/kernel/acpi/boot.c | 28 +---
arch/x86/kernel/apic/io_apic.c | 348 +++++++++++++++++++++++++++++++---------
drivers/pci/Kconfig | 2 +-
drivers/pci/Makefile | 5 +-
drivers/pci/ioapic.c | 19 ++-
include/linux/irq.h | 6 +
kernel/irq/irqdesc.c | 84 +++++++----
10 files changed, 366 insertions(+), 132 deletions(-)


2012-02-24 03:49:50

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 02/13] genirq: Split __irq_reserve_irqs from irq_alloc_descs

also make irq_reserve_irqs to reuse __irq_reserve_irqs.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: David Daney <[email protected]>
---
include/linux/irq.h | 1 +
kernel/irq/irqdesc.c | 55 ++++++++++++++++++++++++-------------------------
2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index bff29c5..7a9a19b 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -582,6 +582,7 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,

void irq_free_descs(unsigned int irq, unsigned int cnt);
int irq_reserve_irqs(unsigned int from, unsigned int cnt);
+int __irq_reserve_irqs(int irq, unsigned int from, unsigned int cnt);

static inline void irq_free_desc(unsigned int irq)
{
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index d86e254..3e23bb9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -338,19 +338,8 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
}
EXPORT_SYMBOL_GPL(irq_free_descs);

-/**
- * irq_alloc_descs - allocate and initialize a range of irq descriptors
- * @irq: Allocate for specific irq number if irq >= 0
- * @from: Start the search from this irq number
- * @cnt: Number of consecutive irqs to allocate.
- * @node: Preferred node on which the irq descriptor should be allocated
- * @owner: Owning module (can be NULL)
- *
- * Returns the first irq number or error code
- */
int __ref
-__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
- struct module *owner)
+__irq_reserve_irqs(int irq, unsigned int from, unsigned int cnt)
{
int start, ret;

@@ -368,7 +357,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
from, cnt, 0);
ret = -EEXIST;
- if (irq >=0 && start != irq)
+ if (irq >= 0 && start != irq)
goto err;

if (start + cnt > nr_irqs) {
@@ -379,12 +368,35 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,

bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
- return alloc_descs(start, cnt, node, owner);
+ return start;

err:
mutex_unlock(&sparse_irq_lock);
return ret;
}
+/**
+ * irq_alloc_descs - allocate and initialize a range of irq descriptors
+ * @irq: Allocate for specific irq number if irq >= 0
+ * @from: Start the search from this irq number
+ * @cnt: Number of consecutive irqs to allocate.
+ * @node: Preferred node on which the irq descriptor should be allocated
+ * @owner: Owning module (can be NULL)
+ *
+ * Returns the first irq number or error code
+ */
+int __ref
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner)
+{
+ int start;
+
+ start = __irq_reserve_irqs(irq, from, cnt);
+
+ if (start < 0)
+ return start;
+
+ return alloc_descs(start, cnt, node, owner);
+}
EXPORT_SYMBOL_GPL(__irq_alloc_descs);

/**
@@ -396,20 +408,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
*/
int irq_reserve_irqs(unsigned int from, unsigned int cnt)
{
- unsigned int start;
- int ret = 0;
-
- if (!cnt || (from + cnt) > nr_irqs)
- return -EINVAL;
-
- mutex_lock(&sparse_irq_lock);
- start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
- if (start == from)
- bitmap_set(allocated_irqs, start, cnt);
- else
- ret = -EEXIST;
- mutex_unlock(&sparse_irq_lock);
- return ret;
+ return __irq_reserve_irqs(from, from, cnt);
}

/**
--
1.7.7

2012-02-24 03:49:57

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 04/13] x86, irq: pre-reserve irq range that are used by ioapic

realloc_irq_and_cfg_at already can handle pre-reserved case.

those for non-hot add ioapic, but make them to share same code path that
will be used by hot add ioapic.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Henrik Kretzschmar <[email protected]>
Cc: Suresh Siddha <[email protected]>
Cc: Sebastian Andrzej Siewior <[email protected]>
---
arch/x86/include/asm/io_apic.h | 1 +
arch/x86/kernel/apic/io_apic.c | 133 ++++++++++++++++++++++++++--------------
2 files changed, 88 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 690d1cc..f2a83ae 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -164,6 +164,7 @@ extern void setup_ioapic_ids_from_mpc_nocheck(void);
struct mp_ioapic_gsi{
u32 gsi_base;
u32 gsi_end;
+ u32 irq_base;
};
extern struct mp_ioapic_gsi mp_gsi_routing[];
extern u32 gsi_top;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4d62f0f..0702998 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -184,51 +184,6 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}

-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
-
-int __init arch_early_irq_init(void)
-{
- struct irq_cfg *cfg;
- int count, node, i;
-
- if (!legacy_pic->nr_legacy_irqs)
- io_apic_irqs = ~0UL;
-
- for (i = 0; i < nr_ioapics; i++) {
- ioapics[i].saved_registers =
- kzalloc(sizeof(struct IO_APIC_route_entry) *
- ioapics[i].nr_registers, GFP_KERNEL);
- if (!ioapics[i].saved_registers)
- pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
- }
-
- cfg = irq_cfgx;
- count = ARRAY_SIZE(irq_cfgx);
- node = cpu_to_node(0);
-
- /* Make sure the legacy interrupts are marked in the bitmap */
- irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
-
- for (i = 0; i < count; i++) {
- INIT_LIST_HEAD(&cfg[i].irq_2_pin);
- irq_set_chip_data(i, &cfg[i]);
- zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
- zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
- /*
- * For legacy IRQ's, start with assigning irq0 to irq15 to
- * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
- */
- if (i < legacy_pic->nr_legacy_irqs) {
- cfg[i].vector = IRQ0_VECTOR + i;
- cpumask_set_cpu(0, cfg[i].domain);
- }
- }
-
- return 0;
-}
-
static struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq_get_chip_data(irq);
@@ -331,6 +286,91 @@ static struct irq_cfg *realloc_irq_and_cfg_at(unsigned int at, int node)
return alloc_irq_and_cfg_at(at, node);
}

+static int reserve_ioapic_gsi_irq_base(int idx)
+{
+ int irq;
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(idx);
+ int cnt = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+
+ irq = __irq_reserve_irqs(-1, gsi_cfg->gsi_base, cnt);
+ if (irq >= 0) {
+ gsi_cfg->irq_base = irq;
+ printk(KERN_INFO
+ "IOAPIC[%d]: apic_id %d, GSI %d-%d ==> irq %d-%d reserved\n",
+ idx, mpc_ioapic_id(idx),
+ gsi_cfg->gsi_base, gsi_cfg->gsi_end,
+ irq, irq + cnt - 1);
+ } else
+ printk(KERN_INFO
+ "IOAPIC[%d]: apic_id %d, GSI %d-%d ==> irq reserve failed\n",
+ idx, mpc_ioapic_id(idx),
+ gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+
+ return irq;
+}
+
+static void __init reserve_ioapic_gsi_irq_extra(void)
+{
+ int irq;
+
+ /* to prevent hot add ioapic taking those slots */
+ if (gsi_top) {
+ irq = irq_reserve_irqs(gsi_top, NR_IRQS_LEGACY);
+ if (irq >= 0)
+ printk(KERN_INFO
+ "IOAPIC[extra]: GSI %d-%d ==> irq %d-%d reserved\n",
+ gsi_top, gsi_top + NR_IRQS_LEGACY - 1,
+ irq, irq + NR_IRQS_LEGACY - 1);
+ else
+ printk(KERN_INFO
+ "IOAPIC[extra]: GSI %d-%d ==> irq reserve failed\n",
+ gsi_top, gsi_top + NR_IRQS_LEGACY - 1);
+ }
+}
+
+static void alloc_ioapic_saved_registers(int idx)
+{
+ if (ioapics[idx].saved_registers)
+ return;
+
+ ioapics[idx].saved_registers =
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
+ ioapics[idx].nr_registers, GFP_KERNEL);
+
+ if (!ioapics[idx].saved_registers)
+ pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
+}
+
+int __init arch_early_irq_init(void)
+{
+ int node = cpu_to_node(0);
+ struct irq_cfg *cfg;
+ int i;
+
+ if (!legacy_pic->nr_legacy_irqs)
+ io_apic_irqs = ~0UL;
+
+ for (i = 0; i < nr_ioapics; i++)
+ alloc_ioapic_saved_registers(i);
+
+ for (i = 0; i < nr_ioapics; i++)
+ reserve_ioapic_gsi_irq_base(i);
+
+ reserve_ioapic_gsi_irq_extra();
+
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+ */
+ for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
+ cfg = realloc_irq_and_cfg_at(i, node);
+ cfg->vector = IRQ0_VECTOR + i;
+ cpumask_set_cpu(0, cfg->domain);
+ }
+
+ return 0;
+}
+
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -3672,7 +3712,8 @@ int __init arch_probe_nr_irqs(void)
if (nr < nr_irqs)
nr_irqs = nr;

- return NR_IRQS_LEGACY;
+ /* x86 arch code will allocate irq_desc/cfg */
+ return 0;
}

int io_apic_set_pci_routing(struct device *dev, int irq,
--
1.7.7

2012-02-24 03:50:08

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 01/13] x86, irq: Convert irq_2_pin list to generic list

So we can use generic list helper function.

Also make free_irq_cfg() free irq_2_pin list.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Hidetoshi Seto <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Mike Frysinger <[email protected]>
Cc: Suresh Siddha <[email protected]>
---
arch/x86/include/asm/hw_irq.h | 2 +-
arch/x86/kernel/apic/io_apic.c | 22 ++++++++++++----------
2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6e..7c2c92a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -114,7 +114,7 @@ struct irq_2_iommu {
* Most irqs are mapped 1:1 with pins.
*/
struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
+ struct list_head irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
u8 vector;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fb07275..4c008ac 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -65,7 +65,7 @@

#define __apicdebuginit(type) static type __init
#define for_each_irq_pin(entry, head) \
- for (entry = head; entry; entry = entry->next)
+ list_for_each_entry(entry, &head, list)

/*
* Is the SiS APIC rmw bug present ?
@@ -175,8 +175,8 @@ void mp_save_irq(struct mpc_intsrc *m)
}

struct irq_pin_list {
+ struct list_head list;
int apic, pin;
- struct irq_pin_list *next;
};

static struct irq_pin_list *alloc_irq_pin_list(int node)
@@ -212,6 +212,7 @@ int __init arch_early_irq_init(void)
irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);

for (i = 0; i < count; i++) {
+ INIT_LIST_HEAD(&cfg[i].irq_2_pin);
irq_set_chip_data(i, &cfg[i]);
zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
@@ -244,6 +245,7 @@ static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
goto out_cfg;
if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
goto out_domain;
+ INIT_LIST_HEAD(&cfg->irq_2_pin);
return cfg;
out_domain:
free_cpumask_var(cfg->domain);
@@ -254,11 +256,15 @@ out_cfg:

static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
{
+ struct irq_pin_list *entry, *tmp;
+
if (!cfg)
return;
irq_set_chip_data(at, NULL);
free_cpumask_var(cfg->domain);
free_cpumask_var(cfg->old_domain);
+ list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
+ kfree(entry);
kfree(cfg);
}

@@ -438,15 +444,12 @@ static void ioapic_mask_entry(int apic, int pin)
static int
__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
- struct irq_pin_list **last, *entry;
+ struct irq_pin_list *entry;

/* don't allow duplicates */
- last = &cfg->irq_2_pin;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
+ for_each_irq_pin(entry, cfg->irq_2_pin)
if (entry->apic == apic && entry->pin == pin)
return 0;
- last = &entry->next;
- }

entry = alloc_irq_pin_list(node);
if (!entry) {
@@ -457,7 +460,7 @@ __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
entry->apic = apic;
entry->pin = pin;

- *last = entry;
+ list_add_tail(&entry->list, &cfg->irq_2_pin);
return 0;
}

@@ -1722,8 +1725,7 @@ __apicdebuginit(void) print_IO_APICs(void)
cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
- entry = cfg->irq_2_pin;
- if (!entry)
+ if (list_empty(&cfg->irq_2_pin))
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
for_each_irq_pin(entry, cfg->irq_2_pin)
--
1.7.7

2012-02-24 03:50:43

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 03/13] x86, irq: Add realloc_irq_and_cfg_at()

Will use it to allocate irq that are pre-reserved.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Suresh Siddha <[email protected]>
---
arch/x86/kernel/apic/io_apic.c | 33 ++++++++++++++++++++++++++++++++-
include/linux/irq.h | 5 +++++
kernel/irq/irqdesc.c | 26 ++++++++++++++++++++++++++
3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4c008ac..4d62f0f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -300,6 +300,37 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
irq_free_desc(at);
}

+static struct irq_cfg *realloc_irq_and_cfg_at(unsigned int at, int node)
+{
+ struct irq_desc *desc = irq_to_desc(at);
+ struct irq_cfg *cfg;
+ int res;
+
+ if (desc) {
+ if (irq_desc_get_irq_data(desc)->node == node)
+ return alloc_irq_and_cfg_at(at, node);
+
+ cfg = irq_desc_get_chip_data(desc);
+ if (cfg) {
+ /* shared irq */
+ if (!list_empty(&cfg->irq_2_pin))
+ return cfg;
+ free_irq_cfg(at, cfg);
+ }
+ }
+
+ res = irq_realloc_desc_at(at, node);
+ if (res >= 0) {
+ cfg = alloc_irq_cfg(at, node);
+ if (cfg) {
+ irq_set_chip_data(at, cfg);
+ return cfg;
+ }
+ }
+
+ return alloc_irq_and_cfg_at(at, node);
+}
+
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -3563,7 +3594,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
static int
io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
{
- struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ struct irq_cfg *cfg = realloc_irq_and_cfg_at(irq, node);
int ret;

if (!cfg)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7a9a19b..e15bc35 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -584,6 +584,11 @@ void irq_free_descs(unsigned int irq, unsigned int cnt);
int irq_reserve_irqs(unsigned int from, unsigned int cnt);
int __irq_reserve_irqs(int irq, unsigned int from, unsigned int cnt);

+int __irq_realloc_desc(int at, int node, struct module *owner);
+/* use macros to avoid needing export.h for THIS_MODULE */
+#define irq_realloc_desc_at(at, node) \
+ __irq_realloc_desc(at, node, THIS_MODULE)
+
static inline void irq_free_desc(unsigned int irq)
{
irq_free_descs(irq, 1);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 3e23bb9..b1d6e51 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -99,6 +99,11 @@ EXPORT_SYMBOL_GPL(nr_irqs);
static DEFINE_MUTEX(sparse_irq_lock);
static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);

+static bool __irq_is_reserved(int irq)
+{
+ return !!test_bit(irq, allocated_irqs);
+}
+
#ifdef CONFIG_SPARSE_IRQ

static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
@@ -400,6 +405,27 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
EXPORT_SYMBOL_GPL(__irq_alloc_descs);

/**
+ * irq_realloc_desc - allocate irq descriptor for irq that is already reserved
+ * @irq: Allocate for specific irq number if irq >= 0
+ * @node: Preferred node on which the irq descriptor should be allocated
+ * @owner: Owning module (can be NULL)
+ *
+ * Returns the irq number or error code
+ */
+int __ref
+__irq_realloc_desc(int irq, int node, struct module *owner)
+{
+ if (!__irq_is_reserved(irq))
+ return -EINVAL;
+
+ if (irq_to_desc(irq))
+ free_desc(irq);
+
+ return alloc_descs(irq, 1, node, owner);
+}
+EXPORT_SYMBOL_GPL(__irq_realloc_desc);
+
+/**
* irq_reserve_irqs - mark irqs allocated
* @from: mark from irq number
* @cnt: number of irqs to mark
--
1.7.7

2012-02-24 03:50:55

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 10/13] x86, irq: Make ioapics loop code skip blank slot

When multiple ioapics get added and removed, we could have blank slot in ioapics
array.

Add skip code in this case by check nr_registers.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Suresh Siddha <[email protected]>
---
arch/x86/kernel/apic/io_apic.c | 29 +++++++++++++++++++++++------
1 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4517713..da02320 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -819,7 +819,8 @@ int save_ioapic_entries(void)
int err = 0;

for (apic = 0; apic < nr_ioapics; apic++) {
- if (!ioapics[apic].saved_registers) {
+ if (!ioapics[apic].saved_registers &&
+ ioapics[apic].nr_registers) {
err = -ENOMEM;
continue;
}
@@ -925,9 +926,12 @@ static int __init find_isa_irq_apic(int irq, int type)
if (i < mp_irq_entries) {
int ioapic_idx;

- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ if (!ioapics[ioapic_idx].nr_registers)
+ continue;
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
return ioapic_idx;
+ }
}

return -1;
@@ -1158,10 +1162,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].srcbus;

- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ if (!ioapics[ioapic_idx].nr_registers)
+ continue;
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
mp_irqs[i].dstapic == MP_APIC_ALL)
break;
+ }

if (!test_bit(lbus, mp_bus_not_pci) &&
!mp_irqs[i].irqtype &&
@@ -2188,6 +2195,9 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
* Set the IOAPIC ID to the value stored in the MPC table.
*/
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ if (!ioapics[ioapic_idx].nr_registers)
+ continue;
+
/* Read the register 0 value */
raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic_idx, 0);
@@ -3153,8 +3163,12 @@ static void ioapic_resume(void)
{
int ioapic_idx;

- for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
+ for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) {
+ if (!ioapics[ioapic_idx].nr_registers)
+ continue;
+
resume_ioapic_id(ioapic_idx);
+ }

restore_ioapic_entries();
}
@@ -4065,8 +4079,11 @@ int mp_find_ioapic(u32 gsi)
/* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
- if ((gsi >= gsi_cfg->gsi_base)
- && (gsi <= gsi_cfg->gsi_end))
+
+ if (!ioapics[i].nr_registers)
+ continue;
+
+ if ((gsi >= gsi_cfg->gsi_base) && (gsi <= gsi_cfg->gsi_end))
return i;
}

--
1.7.7

2012-02-24 03:50:59

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 05/13] x86, irq: add ioapic_gsi_to_irq

it will handle hot add ioapic that irq_base is not equal to gsi_base.

Also remove irq_to_gsi that is causing confusing.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
Cc: Henrik Kretzschmar <[email protected]>
Cc: Suresh Siddha <[email protected]>
Cc: Sebastian Andrzej Siewior <[email protected]>
---
arch/x86/include/asm/io_apic.h | 1 +
arch/x86/kernel/acpi/boot.c | 22 +++++-----------------
arch/x86/kernel/apic/io_apic.c | 29 ++++++++++++++++++++++++++++-
3 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index f2a83ae..b5489e8 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -168,6 +168,7 @@ struct mp_ioapic_gsi{
};
extern struct mp_ioapic_gsi mp_gsi_routing[];
extern u32 gsi_top;
+int ioapic_gsi_to_irq(u32 gsi);
int mp_find_ioapic(u32 gsi);
int mp_find_ioapic_pin(int ioapic, u32 gsi);
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ce664f3..fa9473b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -112,6 +112,10 @@ static unsigned int gsi_to_irq(unsigned int gsi)
}
}

+#ifdef CONFIG_X86_IO_APIC
+ if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
+ return ioapic_gsi_to_irq(gsi);
+#endif
/* Provide an identity mapping of gsi == irq
* except on truly weird platforms that have
* non isa irqs in the first 16 gsis.
@@ -124,22 +128,6 @@ static unsigned int gsi_to_irq(unsigned int gsi)
return irq;
}

-static u32 irq_to_gsi(int irq)
-{
- unsigned int gsi;
-
- if (irq < NR_IRQS_LEGACY)
- gsi = isa_irq_to_gsi[irq];
- else if (irq < gsi_top)
- gsi = irq;
- else if (irq < (gsi_top + NR_IRQS_LEGACY))
- gsi = irq - gsi_top;
- else
- gsi = 0xffffffff;
-
- return gsi;
-}
-
/*
* Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
* to map the target physical address. The problem is that set_fixmap()
@@ -527,7 +515,7 @@ int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
{
if (isa_irq >= 16)
return -1;
- *gsi = irq_to_gsi(isa_irq);
+ *gsi = isa_irq_to_gsi[isa_irq];
return 0;
}

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 0702998..527b184 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1081,13 +1081,16 @@ static int pin_2_irq(int idx, int apic, int pin)

if (test_bit(bus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
- } else {
+ } else if (gsi_cfg->gsi_base == gsi_cfg->irq_base) {
u32 gsi = gsi_cfg->gsi_base + pin;

if (gsi >= NR_IRQS_LEGACY)
irq = gsi;
else
irq = gsi_top + gsi;
+ } else {
+ /* hotadd ioapic */
+ irq = gsi_cfg->irq_base + pin;
}

#ifdef CONFIG_X86_32
@@ -1581,6 +1584,30 @@ static void __init setup_IO_APIC_irqs(void)
__io_apic_setup_irqs(ioapic_idx);
}

+int ioapic_gsi_to_irq(u32 gsi)
+{
+ int ioapic_idx = 0, irq = gsi;
+ struct mp_ioapic_gsi *gsi_cfg;
+
+ ioapic_idx = mp_find_ioapic(gsi);
+ if (ioapic_idx < 0)
+ return -1;
+
+ gsi_cfg = mp_ioapic_gsi_routing(ioapic_idx);
+ if (gsi_cfg->gsi_base == gsi_cfg->irq_base) {
+ if (gsi < NR_IRQS_LEGACY)
+ irq = gsi_top + gsi;
+ } else {
+ int pin = mp_find_ioapic_pin(ioapic_idx, gsi);
+
+ if (pin < 0)
+ return -1;
+ /* hotadd ioapic */
+ irq = gsi_cfg->irq_base + pin;
+ }
+
+ return irq;
+}
/*
* for the gsit that is not in first ioapic
* but could not use acpi_register_gsi()
--
1.7.7

2012-02-24 03:50:42

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 11/13] x86, acpi, irq: Enable pci device type ioapic hotplug

Fill the blank stub of acpi_register_ioapic/acpi_unregister_ioapic.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: "Rafael J. Wysocki" <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
---
arch/x86/kernel/acpi/boot.c | 6 ++----
1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa9473b..94af680 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -688,16 +688,14 @@ EXPORT_SYMBOL(acpi_unmap_lsapic);

int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
{
- /* TBD */
- return -EINVAL;
+ return __mp_register_ioapic(0, phys_addr, gsi_base, true);
}

EXPORT_SYMBOL(acpi_register_ioapic);

int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
{
- /* TBD */
- return -EINVAL;
+ return mp_unregister_ioapic(gsi_base);
}

EXPORT_SYMBOL(acpi_unregister_ioapic);
--
1.7.7

2012-02-24 03:49:45

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 06/13] genirq: bail out early in free_desc()

So we can use irq_free_descs to clear allocated_irqs bits for
preserved irqs.

Signed-off-by: Yinghai Lu <[email protected]>
---
kernel/irq/irqdesc.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index b1d6e51..da82457 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -169,6 +169,9 @@ static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);

+ if (!desc)
+ return;
+
unregister_irq_proc(irq, desc);

mutex_lock(&sparse_irq_lock);
--
1.7.7

2012-02-24 03:52:01

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 09/13] x86, irq: Add mp_unregister_ioapic could handle hotremove ioapic

it will free ioapic related irq_desc and also clear allocated_irqs bits.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Suresh Siddha <[email protected]>
---
arch/x86/include/asm/mpspec.h | 1 +
arch/x86/kernel/apic/io_apic.c | 42 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index aca26fa..6f80e04 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -97,6 +97,7 @@ static inline void early_reserve_e820_mpc_new(void) { }
void __cpuinit generic_processor_info(int apicid, int version);
#ifdef CONFIG_ACPI
int __mp_register_ioapic(int id, u32 address, u32 gsi_base, bool hot);
+int mp_unregister_ioapic(u32 gsi_base);
extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2e8497c..4517713 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -255,6 +255,14 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
irq_free_desc(at);
}

+static void free_irqs(unsigned int from, int cnt)
+{
+ int i;
+
+ for (i = from; i < from + cnt; i++)
+ free_irq_at(i, irq_get_chip_data(i));
+}
+
static struct irq_cfg *realloc_irq_and_cfg_at(unsigned int at, int node)
{
struct irq_desc *desc = irq_to_desc(at);
@@ -328,6 +336,16 @@ static void __init reserve_ioapic_gsi_irq_extra(void)
}
}

+static void free_ioapic_gsi_irq_base(int idx)
+{
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(idx);
+ int irq_base = gsi_cfg->irq_base;
+ int irq_cnt = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+
+ if (irq_base > 0)
+ free_irqs(irq_base, irq_cnt);
+}
+
static void alloc_ioapic_saved_registers(int idx)
{
if (ioapics[idx].saved_registers)
@@ -341,6 +359,11 @@ static void alloc_ioapic_saved_registers(int idx)
pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
}

+static void free_ioapic_saved_registers(int idx)
+{
+ kfree(ioapics[idx].saved_registers);
+}
+
int __init arch_early_irq_init(void)
{
int node = cpu_to_node(0);
@@ -4166,6 +4189,25 @@ void mp_register_ioapic(int id, u32 address, u32 gsi_base)
__mp_register_ioapic(id, address, gsi_base, false);
}

+int mp_unregister_ioapic(u32 gsi_base)
+{
+ int idx;
+
+ idx = mp_find_ioapic(gsi_base);
+ if (idx < 0)
+ return -EINVAL;
+
+ free_ioapic_saved_registers(idx);
+
+ free_ioapic_gsi_irq_base(idx);
+
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ memset(&ioapics[idx], 0, sizeof(struct ioapic));
+ ioapics[idx].mp_config.apicid = 0xff;
+
+ return 0;
+}
+
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
--
1.7.7

2012-02-24 03:49:44

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 12/13] PCI: Make sure hotplug ioapic driver get loaded early

at least before port service.

Some system MADT will only have no hotplug entries even those devices are
there before os is booting.

So we need to enable those ioapic early before real device driver need to
setup ioapic irq.

Signed-off-by: <[email protected]>
Cc: Jesse Barnes <[email protected]>
---
drivers/pci/Kconfig | 2 +-
drivers/pci/Makefile | 5 +++--
drivers/pci/ioapic.c | 6 ------
3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 37856f7..c920a9d 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -98,7 +98,7 @@ config PCI_PASID
If unsure, say N.

config PCI_IOAPIC
- tristate "PCI IO-APIC hotplug support" if X86
+ bool "PCI IO-APIC hotplug support" if X86
depends on PCI
depends on ACPI
depends on HOTPLUG
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 083a49f..7b4ff82 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -10,11 +10,12 @@ obj-$(CONFIG_SYSFS) += slot.o

obj-$(CONFIG_PCI_QUIRKS) += quirks.o

+# pre-installed hotplug ioapic need to be enabled at first
+obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
+
# Build PCI Express stuff if needed
obj-$(CONFIG_PCIEPORTBUS) += pcie/

-obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
-
obj-$(CONFIG_HOTPLUG) += hotplug.o

# Build the PCI Hotplug drivers if we were asked to
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
index 205af8d..4183a5c 100644
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -118,10 +118,4 @@ static int __init ioapic_init(void)
return pci_register_driver(&ioapic_driver);
}

-static void __exit ioapic_exit(void)
-{
- pci_unregister_driver(&ioapic_driver);
-}
-
module_init(ioapic_init);
-module_exit(ioapic_exit);
--
1.7.7

2012-02-24 03:52:23

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 08/13] x86, irq: Make mp_register_ioapic could handle hotadd ioapic

it will reserve irq block in allocated_irqs bit maps
and irq_base will be used to get right irq for ioapic/pin or gsi.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Suresh Siddha <[email protected]>
---
arch/x86/include/asm/mpspec.h | 1 +
arch/x86/kernel/apic/io_apic.c | 53 ++++++++++++++++++++++++++++++++-------
2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 9c7d95f..aca26fa 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -96,6 +96,7 @@ static inline void early_reserve_e820_mpc_new(void) { }

void __cpuinit generic_processor_info(int apicid, int version);
#ifdef CONFIG_ACPI
+int __mp_register_ioapic(int id, u32 address, u32 gsi_base, bool hot);
extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 57ffa86..2e8497c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4080,7 +4080,7 @@ static __init int bad_ioapic(int idx, unsigned long address)
return 0;
}

-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+int __mp_register_ioapic(int id, u32 address, u32 gsi_base, bool hotadd)
{
int idx;
int entries;
@@ -4088,11 +4088,19 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)

idx = mp_find_ioapic(gsi_base);
if (idx >= 0)
- return;
+ return -EINVAL;

idx = nr_ioapics;
+ if (hotadd) {
+ /* find free spot */
+ for (idx = 0; idx < nr_ioapics; idx++)
+ if (!ioapics[idx].nr_registers &&
+ ioapics[idx].mp_config.apicid == 0xff)
+ break;
+ }
+
if (bad_ioapic(idx, address))
- return;
+ return -EINVAL;

ioapics[idx].mp_config.type = MP_IOAPIC;
ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
@@ -4108,10 +4116,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
*/
entries = io_apic_get_redir_entries(idx);

- if (!entries || entries > MP_MAX_IOAPIC_PIN) {
- clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
- return;
- }
+ if (!entries || entries > MP_MAX_IOAPIC_PIN)
+ goto failed;

gsi_cfg = mp_ioapic_gsi_routing(idx);
gsi_cfg->gsi_base = gsi_base;
@@ -4122,15 +4128,42 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
*/
ioapics[idx].nr_registers = entries;

- if (gsi_cfg->gsi_end >= gsi_top)
- gsi_top = gsi_cfg->gsi_end + 1;
+ if (!hotadd) {
+ /*
+ * irqs will be reserved in arch_early_irq_init()
+ * don't need to update gsi_top for hot add case
+ */
+ if (gsi_cfg->gsi_end >= gsi_top)
+ gsi_top = gsi_cfg->gsi_end + 1;
+ } else {
+ int irq = reserve_ioapic_gsi_irq_base(idx);
+
+ if (irq < 0)
+ goto failed;
+
+ alloc_ioapic_saved_registers(idx);
+ }

printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
"GSI %d-%d\n", idx, mpc_ioapic_id(idx),
mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
gsi_cfg->gsi_base, gsi_cfg->gsi_end);

- nr_ioapics++;
+ if (idx == nr_ioapics)
+ nr_ioapics++;
+
+ return 0;
+
+failed:
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ memset(&ioapics[idx], 0, sizeof(struct ioapic));
+ ioapics[idx].mp_config.apicid = 0xff;
+ return -EINVAL;
+}
+
+void mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+ __mp_register_ioapic(id, address, gsi_base, false);
}

/* Enable IOAPIC early just for system timer */
--
1.7.7

2012-02-24 03:52:44

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 07/13] x86, irq: more strict check for register ioapic

1. check overlaping gsi range
for hotplug ioapic case, BIOS may have some entries in MADT
and also in pci bus with _GSB.

2. make bad_ioapics take idx instead of nr_ioapics.
for hotadd ioapic could find spare slot in the middle later.

3. check if entries in right range.

Signed-off-by: Yinghai Lu <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Suresh Siddha <[email protected]>
---
arch/x86/kernel/apic/io_apic.c | 19 ++++++++++++++-----
1 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 527b184..57ffa86 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4065,11 +4065,11 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
return gsi - gsi_cfg->gsi_base;
}

-static __init int bad_ioapic(unsigned long address)
+static __init int bad_ioapic(int idx, unsigned long address)
{
- if (nr_ioapics >= MAX_IO_APICS) {
+ if (idx >= MAX_IO_APICS) {
printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
- "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+ "(found %d), skipping\n", MAX_IO_APICS, idx);
return 1;
}
if (!address) {
@@ -4082,14 +4082,17 @@ static __init int bad_ioapic(unsigned long address)

void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
- int idx = 0;
+ int idx;
int entries;
struct mp_ioapic_gsi *gsi_cfg;

- if (bad_ioapic(address))
+ idx = mp_find_ioapic(gsi_base);
+ if (idx >= 0)
return;

idx = nr_ioapics;
+ if (bad_ioapic(idx, address))
+ return;

ioapics[idx].mp_config.type = MP_IOAPIC;
ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
@@ -4104,6 +4107,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
* and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
entries = io_apic_get_redir_entries(idx);
+
+ if (!entries || entries > MP_MAX_IOAPIC_PIN) {
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ return;
+ }
+
gsi_cfg = mp_ioapic_gsi_routing(idx);
gsi_cfg->gsi_base = gsi_base;
gsi_cfg->gsi_end = gsi_base + entries - 1;
--
1.7.7

2012-02-24 03:52:57

by Yinghai Lu

[permalink] [raw]
Subject: [PATCH 13/13] PCI: Disable mem in the ioapic removing path

For physical hot plug should be ok, but for remove/rescan path will need us
to disable that.

otherwise rescan mmio resource for pci ioapic device will not be sized and
allocated, aka skiped.
For ioapic_probe:pci_enable_device will not enable the device correctly, and
will bail out early.

So we can just disable mmio for all removing case. that will hurt real hotplug
path.

Signed-off-by: <[email protected]>
Cc: Jesse Barnes <[email protected]>
---
drivers/pci/ioapic.c | 13 +++++++++++++
1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
index 4183a5c..0b2c210 100644
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -88,6 +88,17 @@ exit_free:
return -ENODEV;
}

+static void pci_disable_device_mem(struct pci_dev *dev)
+{
+ u16 pci_command;
+
+ pci_read_config_word(dev, PCI_COMMAND, &pci_command);
+ if (pci_command & PCI_COMMAND_MEMORY) {
+ pci_command &= ~PCI_COMMAND_MEMORY;
+ pci_write_config_word(dev, PCI_COMMAND, pci_command);
+ }
+}
+
static void __devexit ioapic_remove(struct pci_dev *dev)
{
struct ioapic *ioapic = pci_get_drvdata(dev);
@@ -95,6 +106,8 @@ static void __devexit ioapic_remove(struct pci_dev *dev)
acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base);
pci_release_region(dev, 0);
pci_disable_device(dev);
+ /* need to disable it, otherwise remove/rescan will not work */
+ pci_disable_device_mem(dev);
kfree(ioapic);
}

--
1.7.7

2012-02-24 16:51:54

by Jesse Barnes

[permalink] [raw]
Subject: Re: [PATCH 00/13] x86, irq: support ioapic device hotplug for x86

On Thu, 23 Feb 2012 19:48:46 -0800
Yinghai Lu <[email protected]> wrote:

> Hi, Thomas
>
> Current x86 code does not support iapic hotplug yet.
>
> This patcheset will try to pre-reserve irq block in allocated_irqs bitmap.
> for hot add ioapic controller. also record irq_base in gsi_config, so later
> could use it to convert gsi to irq for pci device using that ioapic controller.
>
> need to update x86/irq to support realloc_irq after those bit are pre-reserved.
>
> It is tested with pci remove/rescan method plus some test stubs.
>
> Please review them.

Suresh or Matthew may be interested in this patchset as well, and it
should go via -tip.

Thanks,
--
Jesse Barnes, Intel Open Source Technology Center


Attachments:
signature.asc (836.00 B)