LinuxLists.cc - [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts

2009-09-30 16:03:54

Subject: [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts

This patch fixes handling of uv hub irq affinity. IRQs with ALL or
NODE affinity can be routed to cpus other than their originally assigned
cpu. Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <[email protected]>

---

More specific handling for NODE affinity will be added once the patch for
limiting irq affinity is in place.

arch/x86/include/asm/uv/uv_irq.h | 15 +++-
arch/x86/kernel/apic/io_apic.c | 49 ++++++++++++-
arch/x86/kernel/uv_irq.c | 128 ++++++++++++++++++++++++++++++++---
drivers/misc/sgi-xp/xpc_uv.c | 5 -
4 files changed, 180 insertions(+), 17 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-09-29 21:23:53.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c 2009-09-30 08:01:10.000000000 -0500
@@ -3748,9 +3748,10 @@ int arch_setup_ht_irq(unsigned int irq,
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset)
+ unsigned long mmr_offset, int restrict)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
@@ -3766,6 +3767,11 @@ int arch_enable_uv_irq(char *irq_name, u
if (err != 0)
return err;

+ if (restrict == UV_AFFINITY_CPU)
+ desc->status |= IRQ_NO_BALANCING;
+ else
+ desc->status |= IRQ_MOVE_PCNTXT;
+
spin_lock_irqsave(&vector_lock, flags);
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
@@ -3794,11 +3800,10 @@ int arch_enable_uv_irq(char *irq_name, u
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;

BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));

@@ -3806,9 +3811,45 @@ void arch_disable_uv_irq(int mmr_blade,
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;

- mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
}
+
+int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+ unsigned int dest;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ unsigned long mmr_offset;
+ unsigned mmr_pnode;
+
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
+ return -1;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = dest;
+
+ /* Get previously stored MMR and pnode of hub sourcing interrupts */
+ if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+ return -1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
#endif /* CONFIG_X86_64 */

int __init io_apic_get_redir_entries (int ioapic)
Index: linux/drivers/misc/sgi-xp/xpc_uv.c
===================================================================
--- linux.orig/drivers/misc/sgi-xp/xpc_uv.c 2009-09-29 21:23:53.000000000 -0500
+++ linux/drivers/misc/sgi-xp/xpc_uv.c 2009-09-29 21:23:55.000000000 -0500
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_
int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);

#if defined CONFIG_X86_64
- mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+ mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+ UV_AFFINITY_CPU);
if (mq->irq < 0) {
dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
-mq->irq);
@@ -136,7 +137,7 @@ static void
xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
{
#if defined CONFIG_X86_64
- uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+ uv_teardown_irq(mq->irq);

#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
int mmr_pnode;
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h 2009-09-29 21:23:53.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h 2009-09-29 21:23:55.000000000 -0500
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
dest : 32;
};

+enum {
+ UV_AFFINITY_ALL,
+ UV_AFFINITY_NODE,
+ UV_AFFINITY_CPU
+};
+
extern struct irq_chip uv_irq_chip;

-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);

-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);

#endif /* _ASM_X86_UV_UV_IRQ_H */
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c 2009-09-29 21:23:53.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c 2009-09-30 07:56:44.000000000 -0500
@@ -9,10 +9,22 @@
*/

#include <linux/module.h>
+#include <linux/rbtree.h>
#include <linux/irq.h>

#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+ struct rb_node list;
+ unsigned long offset;
+ int pnode;
+ int irq;
+};
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;

static void uv_noop(unsigned int irq)
{
@@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = {
.unmask = uv_noop,
.eoi = uv_ack_apic,
.end = uv_noop,
+ .set_affinity = uv_set_irq_affinity,
};

/*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+ struct rb_node **link = &uv_irq_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct uv_irq_2_mmr_pnode *n;
+ struct uv_irq_2_mmr_pnode *e;
+ unsigned long irqflags;
+
+ n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+ uv_blade_to_memory_nid(blade));
+ if (!n)
+ return -ENOMEM;
+
+ n->irq = irq;
+ n->offset = offset;
+ n->pnode = uv_blade_to_pnode(blade);
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ /* Find the right place in the rbtree: */
+ while (*link) {
+ parent = *link;
+ e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+ if (unlikely(irq == e->irq)) {
+ /* irq entry exists */
+ e->pnode = uv_blade_to_pnode(blade);
+ e->offset = offset;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ kfree(n);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ /* Insert the node into the rbtree. */
+ rb_link_node(&n->list, parent, link);
+ rb_insert_color(&n->list, &uv_irq_root);
+
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+ if (e->irq == irq) {
+ *offset = e->offset;
+ *pnode = e->pnode;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return -1;
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
*/
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
- unsigned long mmr_offset)
+ unsigned long mmr_offset, int restrict)
{
- int irq;
- int ret;
+ int irq, ret;
+
+ irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));

- irq = create_irq();
if (irq <= 0)
return -EBUSY;

- ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
- if (ret != irq)
+ ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+ restrict);
+ if (ret == irq)
+ uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+ else
destroy_irq(irq);

return ret;
@@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*
* Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
*/
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
{
- arch_disable_uv_irq(mmr_blade, mmr_offset);
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+ if (e->irq == irq) {
+ arch_disable_uv_irq(e->pnode, e->offset);
+ rb_erase(n, &uv_irq_root);
+ kfree(e);
+ break;
+ }
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
destroy_irq(irq);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);

2009-09-30 16:10:41

by Robin Holt

[permalink] [raw]

Subject: Re: [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts

On Wed, Sep 30, 2009 at 11:02:59AM -0500, Dimitri Sivanich wrote:
> This patch fixes handling of uv hub irq affinity. IRQs with ALL or
> NODE affinity can be routed to cpus other than their originally assigned
> cpu. Those with CPU affinity cannot be rerouted.
>
> Signed-off-by: Dimitri Sivanich <[email protected]>

Acked-by: Robin Holt <[email protected]>

2009-10-12 19:35:18

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts

* Dimitri Sivanich <[email protected]> wrote:

> This patch fixes handling of uv hub irq affinity. IRQs with ALL or
> NODE affinity can be routed to cpus other than their originally assigned
> cpu. Those with CPU affinity cannot be rerouted.
>
> Signed-off-by: Dimitri Sivanich <[email protected]>
>
> ---
>
> More specific handling for NODE affinity will be added once the patch for
> limiting irq affinity is in place.
>
> arch/x86/include/asm/uv/uv_irq.h | 15 +++-
> arch/x86/kernel/apic/io_apic.c | 49 ++++++++++++-

Could we please move all these UV specific functions out of the generic
IO-APIC code? You can do it as a followup patch to this one, but i'd
like to see the end result first before applying these - the interfacing
is rather messy right now.

Ingo

2009-10-13 20:34:26

by Dimitri Sivanich

[permalink] [raw]

Subject: [PATCH] x86: Move SGI UV functionality out of generic IO-APIC code

Move UV specific functionality out of the generic IO-APIC code.

Signed-off-by: Dimitri Sivanich <[email protected]>

---

arch/x86/include/asm/hw_irq.h | 19 ++++
arch/x86/include/asm/uv/uv_irq.h | 7 -
arch/x86/kernel/apic/io_apic.c | 140 +----------------------------------
arch/x86/kernel/uv_irq.c | 109 +++++++++++++++++++++++++++
4 files changed, 133 insertions(+), 142 deletions(-)

Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h 2009-10-13 13:53:37.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h 2009-10-13 13:54:25.000000000 -0500
@@ -85,6 +85,25 @@ static inline void set_io_apic_irq_attr(
irq_attr->polarity = polarity;
}

+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+ struct irq_pin_list *irq_2_pin;
+ cpumask_var_t domain;
+ cpumask_var_t old_domain;
+ unsigned move_cleanup_count;
+ u8 vector;
+ u8 move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+extern unsigned int set_desc_affinity(struct irq_desc *,
+ const struct cpumask *);
extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h 2009-10-13 13:53:47.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h 2009-10-13 13:56:11.000000000 -0500
@@ -31,13 +31,6 @@ enum {
UV_AFFINITY_CPU
};

-extern struct irq_chip uv_irq_chip;
-
-extern int
-arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
-extern void arch_disable_uv_irq(int, unsigned long);
-extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
-
extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);
Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-10-13 13:53:47.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c 2009-10-13 15:19:04.000000000 -0500
@@ -60,8 +60,6 @@
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>

#include <asm/apic.h>

@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free
return pin;
}

-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
}

#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc
/* end for move_irq_desc */

#else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -1237,8 +1221,7 @@ next:
return err;
}

-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -2228,7 +2211,7 @@ static int ioapic_retrigger_irq(unsigned
*/

#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;

@@ -2272,15 +2255,12 @@ static void __target_IO_APIC_irq(unsigne
}
}

-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
/*
* Either sets desc->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that, or returns BAD_APICID and
* leaves desc->affinity untouched.
*/
-static unsigned int
+unsigned int
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
@@ -3741,116 +3721,6 @@ int arch_setup_ht_irq(unsigned int irq,
}
#endif /* CONFIG_HT_IRQ */

-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset, int restrict)
-{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
- int mmr_pnode;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long flags;
- int err;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- cfg = irq_cfg(irq);
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- if (restrict == UV_AFFINITY_CPU)
- desc->status |= IRQ_NO_BALANCING;
- else
- desc->status |= IRQ_MOVE_PCNTXT;
-
- spin_lock_irqsave(&vector_lock, flags);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
-{
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->mask = 1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-
-int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
- unsigned int dest;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long mmr_offset;
- unsigned mmr_pnode;
-
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
- return -1;
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = dest;
-
- /* Get previously stored MMR and pnode of hub sourcing interrupts */
- if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
- return -1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return 0;
-}
-#endif /* CONFIG_X86_64 */
-
int __init io_apic_get_redir_entries (int ioapic)
{
union IO_APIC_reg_01 reg_01;
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c 2009-10-13 13:53:47.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c 2009-10-13 13:55:49.000000000 -0500
@@ -25,6 +25,7 @@ struct uv_irq_2_mmr_pnode{
};
static spinlock_t uv_irq_lock;
static struct rb_root uv_irq_root;
+static int uv_set_irq_affinity(unsigned int, const struct cpumask *);

static void uv_noop(unsigned int irq)
{
@@ -132,6 +133,114 @@ int uv_irq_2_mmr_info(int irq, unsigned
}

/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+ unsigned long mmr_offset, int restrict)
+{
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg;
+ int mmr_pnode;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ cfg = irq_cfg(irq);
+
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
+ if (err != 0)
+ return err;
+
+ if (restrict == UV_AFFINITY_CPU)
+ desc->status |= IRQ_NO_BALANCING;
+ else
+ desc->status |= IRQ_MOVE_PCNTXT;
+
+ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_name);
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->mask = 1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+ unsigned int dest;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ unsigned long mmr_offset;
+ unsigned mmr_pnode;
+
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
+ return -1;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = dest;
+
+ /* Get previously stored MMR and pnode of hub sourcing interrupts */
+ if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+ return -1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.

2009-10-14 08:19:08

by Dimitri Sivanich

[permalink] [raw]

Subject: [tip:x86/apic] x86: SGI UV: Fix irq affinity for hub based interrupts

Commit-ID: 6c2c502910247d2820cb630e7b28fb6bdecdbf45
Gitweb: http://git.kernel.org/tip/6c2c502910247d2820cb630e7b28fb6bdecdbf45
Author: Dimitri Sivanich <[email protected]>
AuthorDate: Wed, 30 Sep 2009 11:02:59 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Oct 2009 09:17:01 +0200

x86: SGI UV: Fix irq affinity for hub based interrupts

This patch fixes handling of uv hub irq affinity. IRQs with ALL or
NODE affinity can be routed to cpus other than their originally
assigned cpu. Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/uv/uv_irq.h | 15 ++++-
arch/x86/kernel/apic/io_apic.c | 49 +++++++++++++-
arch/x86/kernel/uv_irq.c | 128 +++++++++++++++++++++++++++++++++++---
drivers/misc/sgi-xp/xpc_uv.c | 5 +-
4 files changed, 180 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 9613c8c..5397e12 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
dest : 32;
};

+enum {
+ UV_AFFINITY_ALL,
+ UV_AFFINITY_NODE,
+ UV_AFFINITY_CPU
+};
+
extern struct irq_chip uv_irq_chip;

-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);

-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);

#endif /* _ASM_X86_UV_UV_IRQ_H */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8c718c9..bb52e7f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset)
+ unsigned long mmr_offset, int restrict)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
@@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
if (err != 0)
return err;

+ if (restrict == UV_AFFINITY_CPU)
+ desc->status |= IRQ_NO_BALANCING;
+ else
+ desc->status |= IRQ_MOVE_PCNTXT;
+
spin_lock_irqsave(&vector_lock, flags);
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
@@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;

BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));

@@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;

- mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
}
+
+int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+ unsigned int dest;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ unsigned long mmr_offset;
+ unsigned mmr_pnode;
+
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
+ return -1;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = dest;
+
+ /* Get previously stored MMR and pnode of hub sourcing interrupts */
+ if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+ return -1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
#endif /* CONFIG_X86_64 */

int __init io_apic_get_redir_entries (int ioapic)
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index aeef529..9a83775 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -9,10 +9,22 @@
*/

#include <linux/module.h>
+#include <linux/rbtree.h>
#include <linux/irq.h>

#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+ struct rb_node list;
+ unsigned long offset;
+ int pnode;
+ int irq;
+};
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;

static void uv_noop(unsigned int irq)
{
@@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = {
.unmask = uv_noop,
.eoi = uv_ack_apic,
.end = uv_noop,
+ .set_affinity = uv_set_irq_affinity,
};

/*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+ struct rb_node **link = &uv_irq_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct uv_irq_2_mmr_pnode *n;
+ struct uv_irq_2_mmr_pnode *e;
+ unsigned long irqflags;
+
+ n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+ uv_blade_to_memory_nid(blade));
+ if (!n)
+ return -ENOMEM;
+
+ n->irq = irq;
+ n->offset = offset;
+ n->pnode = uv_blade_to_pnode(blade);
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ /* Find the right place in the rbtree: */
+ while (*link) {
+ parent = *link;
+ e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+ if (unlikely(irq == e->irq)) {
+ /* irq entry exists */
+ e->pnode = uv_blade_to_pnode(blade);
+ e->offset = offset;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ kfree(n);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ /* Insert the node into the rbtree. */
+ rb_link_node(&n->list, parent, link);
+ rb_insert_color(&n->list, &uv_irq_root);
+
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+ if (e->irq == irq) {
+ *offset = e->offset;
+ *pnode = e->pnode;
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return 0;
+ }
+
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+ return -1;
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
*/
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
- unsigned long mmr_offset)
+ unsigned long mmr_offset, int restrict)
{
- int irq;
- int ret;
+ int irq, ret;
+
+ irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));

- irq = create_irq();
if (irq <= 0)
return -EBUSY;

- ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
- if (ret != irq)
+ ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+ restrict);
+ if (ret == irq)
+ uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+ else
destroy_irq(irq);

return ret;
@@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
*
* Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
*/
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
{
- arch_disable_uv_irq(mmr_blade, mmr_offset);
+ struct uv_irq_2_mmr_pnode *e;
+ struct rb_node *n;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&uv_irq_lock, irqflags);
+ n = uv_irq_root.rb_node;
+ while (n) {
+ e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+ if (e->irq == irq) {
+ arch_disable_uv_irq(e->pnode, e->offset);
+ rb_erase(n, &uv_irq_root);
+ kfree(e);
+ break;
+ }
+ if (irq < e->irq)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ spin_unlock_irqrestore(&uv_irq_lock, irqflags);
destroy_irq(irq);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index c76677a..b5bbe59 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);

#if defined CONFIG_X86_64
- mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+ mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+ UV_AFFINITY_CPU);
if (mq->irq < 0) {
dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
-mq->irq);
@@ -136,7 +137,7 @@ static void
xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
{
#if defined CONFIG_X86_64
- uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+ uv_teardown_irq(mq->irq);

#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
int mmr_pnode;

2009-10-14 08:19:18

by Dimitri Sivanich

[permalink] [raw]

Subject: [tip:x86/apic] x86, apic: Move SGI UV functionality out of generic IO-APIC code

Commit-ID: 9338ad6ffb70eca97f335d93c54943828c8b209e
Gitweb: http://git.kernel.org/tip/9338ad6ffb70eca97f335d93c54943828c8b209e
Author: Dimitri Sivanich <[email protected]>
AuthorDate: Tue, 13 Oct 2009 15:32:36 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 14 Oct 2009 09:17:09 +0200

x86, apic: Move SGI UV functionality out of generic IO-APIC code

Move UV specific functionality out of the generic IO-APIC code.

Signed-off-by: Dimitri Sivanich <[email protected]>
LKML-Reference: <[email protected]>
[ Cleaned up the code some more in their new places. ]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/hw_irq.h | 29 ++++++--
arch/x86/include/asm/uv/uv_irq.h | 7 --
arch/x86/kernel/apic/io_apic.c | 140 ++------------------------------------
arch/x86/kernel/uv_irq.c | 123 +++++++++++++++++++++++++++++++--
4 files changed, 145 insertions(+), 154 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ba180d9..56f0877 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -79,14 +79,31 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
int ioapic, int ioapic_pin,
int trigger, int polarity)
{
- irq_attr->ioapic = ioapic;
- irq_attr->ioapic_pin = ioapic_pin;
- irq_attr->trigger = trigger;
- irq_attr->polarity = polarity;
+ irq_attr->ioapic = ioapic;
+ irq_attr->ioapic_pin = ioapic_pin;
+ irq_attr->trigger = trigger;
+ irq_attr->polarity = polarity;
}

-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
- struct io_apic_irq_attr *irq_attr);
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+ struct irq_pin_list *irq_2_pin;
+ cpumask_var_t domain;
+ cpumask_var_t old_domain;
+ unsigned move_cleanup_count;
+ u8 vector;
+ u8 move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);

extern void enable_IO_APIC(void);
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 5397e12..d6b17c7 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,13 +31,6 @@ enum {
UV_AFFINITY_CPU
};

-extern struct irq_chip uv_irq_chip;
-
-extern int
-arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
-extern void arch_disable_uv_irq(int, unsigned long);
-extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
-
extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index bb52e7f..ce16b65 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -60,8 +60,6 @@
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>

#include <asm/apic.h>

@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
return pin;
}

-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
}

#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
/* end for move_irq_desc */

#else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -1237,8 +1221,7 @@ next:
return err;
}

-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -2245,7 +2228,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/

#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;

@@ -2289,15 +2272,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
}
}

-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
/*
* Either sets desc->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that, or returns BAD_APICID and
* leaves desc->affinity untouched.
*/
-static unsigned int
+unsigned int
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
@@ -3725,116 +3705,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */

-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset, int restrict)
-{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
- int mmr_pnode;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long flags;
- int err;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- cfg = irq_cfg(irq);
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- if (restrict == UV_AFFINITY_CPU)
- desc->status |= IRQ_NO_BALANCING;
- else
- desc->status |= IRQ_MOVE_PCNTXT;
-
- spin_lock_irqsave(&vector_lock, flags);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
-{
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->mask = 1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-
-int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
- unsigned int dest;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long mmr_offset;
- unsigned mmr_pnode;
-
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
- return -1;
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = dest;
-
- /* Get previously stored MMR and pnode of hub sourcing interrupts */
- if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
- return -1;
-
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return 0;
-}
-#endif /* CONFIG_X86_64 */
-
int __init io_apic_get_redir_entries (int ioapic)
{
union IO_APIC_reg_01 reg_01;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 9a83775..61d805d 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -18,13 +18,16 @@

/* MMR offset and pnode of hub sourcing interrupts for a given irq */
struct uv_irq_2_mmr_pnode{
- struct rb_node list;
- unsigned long offset;
- int pnode;
- int irq;
+ struct rb_node list;
+ unsigned long offset;
+ int pnode;
+ int irq;
};
-static spinlock_t uv_irq_lock;
-static struct rb_root uv_irq_root;
+
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;
+
+static int uv_set_irq_affinity(unsigned int, const struct cpumask *);

static void uv_noop(unsigned int irq)
{
@@ -132,6 +135,114 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
}

/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+ unsigned long mmr_offset, int restrict)
+{
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg;
+ int mmr_pnode;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ cfg = irq_cfg(irq);
+
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
+ if (err != 0)
+ return err;
+
+ if (restrict == UV_AFFINITY_CPU)
+ desc->status |= IRQ_NO_BALANCING;
+ else
+ desc->status |= IRQ_MOVE_PCNTXT;
+
+ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_name);
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+ sizeof(unsigned long));
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+ entry->mask = 1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+ unsigned int dest;
+ unsigned long mmr_value;
+ struct uv_IO_APIC_route_entry *entry;
+ unsigned long mmr_offset;
+ unsigned mmr_pnode;
+
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
+ return -1;
+
+ mmr_value = 0;
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = dest;
+
+ /* Get previously stored MMR and pnode of hub sourcing interrupts */
+ if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+ return -1;
+
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
+ return 0;
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.