2009-10-15 01:15:31

by Dimitri Sivanich

[permalink] [raw]
Subject: [PATCH v2] x86/apic: limit irq affinity

This patch allows for hard restrictions to irq affinity via a new cpumask and
device node value in the irq_cfg structure.

The mask forces IRQ affinity to remain within the specified cpu domain.
On some UV systems, this domain will be limited to the nodes accessible
to the given node. Currently other X86 systems will have all bits in
the cpumask set, so non-UV systems will remain unaffected at this time.

Signed-off-by: Dimitri Sivanich <[email protected]>

---

Removed UV specific code from generic IO APIC code.

arch/x86/Kconfig | 1
arch/x86/include/asm/hw_irq.h | 3
arch/x86/include/asm/uv/uv_irq.h | 1
arch/x86/include/asm/uv/uv_mmrs.h | 25 +++++
arch/x86/kernel/apic/io_apic.c | 144 ++++++++++++++++++++++++++-------
arch/x86/kernel/apic/x2apic_uv_x.c | 2
arch/x86/kernel/uv_irq.c | 77 +++++++++++++++++
7 files changed, 225 insertions(+), 28 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c 2009-10-14 15:04:23.000000000 -0500
@@ -168,6 +168,19 @@ void __init io_apic_disable_legacy(void)
nr_irqs_gsi = 0;
}

+void (*set_irq_cfg_allowed)(cpumask_var_t, int) = NULL;
+/*
+ * Setup IRQ affinity restriction.
+ */
+static void set_irq_cfg_cpus_allowed(struct irq_cfg *irq_cfg)
+{
+ if (set_irq_cfg_allowed)
+ set_irq_cfg_allowed(irq_cfg->allowed, irq_cfg->node);
+ else
+ /* Default to allow anything */
+ cpumask_setall(irq_cfg->allowed);
+}
+
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
@@ -183,8 +196,11 @@ int __init arch_early_irq_init(void)
for (i = 0; i < count; i++) {
desc = irq_to_desc(i);
desc->chip_data = &cfg[i];
+ cfg->node = node;
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+ zalloc_cpumask_var_node(&cfg[i].allowed, GFP_NOWAIT, node);
+ set_irq_cfg_cpus_allowed(&cfg[i]);
if (i < nr_legacy_irqs)
cpumask_setall(cfg[i].domain);
}
@@ -213,12 +229,19 @@ static struct irq_cfg *get_one_free_irq_
if (cfg) {
if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
- cfg = NULL;
- } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
+ return NULL;
+ }
+ if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
- cfg = NULL;
+ return NULL;
+ }
+ if (!zalloc_cpumask_var_node(&cfg->allowed, GFP_ATOMIC, node)) {
+ free_cpumask_var(cfg->old_domain);
+ free_cpumask_var(cfg->domain);
+ kfree(cfg);
+ return NULL;
}
}

@@ -231,12 +254,14 @@ int arch_init_chip_data(struct irq_desc

cfg = desc->chip_data;
if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(node);
+ cfg = desc->chip_data = get_one_free_irq_cfg(node);
if (!desc->chip_data) {
printk(KERN_ERR "can not alloc irq_cfg\n");
BUG_ON(1);
}
}
+ cfg->node = node;
+ set_irq_cfg_cpus_allowed(cfg);

return 0;
}
@@ -318,6 +343,10 @@ void arch_init_copy_chip_data(struct irq

memcpy(cfg, old_cfg, sizeof(struct irq_cfg));

+ cfg->node = node;
+
+ set_irq_cfg_cpus_allowed(cfg);
+
init_copy_irq_2_pin(old_cfg, cfg, node);
}

@@ -1428,16 +1457,23 @@ static void setup_IO_APIC_irq(int apic_i
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
+ cpumask_var_t tmp_mask;

if (!IO_APIC_IRQ(irq))
return;

cfg = desc->chip_data;

- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1487,7 @@ static void setup_IO_APIC_irq(int apic_i
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic_id].apicid, pin);
__clear_irq_vector(irq, cfg);
- return;
+ goto error;
}

ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1495,9 @@ static void setup_IO_APIC_irq(int apic_i
disable_8259A_irq(irq);

ioapic_write_entry(apic_id, pin, entry);
+error:
+ free_cpumask_var(tmp_mask);
+ return;
}

static struct {
@@ -2265,18 +2304,32 @@ set_desc_affinity(struct irq_desc *desc,
{
struct irq_cfg *cfg;
unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ cpumask_var_t tmp_mask;

irq = desc->irq;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return BAD_APICID;

- cpumask_copy(desc->affinity, mask);
+ if (!cpumask_and(tmp_mask, mask, cfg->allowed))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ cpumask_copy(desc->affinity, tmp_mask);
+
+ free_cpumask_var(tmp_mask);

return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+ free_cpumask_var(tmp_mask);
+ return BAD_APICID;
}

static int
@@ -2332,22 +2385,32 @@ migrate_ioapic_irq_desc(struct irq_desc
{
struct irq_cfg *cfg;
struct irte irte;
+ cpumask_var_t tmp_mask;
unsigned int dest;
unsigned int irq;
int ret = -1;

- if (!cpumask_intersects(mask, cpu_online_mask))
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return ret;

- irq = desc->irq;
+ if (!cpumask_and(tmp_mask, mask, cfg->allowed))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
if (get_irte(irq, &irte))
- return ret;
+ goto error;

- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ ret = 0;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -2360,9 +2423,10 @@ migrate_ioapic_irq_desc(struct irq_desc
if (cfg->move_in_progress)
send_cleanup_vector(cfg);

- cpumask_copy(desc->affinity, mask);
-
- return 0;
+ cpumask_copy(desc->affinity, tmp_mask);
+error:
+ free_cpumask_var(tmp_mask);
+ return ret;
}

/*
@@ -3146,6 +3210,8 @@ unsigned int create_irq_nr(unsigned int

if (irq > 0) {
dynamic_irq_init(irq);
+ cfg_new->node = node;
+ set_irq_cfg_cpus_allowed(cfg_new);
/* restore it, in case dynamic_irq_init clear it */
if (desc_new)
desc_new->chip_data = cfg_new;
@@ -3197,16 +3263,25 @@ static int msi_compose_msg(struct pci_de
struct irq_cfg *cfg;
int err;
unsigned dest;
+ cpumask_var_t tmp_mask;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (err)
- return err;
+ goto error;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

if (irq_remapped(irq)) {
struct irte irte;
@@ -3264,6 +3339,8 @@ static int msi_compose_msg(struct pci_de
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
}
+error:
+ free_cpumask_var(tmp_mask);
return err;
}

@@ -3681,19 +3758,28 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
+ cpumask_var_t tmp_mask;
int err;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);

@@ -3717,6 +3803,8 @@ int arch_setup_ht_irq(unsigned int irq,

dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
}
+error:
+ free_cpumask_var(tmp_mask);
return err;
}
#endif /* CONFIG_HT_IRQ */
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h 2009-10-14 12:44:57.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h 2009-10-14 12:48:50.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
};

/* ========================================================================= */
+/* UVH_LB_SOCKET_DESTINATION_TABLE */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+ unsigned long v;
+ struct uvh_lb_socket_destination_table_s {
+ unsigned long rsvd_0 : 1; /* */
+ unsigned long node_id : 14; /* RW */
+ unsigned long chip_id : 1; /* RW */
+ unsigned long parity : 1; /* RW */
+ unsigned long rsvd_17_63: 47; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_LOCAL_INT0_CONFIG */
/* ========================================================================= */
#define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig 2009-10-14 12:44:57.000000000 -0500
+++ linux/arch/x86/Kconfig 2009-10-14 15:04:23.000000000 -0500
@@ -365,6 +365,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on X86_X2APIC
+ depends on NUMA_IRQ_DESC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c 2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c 2009-10-14 14:27:10.000000000 -0500
@@ -241,6 +241,83 @@ static int uv_set_irq_affinity(unsigned
}

/*
+ * Setup the cpumask for IRQ restriction for a given UV node.
+ */
+static void uv_set_irq_cfg_cpus_allowed(struct cpumask *mask, int node)
+{
+#ifdef CONFIG_SPARSE_IRQ
+ unsigned long pnode_tbl[UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH];
+ unsigned long *pa, *pa_end;
+ int cpu, i;
+
+ /* Assume nodes accessible from node 0 */
+ if (node < 0)
+ node = 0;
+
+ pa = uv_global_mmr64_address(uv_node_to_pnode(node),
+ UVH_LB_SOCKET_DESTINATION_TABLE);
+
+ for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++, i++)
+ pnode_tbl[i] = UV_NASID_TO_PNODE(*pa &
+ UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+ cpumask_clear(mask);
+
+ pa = pnode_tbl;
+ pa_end = pa + UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH;
+
+ /* Select the cpus on nodes accessible from our hub */
+ for_each_possible_cpu(cpu) {
+ int p = uv_cpu_to_pnode(cpu);
+
+ if (p < *pa) {
+ while (p < *pa) {
+ pa--;
+ if (pa < pnode_tbl) {
+ pa++;
+ break;
+ }
+ }
+ if (*pa == p)
+ cpumask_set_cpu(cpu, mask);
+ continue;
+ }
+
+ while (*pa < p) {
+ pa++;
+ if (pa == pa_end) {
+ pa--;
+ break;
+ }
+ }
+
+ if (*pa == p)
+ cpumask_set_cpu(cpu, mask);
+ }
+#else
+ cpumask_setall(mask);
+#endif
+}
+
+/*
+ * Setup IRQ affinity restriction for IRQ's setup prior to the availability
+ * of UV topology information.
+ */
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+ struct irq_cfg *cfg;
+ int i;
+
+ set_irq_cfg_allowed = uv_set_irq_cfg_cpus_allowed;
+ /* Set allowed mask now that topology information is known */
+ for (i = 0; i < NR_IRQS; i++) {
+ cfg = irq_cfg(i);
+ if (cfg)
+ uv_set_irq_cfg_cpus_allowed(cfg->allowed, cfg->node);
+ }
+}
+
+/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2009-10-14 12:44:57.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c 2009-10-14 15:04:23.000000000 -0500
@@ -23,6 +23,7 @@

#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/uv/bios.h>
@@ -659,5 +660,6 @@ void __init uv_system_init(void)

uv_cpu_init();
uv_scir_register_cpu_notifier();
+ arch_init_uv_cfg_cpus_allowed();
proc_mkdir("sgi_uv", NULL);
}
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h 2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h 2009-10-14 12:48:50.000000000 -0500
@@ -94,11 +94,14 @@ struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
+ cpumask_var_t allowed;
+ int node;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
};

+extern void (*set_irq_cfg_allowed)(cpumask_var_t, int);
extern struct irq_cfg *irq_cfg(unsigned int);
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
extern void send_cleanup_vector(struct irq_cfg *);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h 2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h 2009-10-14 12:48:50.000000000 -0500
@@ -31,6 +31,7 @@ enum {
UV_AFFINITY_CPU
};

+extern void arch_init_uv_cfg_cpus_allowed(void);
extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);


2009-10-15 05:31:27

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH v2] x86/apic: limit irq affinity

Dimitri Sivanich wrote:
> This patch allows for hard restrictions to irq affinity via a new cpumask and
> device node value in the irq_cfg structure.
>
> The mask forces IRQ affinity to remain within the specified cpu domain.
> On some UV systems, this domain will be limited to the nodes accessible
> to the given node. Currently other X86 systems will have all bits in
> the cpumask set, so non-UV systems will remain unaffected at this time.
>
> Signed-off-by: Dimitri Sivanich <[email protected]>
>
> ---
>
> Removed UV specific code from generic IO APIC code.
>
> arch/x86/Kconfig | 1
> arch/x86/include/asm/hw_irq.h | 3
> arch/x86/include/asm/uv/uv_irq.h | 1
> arch/x86/include/asm/uv/uv_mmrs.h | 25 +++++
> arch/x86/kernel/apic/io_apic.c | 144 ++++++++++++++++++++++++++-------
> arch/x86/kernel/apic/x2apic_uv_x.c | 2
> arch/x86/kernel/uv_irq.c | 77 +++++++++++++++++
> 7 files changed, 225 insertions(+), 28 deletions(-)
>
> Index: linux/arch/x86/kernel/apic/io_apic.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-10-14 12:48:50.000000000 -0500
> +++ linux/arch/x86/kernel/apic/io_apic.c 2009-10-14 15:04:23.000000000 -0500
> @@ -168,6 +168,19 @@ void __init io_apic_disable_legacy(void)
> nr_irqs_gsi = 0;
> }
>
> +void (*set_irq_cfg_allowed)(cpumask_var_t, int) = NULL;
> +/*
> + * Setup IRQ affinity restriction.
> + */
> +static void set_irq_cfg_cpus_allowed(struct irq_cfg *irq_cfg)
> +{
> + if (set_irq_cfg_allowed)
> + set_irq_cfg_allowed(irq_cfg->allowed, irq_cfg->node);
> + else
> + /* Default to allow anything */
> + cpumask_setall(irq_cfg->allowed);
> +}
> +
> int __init arch_early_irq_init(void)
> {
> struct irq_cfg *cfg;
> @@ -183,8 +196,11 @@ int __init arch_early_irq_init(void)
> for (i = 0; i < count; i++) {
> desc = irq_to_desc(i);
> desc->chip_data = &cfg[i];
> + cfg->node = node;
> zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
> zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
> + zalloc_cpumask_var_node(&cfg[i].allowed, GFP_NOWAIT, node);
> + set_irq_cfg_cpus_allowed(&cfg[i]);
> if (i < nr_legacy_irqs)
> cpumask_setall(cfg[i].domain);
> }
> @@ -213,12 +229,19 @@ static struct irq_cfg *get_one_free_irq_
> if (cfg) {
> if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
> kfree(cfg);
> - cfg = NULL;
> - } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
> + return NULL;
> + }
> + if (!zalloc_cpumask_var_node(&cfg->old_domain,
> GFP_ATOMIC, node)) {
> free_cpumask_var(cfg->domain);
> kfree(cfg);
> - cfg = NULL;
> + return NULL;
> + }
> + if (!zalloc_cpumask_var_node(&cfg->allowed, GFP_ATOMIC, node)) {
> + free_cpumask_var(cfg->old_domain);
> + free_cpumask_var(cfg->domain);
> + kfree(cfg);
> + return NULL;
> }
> }
>
> @@ -231,12 +254,14 @@ int arch_init_chip_data(struct irq_desc
>
> cfg = desc->chip_data;
> if (!cfg) {
> - desc->chip_data = get_one_free_irq_cfg(node);
> + cfg = desc->chip_data = get_one_free_irq_cfg(node);
> if (!desc->chip_data) {
> printk(KERN_ERR "can not alloc irq_cfg\n");
> BUG_ON(1);
> }
> }
> + cfg->node = node;
> + set_irq_cfg_cpus_allowed(cfg);
>
> return 0;
> }
> @@ -318,6 +343,10 @@ void arch_init_copy_chip_data(struct irq
>
> memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
>
> + cfg->node = node;
> +
> + set_irq_cfg_cpus_allowed(cfg);
> +
> init_copy_irq_2_pin(old_cfg, cfg, node);
> }
>
> @@ -1428,16 +1457,23 @@ static void setup_IO_APIC_irq(int apic_i
> struct irq_cfg *cfg;
> struct IO_APIC_route_entry entry;
> unsigned int dest;
> + cpumask_var_t tmp_mask;
>
> if (!IO_APIC_IRQ(irq))
> return;
>
> cfg = desc->chip_data;
>
> - if (assign_irq_vector(irq, cfg, apic->target_cpus()))
> + if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
> return;
>
> - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
> + if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed))
> + goto error;
> +
> + if (assign_irq_vector(irq, cfg, tmp_mask))
> + goto error;
> +
> + dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

can you check if we can reuse target_cpus for this purpose?

YH

2009-10-15 14:01:55

by Dimitri Sivanich

[permalink] [raw]
Subject: Re: [PATCH v2] x86/apic: limit irq affinity

On Wed, Oct 14, 2009 at 10:30:12PM -0700, Yinghai Lu wrote:
> Dimitri Sivanich wrote:
> > This patch allows for hard restrictions to irq affinity via a new cpumask and
> > device node value in the irq_cfg structure.
> >
> > The mask forces IRQ affinity to remain within the specified cpu domain.
> > On some UV systems, this domain will be limited to the nodes accessible
> > to the given node. Currently other X86 systems will have all bits in
> > the cpumask set, so non-UV systems will remain unaffected at this time.
> >
>
> can you check if we can reuse target_cpus for this purpose?
>

Yinghai,

The 'target_cpus' mask is in struct 'apic'. It is a platform level mask
(only one mask per platform).

The 'allowed' mask that I am adding is a per irq level mask (one mask per irq).
Each irq might be coming from a device attached to a different node, and each
of those nodes might require its irqs to have a different mask.

For example, say there is a pci device attached to node 2. Node 2 might only
want (or be able) to route its interrupts to nodes 0-127. It would have
'allowed' mask allowing only processors existing on the first 128 nodes. A
pci device attached to node 150 would have a different allowed mask, since it
would route its interrupts to a different set of nodes.

Use of something like 'target_cpus' would require recalculating this every
time someone changes affinity, instead of just 'and'ing in the mask.

Dimitri

2009-10-20 12:57:59

by Dimitri Sivanich

[permalink] [raw]
Subject: Re: [PATCH v2] x86/apic: limit irq affinity

On Thu, Oct 15, 2009 at 08:50:39AM -0500, Dimitri Sivanich wrote:
> On Wed, Oct 14, 2009 at 10:30:12PM -0700, Yinghai Lu wrote:
> > Dimitri Sivanich wrote:
> > > This patch allows for hard restrictions to irq affinity via a new cpumask and
> > > device node value in the irq_cfg structure.
> > >
> > > The mask forces IRQ affinity to remain within the specified cpu domain.
> > > On some UV systems, this domain will be limited to the nodes accessible
> > > to the given node. Currently other X86 systems will have all bits in
> > > the cpumask set, so non-UV systems will remain unaffected at this time.
> > >
> >
> > can you check if we can reuse target_cpus for this purpose?
> >
>
> The 'target_cpus' mask is in struct 'apic'. It is a platform level mask
> (only one mask per platform).
>
> The 'allowed' mask that I am adding is a per irq level mask (one mask per irq).
> Each irq might be coming from a device attached to a different node, and each
> of those nodes might require its irqs to have a different mask.
>

Assuming that the real issue here is in adding any more cpumasks to irq_cfg, I've created another version of the patch that does not add the cpumask to irq_cfg. The UV specific irq code will store these cpumasks (one per node).

Will send this shortly.

2009-10-20 13:39:20

by Dimitri Sivanich

[permalink] [raw]
Subject: [PATCH v3] x86/apic: limit irq affinity

This patch allows for hard restrictions to irq affinity on x86 systems.

Affinity is masked to allow only those cpus which the subarchitecture
deems accessible by the given irq.

On some UV systems, this domain will be limited to the nodes accessible
to the irq's node. Initially other X86 systems will not mask off any cpus
so non-UV systems will remain unaffected.

Signed-off-by: Dimitri Sivanich <[email protected]>

---

Removed allowed cpumask from irq_cfg. Storing allowed cpumasks in UV
specific IRQ code.

arch/x86/Kconfig | 1
arch/x86/include/asm/hw_irq.h | 3
arch/x86/include/asm/uv/uv_irq.h | 1
arch/x86/include/asm/uv/uv_mmrs.h | 25 ++++++
arch/x86/kernel/apic/io_apic.c | 123 ++++++++++++++++++++++++++-------
arch/x86/kernel/apic/x2apic_uv_x.c | 4 -
arch/x86/kernel/uv_irq.c | 58 +++++++++++++++
7 files changed, 189 insertions(+), 26 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-10-19 15:22:52.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c 2009-10-19 20:57:29.000000000 -0500
@@ -168,6 +168,17 @@ void __init io_apic_disable_legacy(void)
nr_irqs_gsi = 0;
}

+static int default_irq_allowed_and(struct irq_cfg *cfg, struct cpumask *dstp,
+ const struct cpumask *srcp)
+{
+ cpumask_copy(dstp, srcp);
+
+ return 1;
+}
+
+int (*x86_irq_allowed_and)(struct irq_cfg *, struct cpumask *,
+ const struct cpumask *) = default_irq_allowed_and;
+
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
@@ -183,6 +194,7 @@ int __init arch_early_irq_init(void)
for (i = 0; i < count; i++) {
desc = irq_to_desc(i);
desc->chip_data = &cfg[i];
+ cfg->node = node;
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
if (i < nr_legacy_irqs)
@@ -231,12 +243,13 @@ int arch_init_chip_data(struct irq_desc

cfg = desc->chip_data;
if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(node);
+ cfg = desc->chip_data = get_one_free_irq_cfg(node);
if (!desc->chip_data) {
printk(KERN_ERR "can not alloc irq_cfg\n");
BUG_ON(1);
}
}
+ cfg->node = node;

return 0;
}
@@ -318,6 +331,8 @@ void arch_init_copy_chip_data(struct irq

memcpy(cfg, old_cfg, sizeof(struct irq_cfg));

+ cfg->node = node;
+
init_copy_irq_2_pin(old_cfg, cfg, node);
}

@@ -1428,16 +1443,23 @@ static void setup_IO_APIC_irq(int apic_i
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
+ cpumask_var_t tmp_mask;

if (!IO_APIC_IRQ(irq))
return;

cfg = desc->chip_data;

- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ if (!x86_irq_allowed_and(cfg, tmp_mask, apic->target_cpus()))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1473,7 @@ static void setup_IO_APIC_irq(int apic_i
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic_id].apicid, pin);
__clear_irq_vector(irq, cfg);
- return;
+ goto error;
}

ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1481,9 @@ static void setup_IO_APIC_irq(int apic_i
disable_8259A_irq(irq);

ioapic_write_entry(apic_id, pin, entry);
+error:
+ free_cpumask_var(tmp_mask);
+ return;
}

static struct {
@@ -2282,18 +2307,32 @@ set_desc_affinity(struct irq_desc *desc,
{
struct irq_cfg *cfg;
unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ cpumask_var_t tmp_mask;

irq = desc->irq;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return BAD_APICID;

- cpumask_copy(desc->affinity, mask);
+ if (!x86_irq_allowed_and(cfg, tmp_mask, mask))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ cpumask_copy(desc->affinity, tmp_mask);
+
+ free_cpumask_var(tmp_mask);

return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+ free_cpumask_var(tmp_mask);
+ return BAD_APICID;
}

static int
@@ -2349,22 +2388,32 @@ migrate_ioapic_irq_desc(struct irq_desc
{
struct irq_cfg *cfg;
struct irte irte;
+ cpumask_var_t tmp_mask;
unsigned int dest;
unsigned int irq;
int ret = -1;

- if (!cpumask_intersects(mask, cpu_online_mask))
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return ret;

- irq = desc->irq;
+ if (!x86_irq_allowed_and(cfg, tmp_mask, mask))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
if (get_irte(irq, &irte))
- return ret;
+ goto error;

- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+ ret = 0;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -2377,9 +2426,10 @@ migrate_ioapic_irq_desc(struct irq_desc
if (cfg->move_in_progress)
send_cleanup_vector(cfg);

- cpumask_copy(desc->affinity, mask);
-
- return 0;
+ cpumask_copy(desc->affinity, tmp_mask);
+error:
+ free_cpumask_var(tmp_mask);
+ return ret;
}

/*
@@ -3163,6 +3213,7 @@ unsigned int create_irq_nr(unsigned int

if (irq > 0) {
dynamic_irq_init(irq);
+ cfg_new->node = node;
/* restore it, in case dynamic_irq_init clear it */
if (desc_new)
desc_new->chip_data = cfg_new;
@@ -3214,16 +3265,25 @@ static int msi_compose_msg(struct pci_de
struct irq_cfg *cfg;
int err;
unsigned dest;
+ cpumask_var_t tmp_mask;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ if (!x86_irq_allowed_and(cfg, tmp_mask, apic->target_cpus())) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (err)
- return err;
+ goto error;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

if (irq_remapped(irq)) {
struct irte irte;
@@ -3281,6 +3341,8 @@ static int msi_compose_msg(struct pci_de
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
}
+error:
+ free_cpumask_var(tmp_mask);
return err;
}

@@ -3698,19 +3760,28 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
+ cpumask_var_t tmp_mask;
int err;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ if (!x86_irq_allowed_and(cfg, tmp_mask, apic->target_cpus())) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);

@@ -3734,6 +3805,8 @@ int arch_setup_ht_irq(unsigned int irq,

dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
}
+error:
+ free_cpumask_var(tmp_mask);
return err;
}
#endif /* CONFIG_HT_IRQ */
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h 2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h 2009-10-19 20:57:29.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
};

/* ========================================================================= */
+/* UVH_LB_SOCKET_DESTINATION_TABLE */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+ unsigned long v;
+ struct uvh_lb_socket_destination_table_s {
+ unsigned long rsvd_0 : 1; /* */
+ unsigned long node_id : 14; /* RW */
+ unsigned long chip_id : 1; /* RW */
+ unsigned long parity : 1; /* RW */
+ unsigned long rsvd_17_63: 47; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_LOCAL_INT0_CONFIG */
/* ========================================================================= */
#define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig 2009-10-19 15:22:52.000000000 -0500
+++ linux/arch/x86/Kconfig 2009-10-19 20:57:29.000000000 -0500
@@ -365,6 +365,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on X86_X2APIC
+ depends on NUMA_IRQ_DESC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c 2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c 2009-10-20 08:23:08.000000000 -0500
@@ -242,6 +242,64 @@ static int uv_set_irq_affinity(unsigned
return 0;
}

+static cpumask_var_t *uv_irq_cpus_allowed;
+
+int uv_irq_cpus_allowed_and(struct irq_cfg *cfg, struct cpumask *dstp,
+ const struct cpumask *srcp)
+{
+ int bid;
+
+ if (cfg == NULL || cfg->node < 0) {
+ cpumask_copy(dstp, srcp);
+ return 1;
+ }
+
+ bid = uv_node_to_blade_id(cfg->node);
+
+ return cpumask_and(dstp, srcp, uv_irq_cpus_allowed[bid]);
+}
+
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+ int bid;
+
+ uv_irq_cpus_allowed = kzalloc(uv_num_possible_blades() *
+ sizeof(cpumask_var_t *), GFP_KERNEL);
+
+ if (uv_irq_cpus_allowed == NULL) {
+ printk(KERN_EMERG "Out of memory");
+ return;
+ }
+
+ for_each_possible_blade(bid) {
+ unsigned long *pa;
+ int i;
+
+ if (!zalloc_cpumask_var_node(&uv_irq_cpus_allowed[bid],
+ GFP_KERNEL, uv_blade_to_memory_nid(bid))) {
+ printk(KERN_EMERG "Out of memory on blade %d", bid);
+ return;
+ }
+
+ pa = uv_global_mmr64_address(uv_blade_to_pnode(bid),
+ UVH_LB_SOCKET_DESTINATION_TABLE);
+
+ for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++,
+ i++) {
+ int cpu;
+ int pnode = UV_NASID_TO_PNODE(*pa &
+ UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+ for_each_possible_cpu(cpu)
+ if (uv_cpu_to_pnode(cpu) == pnode)
+ cpumask_set_cpu(cpu,
+ uv_irq_cpus_allowed[bid]);
+ }
+ }
+
+ x86_irq_allowed_and = uv_irq_cpus_allowed_and;
+}
+
/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2009-10-19 15:22:52.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c 2009-10-19 20:57:29.000000000 -0500
@@ -23,6 +23,7 @@

#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/uv/bios.h>
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);

static const struct cpumask *uv_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}

static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -659,5 +660,6 @@ void __init uv_system_init(void)

uv_cpu_init();
uv_scir_register_cpu_notifier();
+ arch_init_uv_cfg_cpus_allowed();
proc_mkdir("sgi_uv", NULL);
}
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h 2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h 2009-10-19 20:57:29.000000000 -0500
@@ -94,11 +94,14 @@ struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
cpumask_var_t old_domain;
+ int node;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
};

+extern int (*x86_irq_allowed_and)(struct irq_cfg *, struct cpumask *,
+ const struct cpumask *);
extern struct irq_cfg *irq_cfg(unsigned int);
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
extern void send_cleanup_vector(struct irq_cfg *);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h 2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h 2009-10-19 20:57:29.000000000 -0500
@@ -31,6 +31,7 @@ enum {
UV_AFFINITY_CPU
};

+extern void arch_init_uv_cfg_cpus_allowed(void);
extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);

2009-10-20 18:59:29

by Yinghai Lu

[permalink] [raw]
Subject: Re: [PATCH v3] x86/apic: limit irq affinity

Dimitri Sivanich wrote:
> This patch allows for hard restrictions to irq affinity on x86 systems.
>
> Affinity is masked to allow only those cpus which the subarchitecture
> deems accessible by the given irq.
>
> On some UV systems, this domain will be limited to the nodes accessible
> to the irq's node. Initially other X86 systems will not mask off any cpus
> so non-UV systems will remain unaffected.
>
> Signed-off-by: Dimitri Sivanich <[email protected]>
>
> ---
>
> Removed allowed cpumask from irq_cfg. Storing allowed cpumasks in UV
> specific IRQ code.
>
> arch/x86/Kconfig | 1
> arch/x86/include/asm/hw_irq.h | 3
> arch/x86/include/asm/uv/uv_irq.h | 1
> arch/x86/include/asm/uv/uv_mmrs.h | 25 ++++++
> arch/x86/kernel/apic/io_apic.c | 123 ++++++++++++++++++++++++++-------
> arch/x86/kernel/apic/x2apic_uv_x.c | 4 -
> arch/x86/kernel/uv_irq.c | 58 +++++++++++++++
> 7 files changed, 189 insertions(+), 26 deletions(-)
>
> Index: linux/arch/x86/kernel/apic/io_apic.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-10-19 15:22:52.000000000 -0500
> +++ linux/arch/x86/kernel/apic/io_apic.c 2009-10-19 20:57:29.000000000 -0500
> @@ -168,6 +168,17 @@ void __init io_apic_disable_legacy(void)
> nr_irqs_gsi = 0;
> }
>
> +static int default_irq_allowed_and(struct irq_cfg *cfg, struct cpumask *dstp,
> + const struct cpumask *srcp)
> +{
> + cpumask_copy(dstp, srcp);
> +
> + return 1;
> +}
> +
> +int (*x86_irq_allowed_and)(struct irq_cfg *, struct cpumask *,
> + const struct cpumask *) = default_irq_allowed_and;
> +
> int __init arch_early_irq_init(void)
> {
> struct irq_cfg *cfg;
> @@ -183,6 +194,7 @@ int __init arch_early_irq_init(void)
> for (i = 0; i < count; i++) {
> desc = irq_to_desc(i);
> desc->chip_data = &cfg[i];
> + cfg->node = node;
> zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
> zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
> if (i < nr_legacy_irqs)
> @@ -231,12 +243,13 @@ int arch_init_chip_data(struct irq_desc
>
> cfg = desc->chip_data;
> if (!cfg) {
> - desc->chip_data = get_one_free_irq_cfg(node);
> + cfg = desc->chip_data = get_one_free_irq_cfg(node);
> if (!desc->chip_data) {
> printk(KERN_ERR "can not alloc irq_cfg\n");
> BUG_ON(1);
> }
> }
> + cfg->node = node;

how about desc->node ?

YH

2009-10-21 01:07:15

by Dimitri Sivanich

[permalink] [raw]
Subject: Re: [PATCH v3] x86/apic: limit irq affinity

On Tue, Oct 20, 2009 at 11:58:38AM -0700, Yinghai Lu wrote:
> Dimitri Sivanich wrote:
> > + cfg->node = node;
>
> how about desc->node ?
>

Actually, desc->node does work for most of the cases here. In the case where
it doesn't work, we have a pci device with the node information in it's device.

Sending updated patch shortly.

2009-10-21 01:13:43

by Dimitri Sivanich

[permalink] [raw]
Subject: [PATCH v4] x86/apic: limit irq affinity

This patch allows for hard numa restrictions to irq affinity on x86 systems.

Affinity is masked to allow only those cpus which the subarchitecture
deems accessible by the given irq.

On some UV systems, this domain will be limited to the nodes accessible
to the irq's node. Initially other X86 systems will not mask off any cpus
so non-UV systems will remain unaffected.

Signed-off-by: Dimitri Sivanich <[email protected]>

---

Removed allowed cpumask and node from irq_cfg. Storing allowed cpumasks in UV
specific IRQ code.

arch/x86/Kconfig | 1
arch/x86/include/asm/hw_irq.h | 2
arch/x86/include/asm/uv/uv_irq.h | 1
arch/x86/include/asm/uv/uv_mmrs.h | 25 +++++++
arch/x86/kernel/apic/io_apic.c | 117 ++++++++++++++++++++++++++-------
arch/x86/kernel/apic/x2apic_uv_x.c | 4 -
arch/x86/kernel/uv_irq.c | 58 ++++++++++++++++
7 files changed, 184 insertions(+), 24 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c 2009-10-20 20:03:14.000000000 -0500
@@ -168,6 +168,17 @@ void __init io_apic_disable_legacy(void)
nr_irqs_gsi = 0;
}

+static int default_irq_allowed_and(int node, struct cpumask *dstp,
+ const struct cpumask *srcp)
+{
+ cpumask_copy(dstp, srcp);
+
+ return 1;
+}
+
+int (*x86_irq_allowed_and)(int, struct cpumask *, const struct cpumask *) =
+ default_irq_allowed_and;
+
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
@@ -1428,16 +1439,23 @@ static void setup_IO_APIC_irq(int apic_i
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
+ cpumask_var_t tmp_mask;

if (!IO_APIC_IRQ(irq))
return;

cfg = desc->chip_data;

- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ if (!x86_irq_allowed_and(desc->node, tmp_mask, apic->target_cpus()))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1469,7 @@ static void setup_IO_APIC_irq(int apic_i
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic_id].apicid, pin);
__clear_irq_vector(irq, cfg);
- return;
+ goto error;
}

ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1477,9 @@ static void setup_IO_APIC_irq(int apic_i
disable_8259A_irq(irq);

ioapic_write_entry(apic_id, pin, entry);
+error:
+ free_cpumask_var(tmp_mask);
+ return;
}

static struct {
@@ -2282,18 +2303,32 @@ set_desc_affinity(struct irq_desc *desc,
{
struct irq_cfg *cfg;
unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ cpumask_var_t tmp_mask;

irq = desc->irq;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return BAD_APICID;

- cpumask_copy(desc->affinity, mask);
+ if (!x86_irq_allowed_and(desc->node, tmp_mask, mask))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ cpumask_copy(desc->affinity, tmp_mask);
+
+ free_cpumask_var(tmp_mask);

return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+ free_cpumask_var(tmp_mask);
+ return BAD_APICID;
}

static int
@@ -2349,22 +2384,32 @@ migrate_ioapic_irq_desc(struct irq_desc
{
struct irq_cfg *cfg;
struct irte irte;
+ cpumask_var_t tmp_mask;
unsigned int dest;
unsigned int irq;
int ret = -1;

- if (!cpumask_intersects(mask, cpu_online_mask))
+ irq = desc->irq;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return ret;

- irq = desc->irq;
+ if (!x86_irq_allowed_and(desc->node, tmp_mask, mask))
+ goto error;
+
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
if (get_irte(irq, &irte))
- return ret;
+ goto error;

cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ ret = 0;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -2377,9 +2422,10 @@ migrate_ioapic_irq_desc(struct irq_desc
if (cfg->move_in_progress)
send_cleanup_vector(cfg);

- cpumask_copy(desc->affinity, mask);
-
- return 0;
+ cpumask_copy(desc->affinity, tmp_mask);
+error:
+ free_cpumask_var(tmp_mask);
+ return ret;
}

/*
@@ -3212,18 +3258,29 @@ static int msi_compose_msg(struct pci_de
struct msi_msg *msg, u8 hpet_id)
{
struct irq_cfg *cfg;
+ struct irq_desc *desc;
int err;
unsigned dest;
+ cpumask_var_t tmp_mask;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ desc = irq_to_desc(irq);
+ if (!x86_irq_allowed_and(desc->node, tmp_mask, apic->target_cpus())) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (err)
- return err;
+ goto error;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

if (irq_remapped(irq)) {
struct irte irte;
@@ -3281,6 +3338,8 @@ static int msi_compose_msg(struct pci_de
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
}
+error:
+ free_cpumask_var(tmp_mask);
return err;
}

@@ -3698,19 +3757,29 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
+ cpumask_var_t tmp_mask;
int err;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
+ if (!x86_irq_allowed_and(dev_to_node(&dev->dev), tmp_mask,
+ apic->target_cpus())) {
+ err = -ENOSPC;
+ goto error;
+ }
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);

@@ -3734,6 +3803,8 @@ int arch_setup_ht_irq(unsigned int irq,

dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
}
+error:
+ free_cpumask_var(tmp_mask);
return err;
}
#endif /* CONFIG_HT_IRQ */
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h 2009-10-20 19:54:42.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
};

/* ========================================================================= */
+/* UVH_LB_SOCKET_DESTINATION_TABLE */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+ unsigned long v;
+ struct uvh_lb_socket_destination_table_s {
+ unsigned long rsvd_0 : 1; /* */
+ unsigned long node_id : 14; /* RW */
+ unsigned long chip_id : 1; /* RW */
+ unsigned long parity : 1; /* RW */
+ unsigned long rsvd_17_63: 47; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_LOCAL_INT0_CONFIG */
/* ========================================================================= */
#define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/Kconfig 2009-10-20 19:54:42.000000000 -0500
@@ -365,6 +365,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on X86_X2APIC
+ depends on NUMA_IRQ_DESC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c 2009-10-20 19:54:42.000000000 -0500
@@ -242,6 +242,64 @@ static int uv_set_irq_affinity(unsigned
return 0;
}

+static cpumask_var_t *uv_irq_cpus_allowed;
+
+int uv_irq_cpus_allowed_and(int node, struct cpumask *dstp,
+ const struct cpumask *srcp)
+{
+ int bid;
+
+ if (node < 0) {
+ cpumask_copy(dstp, srcp);
+ return 1;
+ }
+
+ bid = uv_node_to_blade_id(node);
+
+ return cpumask_and(dstp, srcp, uv_irq_cpus_allowed[bid]);
+}
+
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+ int bid;
+
+ uv_irq_cpus_allowed = kzalloc(uv_num_possible_blades() *
+ sizeof(cpumask_var_t *), GFP_KERNEL);
+
+ if (uv_irq_cpus_allowed == NULL) {
+ printk(KERN_EMERG "Out of memory");
+ return;
+ }
+
+ for_each_possible_blade(bid) {
+ unsigned long *pa;
+ int i;
+
+ if (!zalloc_cpumask_var_node(&uv_irq_cpus_allowed[bid],
+ GFP_KERNEL, uv_blade_to_memory_nid(bid))) {
+ printk(KERN_EMERG "Out of memory on blade %d", bid);
+ return;
+ }
+
+ pa = uv_global_mmr64_address(uv_blade_to_pnode(bid),
+ UVH_LB_SOCKET_DESTINATION_TABLE);
+
+ for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++,
+ i++) {
+ int cpu;
+ int pnode = UV_NASID_TO_PNODE(*pa &
+ UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+ for_each_possible_cpu(cpu)
+ if (uv_cpu_to_pnode(cpu) == pnode)
+ cpumask_set_cpu(cpu,
+ uv_irq_cpus_allowed[bid]);
+ }
+ }
+
+ x86_irq_allowed_and = uv_irq_cpus_allowed_and;
+}
+
/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c 2009-10-20 19:54:42.000000000 -0500
@@ -23,6 +23,7 @@

#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/uv/bios.h>
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);

static const struct cpumask *uv_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}

static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -659,5 +660,6 @@ void __init uv_system_init(void)

uv_cpu_init();
uv_scir_register_cpu_notifier();
+ arch_init_uv_cfg_cpus_allowed();
proc_mkdir("sgi_uv", NULL);
}
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h 2009-10-20 19:54:42.000000000 -0500
@@ -99,6 +99,8 @@ struct irq_cfg {
u8 move_in_progress : 1;
};

+extern int (*x86_irq_allowed_and)(int, struct cpumask *,
+ const struct cpumask *);
extern struct irq_cfg *irq_cfg(unsigned int);
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
extern void send_cleanup_vector(struct irq_cfg *);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h 2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h 2009-10-20 19:54:42.000000000 -0500
@@ -31,6 +31,7 @@ enum {
UV_AFFINITY_CPU
};

+extern void arch_init_uv_cfg_cpus_allowed(void);
extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);

2009-11-08 13:08:43

by Dimitri Sivanich

[permalink] [raw]
Subject: [tip:x86/apic] x86/apic: Limit irq affinity

Commit-ID: 683c91f85d7a3e1092d7fa3ec5687af8cd379f02
Gitweb: http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
Author: Dimitri Sivanich <[email protected]>
AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 8 Nov 2009 13:30:40 +0100

x86/apic: Limit irq affinity

This patch allows for hard numa restrictions to irq affinity on
x86 systems.

Affinity is masked to allow only those cpus which the
subarchitecture deems accessible by the given irq.

On some UV systems, this domain will be limited to the nodes
accessible to the irq's node. Initially other X86 systems will
not mask off any cpus so non-UV systems will remain unaffected.

Added apic functions for getting numa irq cpumasks. Systems other
than UV now simply return the mask passed in.

Signed-off-by: Dimitri Sivanich <[email protected]>
Cc: Suresh Siddha <[email protected]>
Cc: Yinghai Lu <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/apic.h | 13 +++++
arch/x86/include/asm/uv/uv_irq.h | 3 +
arch/x86/include/asm/uv/uv_mmrs.h | 25 +++++++++
arch/x86/kernel/apic/apic_flat_64.c | 4 ++
arch/x86/kernel/apic/apic_noop.c | 2 +
arch/x86/kernel/apic/bigsmp_32.c | 2 +
arch/x86/kernel/apic/es7000_32.c | 4 ++
arch/x86/kernel/apic/io_apic.c | 91 ++++++++++++++++++++++++--------
arch/x86/kernel/apic/numaq_32.c | 2 +
arch/x86/kernel/apic/probe_32.c | 2 +
arch/x86/kernel/apic/summit_32.c | 2 +
arch/x86/kernel/apic/x2apic_cluster.c | 2 +
arch/x86/kernel/apic/x2apic_phys.c | 2 +
arch/x86/kernel/apic/x2apic_uv_x.c | 6 ++-
arch/x86/kernel/uv_irq.c | 66 ++++++++++++++++++++++++
16 files changed, 203 insertions(+), 24 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c876bac..93decdd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -363,6 +363,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on X86_X2APIC
+ depends on NUMA_IRQ_DESC
---help---
This option is needed in order to support SGI Ultraviolet systems.
If you don't have one of these, you should say N here.
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 08a5f42..b7336ac 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -293,6 +293,9 @@ struct apic {
u32 irq_dest_mode;

const struct cpumask *(*target_cpus)(void);
+ struct cpumask *(*get_restricted_mask)(const struct cpumask *mask,
+ int node);
+ void (*free_restricted_mask)(struct cpumask *mask);

int disable_esr;

@@ -474,6 +477,16 @@ static inline const struct cpumask *default_target_cpus(void)
#endif
}

+static inline struct cpumask *
+default_get_restricted_mask(const struct cpumask *mask, int node)
+{
+ return (struct cpumask *)mask;
+}
+
+static inline void default_free_restricted_mask(struct cpumask *mask)
+{
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);


diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c7..af1b281 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,6 +31,9 @@ enum {
UV_AFFINITY_CPU
};

+extern struct cpumask *uv_get_restricted_mask(const struct cpumask *, int);
+extern void uv_free_restricted_mask(struct cpumask *);
+extern void arch_init_uv_cfg_cpus_allowed(void);
extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 2cae46c..6b79c96 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
};

/* ========================================================================= */
+/* UVH_LB_SOCKET_DESTINATION_TABLE */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+ unsigned long v;
+ struct uvh_lb_socket_destination_table_s {
+ unsigned long rsvd_0 : 1; /* */
+ unsigned long node_id : 14; /* RW */
+ unsigned long chip_id : 1; /* RW */
+ unsigned long parity : 1; /* RW */
+ unsigned long rsvd_17_63: 47; /* */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_LOCAL_INT0_CONFIG */
/* ========================================================================= */
#define UVH_LOCAL_INT0_CONFIG 0x61000UL
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index d0c99ab..a817e7b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -174,6 +174,8 @@ struct apic apic_flat = {
.irq_dest_mode = 1, /* logical */

.target_cpus = flat_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
@@ -323,6 +325,8 @@ struct apic apic_physflat = {
.irq_dest_mode = 0, /* physical */

.target_cpus = physflat_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 9ab6ffb..1b8a4a1 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -147,6 +147,8 @@ struct apic apic_noop = {
.irq_dest_mode = 1,

.target_cpus = noop_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = noop_check_apicid_used,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 77a0641..46aaed4 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -216,6 +216,8 @@ struct apic apic_bigsmp = {
.irq_dest_mode = 0,

.target_cpus = bigsmp_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = bigsmp_check_apicid_used,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 89174f8..321c655 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -665,6 +665,8 @@ struct apic __refdata apic_es7000_cluster = {
.irq_dest_mode = 1,

.target_cpus = target_cpus_cluster,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = es7000_check_apicid_used,
@@ -730,6 +732,8 @@ struct apic __refdata apic_es7000 = {
.irq_dest_mode = 0,

.target_cpus = es7000_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = es7000_check_apicid_used,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 31e9db3..e347709 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1427,6 +1427,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
{
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
+ struct cpumask *tmp_mask;
unsigned int dest;

if (!IO_APIC_IRQ(irq))
@@ -1434,10 +1435,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq

cfg = desc->chip_data;

- if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+ tmp_mask = apic->get_restricted_mask(apic->target_cpus(), desc->node);
+ if (!tmp_mask)
return;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1456,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic_id].apicid, pin);
__clear_irq_vector(irq, cfg);
- return;
+ goto error;
}

ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1464,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
disable_8259A_irq(irq);

ioapic_write_entry(apic_id, pin, entry);
+error:
+ apic->free_restricted_mask(tmp_mask);
}

static struct {
@@ -2278,18 +2285,30 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ struct cpumask *tmp_mask;

irq = desc->irq;
cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+
+ tmp_mask = apic->get_restricted_mask(mask, desc->node);
+ if (!tmp_mask)
return BAD_APICID;

- cpumask_copy(desc->affinity, mask);
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ cpumask_copy(desc->affinity, tmp_mask);
+
+ apic->free_restricted_mask(tmp_mask);

return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+ apic->free_restricted_mask(tmp_mask);
+ return BAD_APICID;
}

static int
@@ -2345,22 +2364,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct irte irte;
+ struct cpumask *tmp_mask;
unsigned int dest;
unsigned int irq;
int ret = -1;

- if (!cpumask_intersects(mask, cpu_online_mask))
+ irq = desc->irq;
+
+ tmp_mask = apic->get_restricted_mask(mask, desc->node);
+ if (!tmp_mask)
return ret;

- irq = desc->irq;
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+ goto error;
+
if (get_irte(irq, &irte))
- return ret;
+ goto error;

cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ if (assign_irq_vector(irq, cfg, tmp_mask))
+ goto error;
+
+ ret = 0;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -2373,9 +2400,10 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
if (cfg->move_in_progress)
send_cleanup_vector(cfg);

- cpumask_copy(desc->affinity, mask);
-
- return 0;
+ cpumask_copy(desc->affinity, tmp_mask);
+error:
+ apic->free_restricted_mask(tmp_mask);
+ return ret;
}

/*
@@ -3243,18 +3271,26 @@ void destroy_irq(unsigned int irq)
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
struct irq_cfg *cfg;
+ struct irq_desc *desc;
int err;
unsigned dest;
+ struct cpumask *tmp_mask;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ desc = irq_to_desc(irq);
+
+ tmp_mask = apic->get_restricted_mask(apic->target_cpus(), desc->node);
+ if (!tmp_mask)
+ return -ENOMEM;
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (err)
- return err;
+ goto error;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

if (irq_remapped(irq)) {
struct irte irte;
@@ -3309,6 +3345,8 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
}
+error:
+ apic->free_restricted_mask(tmp_mask);
return err;
}

@@ -3697,19 +3735,25 @@ static struct irq_chip ht_irq_chip = {
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
+ struct cpumask *tmp_mask;
int err;

if (disable_apic)
return -ENXIO;

cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+ tmp_mask = apic->get_restricted_mask(apic->target_cpus(),
+ dev_to_node(&dev->dev));
+ if (!tmp_mask)
+ return -ENOMEM;
+
+ err = assign_irq_vector(irq, cfg, tmp_mask);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;

- dest = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus());
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);

@@ -3733,6 +3777,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)

dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
}
+ apic->free_restricted_mask(tmp_mask);
return err;
}
#endif /* CONFIG_HT_IRQ */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index efa00e2..d46f4b5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -501,6 +501,8 @@ struct apic __refdata apic_numaq = {
.irq_dest_mode = 0,

.target_cpus = numaq_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 1,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = numaq_check_apicid_used,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0c0182c..22b6716 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -94,6 +94,8 @@ struct apic apic_default = {
.irq_dest_mode = 1,

.target_cpus = default_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = default_check_apicid_used,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 645ecc4..8d43a00 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -517,6 +517,8 @@ struct apic apic_summit = {
.irq_dest_mode = 1,

.target_cpus = summit_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 1,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = summit_check_apicid_used,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index a5371ec..d8cab82 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -198,6 +198,8 @@ struct apic apic_x2apic_cluster = {
.irq_dest_mode = 1, /* logical */

.target_cpus = x2apic_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a8989aa..4e64e84 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -187,6 +187,8 @@ struct apic apic_x2apic_phys = {
.irq_dest_mode = 0, /* physical */

.target_cpus = x2apic_target_cpus,
+ .get_restricted_mask = default_get_restricted_mask,
+ .free_restricted_mask = default_free_restricted_mask,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f5f5886..c1da399 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -23,6 +23,7 @@

#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
#include <asm/current.h>
#include <asm/pgtable.h>
#include <asm/uv/bios.h>
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);

static const struct cpumask *uv_target_cpus(void)
{
- return cpumask_of(0);
+ return cpu_online_mask;
}

static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -264,6 +265,8 @@ struct apic __refdata apic_x2apic_uv_x = {
.irq_dest_mode = 0, /* physical */

.target_cpus = uv_target_cpus,
+ .get_restricted_mask = uv_get_restricted_mask,
+ .free_restricted_mask = uv_free_restricted_mask,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
@@ -658,5 +661,6 @@ void __init uv_system_init(void)

uv_cpu_init();
uv_scir_register_cpu_notifier();
+ arch_init_uv_cfg_cpus_allowed();
proc_mkdir("sgi_uv", NULL);
}
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 61d805d..b81273c 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -242,6 +242,72 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
return 0;
}

+static cpumask_var_t *uv_irq_cpus_allowed;
+
+struct cpumask *uv_get_restricted_mask(const struct cpumask *mask, int node)
+{
+ cpumask_var_t tmp_mask;
+ int bid;
+
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return NULL;
+
+ if (!uv_irq_cpus_allowed || node < 0) {
+ cpumask_copy(tmp_mask, mask);
+ return tmp_mask;
+ }
+
+ bid = uv_node_to_blade_id(node);
+
+ cpumask_and(tmp_mask, mask, uv_irq_cpus_allowed[bid]);
+
+ return tmp_mask;
+}
+
+void uv_free_restricted_mask(struct cpumask *mask)
+{
+ free_cpumask_var(mask);
+}
+
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+ int bid;
+
+ uv_irq_cpus_allowed = kzalloc(uv_num_possible_blades() *
+ sizeof(cpumask_var_t *), GFP_KERNEL);
+
+ if (uv_irq_cpus_allowed == NULL) {
+ printk(KERN_EMERG "Out of memory");
+ return;
+ }
+
+ for_each_possible_blade(bid) {
+ unsigned long *pa;
+ int i;
+
+ if (!zalloc_cpumask_var_node(&uv_irq_cpus_allowed[bid],
+ GFP_KERNEL, uv_blade_to_memory_nid(bid))) {
+ printk(KERN_EMERG "Out of memory on blade %d", bid);
+ return;
+ }
+
+ pa = uv_global_mmr64_address(uv_blade_to_pnode(bid),
+ UVH_LB_SOCKET_DESTINATION_TABLE);
+
+ for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++,
+ i++) {
+ int cpu;
+ int pnode = UV_NASID_TO_PNODE(*pa &
+ UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+ for_each_possible_cpu(cpu)
+ if (uv_cpu_to_pnode(cpu) == pnode)
+ cpumask_set_cpu(cpu,
+ uv_irq_cpus_allowed[bid]);
+ }
+ }
+}
+
/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an

2009-11-08 14:54:12

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:x86/apic] x86/apic: Limit irq affinity


* tip-bot for Dimitri Sivanich <[email protected]> wrote:

> Commit-ID: 683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> Gitweb: http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> Author: Dimitri Sivanich <[email protected]>
> AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> Committer: Ingo Molnar <[email protected]>
> CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
>
> x86/apic: Limit irq affinity
>
> This patch allows for hard numa restrictions to irq affinity on
> x86 systems.

-tip testing found a build failure:

arch/x86/kernel/apic/io_apic.c:1438: error: ‘struct irq_desc’ has no member named ‘node’
arch/x86/kernel/apic/io_apic.c:3286: error: ‘struct irq_desc’ has no member named ‘node’

Ingo

2009-11-09 16:02:13

by Dimitri Sivanich

[permalink] [raw]
Subject: Re: [tip:x86/apic] x86/apic: Limit irq affinity

On Sun, Nov 08, 2009 at 03:53:55PM +0100, Ingo Molnar wrote:
>
> * tip-bot for Dimitri Sivanich <[email protected]> wrote:
>
> > Commit-ID: 683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > Gitweb: http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > Author: Dimitri Sivanich <[email protected]>
> > AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> > Committer: Ingo Molnar <[email protected]>
> > CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> >
> > x86/apic: Limit irq affinity
> >
> > This patch allows for hard numa restrictions to irq affinity on
> > x86 systems.
>
> -tip testing found a build failure:
>
> arch/x86/kernel/apic/io_apic.c:1438: error: ‘struct irq_desc’ has no member named ‘node’
> arch/x86/kernel/apic/io_apic.c:3286: error: ‘struct irq_desc’ has no member named ‘node’
>

In the interest of doing some ifdef cleanup as well as fixing the build problem,
can I suggest that we remove the 'ifdef CONFIG_SMP' from the irq_desc?

Here's my suggested patch.


Signed-off-by: Dimitri Sivanich <[email protected]>

---

include/linux/irq.h | 70 +++++++++++++++++++-----------------------------
kernel/irq/chip.c | 2 -
kernel/irq/handle.c | 4 --
3 files changed, 29 insertions(+), 47 deletions(-)

Index: linux/include/linux/irq.h
===================================================================
--- linux.orig/include/linux/irq.h 2009-11-09 09:18:32.000000000 -0600
+++ linux/include/linux/irq.h 2009-11-09 09:19:50.000000000 -0600
@@ -193,13 +193,11 @@ struct irq_desc {
unsigned long last_unhandled; /* Aging timer for unhandled count */
unsigned int irqs_unhandled;
spinlock_t lock;
-#ifdef CONFIG_SMP
cpumask_var_t affinity;
unsigned int node;
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_var_t pending_mask;
#endif
-#endif
atomic_t threads_active;
wait_queue_head_t wait_for_threads;
#ifdef CONFIG_PROC_FS
@@ -423,6 +421,35 @@ extern int set_irq_msi(unsigned int irq,

#endif /* !CONFIG_S390 */

+static inline void init_desc_masks(struct irq_desc *desc)
+{
+ cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ cpumask_clear(desc->pending_mask);
+#endif
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc: pointer to old irq_desc struct
+ * @new_desc: pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+ struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ cpumask_copy(new_desc->affinity, old_desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
#ifdef CONFIG_SMP
/**
* alloc_desc_masks - allocate cpumasks for irq_desc
@@ -455,36 +482,6 @@ static inline bool alloc_desc_masks(stru
return true;
}

-static inline void init_desc_masks(struct irq_desc *desc)
-{
- cpumask_setall(desc->affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- cpumask_clear(desc->pending_mask);
-#endif
-}
-
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc: pointer to old irq_desc struct
- * @new_desc: pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
- struct irq_desc *new_desc)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
- cpumask_copy(new_desc->affinity, old_desc->affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
-}
-
static inline void free_desc_masks(struct irq_desc *old_desc,
struct irq_desc *new_desc)
{
@@ -503,15 +500,6 @@ static inline bool alloc_desc_masks(stru
return true;
}

-static inline void init_desc_masks(struct irq_desc *desc)
-{
-}
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
- struct irq_desc *new_desc)
-{
-}
-
static inline void free_desc_masks(struct irq_desc *old_desc,
struct irq_desc *new_desc)
{
Index: linux/kernel/irq/handle.c
===================================================================
--- linux.orig/kernel/irq/handle.c 2009-11-09 09:18:32.000000000 -0600
+++ linux/kernel/irq/handle.c 2009-11-09 09:19:50.000000000 -0600
@@ -110,9 +110,7 @@ static void init_one_irq_desc(int irq, s

spin_lock_init(&desc->lock);
desc->irq = irq;
-#ifdef CONFIG_SMP
desc->node = node;
-#endif
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_kstat_irqs(desc, node, nr_cpu_ids);
if (!desc->kstat_irqs) {
@@ -173,9 +171,7 @@ int __init early_irq_init(void)

for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
-#ifdef CONFIG_SMP
desc[i].node = node;
-#endif
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
alloc_desc_masks(&desc[i], node, true);
Index: linux/kernel/irq/chip.c
===================================================================
--- linux.orig/kernel/irq/chip.c 2009-11-09 09:18:32.000000000 -0600
+++ linux/kernel/irq/chip.c 2009-11-09 09:19:50.000000000 -0600
@@ -45,12 +45,10 @@ void dynamic_irq_init(unsigned int irq)
desc->action = NULL;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
-#ifdef CONFIG_SMP
cpumask_setall(desc->affinity);
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_clear(desc->pending_mask);
#endif
-#endif
spin_unlock_irqrestore(&desc->lock, flags);
}

2009-11-10 04:40:47

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:x86/apic] x86/apic: Limit irq affinity


* Dimitri Sivanich <[email protected]> wrote:

> On Sun, Nov 08, 2009 at 03:53:55PM +0100, Ingo Molnar wrote:
> >
> > * tip-bot for Dimitri Sivanich <[email protected]> wrote:
> >
> > > Commit-ID: 683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > Gitweb: http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > Author: Dimitri Sivanich <[email protected]>
> > > AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> > > Committer: Ingo Molnar <[email protected]>
> > > CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> > >
> > > x86/apic: Limit irq affinity
> > >
> > > This patch allows for hard numa restrictions to irq affinity on
> > > x86 systems.
> >
> > -tip testing found a build failure:
> >
> > arch/x86/kernel/apic/io_apic.c:1438: error: ???struct irq_desc??? has no member named ???node???
> > arch/x86/kernel/apic/io_apic.c:3286: error: ???struct irq_desc??? has no member named ???node???
> >
>
> In the interest of doing some ifdef cleanup as well as fixing the
> build problem, can I suggest that we remove the 'ifdef CONFIG_SMP'
> from the irq_desc?

What's the (data and code) size effect on UP kernels?

Ingo

2009-11-10 16:31:17

by Dimitri Sivanich

[permalink] [raw]
Subject: Re: [tip:x86/apic] x86/apic: Limit irq affinity

On Tue, Nov 10, 2009 at 05:40:25AM +0100, Ingo Molnar wrote:
>
> * Dimitri Sivanich <[email protected]> wrote:
>
> > On Sun, Nov 08, 2009 at 03:53:55PM +0100, Ingo Molnar wrote:
> > >
> > > * tip-bot for Dimitri Sivanich <[email protected]> wrote:
> > >
> > > > Commit-ID: 683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > > Gitweb: http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > > Author: Dimitri Sivanich <[email protected]>
> > > > AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> > > > Committer: Ingo Molnar <[email protected]>
> > > > CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> > > >
> > > > x86/apic: Limit irq affinity
> > > >
> > > > This patch allows for hard numa restrictions to irq affinity on
> > > > x86 systems.
> > >
> > > -tip testing found a build failure:
> > >
> > > arch/x86/kernel/apic/io_apic.c:1438: error: ???struct irq_desc??? has no member named ???node???
> > > arch/x86/kernel/apic/io_apic.c:3286: error: ???struct irq_desc??? has no member named ???node???
> > >
> >
> > In the interest of doing some ifdef cleanup as well as fixing the
> > build problem, can I suggest that we remove the 'ifdef CONFIG_SMP'
> > from the irq_desc?
>
> What's the (data and code) size effect on UP kernels?
>

While I'm not fully certain what is best to report, here is output from
both 'size' and 'readelf'.

For the x86_32 case (the default configuration with SMP_CONFIG off):

Without the patch to remove '#ifdef CONFIG_SMP' from irq_desc:
$ size vmlinux
text data bss dec hex filename
6853617 674868 1333604 8862089 873989 vmlinux

With the patch to remove '#ifdef CONFIG_SMP' from irq_desc:
$ size vmlinux
size vmlinux
text data bss dec hex filename
6853621 674996 1333604 8862221 873a0d vmlinux

So it looks like we have 4 bytes more text and 128 bytes more data.

Looking at elf data, no MemSiz difference is apparent.

Without the patch:
$ readelf -l vmlinux
..
..
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
LOAD 0x001000 0xc1000000 0x01000000 0x651000 0x651000 R E 0x1000
LOAD 0x652000 0xc1651000 0x01651000 0xdea1e 0x225000 RWE 0x1000
NOTE 0x43bd4c 0xc143ad4c 0x0143ad4c 0x00024 0x00024 0x4

With the patch:
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
LOAD 0x001000 0xc1000000 0x01000000 0x651000 0x651000 R E 0x1000
LOAD 0x652000 0xc1651000 0x01651000 0xdea1e 0x225000 RWE 0x1000
NOTE 0x43bd44 0xc143ad44 0x0143ad44 0x00024 0x00024 0x4





For the x86_64 case:
Without patch on x86_64:
$ size vmlinux
text data bss dec hex filename
6879227 731284 883216 8493727 819a9f vmlinux

With patch on x86_64:
$ size vmlinux
text data bss dec hex filename
6879242 731524 883216 8493982 819b9e vmlinux

Looks like 15 bytes text and 240 bytes data.


Looking at elf data, there's a 256 byte difference in MemSiz data
(here I'm showing the output diff without/with the patch):

Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
LOAD 0x0000000000200000 0xffffffff81000000 0x0000000001000000
0x0000000000807000 0x0000000000807000 R E 200000
LOAD 0x0000000000c00000 0xffffffff81a00000 0x0000000001a00000
- 0x0000000000088ff8 0x0000000000088ff8 RWE 200000
+ 0x00000000000890f8 0x00000000000890f8 RWE 200000

2009-11-10 17:19:30

by Ingo Molnar

[permalink] [raw]
Subject: Re: [tip:x86/apic] x86/apic: Limit irq affinity


* Dimitri Sivanich <[email protected]> wrote:

> So it looks like we have 4 bytes more text and 128 bytes more data.

> Looks like 15 bytes text and 240 bytes data.

fair enough - and it removes a good bit of #ifdef complexity. Mind
sending it standalone, tested, properly changelogged and with the above
data included?

Thanks,

Ingo

2009-11-10 18:57:14

by Dimitri Sivanich

[permalink] [raw]
Subject: [PATCH] Remove SMP ifdef from irq_desc

Remove the CONFIG_SMP ifdef from the irq_desc structure and consolidate
initializers for the smp/non-smp cases.

For the x86_32 case, this adds 4 bytes of text and 128 bytes of data.

For the x86_64 case, this adds 15 bytes of text and 240 bytes of data.

Signed-off-by: Dimitri Sivanich <[email protected]>

---

include/linux/irq.h | 70 +++++++++++++++++++-----------------------------
kernel/irq/chip.c | 2 -
kernel/irq/handle.c | 4 --
3 files changed, 29 insertions(+), 47 deletions(-)

Index: linux/include/linux/irq.h
===================================================================
--- linux.orig/include/linux/irq.h 2009-11-10 11:54:52.000000000 -0600
+++ linux/include/linux/irq.h 2009-11-10 11:54:55.000000000 -0600
@@ -193,13 +193,11 @@ struct irq_desc {
unsigned long last_unhandled; /* Aging timer for unhandled count */
unsigned int irqs_unhandled;
spinlock_t lock;
-#ifdef CONFIG_SMP
cpumask_var_t affinity;
unsigned int node;
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_var_t pending_mask;
#endif
-#endif
atomic_t threads_active;
wait_queue_head_t wait_for_threads;
#ifdef CONFIG_PROC_FS
@@ -423,6 +421,35 @@ extern int set_irq_msi(unsigned int irq,

#endif /* !CONFIG_S390 */

+static inline void init_desc_masks(struct irq_desc *desc)
+{
+ cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ cpumask_clear(desc->pending_mask);
+#endif
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc: pointer to old irq_desc struct
+ * @new_desc: pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+ struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ cpumask_copy(new_desc->affinity, old_desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
#ifdef CONFIG_SMP
/**
* alloc_desc_masks - allocate cpumasks for irq_desc
@@ -455,36 +482,6 @@ static inline bool alloc_desc_masks(stru
return true;
}

-static inline void init_desc_masks(struct irq_desc *desc)
-{
- cpumask_setall(desc->affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- cpumask_clear(desc->pending_mask);
-#endif
-}
-
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc: pointer to old irq_desc struct
- * @new_desc: pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
- struct irq_desc *new_desc)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
- cpumask_copy(new_desc->affinity, old_desc->affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
-}
-
static inline void free_desc_masks(struct irq_desc *old_desc,
struct irq_desc *new_desc)
{
@@ -503,15 +500,6 @@ static inline bool alloc_desc_masks(stru
return true;
}

-static inline void init_desc_masks(struct irq_desc *desc)
-{
-}
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
- struct irq_desc *new_desc)
-{
-}
-
static inline void free_desc_masks(struct irq_desc *old_desc,
struct irq_desc *new_desc)
{
Index: linux/kernel/irq/handle.c
===================================================================
--- linux.orig/kernel/irq/handle.c 2009-11-10 11:54:52.000000000 -0600
+++ linux/kernel/irq/handle.c 2009-11-10 11:54:55.000000000 -0600
@@ -110,9 +110,7 @@ static void init_one_irq_desc(int irq, s

spin_lock_init(&desc->lock);
desc->irq = irq;
-#ifdef CONFIG_SMP
desc->node = node;
-#endif
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_kstat_irqs(desc, node, nr_cpu_ids);
if (!desc->kstat_irqs) {
@@ -173,9 +171,7 @@ int __init early_irq_init(void)

for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
-#ifdef CONFIG_SMP
desc[i].node = node;
-#endif
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
alloc_desc_masks(&desc[i], node, true);
Index: linux/kernel/irq/chip.c
===================================================================
--- linux.orig/kernel/irq/chip.c 2009-11-10 11:54:52.000000000 -0600
+++ linux/kernel/irq/chip.c 2009-11-10 11:54:55.000000000 -0600
@@ -45,12 +45,10 @@ void dynamic_irq_init(unsigned int irq)
desc->action = NULL;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
-#ifdef CONFIG_SMP
cpumask_setall(desc->affinity);
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_clear(desc->pending_mask);
#endif
-#endif
spin_unlock_irqrestore(&desc->lock, flags);
}

2009-11-16 10:50:19

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] Remove SMP ifdef from irq_desc

On Tue, 10 Nov 2009, Dimitri Sivanich wrote:

> Remove the CONFIG_SMP ifdef from the irq_desc structure and consolidate
> initializers for the smp/non-smp cases.
>
> For the x86_32 case, this adds 4 bytes of text and 128 bytes of data.
> For the x86_64 case, this adds 15 bytes of text and 240 bytes of data.

Hmm, on ARM and PowerPC the increase of data is 4K and I expect it's
minimum the same on other architectures. Not sure if we should worry
about it, but it's definitely a significant number.

Thanks,

tglx