2003-06-07 04:38:36

by Anton Blanchard

[permalink] [raw]
Subject: Re: irq consolidation


Hi,

On ppc64 we can have lots of sparse irqs. At the moment we do a similar
thing to sparc64 and provide a mapping function into the NR_IRQS sized
array of irq descriptors.

At the moment we have NR_IRQS set at 512 which wastes a bit of memory on
small machines. We also risk running out of slots on larger machines.

We are hoping to kill irq_desc[NR_IRQS] completely and instead allocate
them on demand with some sort of hash to map an interrupt number to an
irq_desc. Im working on top of Andrey Panin's irq consolidation patches
in the hope that it goes in first and other architectures can benefit
from these changes (I think SGI's ia64 boxes have similar issues as
well as large x86)

It looks like we can do this with only a few changes:

1. move the proc/affinity stuff into the irq_desc.
2. create a for_each_irq() macro for iterating through irqs

The consolidation patch already creates the third piece, a macro for
mapping from an interrupt number to an irq descriptor - irq_desc(IRQ)

Here is an untested, uncompiled, written on a plane patch that gives a
general idea of what I want. Any complaints?

Anton

--- include/linux/irq.h.orig 2003-06-06 03:50:25.000000000 +1000
+++ include/linux/irq.h 2003-06-06 04:00:18.000000000 +1000
@@ -56,12 +56,20 @@
*
* Pad this out to 32 bytes for cache and indexing reasons.
*/
+struct proc_irq_dir;
+struct proc_smp_affinity_entry;
typedef struct {
unsigned int status; /* IRQ status */
hw_irq_controller *handler;
struct irqaction *action; /* IRQ action list */
unsigned int depth; /* nested irq disables */
spinlock_t lock;
+ /* seldom used things at the end */
+ struct proc_dir_entry *proc_irq_dir;
+#ifdef CONFIG_SMP
+ struct proc_dir_entry *proc_smp_affinity_entry;
+ unsigned long irq_affinity;
+#endif
} ____cacheline_aligned irq_desc_t;

extern irq_desc_t irq_desc [NR_IRQS];
--- kernel/irq.c.orig 2003-06-06 03:13:29.000000000 +1000
+++ kernel/irq.c 2003-06-07 09:34:24.000000000 +1000
@@ -61,6 +61,8 @@
}
};

+#define for_each_irq(IRQ) for (IRQ = 0; IRQ < NR_IRQS; IRQ++)
+
#endif

/*
@@ -484,7 +486,8 @@
* something may have generated an irq long ago and we want to
* flush such a longstanding irq before considering it as spurious.
*/
- for (i = NR_IRQS - 1; i > 0; i--) {
+ /* XXX this used to walk through the irqs backwards */
+ for_each_irq(i) {
desc = irq_desc(i);

spin_lock_irq(&desc->lock);
@@ -502,7 +505,8 @@
* (we must startup again here because if a longstanding irq
* happened in the previous stage, it may have masked itself)
*/
- for (i = NR_IRQS-1; i > 0; i--) {
+ /* XXX this used to walk through the irqs backwards */
+ for_each_irq(i) {
desc = irq_desc(i);

spin_lock_irq(&desc->lock);
@@ -524,7 +528,7 @@
* Now filter out any obviously spurious interrupts
*/
val = 0;
- for (i = 0; i < NR_IRQS; i++) {
+ for_each_irq(i) {
irq_desc_t *desc = irq_desc(i);
unsigned int status;

@@ -563,10 +567,11 @@
{
int i;
unsigned int mask;
+ int irq;

mask = 0;
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc(i);
+ for_each_irq(irq) {
+ irq_desc_t *desc = irq_desc(irq);
unsigned int status;

spin_lock_irq(&desc->lock);
@@ -609,7 +614,7 @@

nr_irqs = 0;
irq_found = 0;
- for (i = 0; i < NR_IRQS; i++) {
+ for_each_irq(i) {
irq_desc_t *desc = irq_desc(i);
unsigned int status;

@@ -678,22 +683,23 @@
#define MAX_NAMELEN 10

struct proc_dir_entry *root_irq_dir;
-static struct proc_dir_entry *irq_dir[NR_IRQS];

#ifdef CONFIG_SMP
-static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-
-unsigned long irq_affinity[NR_IRQS] = {
- [0 ... NR_IRQS - 1] = ARCH_DEFAULT_IRQ_AFFINITY
-};

static int irq_affinity_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
+ irq_desc_t *irq_desc;
+
if (count < ARCH_AFFINITY_WIDTH + 1)
return -EINVAL;
+
+ irq_desc = irq_desc((long)data);
+ if (!irq_desc)
+ return -EINVAL;
+
return sprintf (page, "%0" __stringify(ARCH_AFFINITY_WIDTH) "lx\n",
- irq_affinity[(long)data]);
+ irq_desc->irq_affinity);
}

static int irq_affinity_write_proc(struct file *file, const char *buffer,
@@ -701,8 +707,13 @@
{
int irq = (long) data, full_count = count, err;
unsigned long new_value;
+ irq_desc_t *irq_desc;

- if (!irq_desc(irq)->handler->set_affinity)
+ irq_desc = irq_desc(irq);
+ if (!irq_desc)
+ return -EINVAL;
+
+ if (!irq_desc->handler->set_affinity)
return -EIO;

err = parse_hex_value(buffer, count, &new_value);
@@ -715,8 +726,8 @@
if (!(new_value & cpu_online_map))
return -EINVAL;

- irq_affinity[irq] = new_value;
- irq_desc(irq)->handler->set_affinity(irq, new_value);
+ irq_desc->irq_affinity = new_value;
+ irq_desc->handler->set_affinity(irq, new_value);

return full_count;
}
@@ -727,21 +738,26 @@
char name[MAX_NAMELEN];

if (!root_irq_dir || (irq_desc(irq)->handler == &no_irq_type) ||
- irq_dir[irq])
+ irq_desc(irq)->proc_irq_dir)
return;

memset(name, 0, MAX_NAMELEN);
sprintf(name, "%d", irq);

/* create /proc/irq/1234 */
- irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+ irq_desc(irq)->proc_irq_dir = proc_mkdir(name, root_irq_dir);

#if CONFIG_SMP
if (irq_desc(irq)->handler->set_affinity) {
struct proc_dir_entry *entry;

+ /* give this irq a default affinity */
+ /* XXX does this clash with arch setting of affinity? */
+ irq_desc(irq)->smp_affinity = ARCH_DEFAULT_IRQ_AFFINITY;
+
/* create /proc/irq/1234/smp_affinity */
- entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+ entry = create_proc_entry("smp_affinity", 0600,
+ irq_desc(irq)->proc_irq_dir);

if (entry) {
entry->nlink = 1;
@@ -750,7 +766,7 @@
entry->write_proc = irq_affinity_write_proc;
}

- smp_affinity_entry[irq] = entry;
+ irq_desc(irq)->proc_smp_affinity_entry = entry;
}
#endif
}
@@ -811,7 +827,7 @@
* Create entries for all existing IRQs.
*/
if (arch_can_create_irq_proc())
- for (i = 0; irq_valid(i); i++)
+ for_each_irq(i) {
if (irq_desc(i)->handler != &no_irq_type)
register_irq_proc(i);
}


2003-06-07 09:05:52

by Russell King

[permalink] [raw]
Subject: Re: irq consolidation

On Sat, Jun 07, 2003 at 02:48:03PM +1000, Anton Blanchard wrote:
> We are hoping to kill irq_desc[NR_IRQS] completely and instead allocate
> them on demand with some sort of hash to map an interrupt number to an
> irq_desc.

The same thought has crossed my mind as well for ARM; the hardware
interrupt controllers are becoming more inteligent, and there is
the possibility that system designers will mix inteligent interrupt
controllers with standard types.

Also on ARM, we're currently defining NR_IRQS on a per-platform class
and even a per-platform basis.

I've been wondering whether we even need to think about passing some
alternative identifier of an IRQ line around, instead of a number.

> Im working on top of Andrey Panin's irq consolidation patches
> in the hope that it goes in first and other architectures can benefit
> from these changes (I think SGI's ia64 boxes have similar issues as
> well as large x86)

I believe Andrey's IRQ consolidation provides a single flat IRQ
structure. Unfortunately, this doesn't reflect the reality that we
have on many ARM platforms - it remains the case that we need to
decode IRQs on a multi-level basis.

Given the rate which ARM has progressed and evolved during the existing
2.4 lifespan, it doesn't make much difference to us whether these
changes happen now or later. If it happens later, it will be during
the 2.6 series of kernels, so "now" is preferable. Reality is such
that ARM hardware moves a hell of a lot faster than x86 hardware.

--
Russell King ([email protected]) The developer of ARM Linux
http://www.arm.linux.org.uk/personal/aboutme.html

2003-06-07 22:47:55

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: irq consolidation

On Sat, 7 Jun 2003, Russell King wrote:

> On Sat, Jun 07, 2003 at 02:48:03PM +1000, Anton Blanchard wrote:
> > We are hoping to kill irq_desc[NR_IRQS] completely and instead allocate
> > them on demand with some sort of hash to map an interrupt number to an
> > irq_desc.

Anton don't you have an NR_IRQS sized interrupt stub you have to deal
with on PPC64?

I was considering perhaps only statically allocating the 0-15 on i386
just to get it booting and then dynamically allocate the rest.

Zwane
--
function.linuxpower.ca

2003-06-12 11:03:04

by Anton Blanchard

[permalink] [raw]
Subject: Re: irq consolidation


> Anton don't you have an NR_IRQS sized interrupt stub you have to deal
> with on PPC64?
>
> I was considering perhaps only statically allocating the 0-15 on i386
> just to get it booting and then dynamically allocate the rest.

We usually have an 8259 but its wired up to the main interrupt
controller. Even if we statically allocated 0-15, we still need a higher
irq for that cascade.

Going all the way and removing NR_IRQS also catches problems like the
random driver where we would only add randomness on 8259 (ie < 16)
interrupts with a mixed static/dynamic scheme.

Anton

2003-06-12 11:23:39

by Anton Blanchard

[permalink] [raw]
Subject: Re: irq consolidation


> I believe Andrey's IRQ consolidation provides a single flat IRQ
> structure. Unfortunately, this doesn't reflect the reality that we
> have on many ARM platforms - it remains the case that we need to
> decode IRQs on a multi-level basis.

Yes its still a flat structure. On ppc32/64 we offset the interrupts on
the main controller to provide a space for ISA interrupts to go. Not
great but it works for us.

One thing Paul suggested was to have a flag to mark an interrupt as a
cascade in the irq descriptor. If its set then we also provide a
get_irq() method (perhaps stashed away in the ->action field). That gives
us nested interrupt handling in generic code. (assuming you can
partition your irq numbers somehow)

Anton

2003-06-16 11:55:32

by Andrey Panin

[permalink] [raw]
Subject: Re: irq consolidation

On 163, 06 12, 2003 at 09:34:06PM +1000, Anton Blanchard wrote:
>
> > I believe Andrey's IRQ consolidation provides a single flat IRQ
> > structure. Unfortunately, this doesn't reflect the reality that we
> > have on many ARM platforms - it remains the case that we need to
> > decode IRQs on a multi-level basis.
>
> Yes its still a flat structure. On ppc32/64 we offset the interrupts on
> the main controller to provide a space for ISA interrupts to go. Not
> great but it works for us.

May be I missed the point, but it isn't flat.

You can define HAVE_ARCH_IRQ_DESC and provide your own irq_desc(irq)
function which will translate irq number to the corresponding
irq_desc_t structure. You are free to implement any irq mappings
behind the irq_desc(). NR_IRQS is used only as maximal irq number.
So what is the problem ?

> One thing Paul suggested was to have a flag to mark an interrupt as a
> cascade in the irq descriptor. If its set then we also provide a
> get_irq() method (perhaps stashed away in the ->action field). That gives
> us nested interrupt handling in generic code. (assuming you can
> partition your irq numbers somehow)
>
> Anton

--
Andrey Panin | Linux and UNIX system administrator
[email protected] | PGP key: wwwkeys.pgp.net


Attachments:
(No filename) (1.23 kB)
(No filename) (189.00 B)
Download all attachments

2003-06-16 12:46:35

by Anton Blanchard

[permalink] [raw]
Subject: Re: irq consolidation


> May be I missed the point, but it isn't flat.
>
> You can define HAVE_ARCH_IRQ_DESC and provide your own irq_desc(irq)
> function which will translate irq number to the corresponding
> irq_desc_t structure. You are free to implement any irq mappings
> behind the irq_desc(). NR_IRQS is used only as maximal irq number.
> So what is the problem ?

We have been discussing passing an opaque value into request_irq like
sparc64 does. It could be a pointer to the irq descriptor. rmk was
interested in it since he can have heavily nested interrupt controllers
and partitioning the NR_IRQS space is a pain in this case.

This will work on 32bit archs since we store irqs as ints everywhere, but
will break on 64bit unless we do some tricks (like sparc64 is currently
doing)

Anton