Parallel probing (e.g. due to asynchronous probing) of devices that share
interrupts can currently result in two mappings for the same hardware
interrupt to be created.
Add a serialising mapping mutex so that looking for an existing mapping
before creating a new one is done atomically.
Note that serialising the lookup and creation in
irq_create_mapping_affinity() would have been enough to prevent the
duplicate mapping, but that could instead cause
irq_create_fwspec_mapping() to fail when there is a race.
Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: Dmitry Torokhov <[email protected]>
Cc: Jon Hunter <[email protected]>
Signed-off-by: Johan Hovold <[email protected]>
---
kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
1 file changed, 34 insertions(+), 12 deletions(-)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fe1da9614ee..d263a7dd4170 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -22,6 +22,7 @@
static LIST_HEAD(irq_domain_list);
static DEFINE_MUTEX(irq_domain_mutex);
+static DEFINE_MUTEX(irq_mapping_mutex);
static struct irq_domain *irq_default_domain;
@@ -669,7 +670,7 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
#endif
/**
- * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
+ * __irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
* @domain: domain owning this hardware interrupt or NULL for default domain
* @hwirq: hardware irq number in that domain space
* @affinity: irq affinity
@@ -679,9 +680,9 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
* If the sense/trigger is to be specified, set_irq_type() should be called
* on the number returned from that call.
*/
-unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
- irq_hw_number_t hwirq,
- const struct irq_affinity_desc *affinity)
+static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
+ irq_hw_number_t hwirq,
+ const struct irq_affinity_desc *affinity)
{
struct device_node *of_node;
int virq;
@@ -724,6 +725,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
return virq;
}
+
+unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+ irq_hw_number_t hwirq,
+ const struct irq_affinity_desc *affinity)
+{
+ unsigned int virq;
+
+ mutex_lock(&irq_mapping_mutex);
+ virq = __irq_create_mapping_affinity(domain, hwirq, affinity);
+ mutex_unlock(&irq_mapping_mutex);
+
+ return virq;
+}
EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
static int irq_domain_translate(struct irq_domain *d,
@@ -789,6 +803,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
type &= IRQ_TYPE_SENSE_MASK;
+ mutex_lock(&irq_mapping_mutex);
+
/*
* If we've already configured this interrupt,
* don't do it again, or hell will break loose.
@@ -801,7 +817,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
* interrupt number.
*/
if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
- return virq;
+ goto out;
/*
* If the trigger type has not been set yet, then set
@@ -810,26 +826,26 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
irq_data = irq_get_irq_data(virq);
if (!irq_data)
- return 0;
+ goto err;
irqd_set_trigger_type(irq_data, type);
- return virq;
+ goto out;
}
pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
- return 0;
+ goto err;
}
if (irq_domain_is_hierarchy(domain)) {
virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
if (virq <= 0)
- return 0;
+ goto err;
} else {
/* Create mapping */
- virq = irq_create_mapping(domain, hwirq);
+ virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
if (!virq)
- return virq;
+ goto err;
}
irq_data = irq_get_irq_data(virq);
@@ -838,13 +854,19 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
irq_domain_free_irqs(virq, 1);
else
irq_dispose_mapping(virq);
- return 0;
+ goto err;
}
/* Store trigger type */
irqd_set_trigger_type(irq_data, type);
+out:
+ mutex_unlock(&irq_mapping_mutex);
return virq;
+err:
+ mutex_unlock(&irq_mapping_mutex);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);
--
2.35.1
On Thu, 28 Jul 2022 10:27:10 +0100,
Johan Hovold <[email protected]> wrote:
>
> Parallel probing (e.g. due to asynchronous probing) of devices that share
> interrupts can currently result in two mappings for the same hardware
> interrupt to be created.
And I thought nobody would be using shared interrupts anymore. Turns
out people are still building braindead HW... :-/
>
> Add a serialising mapping mutex so that looking for an existing mapping
> before creating a new one is done atomically.
>
> Note that serialising the lookup and creation in
> irq_create_mapping_affinity() would have been enough to prevent the
> duplicate mapping, but that could instead cause
> irq_create_fwspec_mapping() to fail when there is a race.
>
> Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> Cc: Dmitry Torokhov <[email protected]>
> Cc: Jon Hunter <[email protected]>
> Signed-off-by: Johan Hovold <[email protected]>
> ---
> kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
> 1 file changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index 8fe1da9614ee..d263a7dd4170 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -22,6 +22,7 @@
>
> static LIST_HEAD(irq_domain_list);
> static DEFINE_MUTEX(irq_domain_mutex);
> +static DEFINE_MUTEX(irq_mapping_mutex);
I'd really like to avoid a global mutex. At the very least this should
be a per-domain mutex, otherwise this will serialise a lot more than
what is needed.
>
> static struct irq_domain *irq_default_domain;
>
> @@ -669,7 +670,7 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
> #endif
>
> /**
> - * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> + * __irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> * @domain: domain owning this hardware interrupt or NULL for default domain
> * @hwirq: hardware irq number in that domain space
> * @affinity: irq affinity
> @@ -679,9 +680,9 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
> * If the sense/trigger is to be specified, set_irq_type() should be called
> * on the number returned from that call.
> */
This comment should be moved to the exported function, instead of
documenting something that nobody can call...
> -unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> - irq_hw_number_t hwirq,
> - const struct irq_affinity_desc *affinity)
> +static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
> + irq_hw_number_t hwirq,
> + const struct irq_affinity_desc *affinity)
> {
> struct device_node *of_node;
> int virq;
> @@ -724,6 +725,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
>
> return virq;
> }
> +
> +unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> + irq_hw_number_t hwirq,
> + const struct irq_affinity_desc *affinity)
> +{
> + unsigned int virq;
> +
> + mutex_lock(&irq_mapping_mutex);
> + virq = __irq_create_mapping_affinity(domain, hwirq, affinity);
> + mutex_unlock(&irq_mapping_mutex);
> +
> + return virq;
> +}
> EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
>
> static int irq_domain_translate(struct irq_domain *d,
> @@ -789,6 +803,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
> type &= IRQ_TYPE_SENSE_MASK;
>
> + mutex_lock(&irq_mapping_mutex);
> +
> /*
> * If we've already configured this interrupt,
> * don't do it again, or hell will break loose.
> @@ -801,7 +817,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> * interrupt number.
> */
> if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
> - return virq;
> + goto out;
>
> /*
> * If the trigger type has not been set yet, then set
> @@ -810,26 +826,26 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
> irq_data = irq_get_irq_data(virq);
> if (!irq_data)
> - return 0;
> + goto err;
>
> irqd_set_trigger_type(irq_data, type);
> - return virq;
> + goto out;
> }
>
> pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
> hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
> - return 0;
> + goto err;
> }
>
> if (irq_domain_is_hierarchy(domain)) {
> virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
> if (virq <= 0)
> - return 0;
> + goto err;
> } else {
> /* Create mapping */
> - virq = irq_create_mapping(domain, hwirq);
> + virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
This rechecks for the existence of the mapping. Surely we can do a bit
better by rejigging this (admittedly bitrotting) code.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
On Thu, Jul 28, 2022 at 12:48:23PM +0100, Marc Zyngier wrote:
> On Thu, 28 Jul 2022 10:27:10 +0100,
> Johan Hovold <[email protected]> wrote:
> >
> > Parallel probing (e.g. due to asynchronous probing) of devices that share
> > interrupts can currently result in two mappings for the same hardware
> > interrupt to be created.
>
> And I thought nobody would be using shared interrupts anymore. Turns
> out people are still building braindead HW... :-/
>
> >
> > Add a serialising mapping mutex so that looking for an existing mapping
> > before creating a new one is done atomically.
> >
> > Note that serialising the lookup and creation in
> > irq_create_mapping_affinity() would have been enough to prevent the
> > duplicate mapping, but that could instead cause
> > irq_create_fwspec_mapping() to fail when there is a race.
> >
> > Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> > Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> > Cc: Dmitry Torokhov <[email protected]>
> > Cc: Jon Hunter <[email protected]>
> > Signed-off-by: Johan Hovold <[email protected]>
> > ---
> > kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
> > 1 file changed, 34 insertions(+), 12 deletions(-)
> >
> > diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> > index 8fe1da9614ee..d263a7dd4170 100644
> > --- a/kernel/irq/irqdomain.c
> > +++ b/kernel/irq/irqdomain.c
> > @@ -22,6 +22,7 @@
> >
> > static LIST_HEAD(irq_domain_list);
> > static DEFINE_MUTEX(irq_domain_mutex);
> > +static DEFINE_MUTEX(irq_mapping_mutex);
>
> I'd really like to avoid a global mutex. At the very least this should
> be a per-domain mutex, otherwise this will serialise a lot more than
> what is needed.
Yeah, I considered that too, but wanted to get your comments on this
first.
Also note that the likewise global irq_domain_mutex (and
sparse_irq_lock) are taken in some of these paths so perhaps using finer
locking won't actually matter that much as this is mostly for parallel
probing.
> >
> > static struct irq_domain *irq_default_domain;
> >
> > @@ -669,7 +670,7 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
> > #endif
> >
> > /**
> > - * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> > + * __irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
> > * @domain: domain owning this hardware interrupt or NULL for default domain
> > * @hwirq: hardware irq number in that domain space
> > * @affinity: irq affinity
> > @@ -679,9 +680,9 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
> > * If the sense/trigger is to be specified, set_irq_type() should be called
> > * on the number returned from that call.
> > */
>
> This comment should be moved to the exported function, instead of
> documenting something that nobody can call...
Yes, of course. I looked at the kernel doc for another
double-underscore-prefixed function, but those are all exported.
> > -unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> > - irq_hw_number_t hwirq,
> > - const struct irq_affinity_desc *affinity)
> > +static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
> > + irq_hw_number_t hwirq,
> > + const struct irq_affinity_desc *affinity)
> > {
> > struct device_node *of_node;
> > int virq;
> > @@ -724,6 +725,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> >
> > return virq;
> > }
> > +
> > +unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
> > + irq_hw_number_t hwirq,
> > + const struct irq_affinity_desc *affinity)
> > +{
> > + unsigned int virq;
> > +
> > + mutex_lock(&irq_mapping_mutex);
> > + virq = __irq_create_mapping_affinity(domain, hwirq, affinity);
> > + mutex_unlock(&irq_mapping_mutex);
> > +
> > + return virq;
> > +}
> > EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
> >
> > static int irq_domain_translate(struct irq_domain *d,
> > @@ -789,6 +803,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> > if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
> > type &= IRQ_TYPE_SENSE_MASK;
> >
> > + mutex_lock(&irq_mapping_mutex);
> > +
> > /*
> > * If we've already configured this interrupt,
> > * don't do it again, or hell will break loose.
> > @@ -801,7 +817,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> > * interrupt number.
> > */
> > if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
> > - return virq;
> > + goto out;
> >
> > /*
> > * If the trigger type has not been set yet, then set
> > @@ -810,26 +826,26 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
> > if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
> > irq_data = irq_get_irq_data(virq);
> > if (!irq_data)
> > - return 0;
> > + goto err;
> >
> > irqd_set_trigger_type(irq_data, type);
> > - return virq;
> > + goto out;
> > }
> >
> > pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
> > hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
> > - return 0;
> > + goto err;
> > }
> >
> > if (irq_domain_is_hierarchy(domain)) {
> > virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
> > if (virq <= 0)
> > - return 0;
> > + goto err;
> > } else {
> > /* Create mapping */
> > - virq = irq_create_mapping(domain, hwirq);
> > + virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
>
> This rechecks for the existence of the mapping. Surely we can do a bit
> better by rejigging this (admittedly bitrotting) code.
I'm sure we can. Should I try to fix the race first with a patch like
this one that can potentially be backported, and then see what I can do
about cleaning this up?
After all it has looked like this for the past eight years since when
this code was first merged.
Johan
On Thu, 28 Jul 2022 13:56:41 +0100,
Johan Hovold <[email protected]> wrote:
>
> On Thu, Jul 28, 2022 at 12:48:23PM +0100, Marc Zyngier wrote:
> > On Thu, 28 Jul 2022 10:27:10 +0100,
> > Johan Hovold <[email protected]> wrote:
> > >
> > > Parallel probing (e.g. due to asynchronous probing) of devices that share
> > > interrupts can currently result in two mappings for the same hardware
> > > interrupt to be created.
> >
> > And I thought nobody would be using shared interrupts anymore. Turns
> > out people are still building braindead HW... :-/
> >
> > >
> > > Add a serialising mapping mutex so that looking for an existing mapping
> > > before creating a new one is done atomically.
> > >
> > > Note that serialising the lookup and creation in
> > > irq_create_mapping_affinity() would have been enough to prevent the
> > > duplicate mapping, but that could instead cause
> > > irq_create_fwspec_mapping() to fail when there is a race.
> > >
> > > Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
> > > Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
> > > Cc: Dmitry Torokhov <[email protected]>
> > > Cc: Jon Hunter <[email protected]>
> > > Signed-off-by: Johan Hovold <[email protected]>
> > > ---
> > > kernel/irq/irqdomain.c | 46 +++++++++++++++++++++++++++++++-----------
> > > 1 file changed, 34 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> > > index 8fe1da9614ee..d263a7dd4170 100644
> > > --- a/kernel/irq/irqdomain.c
> > > +++ b/kernel/irq/irqdomain.c
> > > @@ -22,6 +22,7 @@
> > >
> > > static LIST_HEAD(irq_domain_list);
> > > static DEFINE_MUTEX(irq_domain_mutex);
> > > +static DEFINE_MUTEX(irq_mapping_mutex);
> >
> > I'd really like to avoid a global mutex. At the very least this should
> > be a per-domain mutex, otherwise this will serialise a lot more than
> > what is needed.
>
> Yeah, I considered that too, but wanted to get your comments on this
> first.
>
> Also note that the likewise global irq_domain_mutex (and
> sparse_irq_lock) are taken in some of these paths so perhaps using finer
> locking won't actually matter that much as this is mostly for parallel
> probing.
It will be a good opportunity to make the locking suck a bit less,
like in irq_domain_associate().
> > > } else {
> > > /* Create mapping */
> > > - virq = irq_create_mapping(domain, hwirq);
> > > + virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
> >
> > This rechecks for the existence of the mapping. Surely we can do a bit
> > better by rejigging this (admittedly bitrotting) code.
>
> I'm sure we can. Should I try to fix the race first with a patch like
> this one that can potentially be backported, and then see what I can do
> about cleaning this up?
>
> After all it has looked like this for the past eight years since when
> this code was first merged.
No, let's put the code in shape *first*, then add work on the locking,
as it should make the patch simpler. Backports aren't my concern,
really.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.