The irqdomain->revmap(_tree) of a shared irq can be race updated
as following, which result in an unexpected irq mapping:
---------------------------------------------------------------
(2 threads parse the same hwirq fwspec in the same time.)
Thread A Thread B
irq_create_fwspec_mapping
irq_find_mapping Thread A: virq = 0, alloc a new irq_desc
irq_create_fwspec_mapping
irq_find_mapping Thread B: virq = 0, alloc a new irq_desc
irq_domain_alloc_irqs
irq_domain_insert_irq domain->revmap[hwirq] = irq_data(virq x)
irq_domain_alloc_irqs
irq_domain_insert_irq domain->revmap[hwirq] = irq_data(virq x + 1)
virq = x virq = x + 1
---------------------------------------------------------------
The virq x can't work because the revmap[hwirq] was
overridden by thread B.
It seems both hierarchy and non-hierarchy irq domain have the same
problem because the code from irq_find_mapping to revmap update are
not protected by a same lock.
Do you have any suggestion about how to fix it properly?
Signed-off-by: Mark-PK Tsai <[email protected]>
---
kernel/irq/irqdomain.c | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fe1da9614ee..21d317b26220 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -22,6 +22,7 @@
static LIST_HEAD(irq_domain_list);
static DEFINE_MUTEX(irq_domain_mutex);
+static DEFINE_MUTEX(irq_revmaps_mutex);
static struct irq_domain *irq_default_domain;
@@ -793,6 +794,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
* If we've already configured this interrupt,
* don't do it again, or hell will break loose.
*/
+ mutex_lock(&irq_revmaps_mutex);
virq = irq_find_mapping(domain, hwirq);
if (virq) {
/*
@@ -801,7 +803,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
* interrupt number.
*/
if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
- return virq;
+ goto unlock;
/*
* If the trigger type has not been set yet, then set
@@ -809,27 +811,32 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
*/
if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
irq_data = irq_get_irq_data(virq);
- if (!irq_data)
- return 0;
+ if (!irq_data) {
+ virq = 0;
+ goto unlock;
+ }
irqd_set_trigger_type(irq_data, type);
- return virq;
+ goto unlock;
}
pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
- return 0;
+ virq = 0;
+ goto unlock;
}
if (irq_domain_is_hierarchy(domain)) {
virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
- if (virq <= 0)
- return 0;
+ if (virq <= 0) {
+ virq = 0;
+ goto unlock;
+ }
} else {
/* Create mapping */
virq = irq_create_mapping(domain, hwirq);
if (!virq)
- return virq;
+ goto unlock;
}
irq_data = irq_get_irq_data(virq);
@@ -838,12 +845,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
irq_domain_free_irqs(virq, 1);
else
irq_dispose_mapping(virq);
- return 0;
+ virq = 0;
+ goto unlock;
}
/* Store trigger type */
irqd_set_trigger_type(irq_data, type);
+unlock:
+ mutex_unlock(&irq_revmaps_mutex);
return virq;
}
EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);
--
2.18.0
On Mon, Dec 19, 2022 at 09:06:15PM +0800, Mark-PK Tsai wrote:
> The irqdomain->revmap(_tree) of a shared irq can be race updated
> as following, which result in an unexpected irq mapping:
>
> ---------------------------------------------------------------
> (2 threads parse the same hwirq fwspec in the same time.)
>
> Thread A Thread B
> irq_create_fwspec_mapping
> irq_find_mapping Thread A: virq = 0, alloc a new irq_desc
> irq_create_fwspec_mapping
> irq_find_mapping Thread B: virq = 0, alloc a new irq_desc
> irq_domain_alloc_irqs
> irq_domain_insert_irq domain->revmap[hwirq] = irq_data(virq x)
> irq_domain_alloc_irqs
> irq_domain_insert_irq domain->revmap[hwirq] = irq_data(virq x + 1)
>
> virq = x virq = x + 1
> ---------------------------------------------------------------
>
> The virq x can't work because the revmap[hwirq] was
> overridden by thread B.
>
> It seems both hierarchy and non-hierarchy irq domain have the same
> problem because the code from irq_find_mapping to revmap update are
> not protected by a same lock.
>
> Do you have any suggestion about how to fix it properly?
This is being fixed here:
https://lore.kernel.org/lkml/[email protected]/
Johan