2019-09-27 15:01:28

by Alexander Sverdlin

[permalink] [raw]
Subject: [PATCH v2 0/3] Fix irq_domain vs. irq user race

From: Alexander Sverdlin <[email protected]>

Seems that the discovered race was here since adapting the PowerPC code
into genirq in 2012. I was surprized it went unnoticed all this time, but
it turns out, device registration (being it DT or ACPI) is mostly
sequential in kernel. In our case probe deferring was involved and another
independent probe(), so that the fatal sequence probably looked like
following:

CPU0 CPU1

// probe() of irq-user-device
// irq_domain not found =>
// -EPROBE_DEFER

// probe of irq_domain, // probe() of some unrelated device
// including typical in // leading to re-try of
// drivers/irqchip pattern: // irq-user-device probe(), calling
irq_domain_create() of_irq_get()
for (...) irq_create_mapping() => irq_create_mapping()
=> irq_find_mapping() => irq_find_mapping()
=> irq_domain_alloc_descs() => irq_domain_alloc_descs()
=> irq_domain_associate()
=> irq_domain_associate()

irq_domain_associate() uses irq_domain_mutex internally, this doesn't help
however, because parallel calls to irq_create_mapping() return different
virq numbers and either the irq_domain driver is not able to perform
necessary configuration or the mapping the "slave" driver has got is being
overwritten by the controller driver and the virq "slave" got is never
triggered. The test code [1] reproduces the simplified scenario, which
doesn't involve real deferred probe.

As irq_find_mapping() is used in interrupt handlers it cannot take locks.
I didn't want to change the semantics of the exported
irq_domain_associate() (to remove the mutex from it and wrap
irq_find_mapping()-irq_domain_associate() pair into this mutex). Therefore
irq_domain_associate() was modified to detect the collisions and the
callers were modified to deal with this.

The race in irq_create_fwspec_mapping() is distinct, but has the same
nature: making a decision based on the result of unprotected
irq_find_mapping() call.

Alexander Sverdlin (3):
genirq/irqdomain: Check for existing mapping in irq_domain_associate()
genirq/irqdomain: Re-check mapping after associate in
irq_create_mapping()
genirq/irqdomain: Detect type race in irq_create_fwspec_mapping()

kernel/irq/irqdomain.c | 88 +++++++++++++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 36 deletions(-)

Changelog:
v2:
- EBUSY -> EEXIST
- Only try to find existing mapping if irq_domain_associate() found
one

[1] Test kernel module:

// SPDX-License-Identifier: GPL-2.0
/**********************************************************************
* Author: Alexander Sverdlin <[email protected]>
*
* Copyright (c) 2019 Nokia
*
* This module is intended to test the discovered race condition in
* irq_create_mapping() function. The typical use case is when
* irq_create_mapping() is being called from an IRQ controller driver
* and of_irq_get() is used in a device driver requesting an IRQ from
* the particular IRQ controller. But of_irq_get() is calling
* irq_create_mapping() internally and this is the genuine race.
**********************************************************************/

#include <linux/atomic.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>

static unsigned iterations = 1000;
module_param(iterations, uint, S_IRUGO);

static struct irq_domain *domain;
static atomic_t sync_begin = ATOMIC_INIT(0);
static atomic_t sync_end = ATOMIC_INIT(0);

static int icm_thread(void *data)
{
unsigned *res = data;

while (!kthread_should_stop()) {
/* Wait until main thread signals start */
if (!atomic_read(&sync_begin))
continue;
*res = irq_create_mapping(domain, 0);
smp_mb__before_atomic();
atomic_dec(&sync_begin);
/* Wait until all test threads attepted the mapping */
while (!kthread_should_stop())
if (!atomic_read(&sync_begin))
break;
atomic_dec(&sync_end);
}
return 0;
}

static int irq_create_mapping_race_init(void)
{
const static struct irq_domain_ops ops;
struct task_struct *thr[2];
unsigned irq[ARRAY_SIZE(thr)];
unsigned i;
int ret = 0;

domain = irq_domain_create_linear(NULL, 2, &ops, NULL);
if (!domain) {
pr_err("irq_domain_create_linear() failed\n");
return -EINVAL;
}

for (i = 0; i < ARRAY_SIZE(thr); i++)
thr[i] = kthread_run(icm_thread, &irq[i], "icm_thr%u", i);

for (i = 0; i < iterations; i++) {
unsigned j;

/* Signal test threads to attempt the mapping */
atomic_add(ARRAY_SIZE(thr), &sync_end);
atomic_add(ARRAY_SIZE(thr), &sync_begin);
/* Wait until all threads have made an iteration */
while (atomic_read(&sync_end))
cpu_relax();
smp_mb__after_atomic();

for (j = 0; j < ARRAY_SIZE(thr); j++)
if (!irq[j]) {
pr_err("%s: %s got no virq\n", domain->name,
thr[j]->comm);
ret = -ENOENT;
goto stop_threads;
}

for (j = 1; j < ARRAY_SIZE(thr); j++)
if (irq[0] != irq[j]) {
pr_err("%s: %s got virq %u and %s got virq %u\n",
domain->name, thr[0]->comm, irq[0],
thr[j]->comm, irq[j]);
ret = -EINVAL;
goto stop_threads;
}

irq_dispose_mapping(irq[0]);
}

stop_threads:
for (i = 0; i < ARRAY_SIZE(thr); i++)
kthread_stop(thr[i]);

irq_domain_remove(domain);

return ret;
}
module_init(irq_create_mapping_race_init);

static void __exit irq_create_mapping_race_exit(void)
{
}
module_exit(irq_create_mapping_race_exit);

MODULE_AUTHOR("Alexander Sverdlin <[email protected]>");
MODULE_DESCRIPTION("irq_create_mapping() race test module");
MODULE_LICENSE("GPL");


2019-09-27 15:01:28

by Alexander Sverdlin

[permalink] [raw]
Subject: [PATCH v2 1/3] genirq/irqdomain: Check for existing mapping in irq_domain_associate()

From: Alexander Sverdlin <[email protected]>

irq_domain_associate() is the only place where irq_find_mapping() can be
used reliably (under irq_domain_mutex) to make a decision if the mapping
shall be created or not. Other calls to irq_find_mapping() (not under
any lock) cannot be used for this purpose and lead to race conditions in
particular inside irq_create_mapping().

Give the callers of irq_domain_associate() an ability to detect existing
domain reliably by examining the return value.

Reported-by: Krzysztof Adamski <[email protected]>
Reported-by: Tomasz Bachorski <[email protected]>
Reported-by: Wojciech Kosnikowski <[email protected]>
Cc: [email protected]
Signed-off-by: Alexander Sverdlin <[email protected]>
---
kernel/irq/irqdomain.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 132672b..ccbb048 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -545,6 +545,15 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
return -EINVAL;

mutex_lock(&irq_domain_mutex);
+
+ /* Check if mapping already exists */
+ if (irq_find_mapping(domain, hwirq)) {
+ mutex_unlock(&irq_domain_mutex);
+ pr_debug("%s: conflicting mapping for hwirq 0x%x\n",
+ domain->name, (int)hwirq);
+ return -EEXIST;
+ }
+
irq_data->hwirq = hwirq;
irq_data->domain = domain;
if (domain->ops->map) {
--
2.4.6

2019-09-27 15:04:22

by Alexander Sverdlin

[permalink] [raw]
Subject: [PATCH v2 2/3] genirq/irqdomain: Re-check mapping after associate in irq_create_mapping()

From: Alexander Sverdlin <[email protected]>

If two irq_create_mapping() calls perform a mapping of the same hwirq on
two CPU cores in parallel they both will get 0 from irq_find_mapping(),
both will allocate unique virq using irq_domain_alloc_descs() and both
will finally irq_domain_associate() it. Giving different virq numbers
to their callers.

In practice the first caller is usually an interrupt controller driver and
the seconds is some device requesting the interrupt providede by the above
interrupt controller.

In this case either the interrupt controller driver configures virq which
is not the one being "associated" with hwirq, or the "slave" device
requests the virq which is never being triggered.

Reported-by: Krzysztof Adamski <[email protected]>
Reported-by: Tomasz Bachorski <[email protected]>
Reported-by: Wojciech Kosnikowski <[email protected]>
Cc: [email protected]
Signed-off-by: Alexander Sverdlin <[email protected]>
---
kernel/irq/irqdomain.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index ccbb048..ad62c08 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -661,6 +661,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
{
struct device_node *of_node;
int virq;
+ int ret;

pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);

@@ -675,13 +676,6 @@ unsigned int irq_create_mapping(struct irq_domain *domain,

of_node = irq_domain_get_of_node(domain);

- /* Check if mapping already exists */
- virq = irq_find_mapping(domain, hwirq);
- if (virq) {
- pr_debug("-> existing mapping on virq %d\n", virq);
- return virq;
- }
-
/* Allocate a virtual interrupt number */
virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL);
if (virq <= 0) {
@@ -689,8 +683,11 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
return 0;
}

- if (irq_domain_associate(domain, virq, hwirq)) {
+ ret = irq_domain_associate(domain, virq, hwirq);
+ if (ret) {
irq_free_desc(virq);
+ if (ret == -EEXIST)
+ return irq_find_mapping(domain, hwirq);
return 0;
}

--
2.4.6

2019-09-27 15:05:16

by Alexander Sverdlin

[permalink] [raw]
Subject: [PATCH v2 3/3] genirq/irqdomain: Detect type race in irq_create_fwspec_mapping()

From: Alexander Sverdlin <[email protected]>

irq_create_fwspec_mapping() can race with itself during IRQ trigger type
configuration. Possible scenarios include:

- Mapping exists, two irq_create_fwspec_mapping() running in parallel do
not detect type mismatch, IRQ remains configured with one of the
different trigger types randomly
- Second call to irq_create_fwspec_mapping() sees existing mapping just
created by first call, but earlier irqd_set_trigger_type() call races
with later irqd_set_trigger_type() => totally undetected, IRQ type
is being set randomly to either one or another type

Introduce helper function to detect parallel changes to IRQ type.

Cc: [email protected]
Signed-off-by: Alexander Sverdlin <[email protected]>
---
kernel/irq/irqdomain.c | 66 +++++++++++++++++++++++++++++---------------------
1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index ad62c08..4ff4073 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -764,10 +764,45 @@ static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
fwspec->param[i] = args[i];
}

+/* Detect races during IRQ type setting */
+static int irq_set_trigger_type_locked(unsigned int virq, unsigned int type,
+ irq_hw_number_t hwirq,
+ const struct irq_fwspec *fwspec)
+{
+ struct irq_data *irq_data;
+ int ret = 0;
+
+ mutex_lock(&irq_domain_mutex);
+ /*
+ * If the trigger type is not specified or matches the current trigger
+ * type then we are done.
+ */
+ if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+ goto unlock;
+
+ /* If the trigger type has not been set yet, then set it now */
+ if (irq_get_trigger_type(virq) != IRQ_TYPE_NONE) {
+ pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
+ hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ irq_data = irq_get_irq_data(virq);
+ if (!irq_data) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+ irqd_set_trigger_type(irq_data, type);
+
+unlock:
+ mutex_unlock(&irq_domain_mutex);
+ return ret;
+}
+
unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
{
struct irq_domain *domain;
- struct irq_data *irq_data;
irq_hw_number_t hwirq;
unsigned int type = IRQ_TYPE_NONE;
int virq;
@@ -802,29 +837,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
*/
virq = irq_find_mapping(domain, hwirq);
if (virq) {
- /*
- * If the trigger type is not specified or matches the
- * current trigger type then we are done so return the
- * interrupt number.
- */
- if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
- return virq;
-
- /*
- * If the trigger type has not been set yet, then set
- * it now and return the interrupt number.
- */
- if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
- irq_data = irq_get_irq_data(virq);
- if (!irq_data)
- return 0;
-
- irqd_set_trigger_type(irq_data, type);
+ if (!irq_set_trigger_type_locked(virq, type, hwirq, fwspec))
return virq;
- }
-
- pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
- hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
return 0;
}

@@ -839,8 +853,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
return virq;
}

- irq_data = irq_get_irq_data(virq);
- if (!irq_data) {
+ if (irq_set_trigger_type_locked(virq, type, hwirq, fwspec)) {
if (irq_domain_is_hierarchy(domain))
irq_domain_free_irqs(virq, 1);
else
@@ -848,9 +861,6 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
return 0;
}

- /* Store trigger type */
- irqd_set_trigger_type(irq_data, type);
-
return virq;
}
EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping);