The PLIC driver does not require very early initialization so convert
it into a platform driver.
After conversion, the PLIC driver is probed after CPUs are brought-up
so setup cpuhp state after context handler of all online CPUs are
initialized otherwise PLIC driver crashes for platforms with multiple
PLIC instances.
Signed-off-by: Anup Patel <[email protected]>
---
drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
1 file changed, 61 insertions(+), 40 deletions(-)
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 5b7bc4fd9517..7400a07fc479 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -64,6 +64,7 @@
#define PLIC_QUIRK_EDGE_INTERRUPT 0
struct plic_priv {
+ struct device *dev;
struct cpumask lmask;
struct irq_domain *irqdomain;
void __iomem *regs;
@@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu)
return 0;
}
-static int __init __plic_init(struct device_node *node,
- struct device_node *parent,
- unsigned long plic_quirks)
+static const struct of_device_id plic_match[] = {
+ { .compatible = "sifive,plic-1.0.0" },
+ { .compatible = "riscv,plic0" },
+ { .compatible = "andestech,nceplic100",
+ .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
+ { .compatible = "thead,c900-plic",
+ .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
+ {}
+};
+
+static int plic_probe(struct platform_device *pdev)
{
int error = 0, nr_contexts, nr_handlers = 0, i;
- u32 nr_irqs;
- struct plic_priv *priv;
+ struct device *dev = &pdev->dev;
+ unsigned long plic_quirks = 0;
struct plic_handler *handler;
+ struct plic_priv *priv;
+ bool cpuhp_setup;
unsigned int cpu;
+ u32 nr_irqs;
+
+ if (is_of_node(dev->fwnode)) {
+ const struct of_device_id *id;
+
+ id = of_match_node(plic_match, to_of_node(dev->fwnode));
+ if (id)
+ plic_quirks = (unsigned long)id->data;
+ }
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
+ priv->dev = dev;
priv->plic_quirks = plic_quirks;
- priv->regs = of_iomap(node, 0);
+ priv->regs = of_iomap(to_of_node(dev->fwnode), 0);
if (WARN_ON(!priv->regs)) {
error = -EIO;
goto out_free_priv;
}
error = -EINVAL;
- of_property_read_u32(node, "riscv,ndev", &nr_irqs);
+ of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs);
if (WARN_ON(!nr_irqs))
goto out_iounmap;
@@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node,
if (!priv->prio_save)
goto out_free_priority_reg;
- nr_contexts = of_irq_count(node);
+ nr_contexts = of_irq_count(to_of_node(dev->fwnode));
if (WARN_ON(!nr_contexts))
goto out_free_priority_reg;
error = -ENOMEM;
- priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1,
- &plic_irqdomain_ops, priv);
+ priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1,
+ &plic_irqdomain_ops, priv);
if (WARN_ON(!priv->irqdomain))
goto out_free_priority_reg;
@@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node,
int cpu;
unsigned long hartid;
- if (of_irq_parse_one(node, i, &parent)) {
+ if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) {
pr_err("failed to parse parent for context %d.\n", i);
continue;
}
@@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node,
/* Find parent domain and register chained handler */
if (!plic_parent_irq && irq_find_host(parent.np)) {
- plic_parent_irq = irq_of_parse_and_map(node, i);
+ plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i);
if (plic_parent_irq)
irq_set_chained_handler(plic_parent_irq,
plic_handle_irq);
@@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node,
/*
* We can have multiple PLIC instances so setup cpuhp state
- * and register syscore operations only when context handler
- * for current/boot CPU is present.
+ * and register syscore operations only once after context
+ * handlers of all online CPUs are initialized.
*/
- handler = this_cpu_ptr(&plic_handlers);
- if (handler->present && !plic_cpuhp_setup_done) {
- cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
- "irqchip/sifive/plic:starting",
- plic_starting_cpu, plic_dying_cpu);
- register_syscore_ops(&plic_irq_syscore_ops);
- plic_cpuhp_setup_done = true;
+ if (!plic_cpuhp_setup_done) {
+ cpuhp_setup = true;
+ for_each_online_cpu(cpu) {
+ handler = per_cpu_ptr(&plic_handlers, cpu);
+ if (!handler->present) {
+ cpuhp_setup = false;
+ break;
+ }
+ }
+ if (cpuhp_setup) {
+ cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+ "irqchip/sifive/plic:starting",
+ plic_starting_cpu, plic_dying_cpu);
+ register_syscore_ops(&plic_irq_syscore_ops);
+ plic_cpuhp_setup_done = true;
+ }
}
- pr_info("%pOFP: mapped %d interrupts with %d handlers for"
- " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
+ pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n",
+ to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts);
return 0;
out_free_enable_reg:
@@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node,
return error;
}
-static int __init plic_init(struct device_node *node,
- struct device_node *parent)
-{
- return __plic_init(node, parent, 0);
-}
-
-IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
-IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
-
-static int __init plic_edge_init(struct device_node *node,
- struct device_node *parent)
-{
- return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT));
-}
-
-IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init);
-IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init);
+static struct platform_driver plic_driver = {
+ .driver = {
+ .name = "riscv-plic",
+ .of_match_table = plic_match,
+ },
+ .probe = plic_probe,
+};
+builtin_platform_driver(plic_driver);
--
2.34.1
The following commit has been merged into the irq/msi branch of tip:
Commit-ID: 8ec99b033147ef3bb8f0a560c24eb1baec3bc0be
Gitweb: https://git.kernel.org/tip/8ec99b033147ef3bb8f0a560c24eb1baec3bc0be
Author: Anup Patel <[email protected]>
AuthorDate: Thu, 22 Feb 2024 15:09:49 +05:30
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 23 Feb 2024 10:18:43 +01:00
irqchip/sifive-plic: Convert PLIC driver into a platform driver
The PLIC driver does not require very early initialization so convert
it into a platform driver.
After conversion, the PLIC driver is probed after CPUs are brought-up
so setup cpuhp state after context handler of all online CPUs are
initialized otherwise PLIC driver crashes for platforms with multiple
PLIC instances.
Signed-off-by: Anup Patel <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
drivers/irqchip/irq-sifive-plic.c | 101 +++++++++++++++++------------
1 file changed, 61 insertions(+), 40 deletions(-)
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 5b7bc4f..7400a07 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -64,6 +64,7 @@
#define PLIC_QUIRK_EDGE_INTERRUPT 0
struct plic_priv {
+ struct device *dev;
struct cpumask lmask;
struct irq_domain *irqdomain;
void __iomem *regs;
@@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu)
return 0;
}
-static int __init __plic_init(struct device_node *node,
- struct device_node *parent,
- unsigned long plic_quirks)
+static const struct of_device_id plic_match[] = {
+ { .compatible = "sifive,plic-1.0.0" },
+ { .compatible = "riscv,plic0" },
+ { .compatible = "andestech,nceplic100",
+ .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
+ { .compatible = "thead,c900-plic",
+ .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
+ {}
+};
+
+static int plic_probe(struct platform_device *pdev)
{
int error = 0, nr_contexts, nr_handlers = 0, i;
- u32 nr_irqs;
- struct plic_priv *priv;
+ struct device *dev = &pdev->dev;
+ unsigned long plic_quirks = 0;
struct plic_handler *handler;
+ struct plic_priv *priv;
+ bool cpuhp_setup;
unsigned int cpu;
+ u32 nr_irqs;
+
+ if (is_of_node(dev->fwnode)) {
+ const struct of_device_id *id;
+
+ id = of_match_node(plic_match, to_of_node(dev->fwnode));
+ if (id)
+ plic_quirks = (unsigned long)id->data;
+ }
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
+ priv->dev = dev;
priv->plic_quirks = plic_quirks;
- priv->regs = of_iomap(node, 0);
+ priv->regs = of_iomap(to_of_node(dev->fwnode), 0);
if (WARN_ON(!priv->regs)) {
error = -EIO;
goto out_free_priv;
}
error = -EINVAL;
- of_property_read_u32(node, "riscv,ndev", &nr_irqs);
+ of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs);
if (WARN_ON(!nr_irqs))
goto out_iounmap;
@@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node,
if (!priv->prio_save)
goto out_free_priority_reg;
- nr_contexts = of_irq_count(node);
+ nr_contexts = of_irq_count(to_of_node(dev->fwnode));
if (WARN_ON(!nr_contexts))
goto out_free_priority_reg;
error = -ENOMEM;
- priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1,
- &plic_irqdomain_ops, priv);
+ priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1,
+ &plic_irqdomain_ops, priv);
if (WARN_ON(!priv->irqdomain))
goto out_free_priority_reg;
@@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node,
int cpu;
unsigned long hartid;
- if (of_irq_parse_one(node, i, &parent)) {
+ if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) {
pr_err("failed to parse parent for context %d.\n", i);
continue;
}
@@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node,
/* Find parent domain and register chained handler */
if (!plic_parent_irq && irq_find_host(parent.np)) {
- plic_parent_irq = irq_of_parse_and_map(node, i);
+ plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i);
if (plic_parent_irq)
irq_set_chained_handler(plic_parent_irq,
plic_handle_irq);
@@ -533,20 +554,29 @@ done:
/*
* We can have multiple PLIC instances so setup cpuhp state
- * and register syscore operations only when context handler
- * for current/boot CPU is present.
+ * and register syscore operations only once after context
+ * handlers of all online CPUs are initialized.
*/
- handler = this_cpu_ptr(&plic_handlers);
- if (handler->present && !plic_cpuhp_setup_done) {
- cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
- "irqchip/sifive/plic:starting",
- plic_starting_cpu, plic_dying_cpu);
- register_syscore_ops(&plic_irq_syscore_ops);
- plic_cpuhp_setup_done = true;
+ if (!plic_cpuhp_setup_done) {
+ cpuhp_setup = true;
+ for_each_online_cpu(cpu) {
+ handler = per_cpu_ptr(&plic_handlers, cpu);
+ if (!handler->present) {
+ cpuhp_setup = false;
+ break;
+ }
+ }
+ if (cpuhp_setup) {
+ cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+ "irqchip/sifive/plic:starting",
+ plic_starting_cpu, plic_dying_cpu);
+ register_syscore_ops(&plic_irq_syscore_ops);
+ plic_cpuhp_setup_done = true;
+ }
}
- pr_info("%pOFP: mapped %d interrupts with %d handlers for"
- " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
+ pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n",
+ to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts);
return 0;
out_free_enable_reg:
@@ -563,20 +593,11 @@ out_free_priv:
return error;
}
-static int __init plic_init(struct device_node *node,
- struct device_node *parent)
-{
- return __plic_init(node, parent, 0);
-}
-
-IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
-IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
-
-static int __init plic_edge_init(struct device_node *node,
- struct device_node *parent)
-{
- return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT));
-}
-
-IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init);
-IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init);
+static struct platform_driver plic_driver = {
+ .driver = {
+ .name = "riscv-plic",
+ .of_match_table = plic_match,
+ },
+ .probe = plic_probe,
+};
+builtin_platform_driver(plic_driver);
Hi Anup,
On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <[email protected]> wrote:
>
> The PLIC driver does not require very early initialization so convert
> it into a platform driver.
>
> After conversion, the PLIC driver is probed after CPUs are brought-up
> so setup cpuhp state after context handler of all online CPUs are
> initialized otherwise PLIC driver crashes for platforms with multiple
> PLIC instances.
>
> Signed-off-by: Anup Patel <[email protected]>
> ---
> drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
> 1 file changed, 61 insertions(+), 40 deletions(-)
>
This patch seems to have broken things on RZ/Five SoC, after reverting
this patch I get to boot it back again on v6.9-rc2. Looks like there
is some probe order issue after switching to platform driver?
Cheers,
Prabhakar
> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index 5b7bc4fd9517..7400a07fc479 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -64,6 +64,7 @@
> #define PLIC_QUIRK_EDGE_INTERRUPT 0
>
> struct plic_priv {
> + struct device *dev;
> struct cpumask lmask;
> struct irq_domain *irqdomain;
> void __iomem *regs;
> @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu)
> return 0;
> }
>
> -static int __init __plic_init(struct device_node *node,
> - struct device_node *parent,
> - unsigned long plic_quirks)
> +static const struct of_device_id plic_match[] = {
> + { .compatible = "sifive,plic-1.0.0" },
> + { .compatible = "riscv,plic0" },
> + { .compatible = "andestech,nceplic100",
> + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
> + { .compatible = "thead,c900-plic",
> + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
> + {}
> +};
> +
> +static int plic_probe(struct platform_device *pdev)
> {
> int error = 0, nr_contexts, nr_handlers = 0, i;
> - u32 nr_irqs;
> - struct plic_priv *priv;
> + struct device *dev = &pdev->dev;
> + unsigned long plic_quirks = 0;
> struct plic_handler *handler;
> + struct plic_priv *priv;
> + bool cpuhp_setup;
> unsigned int cpu;
> + u32 nr_irqs;
> +
> + if (is_of_node(dev->fwnode)) {
> + const struct of_device_id *id;
> +
> + id = of_match_node(plic_match, to_of_node(dev->fwnode));
> + if (id)
> + plic_quirks = (unsigned long)id->data;
> + }
>
> priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> if (!priv)
> return -ENOMEM;
>
> + priv->dev = dev;
> priv->plic_quirks = plic_quirks;
>
> - priv->regs = of_iomap(node, 0);
> + priv->regs = of_iomap(to_of_node(dev->fwnode), 0);
> if (WARN_ON(!priv->regs)) {
> error = -EIO;
> goto out_free_priv;
> }
>
> error = -EINVAL;
> - of_property_read_u32(node, "riscv,ndev", &nr_irqs);
> + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs);
> if (WARN_ON(!nr_irqs))
> goto out_iounmap;
>
> @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node,
> if (!priv->prio_save)
> goto out_free_priority_reg;
>
> - nr_contexts = of_irq_count(node);
> + nr_contexts = of_irq_count(to_of_node(dev->fwnode));
> if (WARN_ON(!nr_contexts))
> goto out_free_priority_reg;
>
> error = -ENOMEM;
> - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1,
> - &plic_irqdomain_ops, priv);
> + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1,
> + &plic_irqdomain_ops, priv);
> if (WARN_ON(!priv->irqdomain))
> goto out_free_priority_reg;
>
> @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node,
> int cpu;
> unsigned long hartid;
>
> - if (of_irq_parse_one(node, i, &parent)) {
> + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) {
> pr_err("failed to parse parent for context %d.\n", i);
> continue;
> }
> @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node,
>
> /* Find parent domain and register chained handler */
> if (!plic_parent_irq && irq_find_host(parent.np)) {
> - plic_parent_irq = irq_of_parse_and_map(node, i);
> + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i);
> if (plic_parent_irq)
> irq_set_chained_handler(plic_parent_irq,
> plic_handle_irq);
> @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node,
>
> /*
> * We can have multiple PLIC instances so setup cpuhp state
> - * and register syscore operations only when context handler
> - * for current/boot CPU is present.
> + * and register syscore operations only once after context
> + * handlers of all online CPUs are initialized.
> */
> - handler = this_cpu_ptr(&plic_handlers);
> - if (handler->present && !plic_cpuhp_setup_done) {
> - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> - "irqchip/sifive/plic:starting",
> - plic_starting_cpu, plic_dying_cpu);
> - register_syscore_ops(&plic_irq_syscore_ops);
> - plic_cpuhp_setup_done = true;
> + if (!plic_cpuhp_setup_done) {
> + cpuhp_setup = true;
> + for_each_online_cpu(cpu) {
> + handler = per_cpu_ptr(&plic_handlers, cpu);
> + if (!handler->present) {
> + cpuhp_setup = false;
> + break;
> + }
> + }
> + if (cpuhp_setup) {
> + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> + "irqchip/sifive/plic:starting",
> + plic_starting_cpu, plic_dying_cpu);
> + register_syscore_ops(&plic_irq_syscore_ops);
> + plic_cpuhp_setup_done = true;
> + }
> }
>
> - pr_info("%pOFP: mapped %d interrupts with %d handlers for"
> - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
> + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n",
> + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts);
> return 0;
>
> out_free_enable_reg:
> @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node,
> return error;
> }
>
> -static int __init plic_init(struct device_node *node,
> - struct device_node *parent)
> -{
> - return __plic_init(node, parent, 0);
> -}
> -
> -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
> -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
> -
> -static int __init plic_edge_init(struct device_node *node,
> - struct device_node *parent)
> -{
> - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT));
> -}
> -
> -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init);
> -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init);
> +static struct platform_driver plic_driver = {
> + .driver = {
> + .name = "riscv-plic",
> + .of_match_table = plic_match,
> + },
> + .probe = plic_probe,
> +};
> +builtin_platform_driver(plic_driver);
> --
> 2.34.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar
<[email protected]> wrote:
>
> Hi Anup,
>
> On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <[email protected]> wrote:
> >
> > The PLIC driver does not require very early initialization so convert
> > it into a platform driver.
> >
> > After conversion, the PLIC driver is probed after CPUs are brought-up
> > so setup cpuhp state after context handler of all online CPUs are
> > initialized otherwise PLIC driver crashes for platforms with multiple
> > PLIC instances.
> >
> > Signed-off-by: Anup Patel <[email protected]>
> > ---
> > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
> > 1 file changed, 61 insertions(+), 40 deletions(-)
> >
> This patch seems to have broken things on RZ/Five SoC, after reverting
> this patch I get to boot it back again on v6.9-rc2. Looks like there
> is some probe order issue after switching to platform driver?
Yes, this is most likely related to probe ordering based on your DT.
Can you share the failing boot log and DT ?
Regards,
Anup
>
> Cheers,
> Prabhakar
>
> > diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> > index 5b7bc4fd9517..7400a07fc479 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -64,6 +64,7 @@
> > #define PLIC_QUIRK_EDGE_INTERRUPT 0
> >
> > struct plic_priv {
> > + struct device *dev;
> > struct cpumask lmask;
> > struct irq_domain *irqdomain;
> > void __iomem *regs;
> > @@ -406,30 +407,50 @@ static int plic_starting_cpu(unsigned int cpu)
> > return 0;
> > }
> >
> > -static int __init __plic_init(struct device_node *node,
> > - struct device_node *parent,
> > - unsigned long plic_quirks)
> > +static const struct of_device_id plic_match[] = {
> > + { .compatible = "sifive,plic-1.0.0" },
> > + { .compatible = "riscv,plic0" },
> > + { .compatible = "andestech,nceplic100",
> > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
> > + { .compatible = "thead,c900-plic",
> > + .data = (const void *)BIT(PLIC_QUIRK_EDGE_INTERRUPT) },
> > + {}
> > +};
> > +
> > +static int plic_probe(struct platform_device *pdev)
> > {
> > int error = 0, nr_contexts, nr_handlers = 0, i;
> > - u32 nr_irqs;
> > - struct plic_priv *priv;
> > + struct device *dev = &pdev->dev;
> > + unsigned long plic_quirks = 0;
> > struct plic_handler *handler;
> > + struct plic_priv *priv;
> > + bool cpuhp_setup;
> > unsigned int cpu;
> > + u32 nr_irqs;
> > +
> > + if (is_of_node(dev->fwnode)) {
> > + const struct of_device_id *id;
> > +
> > + id = of_match_node(plic_match, to_of_node(dev->fwnode));
> > + if (id)
> > + plic_quirks = (unsigned long)id->data;
> > + }
> >
> > priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> > if (!priv)
> > return -ENOMEM;
> >
> > + priv->dev = dev;
> > priv->plic_quirks = plic_quirks;
> >
> > - priv->regs = of_iomap(node, 0);
> > + priv->regs = of_iomap(to_of_node(dev->fwnode), 0);
> > if (WARN_ON(!priv->regs)) {
> > error = -EIO;
> > goto out_free_priv;
> > }
> >
> > error = -EINVAL;
> > - of_property_read_u32(node, "riscv,ndev", &nr_irqs);
> > + of_property_read_u32(to_of_node(dev->fwnode), "riscv,ndev", &nr_irqs);
> > if (WARN_ON(!nr_irqs))
> > goto out_iounmap;
> >
> > @@ -439,13 +460,13 @@ static int __init __plic_init(struct device_node *node,
> > if (!priv->prio_save)
> > goto out_free_priority_reg;
> >
> > - nr_contexts = of_irq_count(node);
> > + nr_contexts = of_irq_count(to_of_node(dev->fwnode));
> > if (WARN_ON(!nr_contexts))
> > goto out_free_priority_reg;
> >
> > error = -ENOMEM;
> > - priv->irqdomain = irq_domain_add_linear(node, nr_irqs + 1,
> > - &plic_irqdomain_ops, priv);
> > + priv->irqdomain = irq_domain_add_linear(to_of_node(dev->fwnode), nr_irqs + 1,
> > + &plic_irqdomain_ops, priv);
> > if (WARN_ON(!priv->irqdomain))
> > goto out_free_priority_reg;
> >
> > @@ -455,7 +476,7 @@ static int __init __plic_init(struct device_node *node,
> > int cpu;
> > unsigned long hartid;
> >
> > - if (of_irq_parse_one(node, i, &parent)) {
> > + if (of_irq_parse_one(to_of_node(dev->fwnode), i, &parent)) {
> > pr_err("failed to parse parent for context %d.\n", i);
> > continue;
> > }
> > @@ -491,7 +512,7 @@ static int __init __plic_init(struct device_node *node,
> >
> > /* Find parent domain and register chained handler */
> > if (!plic_parent_irq && irq_find_host(parent.np)) {
> > - plic_parent_irq = irq_of_parse_and_map(node, i);
> > + plic_parent_irq = irq_of_parse_and_map(to_of_node(dev->fwnode), i);
> > if (plic_parent_irq)
> > irq_set_chained_handler(plic_parent_irq,
> > plic_handle_irq);
> > @@ -533,20 +554,29 @@ static int __init __plic_init(struct device_node *node,
> >
> > /*
> > * We can have multiple PLIC instances so setup cpuhp state
> > - * and register syscore operations only when context handler
> > - * for current/boot CPU is present.
> > + * and register syscore operations only once after context
> > + * handlers of all online CPUs are initialized.
> > */
> > - handler = this_cpu_ptr(&plic_handlers);
> > - if (handler->present && !plic_cpuhp_setup_done) {
> > - cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> > - "irqchip/sifive/plic:starting",
> > - plic_starting_cpu, plic_dying_cpu);
> > - register_syscore_ops(&plic_irq_syscore_ops);
> > - plic_cpuhp_setup_done = true;
> > + if (!plic_cpuhp_setup_done) {
> > + cpuhp_setup = true;
> > + for_each_online_cpu(cpu) {
> > + handler = per_cpu_ptr(&plic_handlers, cpu);
> > + if (!handler->present) {
> > + cpuhp_setup = false;
> > + break;
> > + }
> > + }
> > + if (cpuhp_setup) {
> > + cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
> > + "irqchip/sifive/plic:starting",
> > + plic_starting_cpu, plic_dying_cpu);
> > + register_syscore_ops(&plic_irq_syscore_ops);
> > + plic_cpuhp_setup_done = true;
> > + }
> > }
> >
> > - pr_info("%pOFP: mapped %d interrupts with %d handlers for"
> > - " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts);
> > + pr_info("%pOFP: mapped %d interrupts with %d handlers for %d contexts.\n",
> > + to_of_node(dev->fwnode), nr_irqs, nr_handlers, nr_contexts);
> > return 0;
> >
> > out_free_enable_reg:
> > @@ -563,20 +593,11 @@ static int __init __plic_init(struct device_node *node,
> > return error;
> > }
> >
> > -static int __init plic_init(struct device_node *node,
> > - struct device_node *parent)
> > -{
> > - return __plic_init(node, parent, 0);
> > -}
> > -
> > -IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init);
> > -IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */
> > -
> > -static int __init plic_edge_init(struct device_node *node,
> > - struct device_node *parent)
> > -{
> > - return __plic_init(node, parent, BIT(PLIC_QUIRK_EDGE_INTERRUPT));
> > -}
> > -
> > -IRQCHIP_DECLARE(andestech_nceplic100, "andestech,nceplic100", plic_edge_init);
> > -IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_edge_init);
> > +static struct platform_driver plic_driver = {
> > + .driver = {
> > + .name = "riscv-plic",
> > + .of_match_table = plic_match,
> > + },
> > + .probe = plic_probe,
> > +};
> > +builtin_platform_driver(plic_driver);
> > --
> > 2.34.1
> >
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <[email protected]> wrote:
>
> On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar
> <[email protected]> wrote:
> >
> > Hi Anup,
> >
> > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <apatel@ventanamicrocom> wrote:
> > >
> > > The PLIC driver does not require very early initialization so convert
> > > it into a platform driver.
> > >
> > > After conversion, the PLIC driver is probed after CPUs are brought-up
> > > so setup cpuhp state after context handler of all online CPUs are
> > > initialized otherwise PLIC driver crashes for platforms with multiple
> > > PLIC instances.
> > >
> > > Signed-off-by: Anup Patel <[email protected]>
> > > ---
> > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
> > > 1 file changed, 61 insertions(+), 40 deletions(-)
> > >
> > This patch seems to have broken things on RZ/Five SoC, after reverting
> > this patch I get to boot it back again on v6.9-rc2. Looks like there
> > is some probe order issue after switching to platform driver?
>
> Yes, this is most likely related to probe ordering based on your DT.
>
> Can you share the failing boot log and DT ?
non working case, https://paste.debian.net/1312947/
after reverting, https://paste.debian.net/1312948/
(attached is the DTB)
Cheers,
Prabhakar
Hi Prabhakar,
On 2024-04-03 10:49 AM, Lad, Prabhakar wrote:
> On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <[email protected]> wrote:
>>
>> On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar
>> <[email protected]> wrote:
>>>
>>> Hi Anup,
>>>
>>> On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <[email protected]> wrote:
>>>>
>>>> The PLIC driver does not require very early initialization so convert
>>>> it into a platform driver.
>>>>
>>>> After conversion, the PLIC driver is probed after CPUs are brought-up
>>>> so setup cpuhp state after context handler of all online CPUs are
>>>> initialized otherwise PLIC driver crashes for platforms with multiple
>>>> PLIC instances.
>>>>
>>>> Signed-off-by: Anup Patel <[email protected]>
>>>> ---
>>>> drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
>>>> 1 file changed, 61 insertions(+), 40 deletions(-)
>>>>
>>> This patch seems to have broken things on RZ/Five SoC, after reverting
>>> this patch I get to boot it back again on v6.9-rc2. Looks like there
>>> is some probe order issue after switching to platform driver?
>>
>> Yes, this is most likely related to probe ordering based on your DT.
>>
>> Can you share the failing boot log and DT ?
>
> non working case, https://paste.debian.net/1312947/
Looks like you need to add "keep_bootcon" to your kernel command line to get a
full log here.
> after reverting, https://paste.debian.net/1312948/
> (attached is the DTB)
I don't see anything suspicious between the "riscv-intc" lines and the "Fixed
dependency cycle(s)" lines that looks like it would depend on the PLIC IRQ
domain. Maybe there is some driver that does not handle -EPROBE_DEFER? It's hard
to tell without the full log from the failure case.
Regards,
Samuel
On Wed, Apr 3, 2024 at 9:19 PM Lad, Prabhakar
<[email protected]> wrote:
>
> On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <[email protected]> wrote:
> >
> > On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar
> > <[email protected]> wrote:
> > >
> > > Hi Anup,
> > >
> > > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <[email protected]> wrote:
> > > >
> > > > The PLIC driver does not require very early initialization so convert
> > > > it into a platform driver.
> > > >
> > > > After conversion, the PLIC driver is probed after CPUs are brought-up
> > > > so setup cpuhp state after context handler of all online CPUs are
> > > > initialized otherwise PLIC driver crashes for platforms with multiple
> > > > PLIC instances.
> > > >
> > > > Signed-off-by: Anup Patel <[email protected]>
> > > > ---
> > > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
> > > > 1 file changed, 61 insertions(+), 40 deletions(-)
> > > >
> > > This patch seems to have broken things on RZ/Five SoC, after reverting
> > > this patch I get to boot it back again on v6.9-rc2. Looks like there
> > > is some probe order issue after switching to platform driver?
> >
> > Yes, this is most likely related to probe ordering based on your DT.
> >
> > Can you share the failing boot log and DT ?
>
> non working case, https://paste.debian.net/1312947/
> after reverting, https://paste.debian.net/1312948/
> (attached is the DTB)
Can you add "console=ttySC0,115200" to kernel parameters and
share updated boot logs ?
Regards,
Anup
Hi Samuel and Anup,
On Wed, Apr 3, 2024 at 5:28 PM Samuel Holland <[email protected]> wrote:
>
> Hi Prabhakar,
>
> On 2024-04-03 10:49 AM, Lad, Prabhakar wrote:
> > On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <[email protected]> wrote:
> >>
> >> On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar
> >> <[email protected]> wrote:
> >>>
> >>> Hi Anup,
> >>>
> >>> On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <[email protected]> wrote:
> >>>>
> >>>> The PLIC driver does not require very early initialization so convert
> >>>> it into a platform driver.
> >>>>
> >>>> After conversion, the PLIC driver is probed after CPUs are brought-up
> >>>> so setup cpuhp state after context handler of all online CPUs are
> >>>> initialized otherwise PLIC driver crashes for platforms with multiple
> >>>> PLIC instances.
> >>>>
> >>>> Signed-off-by: Anup Patel <[email protected]>
> >>>> ---
> >>>> drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
> >>>> 1 file changed, 61 insertions(+), 40 deletions(-)
> >>>>
> >>> This patch seems to have broken things on RZ/Five SoC, after reverting
> >>> this patch I get to boot it back again on v6.9-rc2. Looks like there
> >>> is some probe order issue after switching to platform driver?
> >>
> >> Yes, this is most likely related to probe ordering based on your DT.
> >>
> >> Can you share the failing boot log and DT ?
> >
> > non working case, https://paste.debian.net/1312947/
>
> Looks like you need to add "keep_bootcon" to your kernel command line to get a
> full log here.
>
Thanks for the pointer, that helped me to get to the root cause.
> > after reverting, https://paste.debian.net/1312948/
> > (attached is the DTB)
>
> I don't see anything suspicious between the "riscv-intc" lines and the "Fixed
> dependency cycle(s)" lines that looks like it would depend on the PLIC IRQ
> domain. Maybe there is some driver that does not handle -EPROBE_DEFER? It's hard
> to tell without the full log from the failure case.
>
The clock required for the PLIC wasnt available during the probe of
this driver. This bug got hidden when the PLIC driver was probed
earlier in boot where it used an incorrect clock source. Ive created
a patch which adds a missing clock for the PLIC.
Sorry for the noise!
Cheers,
Prabhakar
On Wed, Apr 3, 2024 at 9:19 PM Lad, Prabhakar
<[email protected]> wrote:
>
> On Wed, Apr 3, 2024 at 3:17 PM Anup Patel <[email protected]> wrote:
> >
> > On Wed, Apr 3, 2024 at 2:01 PM Lad, Prabhakar
> > <[email protected]> wrote:
> > >
> > > Hi Anup,
> > >
> > > On Thu, Feb 22, 2024 at 9:41 AM Anup Patel <[email protected]> wrote:
> > > >
> > > > The PLIC driver does not require very early initialization so convert
> > > > it into a platform driver.
> > > >
> > > > After conversion, the PLIC driver is probed after CPUs are brought-up
> > > > so setup cpuhp state after context handler of all online CPUs are
> > > > initialized otherwise PLIC driver crashes for platforms with multiple
> > > > PLIC instances.
> > > >
> > > > Signed-off-by: Anup Patel <[email protected]>
> > > > ---
> > > > drivers/irqchip/irq-sifive-plic.c | 101 ++++++++++++++++++------------
> > > > 1 file changed, 61 insertions(+), 40 deletions(-)
> > > >
> > > This patch seems to have broken things on RZ/Five SoC, after reverting
> > > this patch I get to boot it back again on v6.9-rc2. Looks like there
> > > is some probe order issue after switching to platform driver?
> >
> > Yes, this is most likely related to probe ordering based on your DT.
> >
> > Can you share the failing boot log and DT ?
>
> non working case, https://paste.debian.net/1312947/
> after reverting, https://paste.debian.net/1312948/
> (attached is the DTB)
One potential problem is that
drivers/clocksource/renesas-ostm.c is probed early
using TIMER_OF_DECLARE() but the timer interrupt
is connected to PLIC which is probed late hence the
timer probe will fail.
We have two possible options:
1) Disable OSTM nodes
2) Improve the OSTM driver to probe like a
regular platform device on RISC-V
Regards,
Anup
Hi Anup,
On Thu, Feb 22, 2024 at 10:41 AM Anup Patel <[email protected]> wrote:
> The PLIC driver does not require very early initialization so convert
> it into a platform driver.
>
> After conversion, the PLIC driver is probed after CPUs are brought-up
> so setup cpuhp state after context handler of all online CPUs are
> initialized otherwise PLIC driver crashes for platforms with multiple
> PLIC instances.
>
> Signed-off-by: Anup Patel <[email protected]>
Thanks for your patch, which is now commit 8ec99b033147ef3b
("irqchip/sifive-plic: Convert PLIC driver into a platform
driver") in v6.9.
It looks like this conversion is causing issues on BeagleV Starlight
Beta. After updating esmil/visionfive to v6.10-rc1, the kernel usually
fails to boot. Adding "earlycon keep_bootcon" reveals these differences:
-riscv-plic c000000.interrupt-controller: mapped 133 interrupts with 2
handlers for 4 contexts.
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 1 at drivers/irqchip/irq-sifive-plic.c:373
plic_handle_irq+0xf2/0xf6
+Modules linked in:
+CPU: 0 PID: 1 Comm: swapper/0 Not tainted
6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323
+Hardware name: BeagleV Starlight Beta (DT)
+epc : plic_handle_irq+0xf2/0xf6
+ ra : generic_handle_domain_irq+0x1c/0x2a
+epc : ffffffff8033f994 ra : ffffffff8006319a sp : ffffffc800003f50
+ gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040
+ t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0
+ s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009
+ a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
+ a5 : 0000000000000000 a6 : ffffffd880400248 a7 : ffffffd8804002b8
+ s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : 0000000000000000
+ s5 : ffffffff81293f58 s6 : ffffffd88014ac00 s7 : 0000000000000004
+ s8 : ffffffc800013b2c s9 : ffffffc800013b34 s10: 0000000000000006
+ s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402
+ t5 : ffffffd8800610c0 t6 : ffffffd8800610e0
+status: 0000000200000100 badaddr: ffffffd9f8fac458 cause: 0000000000000003
+[<ffffffff8033f994>] plic_handle_irq+0xf2/0xf6
+[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a
+[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60
+[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74
+[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40
+---[ end trace 0000000000000000 ]---
+Unable to handle kernel NULL pointer dereference at virtual address
0000000000000004
+Oops [#1]
+Modules linked in:
+CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W
6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323
+Hardware name: BeagleV Starlight Beta (DT)
+epc : plic_handle_irq+0x66/0xf6
+ ra : generic_handle_domain_irq+0x1c/0x2a
+epc : ffffffff8033f908 ra : ffffffff8006319a sp : ffffffc800003f50
+ gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040
+ t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0
+ s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009
+ a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
+ a5 : ffffffff8033d72a a6 : ffffffd880400248 a7 : ffffffd8804002b8
+ s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : ffffffd880183630
+ s5 : ffffffff81293f58 s6 : ffffffff812948a0 s7 : ffffffff80c4e660
+ s8 : ffffffff80d9eea0 s9 : ffffffc800013b34 s10: 0000000000000006
+ s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402
+ t5 : ffffffd8800610c0 t6 : ffffffd8800610e0
+status: 0000000200000100 badaddr: 0000000000000004 cause: 000000000000000d
+[<ffffffff8033f908>] plic_handle_irq+0x66/0xf6
+[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a
+[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60
+[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74
+[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40
+Code: 8b93 d70b 5b17 00f5 0b13 fa8b fc17 00a5 0c13 5a0c (a783) 0009
+---[ end trace 0000000000000000 ]---
+Kernel panic - not syncing: Fatal exception in interrupt
+SMP: stopping secondary CPUs
+---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
As "mapped 133 interrupts" is no longer printed, it looks like an
unexpected early interrupt comes in while still in plic_probe().
Esmil suggested reverting all of:
a7fb69ffd7ce438a irqchip/sifive-plic: Avoid explicit cpumask allocation on stack
abb7205794900503 irqchip/sifive-plic: Improve locking safety by using
irqsave/irqrestore
95652106478030f5 irqchip/sifive-plic: Parse number of interrupts and
contexts early in plic_probe()
a15587277a246c38 irqchip/sifive-plic: Cleanup PLIC contexts upon
irqdomain creation failure
6c725f33d67b53f2 irqchip/sifive-plic: Use riscv_get_intc_hwnode() to
get parent fwnode
b68d0ff529a939a1 irqchip/sifive-plic: Use devm_xyz() for managed allocation
25d862e183d4efeb irqchip/sifive-plic: Use dev_xyz() in-place of pr_xyz()
8ec99b033147ef3b irqchip/sifive-plic: Convert PLIC driver into a platform driver
After this, the PLIC is initialized earlier again, and this indeed
seems to fix the issue for me.
Before, the kernel booted fine in only ca. 1 out of 5 tries.
After the reverts, it booted 5/5.
Do you know what's going on? Is there a simpler fix?
Thanks!
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68korg
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
Hi Geert,
On 2024-05-29 9:22 AM, Geert Uytterhoeven wrote:
> Hi Anup,
>
> On Thu, Feb 22, 2024 at 10:41 AM Anup Patel <[email protected]> wrote:
>> The PLIC driver does not require very early initialization so convert
>> it into a platform driver.
>>
>> After conversion, the PLIC driver is probed after CPUs are brought-up
>> so setup cpuhp state after context handler of all online CPUs are
>> initialized otherwise PLIC driver crashes for platforms with multiple
>> PLIC instances.
>>
>> Signed-off-by: Anup Patel <[email protected]>
>
> Thanks for your patch, which is now commit 8ec99b033147ef3b
> ("irqchip/sifive-plic: Convert PLIC driver into a platform
> driver") in v6.9.
>
> It looks like this conversion is causing issues on BeagleV Starlight
> Beta. After updating esmil/visionfive to v6.10-rc1, the kernel usually
> fails to boot. Adding "earlycon keep_bootcon" reveals these differences:
>
> -riscv-plic c000000.interrupt-controller: mapped 133 interrupts with 2
> handlers for 4 contexts.
> +------------[ cut here ]------------
> +WARNING: CPU: 0 PID: 1 at drivers/irqchip/irq-sifive-plic.c:373
> plic_handle_irq+0xf2/0xf6
> +Modules linked in:
> +CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> 6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323
> +Hardware name: BeagleV Starlight Beta (DT)
> +epc : plic_handle_irq+0xf2/0xf6
> + ra : generic_handle_domain_irq+0x1c/0x2a
> +epc : ffffffff8033f994 ra : ffffffff8006319a sp : ffffffc800003f50
> + gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040
> + t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0
> + s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009
> + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> + a5 : 0000000000000000 a6 : ffffffd880400248 a7 : ffffffd8804002b8
> + s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : 0000000000000000
> + s5 : ffffffff81293f58 s6 : ffffffd88014ac00 s7 : 0000000000000004
> + s8 : ffffffc800013b2c s9 : ffffffc800013b34 s10: 0000000000000006
> + s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402
> + t5 : ffffffd8800610c0 t6 : ffffffd8800610e0
> +status: 0000000200000100 badaddr: ffffffd9f8fac458 cause: 0000000000000003
> +[<ffffffff8033f994>] plic_handle_irq+0xf2/0xf6
> +[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a
> +[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60
> +[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74
> +[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40
> +---[ end trace 0000000000000000 ]---
> +Unable to handle kernel NULL pointer dereference at virtual address
> 0000000000000004
> +Oops [#1]
> +Modules linked in:
> +CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W
> 6.10.0-rc1-starlight-02342-g0ba4c76ca0e8-dirty #323
> +Hardware name: BeagleV Starlight Beta (DT)
> +epc : plic_handle_irq+0x66/0xf6
> + ra : generic_handle_domain_irq+0x1c/0x2a
> +epc : ffffffff8033f908 ra : ffffffff8006319a sp : ffffffc800003f50
> + gp : ffffffff812d63f0 tp : ffffffd8800b8000 t0 : 0000000000000040
> + t1 : 0000000000000000 t2 : 0000000000001000 s0 : ffffffc800003fa0
> + s1 : 0000000000000009 a0 : ffffffd880183600 a1 : 0000000000000009
> + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
> + a5 : ffffffff8033d72a a6 : ffffffd880400248 a7 : ffffffd8804002b8
> + s2 : ffffffd9f8fac458 s3 : 0000000000000004 s4 : ffffffd880183630
> + s5 : ffffffff81293f58 s6 : ffffffff812948a0 s7 : ffffffff80c4e660
> + s8 : ffffffff80d9eea0 s9 : ffffffc800013b34 s10: 0000000000000006
> + s11: ffffffd9f8fc1458 t3 : 0000000000000002 t4 : 0000000000000402
> + t5 : ffffffd8800610c0 t6 : ffffffd8800610e0
> +status: 0000000200000100 badaddr: 0000000000000004 cause: 000000000000000d
> +[<ffffffff8033f908>] plic_handle_irq+0x66/0xf6
> +[<ffffffff8006319a>] generic_handle_domain_irq+0x1c/0x2a
> +[<ffffffff8033d7aa>] riscv_intc_irq+0x26/0x60
> +[<ffffffff806c92ee>] handle_riscv_irq+0x4a/0x74
> +[<ffffffff806d2346>] call_on_irq_stack+0x32/0x40
> +Code: 8b93 d70b 5b17 00f5 0b13 fa8b fc17 00a5 0c13 5a0c (a783) 0009
> +---[ end trace 0000000000000000 ]---
> +Kernel panic - not syncing: Fatal exception in interrupt
> +SMP: stopping secondary CPUs
> +---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
>
> As "mapped 133 interrupts" is no longer printed, it looks like an
> unexpected early interrupt comes in while still in plic_probe().
>
> Esmil suggested reverting all of:
> a7fb69ffd7ce438a irqchip/sifive-plic: Avoid explicit cpumask allocation on stack
> abb7205794900503 irqchip/sifive-plic: Improve locking safety by using
> irqsave/irqrestore
> 95652106478030f5 irqchip/sifive-plic: Parse number of interrupts and
> contexts early in plic_probe()
> a15587277a246c38 irqchip/sifive-plic: Cleanup PLIC contexts upon
> irqdomain creation failure
> 6c725f33d67b53f2 irqchip/sifive-plic: Use riscv_get_intc_hwnode() to
> get parent fwnode
> b68d0ff529a939a1 irqchip/sifive-plic: Use devm_xyz() for managed allocation
> 25d862e183d4efeb irqchip/sifive-plic: Use dev_xyz() in-place of pr_xyz()
> 8ec99b033147ef3b irqchip/sifive-plic: Convert PLIC driver into a platform driver
>
> After this, the PLIC is initialized earlier again, and this indeed
> seems to fix the issue for me.
> Before, the kernel booted fine in only ca. 1 out of 5 tries.
> After the reverts, it booted 5/5.
>
> Do you know what's going on? Is there a simpler fix?
The fact that you hit the warning indicates that plic_handle_irq() was called
before handler->present was set. Previously the PLIC driver was probed very
early, so it is unlikely that some peripheral already had a pending interrupt.
Now, while platform device drivers would not yet be able to request interrupts
(because the irqdomain is not registered yet), they could have programmed the
hardware in a way that generates an interrupt. If that interrupt was enabled at
the PLIC (e.g. by the bootloader), then we could expect plic_handle_irq() to be
called as soon as irq_set_chained_handler() is called.
So the fix is to not call irq_set_chained_handler() until after the handlers are
completely set up.
I've sent a patch doing this:
https://lore.kernel.org/linux-riscv/[email protected]/
Regards,
Samuel
Hi Samuel,
On Thu, May 30, 2024 at 12:04 AM Samuel Holland
<[email protected]> wrote:
> On 2024-05-29 9:22 AM, Geert Uytterhoeven wrote:
> > On Thu, Feb 22, 2024 at 10:41 AM Anup Patel <[email protected]> wrote:
> >> The PLIC driver does not require very early initialization so convert
> >> it into a platform driver.
> >>
> >> After conversion, the PLIC driver is probed after CPUs are brought-up
> >> so setup cpuhp state after context handler of all online CPUs are
> >> initialized otherwise PLIC driver crashes for platforms with multiple
> >> PLIC instances.
> >>
> >> Signed-off-by: Anup Patel <[email protected]>
> >
> > Thanks for your patch, which is now commit 8ec99b033147ef3b
> > ("irqchip/sifive-plic: Convert PLIC driver into a platform
> > driver") in v6.9.
> >
> > It looks like this conversion is causing issues on BeagleV Starlight
> > Beta. After updating esmil/visionfive to v6.10-rc1, the kernel usually
> > fails to boot. Adding "earlycon keep_bootcon" reveals these differences:
> >
> > -riscv-plic c000000.interrupt-controller: mapped 133 interrupts with 2
> > handlers for 4 contexts.
> > +------------[ cut here ]------------
> > +WARNING: CPU: 0 PID: 1 at drivers/irqchip/irq-sifive-plic.c:373
> > +Unable to handle kernel NULL pointer dereference at virtual address
> The fact that you hit the warning indicates that plic_handle_irq() was called
> before handler->present was set. Previously the PLIC driver was probed very
> early, so it is unlikely that some peripheral already had a pending interrupt.
> Now, while platform device drivers would not yet be able to request interrupts
> (because the irqdomain is not registered yet), they could have programmed the
> hardware in a way that generates an interrupt. If that interrupt was enabled at
> the PLIC (e.g. by the bootloader), then we could expect plic_handle_irq() to be
> called as soon as irq_set_chained_handler() is called.
>
> So the fix is to not call irq_set_chained_handler() until after the handlers are
> completely set up.
>
> I've sent a patch doing this:
> https://lore.kernel.org/linux-riscv/[email protected]/
Thanks, that fixed the issue!
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68korg
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds