2022-04-12 21:56:25

by Robin Murphy

[permalink] [raw]
Subject: [RFC PATCH] irqchip/gic-v3: Claim iomem resources

As a simple quality-of-life tweak, claim our MMIO regions when mapping
them, such that the GIC shows up in /proc/iomem. No effort is spent on
trying to release them, since frankly if the GIC fails to probe then
it's never getting a second try anyway.

Signed-off-by: Robin Murphy <[email protected]>

---

I briefly looked at doing the same for GICv2, but quickly decided that
GICv2 isn't interesting enough to be worth the (greater) bother...

Lightly tested with ACPI.
---
drivers/irqchip/irq-gic-v3.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index b252d5534547..9815b692a47a 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1980,10 +1980,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
u32 nr_redist_regions;
int err, i;

- dist_base = of_iomap(node, 0);
- if (!dist_base) {
+ dist_base = of_io_request_and_map(node, 0, "GICD");
+ if (IS_ERR(dist_base)) {
pr_err("%pOF: unable to map gic dist registers\n", node);
- return -ENXIO;
+ return PTR_ERR(dist_base);
}

err = gic_validate_dist_version(dist_base);
@@ -2007,8 +2007,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
int ret;

ret = of_address_to_resource(node, 1 + i, &res);
- rdist_regs[i].redist_base = of_iomap(node, 1 + i);
- if (ret || !rdist_regs[i].redist_base) {
+ rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR");
+ if (ret || IS_ERR(rdist_regs[i].redist_base)) {
pr_err("%pOF: couldn't map region %d\n", node, i);
err = -ENODEV;
goto out_unmap_rdist;
@@ -2034,7 +2034,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare

out_unmap_rdist:
for (i = 0; i < nr_redist_regions; i++)
- if (rdist_regs[i].redist_base)
+ if (rdist_regs[i].redist_base && !IS_ERR(rdist_regs[i].redist_base))
iounmap(rdist_regs[i].redist_base);
kfree(rdist_regs);
out_unmap_dist:
@@ -2081,6 +2081,7 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header,
pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
return -ENOMEM;
}
+ request_mem_region(redist->base_address, redist->length, "GICR");

gic_acpi_register_redist(redist->base_address, redist_base);
return 0;
@@ -2103,6 +2104,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
redist_base = ioremap(gicc->gicr_base_address, size);
if (!redist_base)
return -ENOMEM;
+ request_mem_region(gicc->gicr_base_address, size, "GICR");

gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
return 0;
@@ -2304,6 +2306,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
pr_err("Unable to map GICD registers\n");
return -ENOMEM;
}
+ request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD");

err = gic_validate_dist_version(acpi_data.dist_base);
if (err) {
--
2.28.0.dirty


2023-02-28 23:11:33

by Tim Harvey

[permalink] [raw]
Subject: Re: [RFC PATCH] irqchip/gic-v3: Claim iomem resources

On Tue, Apr 12, 2022 at 8:29 AM Robin Murphy <[email protected]> wrote:
>
> As a simple quality-of-life tweak, claim our MMIO regions when mapping
> them, such that the GIC shows up in /proc/iomem. No effort is spent on
> trying to release them, since frankly if the GIC fails to probe then
> it's never getting a second try anyway.
>
> Signed-off-by: Robin Murphy <[email protected]>
>
> ---
>
> I briefly looked at doing the same for GICv2, but quickly decided that
> GICv2 isn't interesting enough to be worth the (greater) bother...
>
> Lightly tested with ACPI.
> ---
> drivers/irqchip/irq-gic-v3.c | 15 +++++++++------
> 1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
> index b252d5534547..9815b692a47a 100644
> --- a/drivers/irqchip/irq-gic-v3.c
> +++ b/drivers/irqchip/irq-gic-v3.c
> @@ -1980,10 +1980,10 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
> u32 nr_redist_regions;
> int err, i;
>
> - dist_base = of_iomap(node, 0);
> - if (!dist_base) {
> + dist_base = of_io_request_and_map(node, 0, "GICD");
> + if (IS_ERR(dist_base)) {
> pr_err("%pOF: unable to map gic dist registers\n", node);
> - return -ENXIO;
> + return PTR_ERR(dist_base);
> }
>
> err = gic_validate_dist_version(dist_base);
> @@ -2007,8 +2007,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
> int ret;
>
> ret = of_address_to_resource(node, 1 + i, &res);
> - rdist_regs[i].redist_base = of_iomap(node, 1 + i);
> - if (ret || !rdist_regs[i].redist_base) {
> + rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR");
> + if (ret || IS_ERR(rdist_regs[i].redist_base)) {
> pr_err("%pOF: couldn't map region %d\n", node, i);
> err = -ENODEV;
> goto out_unmap_rdist;
> @@ -2034,7 +2034,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
>
> out_unmap_rdist:
> for (i = 0; i < nr_redist_regions; i++)
> - if (rdist_regs[i].redist_base)
> + if (rdist_regs[i].redist_base && !IS_ERR(rdist_regs[i].redist_base))
> iounmap(rdist_regs[i].redist_base);
> kfree(rdist_regs);
> out_unmap_dist:
> @@ -2081,6 +2081,7 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header,
> pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
> return -ENOMEM;
> }
> + request_mem_region(redist->base_address, redist->length, "GICR");
>
> gic_acpi_register_redist(redist->base_address, redist_base);
> return 0;
> @@ -2103,6 +2104,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
> redist_base = ioremap(gicc->gicr_base_address, size);
> if (!redist_base)
> return -ENOMEM;
> + request_mem_region(gicc->gicr_base_address, size, "GICR");
>
> gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
> return 0;
> @@ -2304,6 +2306,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end)
> pr_err("Unable to map GICD registers\n");
> return -ENOMEM;
> }
> + request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD");
>
> err = gic_validate_dist_version(acpi_data.dist_base);
> if (err) {
> --
> 2.28.0.dirty
>

Greetings,

I have bisected a kernel issue where octeontx (CN803x) will hang on
reboot caused by commit c0db06fd0993 ("mmc: core: Disable card detect
during shutdown"). This commit made it into 5.16 and stable kernels.
I've found that the patch here which is commit 2b2cd74a06c3 resolves
this hang but I'm not entirely clear why.

Does anyone have a good explanation of why the hang occurs in the
first place and why this resolves it? I would like to get the proper
fix into the affected stable branches.

Best Regards,

Tim

2023-02-28 23:24:21

by Marc Zyngier

[permalink] [raw]
Subject: Re: [RFC PATCH] irqchip/gic-v3: Claim iomem resources

On Tue, 28 Feb 2023 23:11:15 +0000,
Tim Harvey <[email protected]> wrote:
>
> I have bisected a kernel issue where octeontx (CN803x) will hang on
> reboot caused by commit c0db06fd0993 ("mmc: core: Disable card detect
> during shutdown"). This commit made it into 5.16 and stable kernels.
> I've found that the patch here which is commit 2b2cd74a06c3 resolves
> this hang but I'm not entirely clear why.
>
> Does anyone have a good explanation of why the hang occurs in the
> first place and why this resolves it? I would like to get the proper
> fix into the affected stable branches.

Wild guess: the reservation prevents some other driver from probing
because the firmware describes overlapping ranges, and that driver is
what is causing your above hang.

But you don't give much information that would allow this theory to be
checked. I guess you'll have to instrument your kernel and find out.

M.

--
Without deviation from the norm, progress is not possible.

2023-03-01 16:13:11

by Robin Murphy

[permalink] [raw]
Subject: Re: [RFC PATCH] irqchip/gic-v3: Claim iomem resources

On 2023-02-28 23:22, Marc Zyngier wrote:
> On Tue, 28 Feb 2023 23:11:15 +0000,
> Tim Harvey <[email protected]> wrote:
>>
>> I have bisected a kernel issue where octeontx (CN803x) will hang on
>> reboot caused by commit c0db06fd0993 ("mmc: core: Disable card detect
>> during shutdown"). This commit made it into 5.16 and stable kernels.
>> I've found that the patch here which is commit 2b2cd74a06c3 resolves
>> this hang but I'm not entirely clear why.
>>
>> Does anyone have a good explanation of why the hang occurs in the
>> first place and why this resolves it? I would like to get the proper
>> fix into the affected stable branches.
>
> Wild guess: the reservation prevents some other driver from probing
> because the firmware describes overlapping ranges, and that driver is
> what is causing your above hang.

Indeed, according to [1], the GIC appears to overlap one of the "PCIe"
windows of &ecam0, which conveniently appears to be the parent of the
MMC controller as well.

Robin.

[1]
https://github.com/Gateworks/dts-newport/blob/sdk-10.1.1.0-newport/cn81xx-linux.dtsi

2023-03-01 16:24:20

by Marc Zyngier

[permalink] [raw]
Subject: Re: [RFC PATCH] irqchip/gic-v3: Claim iomem resources

On Wed, 01 Mar 2023 16:12:58 +0000,
Robin Murphy <[email protected]> wrote:
>
> On 2023-02-28 23:22, Marc Zyngier wrote:
> > On Tue, 28 Feb 2023 23:11:15 +0000,
> > Tim Harvey <[email protected]> wrote:
> >>
> >> I have bisected a kernel issue where octeontx (CN803x) will hang on
> >> reboot caused by commit c0db06fd0993 ("mmc: core: Disable card detect
> >> during shutdown"). This commit made it into 5.16 and stable kernels.
> >> I've found that the patch here which is commit 2b2cd74a06c3 resolves
> >> this hang but I'm not entirely clear why.
> >>
> >> Does anyone have a good explanation of why the hang occurs in the
> >> first place and why this resolves it? I would like to get the proper
> >> fix into the affected stable branches.
> >
> > Wild guess: the reservation prevents some other driver from probing
> > because the firmware describes overlapping ranges, and that driver is
> > what is causing your above hang.
>
> Indeed, according to [1], the GIC appears to overlap one of the "PCIe"
> windows of &ecam0, which conveniently appears to be the parent of the
> MMC controller as well.

Huh, quality firmware. So clearly nothing to backport. Just a DT
update to distribute... :-/

M.

--
Without deviation from the norm, progress is not possible.

2023-03-01 17:29:36

by Tim Harvey

[permalink] [raw]
Subject: Re: [RFC PATCH] irqchip/gic-v3: Claim iomem resources

On Wed, Mar 1, 2023 at 8:13 AM Robin Murphy <[email protected]> wrote:
>
> On 2023-02-28 23:22, Marc Zyngier wrote:
> > On Tue, 28 Feb 2023 23:11:15 +0000,
> > Tim Harvey <[email protected]> wrote:
> >>
> >> I have bisected a kernel issue where octeontx (CN803x) will hang on
> >> reboot caused by commit c0db06fd0993 ("mmc: core: Disable card detect
> >> during shutdown"). This commit made it into 5.16 and stable kernels.
> >> I've found that the patch here which is commit 2b2cd74a06c3 resolves
> >> this hang but I'm not entirely clear why.
> >>
> >> Does anyone have a good explanation of why the hang occurs in the
> >> first place and why this resolves it? I would like to get the proper
> >> fix into the affected stable branches.
> >
> > Wild guess: the reservation prevents some other driver from probing
> > because the firmware describes overlapping ranges, and that driver is
> > what is causing your above hang.
>
> Indeed, according to [1], the GIC appears to overlap one of the "PCIe"
> windows of &ecam0, which conveniently appears to be the parent of the
> MMC controller as well.
>
> Robin.
>
> [1]
> https://github.com/Gateworks/dts-newport/blob/sdk-10.1.1.0-newport/cn81xx-linux.dtsi

Robin and Marc,

Thanks! Yes, this patch exposes an issue in my dt which ends up
killing off ecam0 thus hiding the original 'hang on reboot' issue I
was trying to find related to mmc with a quick bisect.

As you expected ecam0 fails probe:
[ 0.977514] pci-host-generic: probe of 848000000000.pci failed with error -12

Cavium/Marvell never mainlined their cn81xx device-tree and their
latest SDK appears to still have this issue. It looks to me like the
first mem range in ecam0 is just wrong and needs to be removed:
@@ -226,8 +226,7 @@ ecam0: pci@848000000000 {
u-boot,dm-pre-reloc;
dma-coherent;
reg = <0x8480 0x00000000 0 0x02000000>; /*
Configuration space
*/
- ranges = <0x03000000 0x8010 0x00000000 0x8010
0x00000000 0x080
0x00000000>, /* mem ranges */
- <0x03000000 0x8100 0x00000000 0x8100
0x00000000 0x80
0x00000000>, /* SATA */
+ ranges = <0x03000000 0x8100 0x00000000 0x8100
0x00000000 0x80
0x00000000>, /* SATA */
<0x03000000 0x8680 0x00000000 0x8680
0x00000000 0x160
0x28000000>, /* UARTs */
<0x03000000 0x87e0 0x2c000000 0x87e0
0x2c000000 0x000
0x94000000>, /* PEMs */
<0x03000000 0x8400 0x00000000 0x8400
0x00000000 0x010
0x00000000>, /* RNM */

This results in:
[ 0.911162] pci-host-generic 848000000000.pci: host bridge
/soc@0/pci@848000000000 ranges:
[ 0.919506] pci-host-generic 848000000000.pci: MEM
0x810000000000..0x817fffffffff -> 0x810000000000
[ 0.929018] pci-host-generic 848000000000.pci: MEM
0x868000000000..0x87e027ffffff -> 0x868000000000
[ 0.938531] pci-host-generic 848000000000.pci: MEM
0x87e02c000000..0x87e0bfffffff -> 0x87e02c000000
[ 0.948039] pci-host-generic 848000000000.pci: MEM
0x840000000000..0x840fffffffff -> 0x840000000000
[ 0.957543] pci-host-generic 848000000000.pci: MEM
0x843000000000..0x8431ffffffff -> 0x843000000000
[ 0.967054] pci-host-generic 848000000000.pci: MEM
0x87e0c6000000..0x87ffffffffff -> 0x87e0c6000000
[ 0.976562] pci-host-generic 848000000000.pci: Memory resource size
exceeds max for 32 bits
[ 0.984923] pci-host-generic 848000000000.pci: Memory resource size
exceeds max for 32 bits
[ 0.993284] pci-host-generic 848000000000.pci: Memory resource size
exceeds max for 32 bits
[ 1.001645] pci-host-generic 848000000000.pci: Memory resource size
exceeds max for 32 bits
[ 1.010008] pci-host-generic 848000000000.pci: Memory resource size
exceeds max for 32 bits
[ 1.018404] pci-host-generic 848000000000.pci: ECAM at [mem
0x848000000000-0x848001ffffff] for [bus 00-1f]
[ 1.028240] pci-host-generic 848000000000.pci: PCI host bridge to bus 0000:00
[ 1.035412] pci_bus 0000:00: root bus resource [bus 00-1f]
[ 1.040907] pci_bus 0000:00: root bus resource [mem
0x810000000000-0x817fffffffff]
[ 1.048488] pci_bus 0000:00: root bus resource [mem
0x868000000000-0x87e027ffffff]
[ 1.056070] pci_bus 0000:00: root bus resource [mem
0x87e02c000000-0x87e0bfffffff]
[ 1.063652] pci_bus 0000:00: root bus resource [mem
0x840000000000-0x840fffffffff]
[ 1.071232] pci_bus 0000:00: root bus resource [mem
0x843000000000-0x8431ffffffff]
[ 1.078815] pci_bus 0000:00: root bus resource [mem
0x87e0c6000000-0x87ffffffffff]

and all of the PCI devices appear to work fine. Is there an additional
issue I need to work on regarding the 'Memory resource size exceeds
max for 32 bits' warning above?

Best Regards,

Tim