Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
Use two IDAs, one for static domain allocations (those which are defined in
device tree) and second for dynamic allocations (all other).
During removal of root bus / host bridge release also allocated domain id.
So released id can be reused again, for example in situation when
dynamically loading and unloading native PCI host bridge drivers.
This change also allows to mix static device tree assignment and dynamic by
kernel as all static allocations are reserved in dynamic pool.
Signed-off-by: Pali Rohár <[email protected]>
---
Idea of this patch comes from the following discussion:
https://lore.kernel.org/linux-pci/[email protected]/t/#u
---
drivers/pci/pci.c | 102 +++++++++++++++++++++++++------------------
drivers/pci/probe.c | 5 +++
drivers/pci/remove.c | 6 +++
include/linux/pci.h | 1 +
4 files changed, 72 insertions(+), 42 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index cfaf40a540a8..b263abfbe6d5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6762,60 +6762,71 @@ static void pci_no_domains(void)
}
#ifdef CONFIG_PCI_DOMAINS_GENERIC
-static atomic_t __domain_nr = ATOMIC_INIT(-1);
+static DEFINE_IDA(pci_domain_nr_static_ida);
+static DEFINE_IDA(pci_domain_nr_dynamic_ida);
-static int pci_get_new_domain_nr(void)
+static void of_pci_reserve_static_domain_nr(void)
{
- return atomic_inc_return(&__domain_nr);
+ struct device_node *np;
+ int domain_nr;
+
+ for_each_node_by_type(np, "pci") {
+ domain_nr = of_get_pci_domain_nr(np);
+ if (domain_nr < 0)
+ continue;
+ /*
+ * Permanently allocate domain_nr in dynamic_ida
+ * to prevent it from dynamic allocation.
+ */
+ ida_alloc_range(&pci_domain_nr_dynamic_ida,
+ domain_nr, domain_nr, GFP_KERNEL);
+ }
}
static int of_pci_bus_find_domain_nr(struct device *parent)
{
- static int use_dt_domains = -1;
- int domain = -1;
+ static bool static_domains_reserved = false;
+ int domain_nr;
- if (parent)
- domain = of_get_pci_domain_nr(parent->of_node);
+ /* On the first call scan device tree for static allocations. */
+ if (!static_domains_reserved) {
+ of_pci_reserve_static_domain_nr();
+ static_domains_reserved = true;
+ }
+
+ if (parent) {
+ /*
+ * If domain is in DT then allocate it in static IDA.
+ * This prevent duplicate static allocations in case
+ * of errors in DT.
+ */
+ domain_nr = of_get_pci_domain_nr(parent->of_node);
+ if (domain_nr >= 0)
+ return ida_alloc_range(&pci_domain_nr_static_ida,
+ domain_nr, domain_nr,
+ GFP_KERNEL);
+ }
/*
- * Check DT domain and use_dt_domains values.
- *
- * If DT domain property is valid (domain >= 0) and
- * use_dt_domains != 0, the DT assignment is valid since this means
- * we have not previously allocated a domain number by using
- * pci_get_new_domain_nr(); we should also update use_dt_domains to
- * 1, to indicate that we have just assigned a domain number from
- * DT.
- *
- * If DT domain property value is not valid (ie domain < 0), and we
- * have not previously assigned a domain number from DT
- * (use_dt_domains != 1) we should assign a domain number by
- * using the:
- *
- * pci_get_new_domain_nr()
- *
- * API and update the use_dt_domains value to keep track of method we
- * are using to assign domain numbers (use_dt_domains = 0).
- *
- * All other combinations imply we have a platform that is trying
- * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
- * which is a recipe for domain mishandling and it is prevented by
- * invalidating the domain value (domain = -1) and printing a
- * corresponding error.
+ * If domain was not specified in DT then choose free id from dynamic
+ * allocations. All domain numbers from DT are permanently in dynamic
+ * allocations to prevent assigning them to other DT nodes without
+ * static domain.
*/
- if (domain >= 0 && use_dt_domains) {
- use_dt_domains = 1;
- } else if (domain < 0 && use_dt_domains != 1) {
- use_dt_domains = 0;
- domain = pci_get_new_domain_nr();
- } else {
- if (parent)
- pr_err("Node %pOF has ", parent->of_node);
- pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
- domain = -1;
+ return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
}
+}
- return domain;
+static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+ if (bus->domain_nr < 0)
+ return;
+
+ /* Release domain from ida in which was it allocated. */
+ if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
+ ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
+ else
+ ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
}
int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
@@ -6823,6 +6834,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
acpi_pci_bus_find_domain_nr(bus);
}
+
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+ if (!acpi_disabled)
+ return;
+ of_pci_bus_release_domain_nr(bus, parent);
+}
#endif
/**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 17a969942d37..12092d238403 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
else
bus->domain_nr = bridge->domain_nr;
+ if (bus->domain_nr < 0)
+ goto free;
#endif
b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
@@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
device_del(&bridge->dev);
free:
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+ pci_bus_release_domain_nr(bus, parent);
+#endif
kfree(bus);
return err;
}
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 4c54c75050dc..0145aef1b930 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
pci_remove_bus(bus);
host_bridge->bus = NULL;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+ /* Release domain_nr if it was dynamically allocated */
+ if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+ pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+#endif
+
/* remove the host bridge */
device_del(&host_bridge->dev);
}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 81a57b498f22..6c7f27e62bcc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1723,6 +1723,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
{ return 0; }
#endif
int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
#endif
/* Some architectures require additional setup to direct VGA traffic */
--
2.20.1
Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
Use two IDAs, one for static domain allocations (those which are defined in
device tree) and second for dynamic allocations (all other).
During removal of root bus / host bridge release also allocated domain id.
So released id can be reused again, for example in situation when
dynamically loading and unloading native PCI host bridge drivers.
This change also allows to mix static device tree assignment and dynamic by
kernel as all static allocations are reserved in dynamic pool.
Signed-off-by: Pali Rohár <[email protected]>
---
Idea of this patch comes from the following discussion:
https://lore.kernel.org/linux-pci/[email protected]/t/#u
Changes in v2:
* Fix broken compilation
---
drivers/pci/pci.c | 103 +++++++++++++++++++++++++------------------
drivers/pci/probe.c | 5 +++
drivers/pci/remove.c | 6 +++
include/linux/pci.h | 1 +
4 files changed, 72 insertions(+), 43 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index cfaf40a540a8..34fdcee6634a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
}
#ifdef CONFIG_PCI_DOMAINS_GENERIC
-static atomic_t __domain_nr = ATOMIC_INIT(-1);
+static DEFINE_IDA(pci_domain_nr_static_ida);
+static DEFINE_IDA(pci_domain_nr_dynamic_ida);
-static int pci_get_new_domain_nr(void)
+static void of_pci_reserve_static_domain_nr(void)
{
- return atomic_inc_return(&__domain_nr);
+ struct device_node *np;
+ int domain_nr;
+
+ for_each_node_by_type(np, "pci") {
+ domain_nr = of_get_pci_domain_nr(np);
+ if (domain_nr < 0)
+ continue;
+ /*
+ * Permanently allocate domain_nr in dynamic_ida
+ * to prevent it from dynamic allocation.
+ */
+ ida_alloc_range(&pci_domain_nr_dynamic_ida,
+ domain_nr, domain_nr, GFP_KERNEL);
+ }
}
static int of_pci_bus_find_domain_nr(struct device *parent)
{
- static int use_dt_domains = -1;
- int domain = -1;
+ static bool static_domains_reserved = false;
+ int domain_nr;
- if (parent)
- domain = of_get_pci_domain_nr(parent->of_node);
+ /* On the first call scan device tree for static allocations. */
+ if (!static_domains_reserved) {
+ of_pci_reserve_static_domain_nr();
+ static_domains_reserved = true;
+ }
+
+ if (parent) {
+ /*
+ * If domain is in DT then allocate it in static IDA.
+ * This prevent duplicate static allocations in case
+ * of errors in DT.
+ */
+ domain_nr = of_get_pci_domain_nr(parent->of_node);
+ if (domain_nr >= 0)
+ return ida_alloc_range(&pci_domain_nr_static_ida,
+ domain_nr, domain_nr,
+ GFP_KERNEL);
+ }
/*
- * Check DT domain and use_dt_domains values.
- *
- * If DT domain property is valid (domain >= 0) and
- * use_dt_domains != 0, the DT assignment is valid since this means
- * we have not previously allocated a domain number by using
- * pci_get_new_domain_nr(); we should also update use_dt_domains to
- * 1, to indicate that we have just assigned a domain number from
- * DT.
- *
- * If DT domain property value is not valid (ie domain < 0), and we
- * have not previously assigned a domain number from DT
- * (use_dt_domains != 1) we should assign a domain number by
- * using the:
- *
- * pci_get_new_domain_nr()
- *
- * API and update the use_dt_domains value to keep track of method we
- * are using to assign domain numbers (use_dt_domains = 0).
- *
- * All other combinations imply we have a platform that is trying
- * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
- * which is a recipe for domain mishandling and it is prevented by
- * invalidating the domain value (domain = -1) and printing a
- * corresponding error.
+ * If domain was not specified in DT then choose free id from dynamic
+ * allocations. All domain numbers from DT are permanently in dynamic
+ * allocations to prevent assigning them to other DT nodes without
+ * static domain.
*/
- if (domain >= 0 && use_dt_domains) {
- use_dt_domains = 1;
- } else if (domain < 0 && use_dt_domains != 1) {
- use_dt_domains = 0;
- domain = pci_get_new_domain_nr();
- } else {
- if (parent)
- pr_err("Node %pOF has ", parent->of_node);
- pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
- domain = -1;
- }
+ return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
+}
- return domain;
+static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+ if (bus->domain_nr < 0)
+ return;
+
+ /* Release domain from ida in which was it allocated. */
+ if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
+ ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
+ else
+ ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
}
int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
@@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
acpi_pci_bus_find_domain_nr(bus);
}
+
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+ if (!acpi_disabled)
+ return;
+ of_pci_bus_release_domain_nr(bus, parent);
+}
#endif
/**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 17a969942d37..12092d238403 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
else
bus->domain_nr = bridge->domain_nr;
+ if (bus->domain_nr < 0)
+ goto free;
#endif
b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
@@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
device_del(&bridge->dev);
free:
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+ pci_bus_release_domain_nr(bus, parent);
+#endif
kfree(bus);
return err;
}
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 4c54c75050dc..0145aef1b930 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
pci_remove_bus(bus);
host_bridge->bus = NULL;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+ /* Release domain_nr if it was dynamically allocated */
+ if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+ pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+#endif
+
/* remove the host bridge */
device_del(&host_bridge->dev);
}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 81a57b498f22..6c7f27e62bcc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1723,6 +1723,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
{ return 0; }
#endif
int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
#endif
/* Some architectures require additional setup to direct VGA traffic */
--
2.20.1
PING?
On Thursday 14 July 2022 20:41:30 Pali Rohár wrote:
> Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
>
> Use two IDAs, one for static domain allocations (those which are defined in
> device tree) and second for dynamic allocations (all other).
>
> During removal of root bus / host bridge release also allocated domain id.
> So released id can be reused again, for example in situation when
> dynamically loading and unloading native PCI host bridge drivers.
>
> This change also allows to mix static device tree assignment and dynamic by
> kernel as all static allocations are reserved in dynamic pool.
>
> Signed-off-by: Pali Rohár <[email protected]>
> ---
> Idea of this patch comes from the following discussion:
> https://lore.kernel.org/linux-pci/[email protected]/t/#u
>
> Changes in v2:
> * Fix broken compilation
> ---
> drivers/pci/pci.c | 103 +++++++++++++++++++++++++------------------
> drivers/pci/probe.c | 5 +++
> drivers/pci/remove.c | 6 +++
> include/linux/pci.h | 1 +
> 4 files changed, 72 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index cfaf40a540a8..34fdcee6634a 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
> }
>
> #ifdef CONFIG_PCI_DOMAINS_GENERIC
> -static atomic_t __domain_nr = ATOMIC_INIT(-1);
> +static DEFINE_IDA(pci_domain_nr_static_ida);
> +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
>
> -static int pci_get_new_domain_nr(void)
> +static void of_pci_reserve_static_domain_nr(void)
> {
> - return atomic_inc_return(&__domain_nr);
> + struct device_node *np;
> + int domain_nr;
> +
> + for_each_node_by_type(np, "pci") {
> + domain_nr = of_get_pci_domain_nr(np);
> + if (domain_nr < 0)
> + continue;
> + /*
> + * Permanently allocate domain_nr in dynamic_ida
> + * to prevent it from dynamic allocation.
> + */
> + ida_alloc_range(&pci_domain_nr_dynamic_ida,
> + domain_nr, domain_nr, GFP_KERNEL);
> + }
> }
>
> static int of_pci_bus_find_domain_nr(struct device *parent)
> {
> - static int use_dt_domains = -1;
> - int domain = -1;
> + static bool static_domains_reserved = false;
> + int domain_nr;
>
> - if (parent)
> - domain = of_get_pci_domain_nr(parent->of_node);
> + /* On the first call scan device tree for static allocations. */
> + if (!static_domains_reserved) {
> + of_pci_reserve_static_domain_nr();
> + static_domains_reserved = true;
> + }
> +
> + if (parent) {
> + /*
> + * If domain is in DT then allocate it in static IDA.
> + * This prevent duplicate static allocations in case
> + * of errors in DT.
> + */
> + domain_nr = of_get_pci_domain_nr(parent->of_node);
> + if (domain_nr >= 0)
> + return ida_alloc_range(&pci_domain_nr_static_ida,
> + domain_nr, domain_nr,
> + GFP_KERNEL);
> + }
>
> /*
> - * Check DT domain and use_dt_domains values.
> - *
> - * If DT domain property is valid (domain >= 0) and
> - * use_dt_domains != 0, the DT assignment is valid since this means
> - * we have not previously allocated a domain number by using
> - * pci_get_new_domain_nr(); we should also update use_dt_domains to
> - * 1, to indicate that we have just assigned a domain number from
> - * DT.
> - *
> - * If DT domain property value is not valid (ie domain < 0), and we
> - * have not previously assigned a domain number from DT
> - * (use_dt_domains != 1) we should assign a domain number by
> - * using the:
> - *
> - * pci_get_new_domain_nr()
> - *
> - * API and update the use_dt_domains value to keep track of method we
> - * are using to assign domain numbers (use_dt_domains = 0).
> - *
> - * All other combinations imply we have a platform that is trying
> - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
> - * which is a recipe for domain mishandling and it is prevented by
> - * invalidating the domain value (domain = -1) and printing a
> - * corresponding error.
> + * If domain was not specified in DT then choose free id from dynamic
> + * allocations. All domain numbers from DT are permanently in dynamic
> + * allocations to prevent assigning them to other DT nodes without
> + * static domain.
> */
> - if (domain >= 0 && use_dt_domains) {
> - use_dt_domains = 1;
> - } else if (domain < 0 && use_dt_domains != 1) {
> - use_dt_domains = 0;
> - domain = pci_get_new_domain_nr();
> - } else {
> - if (parent)
> - pr_err("Node %pOF has ", parent->of_node);
> - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
> - domain = -1;
> - }
> + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
> +}
>
> - return domain;
> +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (bus->domain_nr < 0)
> + return;
> +
> + /* Release domain from ida in which was it allocated. */
> + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
> + ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
> + else
> + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
> }
>
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> @@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
> acpi_pci_bus_find_domain_nr(bus);
> }
> +
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (!acpi_disabled)
> + return;
> + of_pci_bus_release_domain_nr(bus, parent);
> +}
> #endif
>
> /**
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 17a969942d37..12092d238403 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
> else
> bus->domain_nr = bridge->domain_nr;
> + if (bus->domain_nr < 0)
> + goto free;
> #endif
>
> b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
> @@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> device_del(&bridge->dev);
>
> free:
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + pci_bus_release_domain_nr(bus, parent);
> +#endif
> kfree(bus);
> return err;
> }
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 4c54c75050dc..0145aef1b930 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
> pci_remove_bus(bus);
> host_bridge->bus = NULL;
>
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + /* Release domain_nr if it was dynamically allocated */
> + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> + pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
> +#endif
> +
> /* remove the host bridge */
> device_del(&host_bridge->dev);
> }
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 81a57b498f22..6c7f27e62bcc 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1723,6 +1723,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
> { return 0; }
> #endif
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
> #endif
>
> /* Some architectures require additional setup to direct VGA traffic */
> --
> 2.20.1
>
On Thu, Aug 18, 2022 at 03:50:09PM +0200, Pali Roh?r wrote:
> PING?
Pretty much anything sent during the merge window, and just before the
merge window gets thrown away. Please rebase onto the current pci tree
and repost.
Andrew
On Thu, Aug 18, 2022 at 06:37:56PM +0200, Pali Roh?r wrote:
> On Thursday 18 August 2022 16:59:33 Andrew Lunn wrote:
> > On Thu, Aug 18, 2022 at 03:50:09PM +0200, Pali Roh?r wrote:
> > > PING?
> >
> > Pretty much anything sent during the merge window, and just before the
> > merge window gets thrown away. Please rebase onto the current pci tree
> > and repost.
>
> Please write it pretty clear that you are not interested in those
> patches, and not hiding this info behind asking me after month of
> waiting for another work of rebase with sending them at eight o'clock
> during full moon. It is pretty ridiculous how to say "go away". Thanks.
Nobody is saying "go away". I apologize that I haven't had time to
look at this yet.
It's still in patchwork [1], and if it still applies cleanly to
v6.0-rc1, you don't need to do anything. If it requires rebasing to
apply cleanly, it will expedite things if you do that.
Bjorn
[1] https://patchwork.kernel.org/project/linux-pci/patch/[email protected]/
On Thursday 18 August 2022 16:59:33 Andrew Lunn wrote:
> On Thu, Aug 18, 2022 at 03:50:09PM +0200, Pali Rohár wrote:
> > PING?
>
> Pretty much anything sent during the merge window, and just before the
> merge window gets thrown away. Please rebase onto the current pci tree
> and repost.
>
> Andrew
>
Please write it pretty clear that you are not interested in those
patches, and not hiding this info behind asking me after month of
waiting for another work of rebase with sending them at eight o'clock
during full moon. It is pretty ridiculous how to say "go away". Thanks.
On Thursday 18 August 2022 11:52:20 Bjorn Helgaas wrote:
> On Thu, Aug 18, 2022 at 06:37:56PM +0200, Pali Rohár wrote:
> > On Thursday 18 August 2022 16:59:33 Andrew Lunn wrote:
> > > On Thu, Aug 18, 2022 at 03:50:09PM +0200, Pali Rohár wrote:
> > > > PING?
> > >
> > > Pretty much anything sent during the merge window, and just before the
> > > merge window gets thrown away. Please rebase onto the current pci tree
> > > and repost.
> >
> > Please write it pretty clear that you are not interested in those
> > patches, and not hiding this info behind asking me after month of
> > waiting for another work of rebase with sending them at eight o'clock
> > during full moon. It is pretty ridiculous how to say "go away". Thanks.
>
> Nobody is saying "go away". I apologize that I haven't had time to
> look at this yet.
>
> It's still in patchwork [1], and if it still applies cleanly to
> v6.0-rc1, you don't need to do anything. If it requires rebasing to
> apply cleanly, it will expedite things if you do that.
>
> Bjorn
>
> [1] https://patchwork.kernel.org/project/linux-pci/patch/[email protected]/
It applies cleanly on v6.0-rc1.
On Sunday 09 October 2022 15:36:32 Christophe JAILLET wrote:
> Le 14/07/2022 à 20:41, Pali Rohár a écrit :
> > Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
> >
> > Use two IDAs, one for static domain allocations (those which are defined in
> > device tree) and second for dynamic allocations (all other).
> >
> > During removal of root bus / host bridge release also allocated domain id.
> > So released id can be reused again, for example in situation when
> > dynamically loading and unloading native PCI host bridge drivers.
> >
> > This change also allows to mix static device tree assignment and dynamic by
> > kernel as all static allocations are reserved in dynamic pool.
> >
>
> Hi,
>
> A few comments below related to error handling.
> Take them as a naive review from s.o. with limited knowledge of PCI ;-)
>
> CJ
Hello and thank you!
> > Signed-off-by: Pali Rohár <[email protected]>
> > ---
> > Idea of this patch comes from the following discussion:
> > https://lore.kernel.org/linux-pci/[email protected]/t/#u
> >
> > Changes in v2:
> > * Fix broken compilation
> > ---
> > drivers/pci/pci.c | 103 +++++++++++++++++++++++++------------------
> > drivers/pci/probe.c | 5 +++
> > drivers/pci/remove.c | 6 +++
> > include/linux/pci.h | 1 +
> > 4 files changed, 72 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index cfaf40a540a8..34fdcee6634a 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
> > }
> > #ifdef CONFIG_PCI_DOMAINS_GENERIC
> > -static atomic_t __domain_nr = ATOMIC_INIT(-1);
> > +static DEFINE_IDA(pci_domain_nr_static_ida);
> > +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
> > -static int pci_get_new_domain_nr(void)
> > +static void of_pci_reserve_static_domain_nr(void)
> > {
> > - return atomic_inc_return(&__domain_nr);
> > + struct device_node *np;
> > + int domain_nr;
> > +
> > + for_each_node_by_type(np, "pci") {
> > + domain_nr = of_get_pci_domain_nr(np);
> > + if (domain_nr < 0)
> > + continue;
> > + /*
> > + * Permanently allocate domain_nr in dynamic_ida
> > + * to prevent it from dynamic allocation.
> > + */
> > + ida_alloc_range(&pci_domain_nr_dynamic_ida,
> > + domain_nr, domain_nr, GFP_KERNEL);
>
> IIUC, if the DT is broken, 'domain_nr' could have several times the same
> value. In such a case ida_alloc_range() would return -ENOSPC.
>
> Should this be handled here, maybe at least with a error message?
This code is gathering all domain ids which must not be used by dynamic
domain id allocator. It does it by looking for domain ids in device
tree. And in case one domain id is in device tree more times then it is
not issue for this code -- because the only purpose is to build a list
of "forbidden" ids. So duplicate domain ids here is not error and I
think it is not even warning (as id is already reserved and avoided in
dynamic allocator).
Also it is possible that in device tree is one domain id specified
multiple times, but device tree structure for specific board is written
in the way that exactly one of node with duplicate domain ids would be
registered. This can be achieved e.g. by status = "disabled" or some
other way how subnodes are bind to kernel drivers.
So I think this is not an issue and possible duplicates and error are
handled below...
> > + }
> > }
> > static int of_pci_bus_find_domain_nr(struct device *parent)
> > {
> > - static int use_dt_domains = -1;
> > - int domain = -1;
> > + static bool static_domains_reserved = false;
> > + int domain_nr;
> > - if (parent)
> > - domain = of_get_pci_domain_nr(parent->of_node);
> > + /* On the first call scan device tree for static allocations. */
> > + if (!static_domains_reserved) {
> > + of_pci_reserve_static_domain_nr();
> > + static_domains_reserved = true;
> > + }
> > +
> > + if (parent) {
> > + /*
> > + * If domain is in DT then allocate it in static IDA.
> > + * This prevent duplicate static allocations in case
> > + * of errors in DT.
> > + */
> > + domain_nr = of_get_pci_domain_nr(parent->of_node);
> > + if (domain_nr >= 0)
> > + return ida_alloc_range(&pci_domain_nr_static_ida,
> > + domain_nr, domain_nr,
> > + GFP_KERNEL);
>
> Same here. Can ida_alloc_range() fail with -ENOSPC?
> If yes, what should be done in such a case?
Yes, it can. And caller needs to handle it. If in device tree is
specified that PCI host bridge must use specific domain number (e.g.
because of static allocation in firmware, bootloader, etc...) and kernel
cannot achieve it (e.g. because of error in device tree or somewhere
else) then kernel cannot register this PCI host bridge. Hence there is
immediate return with possible -ENOSPC (or any other negative error
code) to the caller; which prevents future registration of PCI host
bridge.
> > + }
> > /*
> > - * Check DT domain and use_dt_domains values.
> > - *
> > - * If DT domain property is valid (domain >= 0) and
> > - * use_dt_domains != 0, the DT assignment is valid since this means
> > - * we have not previously allocated a domain number by using
> > - * pci_get_new_domain_nr(); we should also update use_dt_domains to
> > - * 1, to indicate that we have just assigned a domain number from
> > - * DT.
> > - *
> > - * If DT domain property value is not valid (ie domain < 0), and we
> > - * have not previously assigned a domain number from DT
> > - * (use_dt_domains != 1) we should assign a domain number by
> > - * using the:
> > - *
> > - * pci_get_new_domain_nr()
> > - *
> > - * API and update the use_dt_domains value to keep track of method we
> > - * are using to assign domain numbers (use_dt_domains = 0).
> > - *
> > - * All other combinations imply we have a platform that is trying
> > - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
> > - * which is a recipe for domain mishandling and it is prevented by
> > - * invalidating the domain value (domain = -1) and printing a
> > - * corresponding error.
> > + * If domain was not specified in DT then choose free id from dynamic
> > + * allocations. All domain numbers from DT are permanently in dynamic
> > + * allocations to prevent assigning them to other DT nodes without
> > + * static domain.
> > */
> > - if (domain >= 0 && use_dt_domains) {
> > - use_dt_domains = 1;
> > - } else if (domain < 0 && use_dt_domains != 1) {
> > - use_dt_domains = 0;
> > - domain = pci_get_new_domain_nr();
> > - } else {
> > - if (parent)
> > - pr_err("Node %pOF has ", parent->of_node);
> > - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
> > - domain = -1;
> > - }
> > + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
>
> Same here, even if un-likelly.
Then again, caller of this function has to check return value and reject
registration of PCI host bridge if kernel cannot allocate PCI domain id
for it (for whatever reason). See below...
>
> > +}
> > - return domain;
> > +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> > +{
> > + if (bus->domain_nr < 0)
> > + return;
> > +
> > + /* Release domain from ida in which was it allocated. */
> > + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
> > + ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
> > + else
> > + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
> > }
> > int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> > @@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> > return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
> > acpi_pci_bus_find_domain_nr(bus);
> > }
> > +
> > +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> > +{
> > + if (!acpi_disabled)
> > + return;
> > + of_pci_bus_release_domain_nr(bus, parent);
> > +}
> > #endif
> > /**
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index 17a969942d37..12092d238403 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> > bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
> > else
> > bus->domain_nr = bridge->domain_nr;
> > + if (bus->domain_nr < 0)
> > + goto free;
And here is the place where _caller_ do this check. If allocation fails
then PCI core code stops registration.
> > #endif
> > b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
> > @@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> > device_del(&bridge->dev);
> > free:
> > +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> > + pci_bus_release_domain_nr(bus, parent);
> > +#endif
> > kfree(bus);
> > return err;
> > }
> > diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> > index 4c54c75050dc..0145aef1b930 100644
> > --- a/drivers/pci/remove.c
> > +++ b/drivers/pci/remove.c
> > @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
> > pci_remove_bus(bus);
> > host_bridge->bus = NULL;
> > +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> > + /* Release domain_nr if it was dynamically allocated */
> > + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> > + pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
> > +#endif
> > +
> > /* remove the host bridge */
> > device_del(&host_bridge->dev);
> > }
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 81a57b498f22..6c7f27e62bcc 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -1723,6 +1723,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
> > { return 0; }
> > #endif
> > int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
> > +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
> > #endif
> > /* Some architectures require additional setup to direct VGA traffic */
>
Le 09/10/2022 à 13:29, Pali Rohár a écrit :
> On Thursday 18 August 2022 11:52:20 Bjorn Helgaas wrote:
>> On Thu, Aug 18, 2022 at 06:37:56PM +0200, Pali Rohár wrote:
>>> On Thursday 18 August 2022 16:59:33 Andrew Lunn wrote:
>>>> On Thu, Aug 18, 2022 at 03:50:09PM +0200, Pali Rohár wrote:
>>>>> PING?
>>>>
>>>> Pretty much anything sent during the merge window, and just before the
>>>> merge window gets thrown away. Please rebase onto the current pci tree
>>>> and repost.
>>>
>>> Please write it pretty clear that you are not interested in those
>>> patches, and not hiding this info behind asking me after month of
>>> waiting for another work of rebase with sending them at eight o'clock
>>> during full moon. It is pretty ridiculous how to say "go away". Thanks.
>>
>> Nobody is saying "go away". I apologize that I haven't had time to
>> look at this yet.
>>
>> It's still in patchwork [1], and if it still applies cleanly to
>> v6.0-rc1, you don't need to do anything. If it requires rebasing to
>> apply cleanly, it will expedite things if you do that.
>>
>> Bjorn
>>
>> [1] https://patchwork.kernel.org/project/linux-pci/patch/[email protected]/
>
> It applies cleanly on v6.0-rc1.
>
On linux-next, the diff applies, but with offsets on pci.c and pci.h.
CJ
Le 14/07/2022 à 20:41, Pali Rohár a écrit :
> Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
>
> Use two IDAs, one for static domain allocations (those which are defined in
> device tree) and second for dynamic allocations (all other).
>
> During removal of root bus / host bridge release also allocated domain id.
> So released id can be reused again, for example in situation when
> dynamically loading and unloading native PCI host bridge drivers.
>
> This change also allows to mix static device tree assignment and dynamic by
> kernel as all static allocations are reserved in dynamic pool.
>
Hi,
A few comments below related to error handling.
Take them as a naive review from s.o. with limited knowledge of PCI ;-)
CJ
> Signed-off-by: Pali Rohár <[email protected]>
> ---
> Idea of this patch comes from the following discussion:
> https://lore.kernel.org/linux-pci/[email protected]/t/#u
>
> Changes in v2:
> * Fix broken compilation
> ---
> drivers/pci/pci.c | 103 +++++++++++++++++++++++++------------------
> drivers/pci/probe.c | 5 +++
> drivers/pci/remove.c | 6 +++
> include/linux/pci.h | 1 +
> 4 files changed, 72 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index cfaf40a540a8..34fdcee6634a 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
> }
>
> #ifdef CONFIG_PCI_DOMAINS_GENERIC
> -static atomic_t __domain_nr = ATOMIC_INIT(-1);
> +static DEFINE_IDA(pci_domain_nr_static_ida);
> +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
>
> -static int pci_get_new_domain_nr(void)
> +static void of_pci_reserve_static_domain_nr(void)
> {
> - return atomic_inc_return(&__domain_nr);
> + struct device_node *np;
> + int domain_nr;
> +
> + for_each_node_by_type(np, "pci") {
> + domain_nr = of_get_pci_domain_nr(np);
> + if (domain_nr < 0)
> + continue;
> + /*
> + * Permanently allocate domain_nr in dynamic_ida
> + * to prevent it from dynamic allocation.
> + */
> + ida_alloc_range(&pci_domain_nr_dynamic_ida,
> + domain_nr, domain_nr, GFP_KERNEL);
IIUC, if the DT is broken, 'domain_nr' could have several times the same
value. In such a case ida_alloc_range() would return -ENOSPC.
Should this be handled here, maybe at least with a error message?
> + }
> }
>
> static int of_pci_bus_find_domain_nr(struct device *parent)
> {
> - static int use_dt_domains = -1;
> - int domain = -1;
> + static bool static_domains_reserved = false;
> + int domain_nr;
>
> - if (parent)
> - domain = of_get_pci_domain_nr(parent->of_node);
> + /* On the first call scan device tree for static allocations. */
> + if (!static_domains_reserved) {
> + of_pci_reserve_static_domain_nr();
> + static_domains_reserved = true;
> + }
> +
> + if (parent) {
> + /*
> + * If domain is in DT then allocate it in static IDA.
> + * This prevent duplicate static allocations in case
> + * of errors in DT.
> + */
> + domain_nr = of_get_pci_domain_nr(parent->of_node);
> + if (domain_nr >= 0)
> + return ida_alloc_range(&pci_domain_nr_static_ida,
> + domain_nr, domain_nr,
> + GFP_KERNEL);
Same here. Can ida_alloc_range() fail with -ENOSPC?
If yes, what should be done in such a case?
> + }
>
> /*
> - * Check DT domain and use_dt_domains values.
> - *
> - * If DT domain property is valid (domain >= 0) and
> - * use_dt_domains != 0, the DT assignment is valid since this means
> - * we have not previously allocated a domain number by using
> - * pci_get_new_domain_nr(); we should also update use_dt_domains to
> - * 1, to indicate that we have just assigned a domain number from
> - * DT.
> - *
> - * If DT domain property value is not valid (ie domain < 0), and we
> - * have not previously assigned a domain number from DT
> - * (use_dt_domains != 1) we should assign a domain number by
> - * using the:
> - *
> - * pci_get_new_domain_nr()
> - *
> - * API and update the use_dt_domains value to keep track of method we
> - * are using to assign domain numbers (use_dt_domains = 0).
> - *
> - * All other combinations imply we have a platform that is trying
> - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
> - * which is a recipe for domain mishandling and it is prevented by
> - * invalidating the domain value (domain = -1) and printing a
> - * corresponding error.
> + * If domain was not specified in DT then choose free id from dynamic
> + * allocations. All domain numbers from DT are permanently in dynamic
> + * allocations to prevent assigning them to other DT nodes without
> + * static domain.
> */
> - if (domain >= 0 && use_dt_domains) {
> - use_dt_domains = 1;
> - } else if (domain < 0 && use_dt_domains != 1) {
> - use_dt_domains = 0;
> - domain = pci_get_new_domain_nr();
> - } else {
> - if (parent)
> - pr_err("Node %pOF has ", parent->of_node);
> - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
> - domain = -1;
> - }
> + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
Same here, even if un-likelly.
> +}
>
> - return domain;
> +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (bus->domain_nr < 0)
> + return;
> +
> + /* Release domain from ida in which was it allocated. */
> + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
> + ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
> + else
> + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
> }
>
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> @@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
> acpi_pci_bus_find_domain_nr(bus);
> }
> +
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (!acpi_disabled)
> + return;
> + of_pci_bus_release_domain_nr(bus, parent);
> +}
> #endif
>
> /**
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 17a969942d37..12092d238403 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
> else
> bus->domain_nr = bridge->domain_nr;
> + if (bus->domain_nr < 0)
> + goto free;
> #endif
>
> b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
> @@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> device_del(&bridge->dev);
>
> free:
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + pci_bus_release_domain_nr(bus, parent);
> +#endif
> kfree(bus);
> return err;
> }
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 4c54c75050dc..0145aef1b930 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
> pci_remove_bus(bus);
> host_bridge->bus = NULL;
>
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + /* Release domain_nr if it was dynamically allocated */
> + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> + pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
> +#endif
> +
> /* remove the host bridge */
> device_del(&host_bridge->dev);
> }
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 81a57b498f22..6c7f27e62bcc 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1723,6 +1723,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
> { return 0; }
> #endif
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
> #endif
>
> /* Some architectures require additional setup to direct VGA traffic */
On Sunday 09 October 2022 15:39:11 Christophe JAILLET wrote:
> Le 09/10/2022 à 13:29, Pali Rohár a écrit :
> > On Thursday 18 August 2022 11:52:20 Bjorn Helgaas wrote:
> > > On Thu, Aug 18, 2022 at 06:37:56PM +0200, Pali Rohár wrote:
> > > > On Thursday 18 August 2022 16:59:33 Andrew Lunn wrote:
> > > > > On Thu, Aug 18, 2022 at 03:50:09PM +0200, Pali Rohár wrote:
> > > > > > PING?
> > > > >
> > > > > Pretty much anything sent during the merge window, and just before the
> > > > > merge window gets thrown away. Please rebase onto the current pci tree
> > > > > and repost.
> > > >
> > > > Please write it pretty clear that you are not interested in those
> > > > patches, and not hiding this info behind asking me after month of
> > > > waiting for another work of rebase with sending them at eight o'clock
> > > > during full moon. It is pretty ridiculous how to say "go away". Thanks.
> > >
> > > Nobody is saying "go away". I apologize that I haven't had time to
> > > look at this yet.
> > >
> > > It's still in patchwork [1], and if it still applies cleanly to
> > > v6.0-rc1, you don't need to do anything. If it requires rebasing to
> > > apply cleanly, it will expedite things if you do that.
> > >
> > > Bjorn
> > >
> > > [1] https://patchwork.kernel.org/project/linux-pci/patch/[email protected]/
> >
> > It applies cleanly on v6.0-rc1.
> >
>
> On linux-next, the diff applies, but with offsets on pci.c and pci.h.
>
> CJ
This should not be an issue. Bjorn?
[+cc Tomasz, Liviu since they worked on this code long ago]
On Thu, Jul 14, 2022 at 08:41:30PM +0200, Pali Roh?r wrote:
> Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
>
> Use two IDAs, one for static domain allocations (those which are defined in
> device tree) and second for dynamic allocations (all other).
>
> During removal of root bus / host bridge release also allocated domain id.
> So released id can be reused again, for example in situation when
> dynamically loading and unloading native PCI host bridge drivers.
>
> This change also allows to mix static device tree assignment and dynamic by
> kernel as all static allocations are reserved in dynamic pool.
>
> Signed-off-by: Pali Roh?r <[email protected]>
Applied to pci/enumeration for v6.2, thanks!
Using the IDAs makes this look so much nicer than what we had before.
> ---
> Idea of this patch comes from the following discussion:
> https://lore.kernel.org/linux-pci/[email protected]/t/#u
>
> Changes in v2:
> * Fix broken compilation
> ---
> drivers/pci/pci.c | 103 +++++++++++++++++++++++++------------------
> drivers/pci/probe.c | 5 +++
> drivers/pci/remove.c | 6 +++
> include/linux/pci.h | 1 +
> 4 files changed, 72 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index cfaf40a540a8..34fdcee6634a 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
> }
>
> #ifdef CONFIG_PCI_DOMAINS_GENERIC
> -static atomic_t __domain_nr = ATOMIC_INIT(-1);
> +static DEFINE_IDA(pci_domain_nr_static_ida);
> +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
>
> -static int pci_get_new_domain_nr(void)
> +static void of_pci_reserve_static_domain_nr(void)
> {
> - return atomic_inc_return(&__domain_nr);
> + struct device_node *np;
> + int domain_nr;
> +
> + for_each_node_by_type(np, "pci") {
> + domain_nr = of_get_pci_domain_nr(np);
> + if (domain_nr < 0)
> + continue;
> + /*
> + * Permanently allocate domain_nr in dynamic_ida
> + * to prevent it from dynamic allocation.
> + */
> + ida_alloc_range(&pci_domain_nr_dynamic_ida,
> + domain_nr, domain_nr, GFP_KERNEL);
> + }
> }
>
> static int of_pci_bus_find_domain_nr(struct device *parent)
> {
> - static int use_dt_domains = -1;
> - int domain = -1;
> + static bool static_domains_reserved = false;
> + int domain_nr;
>
> - if (parent)
> - domain = of_get_pci_domain_nr(parent->of_node);
> + /* On the first call scan device tree for static allocations. */
> + if (!static_domains_reserved) {
> + of_pci_reserve_static_domain_nr();
> + static_domains_reserved = true;
> + }
> +
> + if (parent) {
> + /*
> + * If domain is in DT then allocate it in static IDA.
> + * This prevent duplicate static allocations in case
> + * of errors in DT.
> + */
> + domain_nr = of_get_pci_domain_nr(parent->of_node);
> + if (domain_nr >= 0)
> + return ida_alloc_range(&pci_domain_nr_static_ida,
> + domain_nr, domain_nr,
> + GFP_KERNEL);
> + }
>
> /*
> - * Check DT domain and use_dt_domains values.
> - *
> - * If DT domain property is valid (domain >= 0) and
> - * use_dt_domains != 0, the DT assignment is valid since this means
> - * we have not previously allocated a domain number by using
> - * pci_get_new_domain_nr(); we should also update use_dt_domains to
> - * 1, to indicate that we have just assigned a domain number from
> - * DT.
> - *
> - * If DT domain property value is not valid (ie domain < 0), and we
> - * have not previously assigned a domain number from DT
> - * (use_dt_domains != 1) we should assign a domain number by
> - * using the:
> - *
> - * pci_get_new_domain_nr()
> - *
> - * API and update the use_dt_domains value to keep track of method we
> - * are using to assign domain numbers (use_dt_domains = 0).
> - *
> - * All other combinations imply we have a platform that is trying
> - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
> - * which is a recipe for domain mishandling and it is prevented by
> - * invalidating the domain value (domain = -1) and printing a
> - * corresponding error.
> + * If domain was not specified in DT then choose free id from dynamic
> + * allocations. All domain numbers from DT are permanently in dynamic
> + * allocations to prevent assigning them to other DT nodes without
> + * static domain.
> */
> - if (domain >= 0 && use_dt_domains) {
> - use_dt_domains = 1;
> - } else if (domain < 0 && use_dt_domains != 1) {
> - use_dt_domains = 0;
> - domain = pci_get_new_domain_nr();
> - } else {
> - if (parent)
> - pr_err("Node %pOF has ", parent->of_node);
> - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
> - domain = -1;
> - }
> + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
> +}
>
> - return domain;
> +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (bus->domain_nr < 0)
> + return;
> +
> + /* Release domain from ida in which was it allocated. */
> + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
> + ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
> + else
> + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
> }
>
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> @@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
> acpi_pci_bus_find_domain_nr(bus);
> }
> +
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (!acpi_disabled)
> + return;
> + of_pci_bus_release_domain_nr(bus, parent);
> +}
> #endif
>
> /**
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 17a969942d37..12092d238403 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
> else
> bus->domain_nr = bridge->domain_nr;
> + if (bus->domain_nr < 0)
> + goto free;
> #endif
>
> b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
> @@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> device_del(&bridge->dev);
>
> free:
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + pci_bus_release_domain_nr(bus, parent);
> +#endif
> kfree(bus);
> return err;
> }
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 4c54c75050dc..0145aef1b930 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
> pci_remove_bus(bus);
> host_bridge->bus = NULL;
>
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + /* Release domain_nr if it was dynamically allocated */
> + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> + pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
> +#endif
> +
> /* remove the host bridge */
> device_del(&host_bridge->dev);
> }
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 81a57b498f22..6c7f27e62bcc 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1723,6 +1723,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
> { return 0; }
> #endif
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
> #endif
>
> /* Some architectures require additional setup to direct VGA traffic */
> --
> 2.20.1
>
Hi Pali,
On 14/07/2022 19:41, Pali Rohár wrote:
> Replace assignment of PCI domain from atomic_inc_return() to ida_alloc().
>
> Use two IDAs, one for static domain allocations (those which are defined in
> device tree) and second for dynamic allocations (all other).
>
> During removal of root bus / host bridge release also allocated domain id.
> So released id can be reused again, for example in situation when
> dynamically loading and unloading native PCI host bridge drivers.
>
> This change also allows to mix static device tree assignment and dynamic by
> kernel as all static allocations are reserved in dynamic pool.
>
> Signed-off-by: Pali Rohár <[email protected]>
> ---
> Idea of this patch comes from the following discussion:
> https://lore.kernel.org/linux-pci/[email protected]/t/#u
>
> Changes in v2:
> * Fix broken compilation
> ---
> drivers/pci/pci.c | 103 +++++++++++++++++++++++++------------------
> drivers/pci/probe.c | 5 +++
> drivers/pci/remove.c | 6 +++
> include/linux/pci.h | 1 +
> 4 files changed, 72 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index cfaf40a540a8..34fdcee6634a 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -6762,60 +6762,70 @@ static void pci_no_domains(void)
> }
>
> #ifdef CONFIG_PCI_DOMAINS_GENERIC
> -static atomic_t __domain_nr = ATOMIC_INIT(-1);
> +static DEFINE_IDA(pci_domain_nr_static_ida);
> +static DEFINE_IDA(pci_domain_nr_dynamic_ida);
>
> -static int pci_get_new_domain_nr(void)
> +static void of_pci_reserve_static_domain_nr(void)
> {
> - return atomic_inc_return(&__domain_nr);
> + struct device_node *np;
> + int domain_nr;
> +
> + for_each_node_by_type(np, "pci") {
> + domain_nr = of_get_pci_domain_nr(np);
> + if (domain_nr < 0)
> + continue;
> + /*
> + * Permanently allocate domain_nr in dynamic_ida
> + * to prevent it from dynamic allocation.
> + */
> + ida_alloc_range(&pci_domain_nr_dynamic_ida,
> + domain_nr, domain_nr, GFP_KERNEL);
> + }
> }
>
> static int of_pci_bus_find_domain_nr(struct device *parent)
> {
> - static int use_dt_domains = -1;
> - int domain = -1;
> + static bool static_domains_reserved = false;
> + int domain_nr;
>
> - if (parent)
> - domain = of_get_pci_domain_nr(parent->of_node);
> + /* On the first call scan device tree for static allocations. */
> + if (!static_domains_reserved) {
> + of_pci_reserve_static_domain_nr();
> + static_domains_reserved = true;
> + }
> +
> + if (parent) {
> + /*
> + * If domain is in DT then allocate it in static IDA.
> + * This prevent duplicate static allocations in case
> + * of errors in DT.
> + */
> + domain_nr = of_get_pci_domain_nr(parent->of_node);
> + if (domain_nr >= 0)
> + return ida_alloc_range(&pci_domain_nr_static_ida,
> + domain_nr, domain_nr,
> + GFP_KERNEL);
> + }
>
> /*
> - * Check DT domain and use_dt_domains values.
> - *
> - * If DT domain property is valid (domain >= 0) and
> - * use_dt_domains != 0, the DT assignment is valid since this means
> - * we have not previously allocated a domain number by using
> - * pci_get_new_domain_nr(); we should also update use_dt_domains to
> - * 1, to indicate that we have just assigned a domain number from
> - * DT.
> - *
> - * If DT domain property value is not valid (ie domain < 0), and we
> - * have not previously assigned a domain number from DT
> - * (use_dt_domains != 1) we should assign a domain number by
> - * using the:
> - *
> - * pci_get_new_domain_nr()
> - *
> - * API and update the use_dt_domains value to keep track of method we
> - * are using to assign domain numbers (use_dt_domains = 0).
> - *
> - * All other combinations imply we have a platform that is trying
> - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
> - * which is a recipe for domain mishandling and it is prevented by
> - * invalidating the domain value (domain = -1) and printing a
> - * corresponding error.
> + * If domain was not specified in DT then choose free id from dynamic
> + * allocations. All domain numbers from DT are permanently in dynamic
> + * allocations to prevent assigning them to other DT nodes without
> + * static domain.
> */
> - if (domain >= 0 && use_dt_domains) {
> - use_dt_domains = 1;
> - } else if (domain < 0 && use_dt_domains != 1) {
> - use_dt_domains = 0;
> - domain = pci_get_new_domain_nr();
> - } else {
> - if (parent)
> - pr_err("Node %pOF has ", parent->of_node);
> - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
> - domain = -1;
> - }
> + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
> +}
>
> - return domain;
> +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (bus->domain_nr < 0)
> + return;
> +
> + /* Release domain from ida in which was it allocated. */
> + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
> + ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
> + else
> + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
> }
>
> int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> @@ -6823,6 +6833,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
> return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
> acpi_pci_bus_find_domain_nr(bus);
> }
> +
> +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
> +{
> + if (!acpi_disabled)
> + return;
> + of_pci_bus_release_domain_nr(bus, parent);
> +}
> #endif
>
> /**
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 17a969942d37..12092d238403 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -906,6 +906,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
> else
> bus->domain_nr = bridge->domain_nr;
> + if (bus->domain_nr < 0)
> + goto free;
> #endif
>
> b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
> @@ -1030,6 +1032,9 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
> device_del(&bridge->dev);
>
> free:
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + pci_bus_release_domain_nr(bus, parent);
> +#endif
> kfree(bus);
> return err;
> }
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 4c54c75050dc..0145aef1b930 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
> pci_remove_bus(bus);
> host_bridge->bus = NULL;
>
> +#ifdef CONFIG_PCI_DOMAINS_GENERIC
> + /* Release domain_nr if it was dynamically allocated */
> + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
> + pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
> +#endif
> +
> /* remove the host bridge */
> device_del(&host_bridge->dev);
> }
After this change was made we are seeing the following bug
report on a Tegra234 Jetson Orin board ...
[ 17.172346] tegra194-pcie 141a0000.pcie: host bridge /pcie@141a0000 ranges:
[ 17.172470] tegra194-pcie 141a0000.pcie: MEM 0x2800000000..0x2b27ffffff -> 0x2800000000
[ 17.172519] tegra194-pcie 141a0000.pcie: MEM 0x2b28000000..0x2b2fffffff -> 0x0040000000
[ 17.172548] tegra194-pcie 141a0000.pcie: IO 0x003a100000..0x003a1fffff -> 0x003a100000
[ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8 ob, 2 ib, align 64K, limit 32G
[ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
[ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
[ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to bus 0005:00
[ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
[ 19.279622] pci_bus 0005:00: root bus resource [mem 0x2800000000-0x2b27ffffff pref]
[ 19.279631] pci_bus 0005:00: root bus resource [mem 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
[ 19.279639] pci_bus 0005:00: root bus resource [io 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
[ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
[ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
[ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
[ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
[ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
[ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
[ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
[ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
[ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
[ 19.285870] ==================================================================
[ 19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70
[ 19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115):
[ 19.309677] pci_bus_release_domain_nr+0x10/0x70
[ 19.309691] dw_pcie_host_deinit+0x28/0x78
[ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
[ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
[ 19.309752] platform_probe+0x90/0xd8
[ 19.309764] really_probe+0xb8/0x298
[ 19.309777] __driver_probe_device+0x78/0xd8
[ 19.309788] driver_probe_device+0x38/0x120
[ 19.309799] __device_attach_driver+0x94/0xe0
[ 19.309812] bus_for_each_drv+0x70/0xc8
[ 19.309822] __device_attach+0xfc/0x188
[ 19.309833] device_initial_probe+0x10/0x18
[ 19.309844] bus_probe_device+0x94/0xa0
[ 19.309854] deferred_probe_work_func+0x80/0xb8
[ 19.309864] process_one_work+0x1e0/0x348
[ 19.309882] worker_thread+0x48/0x410
[ 19.309891] kthread+0xf4/0x110
[ 19.309904] ret_from_fork+0x10/0x20
[ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k
[ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
[ 19.311562] __kmem_cache_alloc_node+0x260/0x278
[ 19.311571] kmalloc_trace+0x24/0x30
[ 19.311580] pci_alloc_bus+0x24/0xa0
[ 19.311590] pci_register_host_bridge+0x48/0x4b8
[ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
[ 19.311613] pci_host_probe+0x18/0xc0
[ 19.311623] dw_pcie_host_init+0x2c0/0x568
[ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
[ 19.311647] platform_probe+0x90/0xd8
[ 19.311653] really_probe+0xb8/0x298
[ 19.311663] __driver_probe_device+0x78/0xd8
[ 19.311672] driver_probe_device+0x38/0x120
[ 19.311682] __device_attach_driver+0x94/0xe0
[ 19.311694] bus_for_each_drv+0x70/0xc8
[ 19.311702] __device_attach+0xfc/0x188
[ 19.311713] device_initial_probe+0x10/0x18
[ 19.311724] bus_probe_device+0x94/0xa0
[ 19.311733] deferred_probe_work_func+0x80/0xb8
[ 19.311743] process_one_work+0x1e0/0x348
[ 19.311753] worker_thread+0x48/0x410
[ 19.311763] kthread+0xf4/0x110
[ 19.311771] ret_from_fork+0x10/0x20
[ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
[ 19.311799] release_pcibus_dev+0x30/0x40
[ 19.311808] device_release+0x30/0x90
[ 19.311814] kobject_put+0xa8/0x120
[ 19.311832] device_unregister+0x20/0x30
[ 19.311839] pci_remove_bus+0x78/0x88
[ 19.311850] pci_remove_root_bus+0x5c/0x98
[ 19.311860] dw_pcie_host_deinit+0x28/0x78
[ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
[ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
[ 19.311900] platform_probe+0x90/0xd8
[ 19.311906] really_probe+0xb8/0x298
[ 19.311916] __driver_probe_device+0x78/0xd8
[ 19.311926] driver_probe_device+0x38/0x120
[ 19.311936] __device_attach_driver+0x94/0xe0
[ 19.311947] bus_for_each_drv+0x70/0xc8
[ 19.311956] __device_attach+0xfc/0x188
[ 19.311966] device_initial_probe+0x10/0x18
[ 19.311976] bus_probe_device+0x94/0xa0
[ 19.311985] deferred_probe_work_func+0x80/0xb8
[ 19.311995] process_one_work+0x1e0/0x348
[ 19.312005] worker_thread+0x48/0x410
[ 19.312014] kthread+0xf4/0x110
[ 19.312022] ret_from_fork+0x10/0x20
[ 19.313579] CPU: 10 PID: 96 Comm: kworker/u24:2 Not tainted 6.2.0 #4
[ 19.320171] Hardware name: /, BIOS 1.0-d7fb19b 08/10/2022
[ 19.325852] Workqueue: events_unbound deferred_probe_work_func
[ 19.331919] ==================================================================
After reverting this change I no longer see this issue.
Let me know if you have any thoughts.
Thanks
Jon
--
nvpublic
Hello!
On Monday 20 March 2023 20:26:05 Jon Hunter wrote:
> Hi Pali,
>
> After this change was made we are seeing the following bug
> report on a Tegra234 Jetson Orin board ...
>
> [ 17.172346] tegra194-pcie 141a0000.pcie: host bridge /pcie@141a0000 ranges:
> [ 17.172470] tegra194-pcie 141a0000.pcie: MEM 0x2800000000..0x2b27ffffff -> 0x2800000000
> [ 17.172519] tegra194-pcie 141a0000.pcie: MEM 0x2b28000000..0x2b2fffffff -> 0x0040000000
> [ 17.172548] tegra194-pcie 141a0000.pcie: IO 0x003a100000..0x003a1fffff -> 0x003a100000
> [ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8 ob, 2 ib, align 64K, limit 32G
> [ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
> [ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
> [ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to bus 0005:00
> [ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
> [ 19.279622] pci_bus 0005:00: root bus resource [mem 0x2800000000-0x2b27ffffff pref]
> [ 19.279631] pci_bus 0005:00: root bus resource [mem 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
> [ 19.279639] pci_bus 0005:00: root bus resource [io 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
> [ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
> [ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
> [ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
> [ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
> [ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
> [ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
> [ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
> [ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
> [ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
> [ 19.285870] ==================================================================
> [ 19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70
>
> [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115):
> [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
> [ 19.309691] dw_pcie_host_deinit+0x28/0x78
> [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
> [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
> [ 19.309752] platform_probe+0x90/0xd8
> [ 19.309764] really_probe+0xb8/0x298
> [ 19.309777] __driver_probe_device+0x78/0xd8
> [ 19.309788] driver_probe_device+0x38/0x120
> [ 19.309799] __device_attach_driver+0x94/0xe0
> [ 19.309812] bus_for_each_drv+0x70/0xc8
> [ 19.309822] __device_attach+0xfc/0x188
> [ 19.309833] device_initial_probe+0x10/0x18
> [ 19.309844] bus_probe_device+0x94/0xa0
> [ 19.309854] deferred_probe_work_func+0x80/0xb8
> [ 19.309864] process_one_work+0x1e0/0x348
> [ 19.309882] worker_thread+0x48/0x410
> [ 19.309891] kthread+0xf4/0x110
> [ 19.309904] ret_from_fork+0x10/0x20
>
> [ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k
>
> [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
> [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
> [ 19.311571] kmalloc_trace+0x24/0x30
> [ 19.311580] pci_alloc_bus+0x24/0xa0
> [ 19.311590] pci_register_host_bridge+0x48/0x4b8
> [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
> [ 19.311613] pci_host_probe+0x18/0xc0
> [ 19.311623] dw_pcie_host_init+0x2c0/0x568
> [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
> [ 19.311647] platform_probe+0x90/0xd8
> [ 19.311653] really_probe+0xb8/0x298
> [ 19.311663] __driver_probe_device+0x78/0xd8
> [ 19.311672] driver_probe_device+0x38/0x120
> [ 19.311682] __device_attach_driver+0x94/0xe0
> [ 19.311694] bus_for_each_drv+0x70/0xc8
> [ 19.311702] __device_attach+0xfc/0x188
> [ 19.311713] device_initial_probe+0x10/0x18
> [ 19.311724] bus_probe_device+0x94/0xa0
> [ 19.311733] deferred_probe_work_func+0x80/0xb8
> [ 19.311743] process_one_work+0x1e0/0x348
> [ 19.311753] worker_thread+0x48/0x410
> [ 19.311763] kthread+0xf4/0x110
> [ 19.311771] ret_from_fork+0x10/0x20
>
> [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
> [ 19.311799] release_pcibus_dev+0x30/0x40
> [ 19.311808] device_release+0x30/0x90
> [ 19.311814] kobject_put+0xa8/0x120
> [ 19.311832] device_unregister+0x20/0x30
> [ 19.311839] pci_remove_bus+0x78/0x88
> [ 19.311850] pci_remove_root_bus+0x5c/0x98
> [ 19.311860] dw_pcie_host_deinit+0x28/0x78
> [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
> [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
> [ 19.311900] platform_probe+0x90/0xd8
> [ 19.311906] really_probe+0xb8/0x298
> [ 19.311916] __driver_probe_device+0x78/0xd8
> [ 19.311926] driver_probe_device+0x38/0x120
> [ 19.311936] __device_attach_driver+0x94/0xe0
> [ 19.311947] bus_for_each_drv+0x70/0xc8
> [ 19.311956] __device_attach+0xfc/0x188
> [ 19.311966] device_initial_probe+0x10/0x18
> [ 19.311976] bus_probe_device+0x94/0xa0
> [ 19.311985] deferred_probe_work_func+0x80/0xb8
> [ 19.311995] process_one_work+0x1e0/0x348
> [ 19.312005] worker_thread+0x48/0x410
> [ 19.312014] kthread+0xf4/0x110
> [ 19.312022] ret_from_fork+0x10/0x20
Based on the above trace it looks like a double free of "pp->bridge"
structure.
Also, which kernel version are you using? Because from above third
trace it looks like that dw_pcie_host_deinit() calls first
pci_remove_root_bus() function and then (from first strace) it also
calls pci_bus_release_domain_nr() function.
Source code of the dw_pcie_host_deinit() is there:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/controller/dwc/pcie-designware-host.c
But I do not see where dw_pcie_host_deinit() calls
pci_bus_release_domain_nr() function as visible in the first trace.
> [ 19.313579] CPU: 10 PID: 96 Comm: kworker/u24:2 Not tainted 6.2.0 #4
> [ 19.320171] Hardware name: /, BIOS 1.0-d7fb19b 08/10/2022
> [ 19.325852] Workqueue: events_unbound deferred_probe_work_func
> [ 19.331919] ==================================================================
>
> After reverting this change I no longer see this issue.
> Let me know if you have any thoughts.
>
> Thanks
> Jon
> --
> nvpublic
On 20/03/2023 20:59, Pali Rohár wrote:
> Hello!
>
> On Monday 20 March 2023 20:26:05 Jon Hunter wrote:
>> Hi Pali,
>>
>> After this change was made we are seeing the following bug
>> report on a Tegra234 Jetson Orin board ...
>>
>> [ 17.172346] tegra194-pcie 141a0000.pcie: host bridge /pcie@141a0000 ranges:
>> [ 17.172470] tegra194-pcie 141a0000.pcie: MEM 0x2800000000..0x2b27ffffff -> 0x2800000000
>> [ 17.172519] tegra194-pcie 141a0000.pcie: MEM 0x2b28000000..0x2b2fffffff -> 0x0040000000
>> [ 17.172548] tegra194-pcie 141a0000.pcie: IO 0x003a100000..0x003a1fffff -> 0x003a100000
>> [ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8 ob, 2 ib, align 64K, limit 32G
>> [ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
>> [ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
>> [ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to bus 0005:00
>> [ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
>> [ 19.279622] pci_bus 0005:00: root bus resource [mem 0x2800000000-0x2b27ffffff pref]
>> [ 19.279631] pci_bus 0005:00: root bus resource [mem 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
>> [ 19.279639] pci_bus 0005:00: root bus resource [io 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
>> [ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
>> [ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
>> [ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
>> [ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
>> [ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
>> [ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
>> [ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
>> [ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
>> [ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
>> [ 19.285870] ==================================================================
>> [ 19.293351] BUG: KFENCE: use-after-free read in pci_bus_release_domain_nr+0x10/0x70
>>
>> [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in kfence-#115):
>> [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
>> [ 19.309691] dw_pcie_host_deinit+0x28/0x78
>> [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
>> [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
>> [ 19.309752] platform_probe+0x90/0xd8
>> [ 19.309764] really_probe+0xb8/0x298
>> [ 19.309777] __driver_probe_device+0x78/0xd8
>> [ 19.309788] driver_probe_device+0x38/0x120
>> [ 19.309799] __device_attach_driver+0x94/0xe0
>> [ 19.309812] bus_for_each_drv+0x70/0xc8
>> [ 19.309822] __device_attach+0xfc/0x188
>> [ 19.309833] device_initial_probe+0x10/0x18
>> [ 19.309844] bus_probe_device+0x94/0xa0
>> [ 19.309854] deferred_probe_work_func+0x80/0xb8
>> [ 19.309864] process_one_work+0x1e0/0x348
>> [ 19.309882] worker_thread+0x48/0x410
>> [ 19.309891] kthread+0xf4/0x110
>> [ 19.309904] ret_from_fork+0x10/0x20
>>
>> [ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8, size=1072, cache=kmalloc-2k
>>
>> [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
>> [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
>> [ 19.311571] kmalloc_trace+0x24/0x30
>> [ 19.311580] pci_alloc_bus+0x24/0xa0
>> [ 19.311590] pci_register_host_bridge+0x48/0x4b8
>> [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
>> [ 19.311613] pci_host_probe+0x18/0xc0
>> [ 19.311623] dw_pcie_host_init+0x2c0/0x568
>> [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
>> [ 19.311647] platform_probe+0x90/0xd8
>> [ 19.311653] really_probe+0xb8/0x298
>> [ 19.311663] __driver_probe_device+0x78/0xd8
>> [ 19.311672] driver_probe_device+0x38/0x120
>> [ 19.311682] __device_attach_driver+0x94/0xe0
>> [ 19.311694] bus_for_each_drv+0x70/0xc8
>> [ 19.311702] __device_attach+0xfc/0x188
>> [ 19.311713] device_initial_probe+0x10/0x18
>> [ 19.311724] bus_probe_device+0x94/0xa0
>> [ 19.311733] deferred_probe_work_func+0x80/0xb8
>> [ 19.311743] process_one_work+0x1e0/0x348
>> [ 19.311753] worker_thread+0x48/0x410
>> [ 19.311763] kthread+0xf4/0x110
>> [ 19.311771] ret_from_fork+0x10/0x20
>>
>> [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
>> [ 19.311799] release_pcibus_dev+0x30/0x40
>> [ 19.311808] device_release+0x30/0x90
>> [ 19.311814] kobject_put+0xa8/0x120
>> [ 19.311832] device_unregister+0x20/0x30
>> [ 19.311839] pci_remove_bus+0x78/0x88
>> [ 19.311850] pci_remove_root_bus+0x5c/0x98
>> [ 19.311860] dw_pcie_host_deinit+0x28/0x78
>> [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
>> [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
>> [ 19.311900] platform_probe+0x90/0xd8
>> [ 19.311906] really_probe+0xb8/0x298
>> [ 19.311916] __driver_probe_device+0x78/0xd8
>> [ 19.311926] driver_probe_device+0x38/0x120
>> [ 19.311936] __device_attach_driver+0x94/0xe0
>> [ 19.311947] bus_for_each_drv+0x70/0xc8
>> [ 19.311956] __device_attach+0xfc/0x188
>> [ 19.311966] device_initial_probe+0x10/0x18
>> [ 19.311976] bus_probe_device+0x94/0xa0
>> [ 19.311985] deferred_probe_work_func+0x80/0xb8
>> [ 19.311995] process_one_work+0x1e0/0x348
>> [ 19.312005] worker_thread+0x48/0x410
>> [ 19.312014] kthread+0xf4/0x110
>> [ 19.312022] ret_from_fork+0x10/0x20
>
> Based on the above trace it looks like a double free of "pp->bridge"
> structure.
>
> Also, which kernel version are you using? Because from above third
> trace it looks like that dw_pcie_host_deinit() calls first
> pci_remove_root_bus() function and then (from first strace) it also
> calls pci_bus_release_domain_nr() function.
I have definitely seen this with v6.2. I was doing some more testing and
now it appears that the issue is somewhat intermittent. So now I am no
longer confident that reverting this change does fix it. The backtrace
made it seem like this is a likely candidate, but I need to do more testing.
Cheers
Jon
--
nvpublic
On 21/03/2023 18:44, Jon Hunter wrote:
>
> On 20/03/2023 20:59, Pali Rohár wrote:
>> Hello!
>>
>> On Monday 20 March 2023 20:26:05 Jon Hunter wrote:
>>> Hi Pali,
>>>
>>> After this change was made we are seeing the following bug
>>> report on a Tegra234 Jetson Orin board ...
>>>
>>> [ 17.172346] tegra194-pcie 141a0000.pcie: host bridge
>>> /pcie@141a0000 ranges:
>>> [ 17.172470] tegra194-pcie 141a0000.pcie: MEM
>>> 0x2800000000..0x2b27ffffff -> 0x2800000000
>>> [ 17.172519] tegra194-pcie 141a0000.pcie: MEM
>>> 0x2b28000000..0x2b2fffffff -> 0x0040000000
>>> [ 17.172548] tegra194-pcie 141a0000.pcie: IO
>>> 0x003a100000..0x003a1fffff -> 0x003a100000
>>> [ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8 ob, 2
>>> ib, align 64K, limit 32G
>>> [ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
>>> [ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
>>> [ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to bus
>>> 0005:00
>>> [ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
>>> [ 19.279622] pci_bus 0005:00: root bus resource [mem
>>> 0x2800000000-0x2b27ffffff pref]
>>> [ 19.279631] pci_bus 0005:00: root bus resource [mem
>>> 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
>>> [ 19.279639] pci_bus 0005:00: root bus resource [io
>>> 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
>>> [ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
>>> [ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
>>> [ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
>>> [ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
>>> [ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
>>> [ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
>>> [ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
>>> [ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
>>> [ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
>>> [ 19.285870]
>>> ==================================================================
>>> [ 19.293351] BUG: KFENCE: use-after-free read in
>>> pci_bus_release_domain_nr+0x10/0x70
>>>
>>> [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in
>>> kfence-#115):
>>> [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
>>> [ 19.309691] dw_pcie_host_deinit+0x28/0x78
>>> [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
>>> [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
>>> [ 19.309752] platform_probe+0x90/0xd8
>>> [ 19.309764] really_probe+0xb8/0x298
>>> [ 19.309777] __driver_probe_device+0x78/0xd8
>>> [ 19.309788] driver_probe_device+0x38/0x120
>>> [ 19.309799] __device_attach_driver+0x94/0xe0
>>> [ 19.309812] bus_for_each_drv+0x70/0xc8
>>> [ 19.309822] __device_attach+0xfc/0x188
>>> [ 19.309833] device_initial_probe+0x10/0x18
>>> [ 19.309844] bus_probe_device+0x94/0xa0
>>> [ 19.309854] deferred_probe_work_func+0x80/0xb8
>>> [ 19.309864] process_one_work+0x1e0/0x348
>>> [ 19.309882] worker_thread+0x48/0x410
>>> [ 19.309891] kthread+0xf4/0x110
>>> [ 19.309904] ret_from_fork+0x10/0x20
>>>
>>> [ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8,
>>> size=1072, cache=kmalloc-2k
>>>
>>> [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
>>> [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
>>> [ 19.311571] kmalloc_trace+0x24/0x30
>>> [ 19.311580] pci_alloc_bus+0x24/0xa0
>>> [ 19.311590] pci_register_host_bridge+0x48/0x4b8
>>> [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
>>> [ 19.311613] pci_host_probe+0x18/0xc0
>>> [ 19.311623] dw_pcie_host_init+0x2c0/0x568
>>> [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
>>> [ 19.311647] platform_probe+0x90/0xd8
>>> [ 19.311653] really_probe+0xb8/0x298
>>> [ 19.311663] __driver_probe_device+0x78/0xd8
>>> [ 19.311672] driver_probe_device+0x38/0x120
>>> [ 19.311682] __device_attach_driver+0x94/0xe0
>>> [ 19.311694] bus_for_each_drv+0x70/0xc8
>>> [ 19.311702] __device_attach+0xfc/0x188
>>> [ 19.311713] device_initial_probe+0x10/0x18
>>> [ 19.311724] bus_probe_device+0x94/0xa0
>>> [ 19.311733] deferred_probe_work_func+0x80/0xb8
>>> [ 19.311743] process_one_work+0x1e0/0x348
>>> [ 19.311753] worker_thread+0x48/0x410
>>> [ 19.311763] kthread+0xf4/0x110
>>> [ 19.311771] ret_from_fork+0x10/0x20
>>>
>>> [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
>>> [ 19.311799] release_pcibus_dev+0x30/0x40
>>> [ 19.311808] device_release+0x30/0x90
>>> [ 19.311814] kobject_put+0xa8/0x120
>>> [ 19.311832] device_unregister+0x20/0x30
>>> [ 19.311839] pci_remove_bus+0x78/0x88
>>> [ 19.311850] pci_remove_root_bus+0x5c/0x98
>>> [ 19.311860] dw_pcie_host_deinit+0x28/0x78
>>> [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
>>> [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
>>> [ 19.311900] platform_probe+0x90/0xd8
>>> [ 19.311906] really_probe+0xb8/0x298
>>> [ 19.311916] __driver_probe_device+0x78/0xd8
>>> [ 19.311926] driver_probe_device+0x38/0x120
>>> [ 19.311936] __device_attach_driver+0x94/0xe0
>>> [ 19.311947] bus_for_each_drv+0x70/0xc8
>>> [ 19.311956] __device_attach+0xfc/0x188
>>> [ 19.311966] device_initial_probe+0x10/0x18
>>> [ 19.311976] bus_probe_device+0x94/0xa0
>>> [ 19.311985] deferred_probe_work_func+0x80/0xb8
>>> [ 19.311995] process_one_work+0x1e0/0x348
>>> [ 19.312005] worker_thread+0x48/0x410
>>> [ 19.312014] kthread+0xf4/0x110
>>> [ 19.312022] ret_from_fork+0x10/0x20
>>
>> Based on the above trace it looks like a double free of "pp->bridge"
>> structure.
>>
>> Also, which kernel version are you using? Because from above third
>> trace it looks like that dw_pcie_host_deinit() calls first
>> pci_remove_root_bus() function and then (from first strace) it also
>> calls pci_bus_release_domain_nr() function.
>
>
> I have definitely seen this with v6.2. I was doing some more testing and
> now it appears that the issue is somewhat intermittent. So now I am no
> longer confident that reverting this change does fix it. The backtrace
> made it seem like this is a likely candidate, but I need to do more
> testing.
OK, so I have done some more testing. I found that if I build the
Tegra194 PCIe driver into the kernel, then I can reproduce the above
100%. I guess by loading early there is more chance of a probe deferral.
With that I verified that the issue is seen on v6.2 (as I previously
observed) and on v6.3-rc3. Reverting this commit on top of v6.3-rc3
fixes this and I no longer see the problem.
Let me know if you have any thoughts on how to fix this.
Thanks
Jon
--
nvpublic
On Wednesday 22 March 2023 14:36:23 Jon Hunter wrote:
> On 21/03/2023 18:44, Jon Hunter wrote:
> >
> > On 20/03/2023 20:59, Pali Rohár wrote:
> > > Hello!
> > >
> > > On Monday 20 March 2023 20:26:05 Jon Hunter wrote:
> > > > Hi Pali,
> > > >
> > > > After this change was made we are seeing the following bug
> > > > report on a Tegra234 Jetson Orin board ...
> > > >
> > > > [ 17.172346] tegra194-pcie 141a0000.pcie: host bridge
> > > > /pcie@141a0000 ranges:
> > > > [ 17.172470] tegra194-pcie 141a0000.pcie: MEM
> > > > 0x2800000000..0x2b27ffffff -> 0x2800000000
> > > > [ 17.172519] tegra194-pcie 141a0000.pcie: MEM
> > > > 0x2b28000000..0x2b2fffffff -> 0x0040000000
> > > > [ 17.172548] tegra194-pcie 141a0000.pcie: IO
> > > > 0x003a100000..0x003a1fffff -> 0x003a100000
> > > > [ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8
> > > > ob, 2 ib, align 64K, limit 32G
> > > > [ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
> > > > [ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
> > > > [ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to
> > > > bus 0005:00
> > > > [ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
> > > > [ 19.279622] pci_bus 0005:00: root bus resource [mem
> > > > 0x2800000000-0x2b27ffffff pref]
> > > > [ 19.279631] pci_bus 0005:00: root bus resource [mem
> > > > 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
> > > > [ 19.279639] pci_bus 0005:00: root bus resource [io
> > > > 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
> > > > [ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
> > > > [ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
> > > > [ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
> > > > [ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
> > > > [ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
> > > > [ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
> > > > [ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
> > > > [ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
> > > > [ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
> > > > [ 19.285870]
> > > > ==================================================================
> > > > [ 19.293351] BUG: KFENCE: use-after-free read in
> > > > pci_bus_release_domain_nr+0x10/0x70
> > > >
> > > > [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in
> > > > kfence-#115):
> > > > [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
> > > > [ 19.309691] dw_pcie_host_deinit+0x28/0x78
> > > > [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
> > > > [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
> > > > [ 19.309752] platform_probe+0x90/0xd8
> > > > [ 19.309764] really_probe+0xb8/0x298
> > > > [ 19.309777] __driver_probe_device+0x78/0xd8
> > > > [ 19.309788] driver_probe_device+0x38/0x120
> > > > [ 19.309799] __device_attach_driver+0x94/0xe0
> > > > [ 19.309812] bus_for_each_drv+0x70/0xc8
> > > > [ 19.309822] __device_attach+0xfc/0x188
> > > > [ 19.309833] device_initial_probe+0x10/0x18
> > > > [ 19.309844] bus_probe_device+0x94/0xa0
> > > > [ 19.309854] deferred_probe_work_func+0x80/0xb8
> > > > [ 19.309864] process_one_work+0x1e0/0x348
> > > > [ 19.309882] worker_thread+0x48/0x410
> > > > [ 19.309891] kthread+0xf4/0x110
> > > > [ 19.309904] ret_from_fork+0x10/0x20
> > > >
> > > > [ 19.311457] kfence-#115:
> > > > 0x00000000063a155a-0x00000000ba698da8, size=1072,
> > > > cache=kmalloc-2k
> > > >
> > > > [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
> > > > [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
> > > > [ 19.311571] kmalloc_trace+0x24/0x30
> > > > [ 19.311580] pci_alloc_bus+0x24/0xa0
> > > > [ 19.311590] pci_register_host_bridge+0x48/0x4b8
> > > > [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
> > > > [ 19.311613] pci_host_probe+0x18/0xc0
> > > > [ 19.311623] dw_pcie_host_init+0x2c0/0x568
> > > > [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
> > > > [ 19.311647] platform_probe+0x90/0xd8
> > > > [ 19.311653] really_probe+0xb8/0x298
> > > > [ 19.311663] __driver_probe_device+0x78/0xd8
> > > > [ 19.311672] driver_probe_device+0x38/0x120
> > > > [ 19.311682] __device_attach_driver+0x94/0xe0
> > > > [ 19.311694] bus_for_each_drv+0x70/0xc8
> > > > [ 19.311702] __device_attach+0xfc/0x188
> > > > [ 19.311713] device_initial_probe+0x10/0x18
> > > > [ 19.311724] bus_probe_device+0x94/0xa0
> > > > [ 19.311733] deferred_probe_work_func+0x80/0xb8
> > > > [ 19.311743] process_one_work+0x1e0/0x348
> > > > [ 19.311753] worker_thread+0x48/0x410
> > > > [ 19.311763] kthread+0xf4/0x110
> > > > [ 19.311771] ret_from_fork+0x10/0x20
> > > >
> > > > [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
> > > > [ 19.311799] release_pcibus_dev+0x30/0x40
> > > > [ 19.311808] device_release+0x30/0x90
> > > > [ 19.311814] kobject_put+0xa8/0x120
> > > > [ 19.311832] device_unregister+0x20/0x30
> > > > [ 19.311839] pci_remove_bus+0x78/0x88
> > > > [ 19.311850] pci_remove_root_bus+0x5c/0x98
> > > > [ 19.311860] dw_pcie_host_deinit+0x28/0x78
> > > > [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
> > > > [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
> > > > [ 19.311900] platform_probe+0x90/0xd8
> > > > [ 19.311906] really_probe+0xb8/0x298
> > > > [ 19.311916] __driver_probe_device+0x78/0xd8
> > > > [ 19.311926] driver_probe_device+0x38/0x120
> > > > [ 19.311936] __device_attach_driver+0x94/0xe0
> > > > [ 19.311947] bus_for_each_drv+0x70/0xc8
> > > > [ 19.311956] __device_attach+0xfc/0x188
> > > > [ 19.311966] device_initial_probe+0x10/0x18
> > > > [ 19.311976] bus_probe_device+0x94/0xa0
> > > > [ 19.311985] deferred_probe_work_func+0x80/0xb8
> > > > [ 19.311995] process_one_work+0x1e0/0x348
> > > > [ 19.312005] worker_thread+0x48/0x410
> > > > [ 19.312014] kthread+0xf4/0x110
> > > > [ 19.312022] ret_from_fork+0x10/0x20
> > >
> > > Based on the above trace it looks like a double free of "pp->bridge"
> > > structure.
> > >
> > > Also, which kernel version are you using? Because from above third
> > > trace it looks like that dw_pcie_host_deinit() calls first
> > > pci_remove_root_bus() function and then (from first strace) it also
> > > calls pci_bus_release_domain_nr() function.
> >
> >
> > I have definitely seen this with v6.2. I was doing some more testing and
> > now it appears that the issue is somewhat intermittent. So now I am no
> > longer confident that reverting this change does fix it. The backtrace
> > made it seem like this is a likely candidate, but I need to do more
> > testing.
>
>
> OK, so I have done some more testing. I found that if I build the Tegra194
> PCIe driver into the kernel, then I can reproduce the above 100%. I guess by
> loading early there is more chance of a probe deferral. With that I verified
> that the issue is seen on v6.2 (as I previously observed) and on v6.3-rc3.
> Reverting this commit on top of v6.3-rc3 fixes this and I no longer see the
> problem.
>
> Let me know if you have any thoughts on how to fix this.
>
> Thanks
> Jon
>
> --
> nvpublic
Sorry, but I'm not going to investigate another issue in this area.
I spent too much time in debugging issues, prepared fixes and the only
result was simple silence, rejection and proposal to resend emails.
The only thing which I receive are bug reports from the users and I
really do not want to repeat reply, hey patch for this was sent to
mailing list... So I do not see reason why I should look at this issue
as with high probability result would be same... for which I'm not
interested.
On 22/03/2023 17:21, Pali Rohár wrote:
> On Wednesday 22 March 2023 14:36:23 Jon Hunter wrote:
>> On 21/03/2023 18:44, Jon Hunter wrote:
>>>
>>> On 20/03/2023 20:59, Pali Rohár wrote:
>>>> Hello!
>>>>
>>>> On Monday 20 March 2023 20:26:05 Jon Hunter wrote:
>>>>> Hi Pali,
>>>>>
>>>>> After this change was made we are seeing the following bug
>>>>> report on a Tegra234 Jetson Orin board ...
>>>>>
>>>>> [ 17.172346] tegra194-pcie 141a0000.pcie: host bridge
>>>>> /pcie@141a0000 ranges:
>>>>> [ 17.172470] tegra194-pcie 141a0000.pcie: MEM
>>>>> 0x2800000000..0x2b27ffffff -> 0x2800000000
>>>>> [ 17.172519] tegra194-pcie 141a0000.pcie: MEM
>>>>> 0x2b28000000..0x2b2fffffff -> 0x0040000000
>>>>> [ 17.172548] tegra194-pcie 141a0000.pcie: IO
>>>>> 0x003a100000..0x003a1fffff -> 0x003a100000
>>>>> [ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8
>>>>> ob, 2 ib, align 64K, limit 32G
>>>>> [ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
>>>>> [ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
>>>>> [ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to
>>>>> bus 0005:00
>>>>> [ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
>>>>> [ 19.279622] pci_bus 0005:00: root bus resource [mem
>>>>> 0x2800000000-0x2b27ffffff pref]
>>>>> [ 19.279631] pci_bus 0005:00: root bus resource [mem
>>>>> 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
>>>>> [ 19.279639] pci_bus 0005:00: root bus resource [io
>>>>> 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
>>>>> [ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
>>>>> [ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
>>>>> [ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
>>>>> [ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
>>>>> [ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
>>>>> [ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
>>>>> [ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
>>>>> [ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
>>>>> [ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
>>>>> [ 19.285870]
>>>>> ==================================================================
>>>>> [ 19.293351] BUG: KFENCE: use-after-free read in
>>>>> pci_bus_release_domain_nr+0x10/0x70
>>>>>
>>>>> [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in
>>>>> kfence-#115):
>>>>> [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
>>>>> [ 19.309691] dw_pcie_host_deinit+0x28/0x78
>>>>> [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
>>>>> [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
>>>>> [ 19.309752] platform_probe+0x90/0xd8
>>>>> [ 19.309764] really_probe+0xb8/0x298
>>>>> [ 19.309777] __driver_probe_device+0x78/0xd8
>>>>> [ 19.309788] driver_probe_device+0x38/0x120
>>>>> [ 19.309799] __device_attach_driver+0x94/0xe0
>>>>> [ 19.309812] bus_for_each_drv+0x70/0xc8
>>>>> [ 19.309822] __device_attach+0xfc/0x188
>>>>> [ 19.309833] device_initial_probe+0x10/0x18
>>>>> [ 19.309844] bus_probe_device+0x94/0xa0
>>>>> [ 19.309854] deferred_probe_work_func+0x80/0xb8
>>>>> [ 19.309864] process_one_work+0x1e0/0x348
>>>>> [ 19.309882] worker_thread+0x48/0x410
>>>>> [ 19.309891] kthread+0xf4/0x110
>>>>> [ 19.309904] ret_from_fork+0x10/0x20
>>>>>
>>>>> [ 19.311457] kfence-#115:
>>>>> 0x00000000063a155a-0x00000000ba698da8, size=1072,
>>>>> cache=kmalloc-2k
>>>>>
>>>>> [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
>>>>> [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
>>>>> [ 19.311571] kmalloc_trace+0x24/0x30
>>>>> [ 19.311580] pci_alloc_bus+0x24/0xa0
>>>>> [ 19.311590] pci_register_host_bridge+0x48/0x4b8
>>>>> [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
>>>>> [ 19.311613] pci_host_probe+0x18/0xc0
>>>>> [ 19.311623] dw_pcie_host_init+0x2c0/0x568
>>>>> [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
>>>>> [ 19.311647] platform_probe+0x90/0xd8
>>>>> [ 19.311653] really_probe+0xb8/0x298
>>>>> [ 19.311663] __driver_probe_device+0x78/0xd8
>>>>> [ 19.311672] driver_probe_device+0x38/0x120
>>>>> [ 19.311682] __device_attach_driver+0x94/0xe0
>>>>> [ 19.311694] bus_for_each_drv+0x70/0xc8
>>>>> [ 19.311702] __device_attach+0xfc/0x188
>>>>> [ 19.311713] device_initial_probe+0x10/0x18
>>>>> [ 19.311724] bus_probe_device+0x94/0xa0
>>>>> [ 19.311733] deferred_probe_work_func+0x80/0xb8
>>>>> [ 19.311743] process_one_work+0x1e0/0x348
>>>>> [ 19.311753] worker_thread+0x48/0x410
>>>>> [ 19.311763] kthread+0xf4/0x110
>>>>> [ 19.311771] ret_from_fork+0x10/0x20
>>>>>
>>>>> [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
>>>>> [ 19.311799] release_pcibus_dev+0x30/0x40
>>>>> [ 19.311808] device_release+0x30/0x90
>>>>> [ 19.311814] kobject_put+0xa8/0x120
>>>>> [ 19.311832] device_unregister+0x20/0x30
>>>>> [ 19.311839] pci_remove_bus+0x78/0x88
>>>>> [ 19.311850] pci_remove_root_bus+0x5c/0x98
>>>>> [ 19.311860] dw_pcie_host_deinit+0x28/0x78
>>>>> [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
>>>>> [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
>>>>> [ 19.311900] platform_probe+0x90/0xd8
>>>>> [ 19.311906] really_probe+0xb8/0x298
>>>>> [ 19.311916] __driver_probe_device+0x78/0xd8
>>>>> [ 19.311926] driver_probe_device+0x38/0x120
>>>>> [ 19.311936] __device_attach_driver+0x94/0xe0
>>>>> [ 19.311947] bus_for_each_drv+0x70/0xc8
>>>>> [ 19.311956] __device_attach+0xfc/0x188
>>>>> [ 19.311966] device_initial_probe+0x10/0x18
>>>>> [ 19.311976] bus_probe_device+0x94/0xa0
>>>>> [ 19.311985] deferred_probe_work_func+0x80/0xb8
>>>>> [ 19.311995] process_one_work+0x1e0/0x348
>>>>> [ 19.312005] worker_thread+0x48/0x410
>>>>> [ 19.312014] kthread+0xf4/0x110
>>>>> [ 19.312022] ret_from_fork+0x10/0x20
>>>>
>>>> Based on the above trace it looks like a double free of "pp->bridge"
>>>> structure.
>>>>
>>>> Also, which kernel version are you using? Because from above third
>>>> trace it looks like that dw_pcie_host_deinit() calls first
>>>> pci_remove_root_bus() function and then (from first strace) it also
>>>> calls pci_bus_release_domain_nr() function.
>>>
>>>
>>> I have definitely seen this with v6.2. I was doing some more testing and
>>> now it appears that the issue is somewhat intermittent. So now I am no
>>> longer confident that reverting this change does fix it. The backtrace
>>> made it seem like this is a likely candidate, but I need to do more
>>> testing.
>>
>>
>> OK, so I have done some more testing. I found that if I build the Tegra194
>> PCIe driver into the kernel, then I can reproduce the above 100%. I guess by
>> loading early there is more chance of a probe deferral. With that I verified
>> that the issue is seen on v6.2 (as I previously observed) and on v6.3-rc3.
>> Reverting this commit on top of v6.3-rc3 fixes this and I no longer see the
>> problem.
>>
>> Let me know if you have any thoughts on how to fix this.
>
> Sorry, but I'm not going to investigate another issue in this area.
> I spent too much time in debugging issues, prepared fixes and the only
> result was simple silence, rejection and proposal to resend emails.
> The only thing which I receive are bug reports from the users and I
> really do not want to repeat reply, hey patch for this was sent to
> mailing list... So I do not see reason why I should look at this issue
> as with high probability result would be same... for which I'm not
> interested.
OK, so I guess we should revert this then?
Cheers
Jon
--
nvpublic
On Wed, Mar 22, 2023 at 9:36 AM Jon Hunter <[email protected]> wrote:
>
>
> On 21/03/2023 18:44, Jon Hunter wrote:
> >
> > On 20/03/2023 20:59, Pali Rohár wrote:
> >> Hello!
> >>
> >> On Monday 20 March 2023 20:26:05 Jon Hunter wrote:
> >>> Hi Pali,
> >>>
> >>> After this change was made we are seeing the following bug
> >>> report on a Tegra234 Jetson Orin board ...
> >>>
> >>> [ 17.172346] tegra194-pcie 141a0000.pcie: host bridge
> >>> /pcie@141a0000 ranges:
> >>> [ 17.172470] tegra194-pcie 141a0000.pcie: MEM
> >>> 0x2800000000..0x2b27ffffff -> 0x2800000000
> >>> [ 17.172519] tegra194-pcie 141a0000.pcie: MEM
> >>> 0x2b28000000..0x2b2fffffff -> 0x0040000000
> >>> [ 17.172548] tegra194-pcie 141a0000.pcie: IO
> >>> 0x003a100000..0x003a1fffff -> 0x003a100000
> >>> [ 17.173449] tegra194-pcie 141a0000.pcie: iATU: unroll T, 8 ob, 2
> >>> ib, align 64K, limit 32G
> >>> [ 18.279048] tegra194-pcie 141a0000.pcie: Phy link never came up
> >>> [ 19.279285] tegra194-pcie 141a0000.pcie: Phy link never came up
> >>> [ 19.279599] tegra194-pcie 141a0000.pcie: PCI host bridge to bus
> >>> 0005:00
> >>> [ 19.279613] pci_bus 0005:00: root bus resource [bus 00-ff]
> >>> [ 19.279622] pci_bus 0005:00: root bus resource [mem
> >>> 0x2800000000-0x2b27ffffff pref]
> >>> [ 19.279631] pci_bus 0005:00: root bus resource [mem
> >>> 0x2b28000000-0x2b2fffffff] (bus address [0x40000000-0x47ffffff])
> >>> [ 19.279639] pci_bus 0005:00: root bus resource [io
> >>> 0x200000-0x2fffff] (bus address [0x3a100000-0x3a1fffff])
> >>> [ 19.279687] pci 0005:00:00.0: [10de:229a] type 01 class 0x060400
> >>> [ 19.279886] pci 0005:00:00.0: PME# supported from D0 D3hot
> >>> [ 19.283256] pci 0005:00:00.0: PCI bridge to [bus 01-ff]
> >>> [ 19.283590] pcieport 0005:00:00.0: Adding to iommu group 26
> >>> [ 19.283991] pcieport 0005:00:00.0: PME: Signaling with IRQ 174
> >>> [ 19.284429] pcieport 0005:00:00.0: AER: enabled with IRQ 174
> >>> [ 19.285003] pci_bus 0005:01: busn_res: [bus 01-ff] is released
> >>> [ 19.285591] pci 0005:00:00.0: Removing from iommu group 26
> >>> [ 19.285751] pci_bus 0005:00: busn_res: [bus 00-ff] is released
> >>> [ 19.285870]
> >>> ==================================================================
> >>> [ 19.293351] BUG: KFENCE: use-after-free read in
> >>> pci_bus_release_domain_nr+0x10/0x70
> >>>
> >>> [ 19.302817] Use-after-free read at 0x000000007f3b80eb (in
> >>> kfence-#115):
> >>> [ 19.309677] pci_bus_release_domain_nr+0x10/0x70
> >>> [ 19.309691] dw_pcie_host_deinit+0x28/0x78
> >>> [ 19.309702] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
> >>> [ 19.309734] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
> >>> [ 19.309752] platform_probe+0x90/0xd8
> >>> [ 19.309764] really_probe+0xb8/0x298
> >>> [ 19.309777] __driver_probe_device+0x78/0xd8
> >>> [ 19.309788] driver_probe_device+0x38/0x120
> >>> [ 19.309799] __device_attach_driver+0x94/0xe0
> >>> [ 19.309812] bus_for_each_drv+0x70/0xc8
> >>> [ 19.309822] __device_attach+0xfc/0x188
> >>> [ 19.309833] device_initial_probe+0x10/0x18
> >>> [ 19.309844] bus_probe_device+0x94/0xa0
> >>> [ 19.309854] deferred_probe_work_func+0x80/0xb8
> >>> [ 19.309864] process_one_work+0x1e0/0x348
> >>> [ 19.309882] worker_thread+0x48/0x410
> >>> [ 19.309891] kthread+0xf4/0x110
> >>> [ 19.309904] ret_from_fork+0x10/0x20
> >>>
> >>> [ 19.311457] kfence-#115: 0x00000000063a155a-0x00000000ba698da8,
> >>> size=1072, cache=kmalloc-2k
> >>>
> >>> [ 19.311469] allocated by task 96 on cpu 10 at 19.279323s:
> >>> [ 19.311562] __kmem_cache_alloc_node+0x260/0x278
> >>> [ 19.311571] kmalloc_trace+0x24/0x30
> >>> [ 19.311580] pci_alloc_bus+0x24/0xa0
> >>> [ 19.311590] pci_register_host_bridge+0x48/0x4b8
> >>> [ 19.311601] pci_scan_root_bus_bridge+0xc0/0xe8
> >>> [ 19.311613] pci_host_probe+0x18/0xc0
> >>> [ 19.311623] dw_pcie_host_init+0x2c0/0x568
> >>> [ 19.311630] tegra_pcie_dw_probe+0x610/0xb28 [pcie_tegra194]
> >>> [ 19.311647] platform_probe+0x90/0xd8
> >>> [ 19.311653] really_probe+0xb8/0x298
> >>> [ 19.311663] __driver_probe_device+0x78/0xd8
> >>> [ 19.311672] driver_probe_device+0x38/0x120
> >>> [ 19.311682] __device_attach_driver+0x94/0xe0
> >>> [ 19.311694] bus_for_each_drv+0x70/0xc8
> >>> [ 19.311702] __device_attach+0xfc/0x188
> >>> [ 19.311713] device_initial_probe+0x10/0x18
> >>> [ 19.311724] bus_probe_device+0x94/0xa0
> >>> [ 19.311733] deferred_probe_work_func+0x80/0xb8
> >>> [ 19.311743] process_one_work+0x1e0/0x348
> >>> [ 19.311753] worker_thread+0x48/0x410
> >>> [ 19.311763] kthread+0xf4/0x110
> >>> [ 19.311771] ret_from_fork+0x10/0x20
> >>>
> >>> [ 19.311782] freed by task 96 on cpu 10 at 19.285833s:
> >>> [ 19.311799] release_pcibus_dev+0x30/0x40
> >>> [ 19.311808] device_release+0x30/0x90
> >>> [ 19.311814] kobject_put+0xa8/0x120
> >>> [ 19.311832] device_unregister+0x20/0x30
> >>> [ 19.311839] pci_remove_bus+0x78/0x88
> >>> [ 19.311850] pci_remove_root_bus+0x5c/0x98
> >>> [ 19.311860] dw_pcie_host_deinit+0x28/0x78
> >>> [ 19.311866] tegra_pcie_deinit_controller+0x1c/0x38 [pcie_tegra194]
> >>> [ 19.311883] tegra_pcie_dw_probe+0x648/0xb28 [pcie_tegra194]
> >>> [ 19.311900] platform_probe+0x90/0xd8
> >>> [ 19.311906] really_probe+0xb8/0x298
> >>> [ 19.311916] __driver_probe_device+0x78/0xd8
> >>> [ 19.311926] driver_probe_device+0x38/0x120
> >>> [ 19.311936] __device_attach_driver+0x94/0xe0
> >>> [ 19.311947] bus_for_each_drv+0x70/0xc8
> >>> [ 19.311956] __device_attach+0xfc/0x188
> >>> [ 19.311966] device_initial_probe+0x10/0x18
> >>> [ 19.311976] bus_probe_device+0x94/0xa0
> >>> [ 19.311985] deferred_probe_work_func+0x80/0xb8
> >>> [ 19.311995] process_one_work+0x1e0/0x348
> >>> [ 19.312005] worker_thread+0x48/0x410
> >>> [ 19.312014] kthread+0xf4/0x110
> >>> [ 19.312022] ret_from_fork+0x10/0x20
> >>
> >> Based on the above trace it looks like a double free of "pp->bridge"
> >> structure.
> >>
> >> Also, which kernel version are you using? Because from above third
> >> trace it looks like that dw_pcie_host_deinit() calls first
> >> pci_remove_root_bus() function and then (from first strace) it also
> >> calls pci_bus_release_domain_nr() function.
It's all the same stack trace. The struct pci_bus is a struct device
which is released first and then accessed by
of_pci_bus_release_domain_nr().
> > I have definitely seen this with v6.2. I was doing some more testing and
> > now it appears that the issue is somewhat intermittent. So now I am no
> > longer confident that reverting this change does fix it. The backtrace
> > made it seem like this is a likely candidate, but I need to do more
> > testing.
>
>
> OK, so I have done some more testing. I found that if I build the
> Tegra194 PCIe driver into the kernel, then I can reproduce the above
> 100%. I guess by loading early there is more chance of a probe deferral.
> With that I verified that the issue is seen on v6.2 (as I previously
> observed) and on v6.3-rc3. Reverting this commit on top of v6.3-rc3
> fixes this and I no longer see the problem.
>
> Let me know if you have any thoughts on how to fix this.
Does this fix it?:
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 0145aef1b930..22d39e12b236 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -157,8 +157,6 @@ void pci_remove_root_bus(struct pci_bus *bus)
list_for_each_entry_safe(child, tmp,
&bus->devices, bus_list)
pci_remove_bus_device(child);
- pci_remove_bus(bus);
- host_bridge->bus = NULL;
#ifdef CONFIG_PCI_DOMAINS_GENERIC
/* Release domain_nr if it was dynamically allocated */
@@ -166,6 +164,9 @@ void pci_remove_root_bus(struct pci_bus *bus)
pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
#endif
+ pci_remove_bus(bus);
+ host_bridge->bus = NULL;
+
/* remove the host bridge */
device_del(&host_bridge->dev);
}
On 28/03/2023 14:31, Rob Herring wrote:
...
> Does this fix it?:
>
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 0145aef1b930..22d39e12b236 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -157,8 +157,6 @@ void pci_remove_root_bus(struct pci_bus *bus)
> list_for_each_entry_safe(child, tmp,
> &bus->devices, bus_list)
> pci_remove_bus_device(child);
> - pci_remove_bus(bus);
> - host_bridge->bus = NULL;
>
> #ifdef CONFIG_PCI_DOMAINS_GENERIC
> /* Release domain_nr if it was dynamically allocated */
> @@ -166,6 +164,9 @@ void pci_remove_root_bus(struct pci_bus *bus)
> pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
> #endif
>
> + pci_remove_bus(bus);
> + host_bridge->bus = NULL;
> +
> /* remove the host bridge */
> device_del(&host_bridge->dev);
> }
Yes that does fix it! I had been meaning to get back to this this week
to figure out what is going on and so thanks for figuring this out.
If you plan to send a fix for this, please add my ...
Tested-by: Jon Hunter <[email protected]>
Cheers!
Jon
--
nvpublic