There has been an effort to make the pci-hyperv driver support
async-probing to reduce the boot time. With async-probing, multiple
kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
update the global list, causing list corruption.
Add a mutex to protect the list.
Signed-off-by: Dexuan Cui <[email protected]>
---
drivers/pci/probe.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e19b79821dd6..1327fd820b24 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
EXPORT_SYMBOL(pci_root_buses);
static LIST_HEAD(pci_domain_busn_res_list);
+static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
struct pci_domain_busn_res {
struct list_head list;
@@ -47,14 +48,22 @@ struct pci_domain_busn_res {
static struct resource *get_pci_domain_busn_res(int domain_nr)
{
struct pci_domain_busn_res *r;
+ struct resource *ret;
- list_for_each_entry(r, &pci_domain_busn_res_list, list)
- if (r->domain_nr == domain_nr)
- return &r->res;
+ mutex_lock(&pci_domain_busn_res_list_lock);
+
+ list_for_each_entry(r, &pci_domain_busn_res_list, list) {
+ if (r->domain_nr == domain_nr) {
+ ret = &r->res;
+ goto out;
+ }
+ }
r = kzalloc(sizeof(*r), GFP_KERNEL);
- if (!r)
- return NULL;
+ if (!r) {
+ ret = NULL;
+ goto out;
+ }
r->domain_nr = domain_nr;
r->res.start = 0;
@@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
list_add_tail(&r->list, &pci_domain_busn_res_list);
-
- return &r->res;
+ ret = &r->res;
+out:
+ mutex_unlock(&pci_domain_busn_res_list_lock);
+ return ret;
}
/*
--
2.25.1
> -----Original Message-----
> From: Dexuan Cui <[email protected]>
> Sent: Thursday, April 18, 2024 9:53 PM
> To: [email protected]; [email protected]; KY Srinivasan
> <[email protected]>; Haiyang Zhang <[email protected]>;
> [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Boqun
> Feng <[email protected]>; Sunil Muthuswamy
> <[email protected]>; Saurabh Singh Sengar <[email protected]>;
> Dexuan Cui <[email protected]>
> Subject: [PATCH] PCI: Add a mutex to protect the global list
> pci_domain_busn_res_list
>
> There has been an effort to make the pci-hyperv driver support
> async-probing to reduce the boot time. With async-probing, multiple
> kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus()
> ->
> pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time
> to
> update the global list, causing list corruption.
>
> Add a mutex to protect the list.
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> drivers/pci/probe.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index e19b79821dd6..1327fd820b24 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
> EXPORT_SYMBOL(pci_root_buses);
>
> static LIST_HEAD(pci_domain_busn_res_list);
> +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
>
> struct pci_domain_busn_res {
> struct list_head list;
> @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
> static struct resource *get_pci_domain_busn_res(int domain_nr)
> {
> struct pci_domain_busn_res *r;
> + struct resource *ret;
>
> - list_for_each_entry(r, &pci_domain_busn_res_list, list)
> - if (r->domain_nr == domain_nr)
> - return &r->res;
> + mutex_lock(&pci_domain_busn_res_list_lock);
> +
> + list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> + if (r->domain_nr == domain_nr) {
> + ret = &r->res;
> + goto out;
> + }
> + }
>
> r = kzalloc(sizeof(*r), GFP_KERNEL);
> - if (!r)
> - return NULL;
> + if (!r) {
> + ret = NULL;
> + goto out;
> + }
>
> r->domain_nr = domain_nr;
> r->res.start = 0;
> @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int
> domain_nr)
> r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
>
> list_add_tail(&r->list, &pci_domain_busn_res_list);
> -
> - return &r->res;
> + ret = &r->res;
> +out:
> + mutex_unlock(&pci_domain_busn_res_list_lock);
> + return ret;
> }
The patch is for common pci code. So, this bug has been there for a while?
Do you have a sample stack trace of the crash?
I checked pci-hyperv, it doesn't define the .driver.probe_type, so
PROBE_DEFAULT_STRATEGY is in effect. driver_allows_async_probing() returns
false unless kernel/mod param requests async. So async probing haven't
been practiced here.
If in the future, we change the pci-hyperv's probe_type to PROBE_PREFER_ASYNCHRONOUS,
how does it affect the underlying PCI device's probes within the same
device type?
For example, MANA driver doesn't set probe_type. Will pci-hyperv's async
probing cause async probing or potentially nondeterministic naming for
MANA devices?
Thanks,
- Haiyang
On Thu, Apr 18, 2024 at 06:53:02PM -0700, Dexuan Cui wrote:
> There has been an effort to make the pci-hyperv driver support
> async-probing to reduce the boot time. With async-probing, multiple
> kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
> pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
> update the global list, causing list corruption.
>
> Add a mutex to protect the list.
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> drivers/pci/probe.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index e19b79821dd6..1327fd820b24 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
> EXPORT_SYMBOL(pci_root_buses);
>
> static LIST_HEAD(pci_domain_busn_res_list);
> +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
>
> struct pci_domain_busn_res {
> struct list_head list;
> @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
> static struct resource *get_pci_domain_busn_res(int domain_nr)
> {
> struct pci_domain_busn_res *r;
> + struct resource *ret;
>
> - list_for_each_entry(r, &pci_domain_busn_res_list, list)
> - if (r->domain_nr == domain_nr)
> - return &r->res;
> + mutex_lock(&pci_domain_busn_res_list_lock);
Using
guard(mutex)(&pci_domain_busn_res_list_lock);
to simple logic, especially there are goto.
You can avoid goto out, direct return NULL;
Frank
> +
> + list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> + if (r->domain_nr == domain_nr) {
> + ret = &r->res;
> + goto out;
> + }
> + }
>
> r = kzalloc(sizeof(*r), GFP_KERNEL);
> - if (!r)
> - return NULL;
> + if (!r) {
> + ret = NULL;
> + goto out;
> + }
>
> r->domain_nr = domain_nr;
> r->res.start = 0;
> @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
> r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
>
> list_add_tail(&r->list, &pci_domain_busn_res_list);
> -
> - return &r->res;
> + ret = &r->res;
> +out:
> + mutex_unlock(&pci_domain_busn_res_list_lock);
> + return ret;
> }
>
> /*
> --
> 2.25.1
>
On Thu, Apr 18, 2024 at 06:53:02PM -0700, Dexuan Cui wrote:
> There has been an effort to make the pci-hyperv driver support
> async-probing to reduce the boot time. With async-probing, multiple
> kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
> pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
> update the global list, causing list corruption.
>
> Add a mutex to protect the list.
I think it's a good idea to support probing multiple PCI root buses in
parallel.
The problem in get_pci_domain_busn_res() is the global
pci_domain_busn_res_list. I'm not even sure what that list contains,
since it's a lookup by "domain_nr". In the hv case, you probably have
one host bridge per domain, but in general there may be multiple root
buses in the same domain, e.g.,
ACPI: PCI Root Bridge [PC00] (domain 0000 [bus 00-16])
ACPI: PCI Root Bridge [PC01] (domain 0000 [bus 17-39])
ACPI: PCI Root Bridge [PC02] (domain 0000 [bus 3a-5c])
...
We only use get_pci_domain_busn_res() for root buses, and we should
know the bus number range for root buses when we set up the struct
pci_host_bridge, so it seems like we should keep the bus number
resource there instead of allocating it in this sort of random place.
Then we shouldn't need this weird pci_domain_busn_res_list at all.
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> drivers/pci/probe.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index e19b79821dd6..1327fd820b24 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
> EXPORT_SYMBOL(pci_root_buses);
>
> static LIST_HEAD(pci_domain_busn_res_list);
> +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
>
> struct pci_domain_busn_res {
> struct list_head list;
> @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
> static struct resource *get_pci_domain_busn_res(int domain_nr)
> {
> struct pci_domain_busn_res *r;
> + struct resource *ret;
>
> - list_for_each_entry(r, &pci_domain_busn_res_list, list)
> - if (r->domain_nr == domain_nr)
> - return &r->res;
> + mutex_lock(&pci_domain_busn_res_list_lock);
> +
> + list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> + if (r->domain_nr == domain_nr) {
> + ret = &r->res;
> + goto out;
> + }
> + }
>
> r = kzalloc(sizeof(*r), GFP_KERNEL);
> - if (!r)
> - return NULL;
> + if (!r) {
> + ret = NULL;
> + goto out;
> + }
>
> r->domain_nr = domain_nr;
> r->res.start = 0;
> @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
> r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
>
> list_add_tail(&r->list, &pci_domain_busn_res_list);
> -
> - return &r->res;
> + ret = &r->res;
> +out:
> + mutex_unlock(&pci_domain_busn_res_list_lock);
> + return ret;
> }
>
> /*
> --
> 2.25.1
>
On Thu, Apr 25, 2024 at 05:51:38PM -0500, Bjorn Helgaas wrote:
> On Thu, Apr 18, 2024 at 06:53:02PM -0700, Dexuan Cui wrote:
> > There has been an effort to make the pci-hyperv driver support
> > async-probing to reduce the boot time. With async-probing, multiple
> > kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
> > pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
> > update the global list, causing list corruption.
> >
> > Add a mutex to protect the list.
>
> I think it's a good idea to support probing multiple PCI root buses in
> parallel.
>
> The problem in get_pci_domain_busn_res() is the global
> pci_domain_busn_res_list. I'm not even sure what that list contains,
> since it's a lookup by "domain_nr". In the hv case, you probably have
> one host bridge per domain, but in general there may be multiple root
> buses in the same domain, e.g.,
>
> ACPI: PCI Root Bridge [PC00] (domain 0000 [bus 00-16])
> ACPI: PCI Root Bridge [PC01] (domain 0000 [bus 17-39])
> ACPI: PCI Root Bridge [PC02] (domain 0000 [bus 3a-5c])
> ...
>
> We only use get_pci_domain_busn_res() for root buses, and we should
> know the bus number range for root buses when we set up the struct
> pci_host_bridge, so it seems like we should keep the bus number
> resource there instead of allocating it in this sort of random place.
>
> Then we shouldn't need this weird pci_domain_busn_res_list at all.
Oops, sorry, I totally missed the point here. The point is that for
each domain, we get a new 00-ff range of possible bus numbers. This
is independent of the host bridges for that domain that may exist.
Then each host bridge will allocate a piece of the 00-ff range.
But I do still think get_pci_domain_busn_res() isn't really the best
place for this. It seems like it should be at a higher level,
connected somehow to domain number allocation, e.g., somewhere related
to bridge->domain_nr like the pci_bus_find_domain_nr() path.
> > Signed-off-by: Dexuan Cui <[email protected]>
> > ---
> > drivers/pci/probe.c | 25 ++++++++++++++++++-------
> > 1 file changed, 18 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index e19b79821dd6..1327fd820b24 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
> > EXPORT_SYMBOL(pci_root_buses);
> >
> > static LIST_HEAD(pci_domain_busn_res_list);
> > +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
> >
> > struct pci_domain_busn_res {
> > struct list_head list;
> > @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
> > static struct resource *get_pci_domain_busn_res(int domain_nr)
> > {
> > struct pci_domain_busn_res *r;
> > + struct resource *ret;
> >
> > - list_for_each_entry(r, &pci_domain_busn_res_list, list)
> > - if (r->domain_nr == domain_nr)
> > - return &r->res;
> > + mutex_lock(&pci_domain_busn_res_list_lock);
> > +
> > + list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> > + if (r->domain_nr == domain_nr) {
> > + ret = &r->res;
> > + goto out;
> > + }
> > + }
> >
> > r = kzalloc(sizeof(*r), GFP_KERNEL);
> > - if (!r)
> > - return NULL;
> > + if (!r) {
> > + ret = NULL;
> > + goto out;
> > + }
> >
> > r->domain_nr = domain_nr;
> > r->res.start = 0;
> > @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
> > r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
> >
> > list_add_tail(&r->list, &pci_domain_busn_res_list);
> > -
> > - return &r->res;
> > + ret = &r->res;
> > +out:
> > + mutex_unlock(&pci_domain_busn_res_list_lock);
> > + return ret;
> > }
> >
> > /*
> > --
> > 2.25.1
> >