2022-11-11 14:41:30

by Thomas Gleixner

[permalink] [raw]
Subject: [patch 05/39] genirq/msi: Remove filter from msi_free_descs_free_range()

When a range of descriptors is freed then all of them are not associated to
a linux interrupt. Remove the filter and add a warning to the free function.

Signed-off-by: Thomas Gleixner <[email protected]>
---
drivers/base/platform-msi.c | 2 +-
include/linux/msi.h | 5 ++---
kernel/irq/msi.c | 19 ++++++++++---------
3 files changed, 13 insertions(+), 13 deletions(-)

--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -325,7 +325,7 @@ void platform_msi_device_domain_free(str

msi_lock_descs(data->dev);
irq_domain_free_irqs_common(domain, virq, nr_irqs);
- msi_free_msi_descs_range(data->dev, MSI_DESC_ALL, virq, virq + nr_irqs - 1);
+ msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
msi_unlock_descs(data->dev);
}

--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -247,8 +247,7 @@ static inline void pci_write_msi_msg(uns
#endif /* CONFIG_PCI_MSI */

int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc);
-void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
- unsigned int first_index, unsigned int last_index);
+void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index);

/**
* msi_free_msi_descs - Free MSI descriptors of a device
@@ -256,7 +255,7 @@ void msi_free_msi_descs_range(struct dev
*/
static inline void msi_free_msi_descs(struct device *dev)
{
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX);
+ msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX);
}

void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -120,7 +120,7 @@ static int msi_add_simple_msi_descs(stru
fail_mem:
ret = -ENOMEM;
fail:
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, index, last);
+ msi_free_msi_descs_range(dev, index, last);
return ret;
}

@@ -141,12 +141,11 @@ static bool msi_desc_match(struct msi_de
/**
* msi_free_msi_descs_range - Free MSI descriptors of a device
* @dev: Device to free the descriptors
- * @filter: Descriptor state filter
* @first_index: Index to start freeing from
* @last_index: Last index to be freed
*/
-void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
- unsigned int first_index, unsigned int last_index)
+void msi_free_msi_descs_range(struct device *dev, unsigned int first_index,
+ unsigned int last_index)
{
struct xarray *xa = &dev->msi.data->__store;
struct msi_desc *desc;
@@ -155,10 +154,12 @@ void msi_free_msi_descs_range(struct dev
lockdep_assert_held(&dev->msi.data->mutex);

xa_for_each_range(xa, idx, desc, first_index, last_index) {
- if (msi_desc_match(desc, filter)) {
- xa_erase(xa, idx);
- msi_free_desc(desc);
- }
+ xa_erase(xa, idx);
+
+ /* Leak the descriptor when it is still referenced */
+ if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
+ continue;
+ msi_free_desc(desc);
}
}

@@ -739,7 +740,7 @@ int msi_domain_populate_irqs(struct irq_
fail:
for (--virq; virq >= virq_base; virq--)
irq_domain_free_irqs_common(domain, virq, 1);
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1);
+ msi_free_msi_descs_range(dev, virq_base, virq_base + nvec - 1);
unlock:
msi_unlock_descs(dev);
return ret;



2022-11-16 18:03:39

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [patch 05/39] genirq/msi: Remove filter from msi_free_descs_free_range()

On Fri, Nov 11, 2022 at 02:54:22PM +0100, Thomas Gleixner wrote:
> When a range of descriptors is freed then all of them are not associated to
> a linux interrupt. Remove the filter and add a warning to the free function.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> drivers/base/platform-msi.c | 2 +-
> include/linux/msi.h | 5 ++---
> kernel/irq/msi.c | 19 ++++++++++---------
> 3 files changed, 13 insertions(+), 13 deletions(-)

Reviewed-by: Jason Gunthorpe <[email protected]>

Jason

Subject: [tip: irq/core] genirq/msi: Remove filter from msi_free_descs_free_range()

The following commit has been merged into the irq/core branch of tip:

Commit-ID: 2f2940d168236a92df524a1bd99fc7b0325918b5
Gitweb: https://git.kernel.org/tip/2f2940d168236a92df524a1bd99fc7b0325918b5
Author: Thomas Gleixner <[email protected]>
AuthorDate: Fri, 11 Nov 2022 14:54:22 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Thu, 17 Nov 2022 15:15:18 +01:00

genirq/msi: Remove filter from msi_free_descs_free_range()

When a range of descriptors is freed then all of them are not associated to
a linux interrupt. Remove the filter and add a warning to the free function.

Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Ashok Raj <[email protected]>
Reviewed-by: Jason Gunthorpe <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
drivers/base/platform-msi.c | 2 +-
include/linux/msi.h | 5 ++---
kernel/irq/msi.c | 19 ++++++++++---------
3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 12b0441..dddafa1 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -325,7 +325,7 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir

msi_lock_descs(data->dev);
irq_domain_free_irqs_common(domain, virq, nr_irqs);
- msi_free_msi_descs_range(data->dev, MSI_DESC_ALL, virq, virq + nr_irqs - 1);
+ msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
msi_unlock_descs(data->dev);
}

diff --git a/include/linux/msi.h b/include/linux/msi.h
index fc918a6..969ce46 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -247,8 +247,7 @@ static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
#endif /* CONFIG_PCI_MSI */

int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc);
-void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
- unsigned int first_index, unsigned int last_index);
+void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index);

/**
* msi_free_msi_descs - Free MSI descriptors of a device
@@ -256,7 +255,7 @@ void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
*/
static inline void msi_free_msi_descs(struct device *dev)
{
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX);
+ msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX);
}

void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index bba6359..1ca4846 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -120,7 +120,7 @@ static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsi
fail_mem:
ret = -ENOMEM;
fail:
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, index, last);
+ msi_free_msi_descs_range(dev, index, last);
return ret;
}

@@ -141,12 +141,11 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
/**
* msi_free_msi_descs_range - Free MSI descriptors of a device
* @dev: Device to free the descriptors
- * @filter: Descriptor state filter
* @first_index: Index to start freeing from
* @last_index: Last index to be freed
*/
-void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
- unsigned int first_index, unsigned int last_index)
+void msi_free_msi_descs_range(struct device *dev, unsigned int first_index,
+ unsigned int last_index)
{
struct xarray *xa = &dev->msi.data->__store;
struct msi_desc *desc;
@@ -155,10 +154,12 @@ void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
lockdep_assert_held(&dev->msi.data->mutex);

xa_for_each_range(xa, idx, desc, first_index, last_index) {
- if (msi_desc_match(desc, filter)) {
- xa_erase(xa, idx);
- msi_free_desc(desc);
- }
+ xa_erase(xa, idx);
+
+ /* Leak the descriptor when it is still referenced */
+ if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
+ continue;
+ msi_free_desc(desc);
}
}

@@ -739,7 +740,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
fail:
for (--virq; virq >= virq_base; virq--)
irq_domain_free_irqs_common(domain, virq, 1);
- msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1);
+ msi_free_msi_descs_range(dev, virq_base, virq_base + nvec - 1);
unlock:
msi_unlock_descs(dev);
return ret;

2023-03-01 10:55:49

by Miquel Raynal

[permalink] [raw]
Subject: Re: [patch 05/39] genirq/msi: Remove filter from msi_free_descs_free_range()

Hi Thomas,

[email protected] wrote on Fri, 11 Nov 2022 14:54:22 +0100 (CET):

> When a range of descriptors is freed then all of them are not associated to
> a linux interrupt. Remove the filter and add a warning to the free function.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---

[...]

> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -120,7 +120,7 @@ static int msi_add_simple_msi_descs(stru
> fail_mem:
> ret = -ENOMEM;
> fail:
> - msi_free_msi_descs_range(dev, MSI_DESC_ALL, index, last);
> + msi_free_msi_descs_range(dev, index, last);
> return ret;
> }
>
> @@ -141,12 +141,11 @@ static bool msi_desc_match(struct msi_de
> /**
> * msi_free_msi_descs_range - Free MSI descriptors of a device
> * @dev: Device to free the descriptors
> - * @filter: Descriptor state filter
> * @first_index: Index to start freeing from
> * @last_index: Last index to be freed
> */
> -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
> - unsigned int first_index, unsigned int last_index)
> +void msi_free_msi_descs_range(struct device *dev, unsigned int first_index,
> + unsigned int last_index)
> {
> struct xarray *xa = &dev->msi.data->__store;
> struct msi_desc *desc;
> @@ -155,10 +154,12 @@ void msi_free_msi_descs_range(struct dev
> lockdep_assert_held(&dev->msi.data->mutex);
>
> xa_for_each_range(xa, idx, desc, first_index, last_index) {
> - if (msi_desc_match(desc, filter)) {
> - xa_erase(xa, idx);
> - msi_free_desc(desc);
> - }
> + xa_erase(xa, idx);
> +
> + /* Leak the descriptor when it is still referenced */
> + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
> + continue;
> + msi_free_desc(desc);
> }
> }

It looks like since this commit I am getting warnings upon EPROBE_DEFER
errors in the mvpp2 Marvell Ethernet driver. I looked a bit at the
internals to understand why this warning was shown, and it seems that
nothing "de-references" the descriptors, which would mean here:
resetting desc->irq to 0.

In my case I don't think the mvpp2_main.c driver nor the
irq_mvebu_icu.c driver behind do anything strange (as far as I
understand them). I believe any error during a ->probe() leading
to an irq_dispose_mapping() call with MSI behind will trigger that
warning.

I am wondering how useful thisd WARN_ON() is, or otherwise where the
desc->irq entry should be zeroed (if I understand that correctly), any
help will be appreciated.

Here is the splat:

[ 2.045857] ------------[ cut here ]------------
[ 2.050497] WARNING: CPU: 2 PID: 9 at kernel/irq/msi.c:197 msi_domain_free_descs+0x120/0x128
[ 2.058993] Modules linked in:
[ 2.062068] CPU: 2 PID: 9 Comm: kworker/u8:0 Not tainted 6.2.0-rc1+ #168
[ 2.068804] Hardware name: Delta TN48M (DT)
[ 2.073008] Workqueue: events_unbound deferred_probe_work_func
[ 2.078878] pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 2.085875] pc : msi_domain_free_descs+0x120/0x128
[ 2.090693] lr : msi_domain_free_descs+0xe0/0x128
[ 2.095423] sp : ffff80000a82b8d0
[ 2.098745] x29: ffff80000a82b8d0 x28: 00007bfdbb508ca8 x27: ffff000102e28940
[ 2.105921] x26: 0000000000000004 x25: ffff000100257660 x24: ffff800009a6b8d8
[ 2.113096] x23: ffff800008a1c868 x22: ffff000102e4b700 x21: ffff000101494bb0
[ 2.120270] x20: ffff80000a82b958 x19: ffff800009afe248 x18: 0000000000000010
[ 2.127444] x17: fffffc0000fa4008 x16: 0000000000000008 x15: 000000000000013a
[ 2.134618] x14: ffff80000a82b6a0 x13: 00000000ffffffea x12: ffff80000a82b870
[ 2.141794] x11: 0000000000000002 x10: 0000000000000000 x9 : 0000000000000001
[ 2.148967] x8 : 0000000000000000 x7 : 0000000000000238 x6 : ffff0001005f1230
[ 2.156141] x5 : 0000000000000000 x4 : 0000200000000000 x3 : 0000000000000000
[ 2.163315] x2 : b586accf01c45400 x1 : ffff0001000e0000 x0 : 000000000000002d
[ 2.170489] Call trace:
[ 2.172948] msi_domain_free_descs+0x120/0x128
[ 2.177417] msi_domain_free_msi_descs_range+0x64/0x9c
[ 2.182586] platform_msi_device_domain_free+0x88/0xb8
[ 2.187752] mvebu_icu_irq_domain_free+0x60/0x80
[ 2.192396] irq_domain_free_irqs_hierarchy.part.21+0x94/0xa8
[ 2.198173] irq_domain_free_irqs+0xec/0x150
[ 2.202466] irq_dispose_mapping+0x104/0x120
[ 2.206758] mvpp2_probe+0x2028/0x21f8
[ 2.210530] platform_probe+0x68/0xd8
[ 2.214210] really_probe+0xbc/0x2a8
[ 2.217807] __driver_probe_device+0x78/0xe0
[ 2.222102] driver_probe_device+0x3c/0x108
[ 2.226308] __device_attach_driver+0xb8/0xf8
[ 2.230690] bus_for_each_drv+0x7c/0xd0
[ 2.234547] __device_attach+0xec/0x188
[ 2.238404] device_initial_probe+0x14/0x20
[ 2.242611] bus_probe_device+0x9c/0xa8
[ 2.246468] deferred_probe_work_func+0x88/0xc0
[ 2.251024] process_one_work+0x1fc/0x348
[ 2.255056] worker_thread+0x228/0x420
[ 2.258825] kthread+0x10c/0x118
[ 2.262075] ret_from_fork+0x10/0x20
[ 2.265672] ---[ end trace 0000000000000000 ]---

Thanks,
Miquèl

2023-03-01 21:07:58

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch 05/39] genirq/msi: Remove filter from msi_free_descs_free_range()

Miquel!

On Wed, Mar 01 2023 at 11:55, Miquel Raynal wrote:
> [email protected] wrote on Fri, 11 Nov 2022 14:54:22 +0100 (CET):
>
>> When a range of descriptors is freed then all of them are not associated to
>> a linux interrupt. Remove the filter and add a warning to the free function.
>> + /* Leak the descriptor when it is still referenced */
>> + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
>> + continue;
>> + msi_free_desc(desc);
>> }
>> }
>
> It looks like since this commit I am getting warnings upon EPROBE_DEFER
> errors in the mvpp2 Marvell Ethernet driver. I looked a bit at the
> internals to understand why this warning was shown, and it seems that
> nothing "de-references" the descriptors, which would mean here:
> resetting desc->irq to 0.

Correct. This platform-msi ^(*&!@&^ hack really needs to die ASAP.

Marc, where are we on that? Is this still in limbo?

> I am wondering how useful thisd WARN_ON() is, or otherwise where the

It is useful as it caught bugs already.

> desc->irq entry should be zeroed (if I understand that correctly), any
> help will be appreciated.

Untested workaround below. I hate it with a passion, but *shrug*.

Thanks,

tglx
---
drivers/base/platform-msi.c | 1 +
include/linux/msi.h | 2 ++
kernel/irq/msi.c | 23 ++++++++++++++++++++++-
3 files changed, 25 insertions(+), 1 deletion(-)

--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -324,6 +324,7 @@ void platform_msi_device_domain_free(str
struct platform_msi_priv_data *data = domain->host_data;

msi_lock_descs(data->dev);
+ msi_domain_depopulate_descs(data->dev, virq, nr_irqs);
irq_domain_free_irqs_common(domain, virq, nr_irqs);
msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
msi_unlock_descs(data->dev);
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_d
int nvec, msi_alloc_info_t *args);
int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
int virq, int nvec, msi_alloc_info_t *args);
+void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
+
struct irq_domain *
__platform_msi_create_device_domain(struct device *dev,
unsigned int nvec,
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1109,14 +1109,35 @@ int msi_domain_populate_irqs(struct irq_
return 0;

fail:
- for (--virq; virq >= virq_base; virq--)
+ for (--virq; virq >= virq_base; virq--) {
+ msi_domain_depopulate_descs(dev, virq, 1);
irq_domain_free_irqs_common(domain, virq, 1);
+ }
msi_domain_free_descs(dev, &ctrl);
unlock:
msi_unlock_descs(dev);
return ret;
}

+void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
+{
+ struct msi_ctrl ctrl = {
+ .domid = MSI_DEFAULT_DOMAIN,
+ .first = virq_base,
+ .last = virq_base + nvec - 1,
+ };
+ struct msi_desc *desc;
+ struct xarray *xa;
+ unsigned long idx;
+
+ if (!msi_ctrl_valid(dev, &ctrl))
+ return;
+
+ xa = &dev->msi.data->__domains[ctrl.domid].store;
+ xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
+ desc->irq = 0;
+}
+
/*
* Carefully check whether the device can use reservation mode. If
* reservation mode is enabled then the early activation will assign a

2023-03-02 14:43:30

by Miquel Raynal

[permalink] [raw]
Subject: Re: [patch 05/39] genirq/msi: Remove filter from msi_free_descs_free_range()

Hi Thomas,

[email protected] wrote on Wed, 01 Mar 2023 22:07:48 +0100:

> Miquel!
>
> On Wed, Mar 01 2023 at 11:55, Miquel Raynal wrote:
> > [email protected] wrote on Fri, 11 Nov 2022 14:54:22 +0100 (CET):
> >
> >> When a range of descriptors is freed then all of them are not associated to
> >> a linux interrupt. Remove the filter and add a warning to the free function.
> >> + /* Leak the descriptor when it is still referenced */
> >> + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
> >> + continue;
> >> + msi_free_desc(desc);
> >> }
> >> }
> >
> > It looks like since this commit I am getting warnings upon EPROBE_DEFER
> > errors in the mvpp2 Marvell Ethernet driver. I looked a bit at the
> > internals to understand why this warning was shown, and it seems that
> > nothing "de-references" the descriptors, which would mean here:
> > resetting desc->irq to 0.
>
> Correct. This platform-msi ^(*&!@&^ hack really needs to die ASAP.

:-)

> Marc, where are we on that? Is this still in limbo?
>
> > I am wondering how useful thisd WARN_ON() is, or otherwise where the
>
> It is useful as it caught bugs already.

Sure.

> > desc->irq entry should be zeroed (if I understand that correctly), any
> > help will be appreciated.
>
> Untested workaround below.

Excellent!

> I hate it with a passion, but *shrug*.

:'D

> Thanks,
>
> tglx
> ---
> drivers/base/platform-msi.c | 1 +
> include/linux/msi.h | 2 ++
> kernel/irq/msi.c | 23 ++++++++++++++++++++++-
> 3 files changed, 25 insertions(+), 1 deletion(-)

Kudos for the diff, which indeed works perfectly in my case. I guess
you'll make a proper patch soon, if that's the case you can add my:

Tested-by: Miquel Raynal <[email protected]>

Let me know otherwise.

Thanks a lot for the very quick fix!
Miquèl

> --- a/drivers/base/platform-msi.c
> +++ b/drivers/base/platform-msi.c
> @@ -324,6 +324,7 @@ void platform_msi_device_domain_free(str
> struct platform_msi_priv_data *data = domain->host_data;
>
> msi_lock_descs(data->dev);
> + msi_domain_depopulate_descs(data->dev, virq, nr_irqs);
> irq_domain_free_irqs_common(domain, virq, nr_irqs);
> msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
> msi_unlock_descs(data->dev);
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_d
> int nvec, msi_alloc_info_t *args);
> int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
> int virq, int nvec, msi_alloc_info_t *args);
> +void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
> +
> struct irq_domain *
> __platform_msi_create_device_domain(struct device *dev,
> unsigned int nvec,
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -1109,14 +1109,35 @@ int msi_domain_populate_irqs(struct irq_
> return 0;
>
> fail:
> - for (--virq; virq >= virq_base; virq--)
> + for (--virq; virq >= virq_base; virq--) {
> + msi_domain_depopulate_descs(dev, virq, 1);
> irq_domain_free_irqs_common(domain, virq, 1);
> + }
> msi_domain_free_descs(dev, &ctrl);
> unlock:
> msi_unlock_descs(dev);
> return ret;
> }
>
> +void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
> +{
> + struct msi_ctrl ctrl = {
> + .domid = MSI_DEFAULT_DOMAIN,
> + .first = virq_base,
> + .last = virq_base + nvec - 1,
> + };
> + struct msi_desc *desc;
> + struct xarray *xa;
> + unsigned long idx;
> +
> + if (!msi_ctrl_valid(dev, &ctrl))
> + return;
> +
> + xa = &dev->msi.data->__domains[ctrl.domid].store;
> + xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
> + desc->irq = 0;
> +}
> +
> /*
> * Carefully check whether the device can use reservation mode. If
> * reservation mode is enabled then the early activation will assign a

Subject: [tip: irq/urgent] genirq/msi, platform-msi: Ensure that MSI descriptors are unreferenced

The following commit has been merged into the irq/urgent branch of tip:

Commit-ID: 0fb7fb713461e44b12e72c292bf90ee300f40710
Gitweb: https://git.kernel.org/tip/0fb7fb713461e44b12e72c292bf90ee300f40710
Author: Thomas Gleixner <[email protected]>
AuthorDate: Wed, 01 Mar 2023 22:07:48 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Thu, 02 Mar 2023 18:09:44 +01:00

genirq/msi, platform-msi: Ensure that MSI descriptors are unreferenced

Miquel reported a warning in the MSI core which is triggered when
interrupts are freed via platform_msi_device_domain_free().

This code got reworked to use core functions for freeing the MSI
descriptors, but nothing took care to clear the msi_desc->irq entry, which
then triggers the warning in msi_free_msi_desc() which uses desc->irq to
validate that the descriptor has been torn down. The same issue exists in
msi_domain_populate_irqs().

Up to the point that msi_free_msi_descs() grew a warning for this case,
this went un-noticed.

Provide the counterpart of msi_domain_populate_irqs() and invoke it in
platform_msi_device_domain_free() before freeing the interrupts and MSI
descriptors and also in the error path of msi_domain_populate_irqs().

Fixes: 2f2940d16823 ("genirq/msi: Remove filter from msi_free_descs_free_range()")
Reported-by: Miquel Raynal <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Miquel Raynal <[email protected]>
Cc: [email protected]
Link: https://lore.kernel.org/r/87mt4wkwnv.ffs@tglx
---
drivers/base/platform-msi.c | 1 +
include/linux/msi.h | 2 ++
kernel/irq/msi.c | 23 ++++++++++++++++++++++-
3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 5883e76..f37ad34 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -324,6 +324,7 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir
struct platform_msi_priv_data *data = domain->host_data;

msi_lock_descs(data->dev);
+ msi_domain_depopulate_descs(data->dev, virq, nr_irqs);
irq_domain_free_irqs_common(domain, virq, nr_irqs);
msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
msi_unlock_descs(data->dev);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index a112b91..15dd718 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
int nvec, msi_alloc_info_t *args);
int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
int virq, int nvec, msi_alloc_info_t *args);
+void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
+
struct irq_domain *
__platform_msi_create_device_domain(struct device *dev,
unsigned int nvec,
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index efd21b7..d169ee0 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1109,14 +1109,35 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
return 0;

fail:
- for (--virq; virq >= virq_base; virq--)
+ for (--virq; virq >= virq_base; virq--) {
+ msi_domain_depopulate_descs(dev, virq, 1);
irq_domain_free_irqs_common(domain, virq, 1);
+ }
msi_domain_free_descs(dev, &ctrl);
unlock:
msi_unlock_descs(dev);
return ret;
}

+void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
+{
+ struct msi_ctrl ctrl = {
+ .domid = MSI_DEFAULT_DOMAIN,
+ .first = virq_base,
+ .last = virq_base + nvec - 1,
+ };
+ struct msi_desc *desc;
+ struct xarray *xa;
+ unsigned long idx;
+
+ if (!msi_ctrl_valid(dev, &ctrl))
+ return;
+
+ xa = &dev->msi.data->__domains[ctrl.domid].store;
+ xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
+ desc->irq = 0;
+}
+
/*
* Carefully check whether the device can use reservation mode. If
* reservation mode is enabled then the early activation will assign a