When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
we perform the activation of the interrupt (which in the case of
PCI results in the endpoint being programmed) as soon as the
interrupt is allocated.
But it appears that this is only done for the first vector,
introducing an inconsistent behaviour for PCI Multi-MSI.
Fix it by iterating over the number of vectors allocated to
each MSI descriptor. This is easily achieved by introducing
a new "for_each_msi_vector" iterator, together with a tiny
bit of refactoring.
Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
Reported-by: Shameer Kolothum <[email protected]>
Signed-off-by: Marc Zyngier <[email protected]>
Cc: [email protected]
---
include/linux/msi.h | 6 ++++++
kernel/irq/msi.c | 44 ++++++++++++++++++++------------------------
2 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 360a0a7e7341..aef35fd1cf11 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -178,6 +178,12 @@ struct msi_desc {
list_for_each_entry((desc), dev_to_msi_list((dev)), list)
#define for_each_msi_entry_safe(desc, tmp, dev) \
list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
+#define for_each_msi_vector(desc, __irq, dev) \
+ for_each_msi_entry((desc), (dev)) \
+ if ((desc)->irq) \
+ for (__irq = (desc)->irq; \
+ __irq < ((desc)->irq + (desc)->nvec_used); \
+ __irq++)
#ifdef CONFIG_IRQ_MSI_IOMMU
static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2c0c4d6d0f83..d924676c8781 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
can_reserve = msi_check_reservation_mode(domain, info, dev);
- for_each_msi_entry(desc, dev) {
- virq = desc->irq;
- if (desc->nvec_used == 1)
- dev_dbg(dev, "irq %d for MSI\n", virq);
- else
+ /*
+ * This flag is set by the PCI layer as we need to activate
+ * the MSI entries before the PCI layer enables MSI in the
+ * card. Otherwise the card latches a random msi message.
+ */
+ if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
+ goto skip_activate;
+
+ for_each_msi_vector(desc, i, dev) {
+ if (desc->irq == i) {
+ virq = desc->irq;
dev_dbg(dev, "irq [%d-%d] for MSI\n",
virq, virq + desc->nvec_used - 1);
- /*
- * This flag is set by the PCI layer as we need to activate
- * the MSI entries before the PCI layer enables MSI in the
- * card. Otherwise the card latches a random msi message.
- */
- if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
- continue;
+ }
- irq_data = irq_domain_get_irq_data(domain, desc->irq);
+ irq_data = irq_domain_get_irq_data(domain, i);
if (!can_reserve) {
irqd_clr_can_reserve(irq_data);
if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
@@ -462,28 +462,24 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
goto cleanup;
}
+skip_activate:
/*
* If these interrupts use reservation mode, clear the activated bit
* so request_irq() will assign the final vector.
*/
if (can_reserve) {
- for_each_msi_entry(desc, dev) {
- irq_data = irq_domain_get_irq_data(domain, desc->irq);
+ for_each_msi_vector(desc, i, dev) {
+ irq_data = irq_domain_get_irq_data(domain, i);
irqd_clr_activated(irq_data);
}
}
return 0;
cleanup:
- for_each_msi_entry(desc, dev) {
- struct irq_data *irqd;
-
- if (desc->irq == virq)
- break;
-
- irqd = irq_domain_get_irq_data(domain, desc->irq);
- if (irqd_is_activated(irqd))
- irq_domain_deactivate_irq(irqd);
+ for_each_msi_vector(desc, i, dev) {
+ irq_data = irq_domain_get_irq_data(domain, i);
+ if (irqd_is_activated(irq_data))
+ irq_domain_deactivate_irq(irq_data);
}
msi_domain_free_irqs(domain, dev);
return ret;
--
2.29.2
> -----Original Message-----
> From: Marc Zyngier [mailto:[email protected]]
> Sent: 23 January 2021 12:28
> To: [email protected]
> Cc: Thomas Gleixner <[email protected]>; Bjorn Helgaas
> <[email protected]>; Shameerali Kolothum Thodi
> <[email protected]>; [email protected]
> Subject: [PATCH] genirq/msi: Activate Multi-MSI early when
> MSI_FLAG_ACTIVATE_EARLY is set
>
> When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
> we perform the activation of the interrupt (which in the case of
> PCI results in the endpoint being programmed) as soon as the
> interrupt is allocated.
>
> But it appears that this is only done for the first vector,
> introducing an inconsistent behaviour for PCI Multi-MSI.
>
> Fix it by iterating over the number of vectors allocated to
> each MSI descriptor. This is easily achieved by introducing
> a new "for_each_msi_vector" iterator, together with a tiny
> bit of refactoring.
>
> Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
> Reported-by: Shameer Kolothum <[email protected]>
> Signed-off-by: Marc Zyngier <[email protected]>
> Cc: [email protected]
> ---
> include/linux/msi.h | 6 ++++++
> kernel/irq/msi.c | 44 ++++++++++++++++++++------------------------
> 2 files changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 360a0a7e7341..aef35fd1cf11 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -178,6 +178,12 @@ struct msi_desc {
> list_for_each_entry((desc), dev_to_msi_list((dev)), list)
> #define for_each_msi_entry_safe(desc, tmp, dev) \
> list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
> +#define for_each_msi_vector(desc, __irq, dev) \
> + for_each_msi_entry((desc), (dev)) \
> + if ((desc)->irq) \
> + for (__irq = (desc)->irq; \
> + __irq < ((desc)->irq + (desc)->nvec_used); \
> + __irq++)
>
> #ifdef CONFIG_IRQ_MSI_IOMMU
> static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> index 2c0c4d6d0f83..d924676c8781 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
>
> can_reserve = msi_check_reservation_mode(domain, info, dev);
>
> - for_each_msi_entry(desc, dev) {
> - virq = desc->irq;
> - if (desc->nvec_used == 1)
> - dev_dbg(dev, "irq %d for MSI\n", virq);
> - else
> + /*
> + * This flag is set by the PCI layer as we need to activate
> + * the MSI entries before the PCI layer enables MSI in the
> + * card. Otherwise the card latches a random msi message.
> + */
> + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> + goto skip_activate;
This will change the dbg print behavior. From the commit f3b0946d629c,
it looks like the below dev_dbg() code was there for !MSI_FLAG_ACTIVATE_EARLY
case as well. Not sure how much this matters though.
Thanks,
Shameer
> +
> + for_each_msi_vector(desc, i, dev) {
> + if (desc->irq == i) {
> + virq = desc->irq;
> dev_dbg(dev, "irq [%d-%d] for MSI\n",
> virq, virq + desc->nvec_used - 1);
> - /*
> - * This flag is set by the PCI layer as we need to activate
> - * the MSI entries before the PCI layer enables MSI in the
> - * card. Otherwise the card latches a random msi message.
> - */
> - if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> - continue;
> + }
>
> - irq_data = irq_domain_get_irq_data(domain, desc->irq);
> + irq_data = irq_domain_get_irq_data(domain, i);
> if (!can_reserve) {
> irqd_clr_can_reserve(irq_data);
> if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
> @@ -462,28 +462,24 @@ int __msi_domain_alloc_irqs(struct irq_domain
> *domain, struct device *dev,
> goto cleanup;
> }
>
> +skip_activate:
> /*
> * If these interrupts use reservation mode, clear the activated bit
> * so request_irq() will assign the final vector.
> */
> if (can_reserve) {
> - for_each_msi_entry(desc, dev) {
> - irq_data = irq_domain_get_irq_data(domain, desc->irq);
> + for_each_msi_vector(desc, i, dev) {
> + irq_data = irq_domain_get_irq_data(domain, i);
> irqd_clr_activated(irq_data);
> }
> }
> return 0;
>
> cleanup:
> - for_each_msi_entry(desc, dev) {
> - struct irq_data *irqd;
> -
> - if (desc->irq == virq)
> - break;
> -
> - irqd = irq_domain_get_irq_data(domain, desc->irq);
> - if (irqd_is_activated(irqd))
> - irq_domain_deactivate_irq(irqd);
> + for_each_msi_vector(desc, i, dev) {
> + irq_data = irq_domain_get_irq_data(domain, i);
> + if (irqd_is_activated(irq_data))
> + irq_domain_deactivate_irq(irq_data);
> }
> msi_domain_free_irqs(domain, dev);
> return ret;
> --
> 2.29.2
On 2021-01-25 14:39, Shameerali Kolothum Thodi wrote:
>> -----Original Message-----
>> From: Marc Zyngier [mailto:[email protected]]
>> Sent: 23 January 2021 12:28
>> To: [email protected]
>> Cc: Thomas Gleixner <[email protected]>; Bjorn Helgaas
>> <[email protected]>; Shameerali Kolothum Thodi
>> <[email protected]>; [email protected]
>> Subject: [PATCH] genirq/msi: Activate Multi-MSI early when
>> MSI_FLAG_ACTIVATE_EARLY is set
>>
>> When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
>> we perform the activation of the interrupt (which in the case of
>> PCI results in the endpoint being programmed) as soon as the
>> interrupt is allocated.
>>
>> But it appears that this is only done for the first vector,
>> introducing an inconsistent behaviour for PCI Multi-MSI.
>>
>> Fix it by iterating over the number of vectors allocated to
>> each MSI descriptor. This is easily achieved by introducing
>> a new "for_each_msi_vector" iterator, together with a tiny
>> bit of refactoring.
>>
>> Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated
>> early")
>> Reported-by: Shameer Kolothum <[email protected]>
>> Signed-off-by: Marc Zyngier <[email protected]>
>> Cc: [email protected]
>> ---
>> include/linux/msi.h | 6 ++++++
>> kernel/irq/msi.c | 44 ++++++++++++++++++++------------------------
>> 2 files changed, 26 insertions(+), 24 deletions(-)
>>
>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>> index 360a0a7e7341..aef35fd1cf11 100644
>> --- a/include/linux/msi.h
>> +++ b/include/linux/msi.h
>> @@ -178,6 +178,12 @@ struct msi_desc {
>> list_for_each_entry((desc), dev_to_msi_list((dev)), list)
>> #define for_each_msi_entry_safe(desc, tmp, dev) \
>> list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)),
>> list)
>> +#define for_each_msi_vector(desc, __irq, dev) \
>> + for_each_msi_entry((desc), (dev)) \
>> + if ((desc)->irq) \
>> + for (__irq = (desc)->irq; \
>> + __irq < ((desc)->irq + (desc)->nvec_used); \
>> + __irq++)
>>
>> #ifdef CONFIG_IRQ_MSI_IOMMU
>> static inline const void *msi_desc_get_iommu_cookie(struct msi_desc
>> *desc)
>> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
>> index 2c0c4d6d0f83..d924676c8781 100644
>> --- a/kernel/irq/msi.c
>> +++ b/kernel/irq/msi.c
>> @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain
>> *domain, struct device *dev,
>>
>> can_reserve = msi_check_reservation_mode(domain, info, dev);
>>
>> - for_each_msi_entry(desc, dev) {
>> - virq = desc->irq;
>> - if (desc->nvec_used == 1)
>> - dev_dbg(dev, "irq %d for MSI\n", virq);
>> - else
>> + /*
>> + * This flag is set by the PCI layer as we need to activate
>> + * the MSI entries before the PCI layer enables MSI in the
>> + * card. Otherwise the card latches a random msi message.
>> + */
>> + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
>> + goto skip_activate;
>
> This will change the dbg print behavior. From the commit f3b0946d629c,
> it looks like the below dev_dbg() code was there for
> !MSI_FLAG_ACTIVATE_EARLY
> case as well. Not sure how much this matters though.
I'm not sure this matters either. We may have relied on these statements
some 6/7 years ago, as the whole hierarchy stuff was brand new, but we
now have a much better debug infrastructure thanks to Thomas. I'd be
totally in favour of dropping it.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
> -----Original Message-----
> From: Marc Zyngier [mailto:[email protected]]
> Sent: 25 January 2021 14:49
> To: Shameerali Kolothum Thodi <[email protected]>
> Cc: [email protected]; Thomas Gleixner <[email protected]>; Bjorn
> Helgaas <[email protected]>; [email protected]
> Subject: Re: [PATCH] genirq/msi: Activate Multi-MSI early when
> MSI_FLAG_ACTIVATE_EARLY is set
>
> On 2021-01-25 14:39, Shameerali Kolothum Thodi wrote:
> >> -----Original Message-----
> >> From: Marc Zyngier [mailto:[email protected]]
> >> Sent: 23 January 2021 12:28
> >> To: [email protected]
> >> Cc: Thomas Gleixner <[email protected]>; Bjorn Helgaas
> >> <[email protected]>; Shameerali Kolothum Thodi
> >> <[email protected]>; [email protected]
> >> Subject: [PATCH] genirq/msi: Activate Multi-MSI early when
> >> MSI_FLAG_ACTIVATE_EARLY is set
> >>
> >> When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
> >> we perform the activation of the interrupt (which in the case of
> >> PCI results in the endpoint being programmed) as soon as the
> >> interrupt is allocated.
> >>
> >> But it appears that this is only done for the first vector,
> >> introducing an inconsistent behaviour for PCI Multi-MSI.
> >>
> >> Fix it by iterating over the number of vectors allocated to
> >> each MSI descriptor. This is easily achieved by introducing
> >> a new "for_each_msi_vector" iterator, together with a tiny
> >> bit of refactoring.
> >>
> >> Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated
> >> early")
> >> Reported-by: Shameer Kolothum
> <[email protected]>
> >> Signed-off-by: Marc Zyngier <[email protected]>
> >> Cc: [email protected]
> >> ---
> >> include/linux/msi.h | 6 ++++++
> >> kernel/irq/msi.c | 44 ++++++++++++++++++++------------------------
> >> 2 files changed, 26 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/include/linux/msi.h b/include/linux/msi.h
> >> index 360a0a7e7341..aef35fd1cf11 100644
> >> --- a/include/linux/msi.h
> >> +++ b/include/linux/msi.h
> >> @@ -178,6 +178,12 @@ struct msi_desc {
> >> list_for_each_entry((desc), dev_to_msi_list((dev)), list)
> >> #define for_each_msi_entry_safe(desc, tmp, dev) \
> >> list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)),
> >> list)
> >> +#define for_each_msi_vector(desc, __irq, dev) \
> >> + for_each_msi_entry((desc), (dev)) \
> >> + if ((desc)->irq) \
> >> + for (__irq = (desc)->irq; \
> >> + __irq < ((desc)->irq + (desc)->nvec_used); \
> >> + __irq++)
> >>
> >> #ifdef CONFIG_IRQ_MSI_IOMMU
> >> static inline const void *msi_desc_get_iommu_cookie(struct msi_desc
> >> *desc)
> >> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> >> index 2c0c4d6d0f83..d924676c8781 100644
> >> --- a/kernel/irq/msi.c
> >> +++ b/kernel/irq/msi.c
> >> @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain
> >> *domain, struct device *dev,
> >>
> >> can_reserve = msi_check_reservation_mode(domain, info, dev);
> >>
> >> - for_each_msi_entry(desc, dev) {
> >> - virq = desc->irq;
> >> - if (desc->nvec_used == 1)
> >> - dev_dbg(dev, "irq %d for MSI\n", virq);
> >> - else
> >> + /*
> >> + * This flag is set by the PCI layer as we need to activate
> >> + * the MSI entries before the PCI layer enables MSI in the
> >> + * card. Otherwise the card latches a random msi message.
> >> + */
> >> + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
> >> + goto skip_activate;
> >
> > This will change the dbg print behavior. From the commit f3b0946d629c,
> > it looks like the below dev_dbg() code was there for
> > !MSI_FLAG_ACTIVATE_EARLY
> > case as well. Not sure how much this matters though.
>
> I'm not sure this matters either. We may have relied on these statements
> some 6/7 years ago, as the whole hierarchy stuff was brand new, but we
> now have a much better debug infrastructure thanks to Thomas. I'd be
> totally in favour of dropping it.
>
Ok.
Tested on D06 with gicv4 enabled and Guest MSI dev works fine.
FWIW,
Tested-by: Shameer Kolothum <[email protected]>
Thanks,
Shameer
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 4c457e8cb75eda91906a4f89fc39bde3f9a43922
Gitweb: https://git.kernel.org/tip/4c457e8cb75eda91906a4f89fc39bde3f9a43922
Author: Marc Zyngier <[email protected]>
AuthorDate: Sat, 23 Jan 2021 12:27:59
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Sat, 30 Jan 2021 01:22:31 +01:00
genirq/msi: Activate Multi-MSI early when MSI_FLAG_ACTIVATE_EARLY is set
When MSI_FLAG_ACTIVATE_EARLY is set (which is the case for PCI),
__msi_domain_alloc_irqs() performs the activation of the interrupt (which
in the case of PCI results in the endpoint being programmed) as soon as the
interrupt is allocated.
But it appears that this is only done for the first vector, introducing an
inconsistent behaviour for PCI Multi-MSI.
Fix it by iterating over the number of vectors allocated to each MSI
descriptor. This is easily achieved by introducing a new
"for_each_msi_vector" iterator, together with a tiny bit of refactoring.
Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
Reported-by: Shameer Kolothum <[email protected]>
Signed-off-by: Marc Zyngier <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Shameer Kolothum <[email protected]>
Cc: [email protected]
Link: https://lore.kernel.org/r/[email protected]
---
include/linux/msi.h | 6 ++++++-
kernel/irq/msi.c | 44 ++++++++++++++++++++------------------------
2 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 360a0a7..aef35fd 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -178,6 +178,12 @@ struct msi_desc {
list_for_each_entry((desc), dev_to_msi_list((dev)), list)
#define for_each_msi_entry_safe(desc, tmp, dev) \
list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
+#define for_each_msi_vector(desc, __irq, dev) \
+ for_each_msi_entry((desc), (dev)) \
+ if ((desc)->irq) \
+ for (__irq = (desc)->irq; \
+ __irq < ((desc)->irq + (desc)->nvec_used); \
+ __irq++)
#ifdef CONFIG_IRQ_MSI_IOMMU
static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index dc0e2d7..b338d62 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
can_reserve = msi_check_reservation_mode(domain, info, dev);
- for_each_msi_entry(desc, dev) {
- virq = desc->irq;
- if (desc->nvec_used == 1)
- dev_dbg(dev, "irq %d for MSI\n", virq);
- else
+ /*
+ * This flag is set by the PCI layer as we need to activate
+ * the MSI entries before the PCI layer enables MSI in the
+ * card. Otherwise the card latches a random msi message.
+ */
+ if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
+ goto skip_activate;
+
+ for_each_msi_vector(desc, i, dev) {
+ if (desc->irq == i) {
+ virq = desc->irq;
dev_dbg(dev, "irq [%d-%d] for MSI\n",
virq, virq + desc->nvec_used - 1);
- /*
- * This flag is set by the PCI layer as we need to activate
- * the MSI entries before the PCI layer enables MSI in the
- * card. Otherwise the card latches a random msi message.
- */
- if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
- continue;
+ }
- irq_data = irq_domain_get_irq_data(domain, desc->irq);
+ irq_data = irq_domain_get_irq_data(domain, i);
if (!can_reserve) {
irqd_clr_can_reserve(irq_data);
if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
@@ -462,28 +462,24 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
goto cleanup;
}
+skip_activate:
/*
* If these interrupts use reservation mode, clear the activated bit
* so request_irq() will assign the final vector.
*/
if (can_reserve) {
- for_each_msi_entry(desc, dev) {
- irq_data = irq_domain_get_irq_data(domain, desc->irq);
+ for_each_msi_vector(desc, i, dev) {
+ irq_data = irq_domain_get_irq_data(domain, i);
irqd_clr_activated(irq_data);
}
}
return 0;
cleanup:
- for_each_msi_entry(desc, dev) {
- struct irq_data *irqd;
-
- if (desc->irq == virq)
- break;
-
- irqd = irq_domain_get_irq_data(domain, desc->irq);
- if (irqd_is_activated(irqd))
- irq_domain_deactivate_irq(irqd);
+ for_each_msi_vector(desc, i, dev) {
+ irq_data = irq_domain_get_irq_data(domain, i);
+ if (irqd_is_activated(irq_data))
+ irq_domain_deactivate_irq(irq_data);
}
msi_domain_free_irqs(domain, dev);
return ret;