2023-05-08 14:39:33

by Anup Patel

[permalink] [raw]
Subject: [PATCH v3 07/11] irqchip/riscv-imsic: Improve IOMMU DMA support

We have a separate RISC-V IMSIC MSI address for each CPU so changing
MSI (or IRQ) affinity results in re-programming of MSI address in
the PCIe (or platform) device.

Currently, the iommu_dma_prepare_msi() is called only once at the
time of IRQ allocation so IOMMU DMA domain will only have mapping
for one MSI page. This means iommu_dma_compose_msi_msg() called
by imsic_irq_compose_msi_msg() will always use the same MSI page
irrespective to target CPU MSI address. In other words, changing
MSI (or IRQ) affinity for device using IOMMU DMA domain will not
work.

To address above issue, we do the following:
1) Map MSI pages for all CPUs in imsic_irq_domain_alloc()
using iommu_dma_prepare_msi().
2) Add a new iommu_dma_select_msi() API to select a specific
MSI page from a set of already mapped MSI pages.
3) Use iommu_dma_select_msi() to select a specific MSI page
before calling iommu_dma_compose_msi_msg() in
imsic_irq_compose_msi_msg().

Reported-by: Vincent Chen <[email protected]>
Signed-off-by: Anup Patel <[email protected]>
---
drivers/iommu/dma-iommu.c | 38 +++++++++++++++++++++++++++++++
drivers/irqchip/irq-riscv-imsic.c | 27 ++++++++++++----------
include/linux/iommu.h | 6 +++++
3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7a9f0b0bddbd..07782c77a6eb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1677,6 +1677,44 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
return 0;
}

+/**
+ * iommu_dma_select_msi() - Select a MSI page from a set of
+ * already mapped MSI pages in the IOMMU domain.
+ *
+ * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
+ * @msi_addr: physical address of the MSI page to be selected
+ *
+ * Return: 0 on success or negative error code if the select failed.
+ */
+int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ const struct iommu_dma_msi_page *msi_page;
+ struct iommu_dma_cookie *cookie;
+
+ if (!domain || !domain->iova_cookie) {
+ desc->iommu_cookie = NULL;
+ return 0;
+ }
+
+ cookie = domain->iova_cookie;
+ msi_addr &= ~(phys_addr_t)(cookie_msi_granule(cookie) - 1);
+
+ msi_page = msi_desc_get_iommu_cookie(desc);
+ if (msi_page && msi_page->phys == msi_addr)
+ return 0;
+
+ list_for_each_entry(msi_page, &cookie->msi_page_list, list) {
+ if (msi_page->phys == msi_addr) {
+ msi_desc_set_iommu_cookie(desc, msi_page);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
/**
* iommu_dma_compose_msi_msg() - Apply translation to an MSI message
* @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
diff --git a/drivers/irqchip/irq-riscv-imsic.c b/drivers/irqchip/irq-riscv-imsic.c
index 30247c84a6b0..ec61c599e0c5 100644
--- a/drivers/irqchip/irq-riscv-imsic.c
+++ b/drivers/irqchip/irq-riscv-imsic.c
@@ -446,6 +446,10 @@ static void imsic_irq_compose_msi_msg(struct irq_data *d,
if (WARN_ON(err))
return;

+ err = iommu_dma_select_msi(desc, msi_addr);
+ if (WARN_ON(err))
+ return;
+
msg->address_hi = upper_32_bits(msi_addr);
msg->address_lo = lower_32_bits(msi_addr);
msg->data = d->hwirq;
@@ -493,11 +497,18 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
int i, hwirq, err = 0;
unsigned int cpu;

- err = imsic_get_cpu(&imsic->lmask, false, &cpu);
- if (err)
- return err;
+ /* Map MSI address of all CPUs */
+ for_each_cpu(cpu, &imsic->lmask) {
+ err = imsic_cpu_page_phys(cpu, 0, &msi_addr);
+ if (err)
+ return err;

- err = imsic_cpu_page_phys(cpu, 0, &msi_addr);
+ err = iommu_dma_prepare_msi(info->desc, msi_addr);
+ if (err)
+ return err;
+ }
+
+ err = imsic_get_cpu(&imsic->lmask, false, &cpu);
if (err)
return err;

@@ -505,10 +516,6 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
if (hwirq < 0)
return hwirq;

- err = iommu_dma_prepare_msi(info->desc, msi_addr);
- if (err)
- goto fail;
-
for (i = 0; i < nr_irqs; i++) {
imsic_id_set_target(hwirq + i, cpu);
irq_domain_set_info(domain, virq + i, hwirq + i,
@@ -528,10 +535,6 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
}

return 0;
-
-fail:
- imsic_ids_free(hwirq, get_count_order(nr_irqs));
- return err;
}

static void imsic_irq_domain_free(struct irq_domain *domain,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e8c9a7da1060..41e8613832ab 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1117,6 +1117,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);

int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
+int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr);
void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg);

#else /* CONFIG_IOMMU_DMA */
@@ -1138,6 +1139,11 @@ static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_a
return 0;
}

+static inline int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return 0;
+}
+
static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
{
}
--
2.34.1


2023-05-10 10:54:14

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH v3 07/11] irqchip/riscv-imsic: Improve IOMMU DMA support

On 2023-05-08 15:28, Anup Patel wrote:
> We have a separate RISC-V IMSIC MSI address for each CPU so changing
> MSI (or IRQ) affinity results in re-programming of MSI address in
> the PCIe (or platform) device.
>
> Currently, the iommu_dma_prepare_msi() is called only once at the
> time of IRQ allocation so IOMMU DMA domain will only have mapping
> for one MSI page. This means iommu_dma_compose_msi_msg() called
> by imsic_irq_compose_msi_msg() will always use the same MSI page
> irrespective to target CPU MSI address. In other words, changing
> MSI (or IRQ) affinity for device using IOMMU DMA domain will not
> work.
>
> To address above issue, we do the following:
> 1) Map MSI pages for all CPUs in imsic_irq_domain_alloc()
> using iommu_dma_prepare_msi().
> 2) Add a new iommu_dma_select_msi() API to select a specific
> MSI page from a set of already mapped MSI pages.
> 3) Use iommu_dma_select_msi() to select a specific MSI page
> before calling iommu_dma_compose_msi_msg() in
> imsic_irq_compose_msi_msg().

The high-level design is that prepare ensures any necessary page
mappings exist, then compose retrieves the appropriate page for the
given message. I think it generalises well enough without needing a new
op, it just means that caching a single page in the msi_desc up-front no
longer fits, so that wants tweaking to allow compose to do a more
general lookup.

Thanks,
Robin.

> Reported-by: Vincent Chen <[email protected]>
> Signed-off-by: Anup Patel <[email protected]>
> ---
> drivers/iommu/dma-iommu.c | 38 +++++++++++++++++++++++++++++++
> drivers/irqchip/irq-riscv-imsic.c | 27 ++++++++++++----------
> include/linux/iommu.h | 6 +++++
> 3 files changed, 59 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 7a9f0b0bddbd..07782c77a6eb 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1677,6 +1677,44 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> return 0;
> }
>
> +/**
> + * iommu_dma_select_msi() - Select a MSI page from a set of
> + * already mapped MSI pages in the IOMMU domain.
> + *
> + * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
> + * @msi_addr: physical address of the MSI page to be selected
> + *
> + * Return: 0 on success or negative error code if the select failed.
> + */
> +int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> +{
> + struct device *dev = msi_desc_to_dev(desc);
> + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + const struct iommu_dma_msi_page *msi_page;
> + struct iommu_dma_cookie *cookie;
> +
> + if (!domain || !domain->iova_cookie) {
> + desc->iommu_cookie = NULL;
> + return 0;
> + }
> +
> + cookie = domain->iova_cookie;
> + msi_addr &= ~(phys_addr_t)(cookie_msi_granule(cookie) - 1);
> +
> + msi_page = msi_desc_get_iommu_cookie(desc);
> + if (msi_page && msi_page->phys == msi_addr)
> + return 0;
> +
> + list_for_each_entry(msi_page, &cookie->msi_page_list, list) {
> + if (msi_page->phys == msi_addr) {
> + msi_desc_set_iommu_cookie(desc, msi_page);
> + return 0;
> + }
> + }
> +
> + return -ENOENT;
> +}
> +
> /**
> * iommu_dma_compose_msi_msg() - Apply translation to an MSI message
> * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
> diff --git a/drivers/irqchip/irq-riscv-imsic.c b/drivers/irqchip/irq-riscv-imsic.c
> index 30247c84a6b0..ec61c599e0c5 100644
> --- a/drivers/irqchip/irq-riscv-imsic.c
> +++ b/drivers/irqchip/irq-riscv-imsic.c
> @@ -446,6 +446,10 @@ static void imsic_irq_compose_msi_msg(struct irq_data *d,
> if (WARN_ON(err))
> return;
>
> + err = iommu_dma_select_msi(desc, msi_addr);
> + if (WARN_ON(err))
> + return;
> +
> msg->address_hi = upper_32_bits(msi_addr);
> msg->address_lo = lower_32_bits(msi_addr);
> msg->data = d->hwirq;
> @@ -493,11 +497,18 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
> int i, hwirq, err = 0;
> unsigned int cpu;
>
> - err = imsic_get_cpu(&imsic->lmask, false, &cpu);
> - if (err)
> - return err;
> + /* Map MSI address of all CPUs */
> + for_each_cpu(cpu, &imsic->lmask) {
> + err = imsic_cpu_page_phys(cpu, 0, &msi_addr);
> + if (err)
> + return err;
>
> - err = imsic_cpu_page_phys(cpu, 0, &msi_addr);
> + err = iommu_dma_prepare_msi(info->desc, msi_addr);
> + if (err)
> + return err;
> + }
> +
> + err = imsic_get_cpu(&imsic->lmask, false, &cpu);
> if (err)
> return err;
>
> @@ -505,10 +516,6 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
> if (hwirq < 0)
> return hwirq;
>
> - err = iommu_dma_prepare_msi(info->desc, msi_addr);
> - if (err)
> - goto fail;
> -
> for (i = 0; i < nr_irqs; i++) {
> imsic_id_set_target(hwirq + i, cpu);
> irq_domain_set_info(domain, virq + i, hwirq + i,
> @@ -528,10 +535,6 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
> }
>
> return 0;
> -
> -fail:
> - imsic_ids_free(hwirq, get_count_order(nr_irqs));
> - return err;
> }
>
> static void imsic_irq_domain_free(struct irq_domain *domain,
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index e8c9a7da1060..41e8613832ab 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -1117,6 +1117,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
> int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
>
> int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> +int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg);
>
> #else /* CONFIG_IOMMU_DMA */
> @@ -1138,6 +1139,11 @@ static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_a
> return 0;
> }
>
> +static inline int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> +{
> + return 0;
> +}
> +
> static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> {
> }

2023-05-10 15:18:13

by Anup Patel

[permalink] [raw]
Subject: Re: [PATCH v3 07/11] irqchip/riscv-imsic: Improve IOMMU DMA support

On Wed, May 10, 2023 at 4:18 PM Robin Murphy <[email protected]> wrote:
>
> On 2023-05-08 15:28, Anup Patel wrote:
> > We have a separate RISC-V IMSIC MSI address for each CPU so changing
> > MSI (or IRQ) affinity results in re-programming of MSI address in
> > the PCIe (or platform) device.
> >
> > Currently, the iommu_dma_prepare_msi() is called only once at the
> > time of IRQ allocation so IOMMU DMA domain will only have mapping
> > for one MSI page. This means iommu_dma_compose_msi_msg() called
> > by imsic_irq_compose_msi_msg() will always use the same MSI page
> > irrespective to target CPU MSI address. In other words, changing
> > MSI (or IRQ) affinity for device using IOMMU DMA domain will not
> > work.
> >
> > To address above issue, we do the following:
> > 1) Map MSI pages for all CPUs in imsic_irq_domain_alloc()
> > using iommu_dma_prepare_msi().
> > 2) Add a new iommu_dma_select_msi() API to select a specific
> > MSI page from a set of already mapped MSI pages.
> > 3) Use iommu_dma_select_msi() to select a specific MSI page
> > before calling iommu_dma_compose_msi_msg() in
> > imsic_irq_compose_msi_msg().
>
> The high-level design is that prepare ensures any necessary page
> mappings exist, then compose retrieves the appropriate page for the
> given message. I think it generalises well enough without needing a new
> op, it just means that caching a single page in the msi_desc up-front no
> longer fits, so that wants tweaking to allow compose to do a more
> general lookup.

Yes, usage of msi_desc_get/set_iommu_cookie() is the only problem.

To have minimal impact on other irqchip drivers, I added a separate
iommu_dma_select_msi() API but in the next patch revision I will
tweak iommu_dma_compose_msi_msg() to lookup msi_page based
on the existing MSI address in "struct msi_msg".

Regards,
Anup

>
> Thanks,
> Robin.
>
> > Reported-by: Vincent Chen <[email protected]>
> > Signed-off-by: Anup Patel <[email protected]>
> > ---
> > drivers/iommu/dma-iommu.c | 38 +++++++++++++++++++++++++++++++
> > drivers/irqchip/irq-riscv-imsic.c | 27 ++++++++++++----------
> > include/linux/iommu.h | 6 +++++
> > 3 files changed, 59 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index 7a9f0b0bddbd..07782c77a6eb 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -1677,6 +1677,44 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> > return 0;
> > }
> >
> > +/**
> > + * iommu_dma_select_msi() - Select a MSI page from a set of
> > + * already mapped MSI pages in the IOMMU domain.
> > + *
> > + * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
> > + * @msi_addr: physical address of the MSI page to be selected
> > + *
> > + * Return: 0 on success or negative error code if the select failed.
> > + */
> > +int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> > +{
> > + struct device *dev = msi_desc_to_dev(desc);
> > + struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> > + const struct iommu_dma_msi_page *msi_page;
> > + struct iommu_dma_cookie *cookie;
> > +
> > + if (!domain || !domain->iova_cookie) {
> > + desc->iommu_cookie = NULL;
> > + return 0;
> > + }
> > +
> > + cookie = domain->iova_cookie;
> > + msi_addr &= ~(phys_addr_t)(cookie_msi_granule(cookie) - 1);
> > +
> > + msi_page = msi_desc_get_iommu_cookie(desc);
> > + if (msi_page && msi_page->phys == msi_addr)
> > + return 0;
> > +
> > + list_for_each_entry(msi_page, &cookie->msi_page_list, list) {
> > + if (msi_page->phys == msi_addr) {
> > + msi_desc_set_iommu_cookie(desc, msi_page);
> > + return 0;
> > + }
> > + }
> > +
> > + return -ENOENT;
> > +}
> > +
> > /**
> > * iommu_dma_compose_msi_msg() - Apply translation to an MSI message
> > * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
> > diff --git a/drivers/irqchip/irq-riscv-imsic.c b/drivers/irqchip/irq-riscv-imsic.c
> > index 30247c84a6b0..ec61c599e0c5 100644
> > --- a/drivers/irqchip/irq-riscv-imsic.c
> > +++ b/drivers/irqchip/irq-riscv-imsic.c
> > @@ -446,6 +446,10 @@ static void imsic_irq_compose_msi_msg(struct irq_data *d,
> > if (WARN_ON(err))
> > return;
> >
> > + err = iommu_dma_select_msi(desc, msi_addr);
> > + if (WARN_ON(err))
> > + return;
> > +
> > msg->address_hi = upper_32_bits(msi_addr);
> > msg->address_lo = lower_32_bits(msi_addr);
> > msg->data = d->hwirq;
> > @@ -493,11 +497,18 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
> > int i, hwirq, err = 0;
> > unsigned int cpu;
> >
> > - err = imsic_get_cpu(&imsic->lmask, false, &cpu);
> > - if (err)
> > - return err;
> > + /* Map MSI address of all CPUs */
> > + for_each_cpu(cpu, &imsic->lmask) {
> > + err = imsic_cpu_page_phys(cpu, 0, &msi_addr);
> > + if (err)
> > + return err;
> >
> > - err = imsic_cpu_page_phys(cpu, 0, &msi_addr);
> > + err = iommu_dma_prepare_msi(info->desc, msi_addr);
> > + if (err)
> > + return err;
> > + }
> > +
> > + err = imsic_get_cpu(&imsic->lmask, false, &cpu);
> > if (err)
> > return err;
> >
> > @@ -505,10 +516,6 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
> > if (hwirq < 0)
> > return hwirq;
> >
> > - err = iommu_dma_prepare_msi(info->desc, msi_addr);
> > - if (err)
> > - goto fail;
> > -
> > for (i = 0; i < nr_irqs; i++) {
> > imsic_id_set_target(hwirq + i, cpu);
> > irq_domain_set_info(domain, virq + i, hwirq + i,
> > @@ -528,10 +535,6 @@ static int imsic_irq_domain_alloc(struct irq_domain *domain,
> > }
> >
> > return 0;
> > -
> > -fail:
> > - imsic_ids_free(hwirq, get_count_order(nr_irqs));
> > - return err;
> > }
> >
> > static void imsic_irq_domain_free(struct irq_domain *domain,
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index e8c9a7da1060..41e8613832ab 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -1117,6 +1117,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
> > int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
> >
> > int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> > +int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> > void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg);
> >
> > #else /* CONFIG_IOMMU_DMA */
> > @@ -1138,6 +1139,11 @@ static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_a
> > return 0;
> > }
> >
> > +static inline int iommu_dma_select_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> > +{
> > + return 0;
> > +}
> > +
> > static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> > {
> > }

2023-05-15 13:18:24

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 07/11] irqchip/riscv-imsic: Improve IOMMU DMA support

On Mon, May 08, 2023 at 07:58:38PM +0530, Anup Patel wrote:
> We have a separate RISC-V IMSIC MSI address for each CPU so changing
> MSI (or IRQ) affinity results in re-programming of MSI address in
> the PCIe (or platform) device.
>
> Currently, the iommu_dma_prepare_msi() is called only once at the
> time of IRQ allocation so IOMMU DMA domain will only have mapping
> for one MSI page. This means iommu_dma_compose_msi_msg() called
> by imsic_irq_compose_msi_msg() will always use the same MSI page
> irrespective to target CPU MSI address. In other words, changing
> MSI (or IRQ) affinity for device using IOMMU DMA domain will not
> work.
>
> To address above issue, we do the following:
> 1) Map MSI pages for all CPUs in imsic_irq_domain_alloc()
> using iommu_dma_prepare_msi().
> 2) Add a new iommu_dma_select_msi() API to select a specific
> MSI page from a set of already mapped MSI pages.
> 3) Use iommu_dma_select_msi() to select a specific MSI page
> before calling iommu_dma_compose_msi_msg() in
> imsic_irq_compose_msi_msg().

Is there an iommu driver somewhere in all this? I don't obviously see
one?

There should be no reason to use the dma-iommu.c stuff just to make
interrupts work, that is only necessary if there is an iommu, and the
platform architecture requires the iommu to have the MSI region
programmed into IOPTEs.

And I'd be much happier if we could clean this design up before risc-v
starts using it too :\

Jason

2023-06-13 07:57:55

by Anup Patel

[permalink] [raw]
Subject: Re: [PATCH v3 07/11] irqchip/riscv-imsic: Improve IOMMU DMA support

On Mon, May 15, 2023 at 6:23 PM Jason Gunthorpe <[email protected]> wrote:
>
> On Mon, May 08, 2023 at 07:58:38PM +0530, Anup Patel wrote:
> > We have a separate RISC-V IMSIC MSI address for each CPU so changing
> > MSI (or IRQ) affinity results in re-programming of MSI address in
> > the PCIe (or platform) device.
> >
> > Currently, the iommu_dma_prepare_msi() is called only once at the
> > time of IRQ allocation so IOMMU DMA domain will only have mapping
> > for one MSI page. This means iommu_dma_compose_msi_msg() called
> > by imsic_irq_compose_msi_msg() will always use the same MSI page
> > irrespective to target CPU MSI address. In other words, changing
> > MSI (or IRQ) affinity for device using IOMMU DMA domain will not
> > work.
> >
> > To address above issue, we do the following:
> > 1) Map MSI pages for all CPUs in imsic_irq_domain_alloc()
> > using iommu_dma_prepare_msi().
> > 2) Add a new iommu_dma_select_msi() API to select a specific
> > MSI page from a set of already mapped MSI pages.
> > 3) Use iommu_dma_select_msi() to select a specific MSI page
> > before calling iommu_dma_compose_msi_msg() in
> > imsic_irq_compose_msi_msg().
>
> Is there an iommu driver somewhere in all this? I don't obviously see
> one?

Sorry for the delayed response.

The RISC-V IOMMU specification is frozen and will be ratified/released
anytime this month or next.
https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0-rc6/riscv-iommu.pdf

The RISC-V IOMMU driver will be send-out on LKML pretty soon
https://github.com/tjeznach/linux/tree/tjeznach/riscv-iommu
which can be tested on QEMU
https://github.com/tjeznach/qemu/tree/tjeznach/riscv-iommu

>
> There should be no reason to use the dma-iommu.c stuff just to make
> interrupts work, that is only necessary if there is an iommu, and the
> platform architecture requires the iommu to have the MSI region
> programmed into IOPTEs.
>
> And I'd be much happier if we could clean this design up before risc-v
> starts using it too :\
>

Sure, I will send-out v4 in the next few days.

Regards,
Anup