2023-02-07 11:16:34

by Nanyong Sun

[permalink] [raw]
Subject: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

From: Rong Wang <[email protected]>

Once enable iommu domain for one device, the MSI
translation tables have to be there for software-managed MSI.
Otherwise, platform with software-managed MSI without an
irq bypass function, can not get a correct memory write event
from pcie, will not get irqs.
The solution is to obtain the MSI phy base address from
iommu reserved region, and set it to iommu MSI cookie,
then translation tables will be created while request irq.

Change log
----------

v1->v2:
- add resv iotlb to avoid overlap mapping.

Signed-off-by: Rong Wang <[email protected]>
Signed-off-by: Nanyong Sun <[email protected]>
---
drivers/iommu/iommu.c | 1 +
drivers/vhost/vdpa.c | 59 ++++++++++++++++++++++++++++++++++++++++---
2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5f6a85aea501..af9c064ad8b2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
if (ops->get_resv_regions)
ops->get_resv_regions(dev, list);
}
+EXPORT_SYMBOL(iommu_get_resv_regions);

/**
* iommu_put_resv_regions - release resered regions
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ec32f785dfde..a58979da8acd 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -49,6 +49,7 @@ struct vhost_vdpa {
struct completion completion;
struct vdpa_device *vdpa;
struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
+ struct vhost_iotlb resv_iotlb;
struct device dev;
struct cdev cdev;
atomic_t opened;
@@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)

v->in_batch = 0;

+ vhost_iotlb_reset(&v->resv_iotlb);
+
return vdpa_reset(vdpa);
}

@@ -1013,6 +1016,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
msg->iova + msg->size - 1 > v->range.last)
return -EINVAL;

+ if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
+ msg->iova + msg->size - 1))
+ return -EINVAL;
+
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;
@@ -1103,6 +1110,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
return vhost_chr_write_iter(dev, from);
}

+static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
+ struct vhost_iotlb *resv_iotlb)
+{
+ struct list_head dev_resv_regions;
+ phys_addr_t resv_msi_base = 0;
+ struct iommu_resv_region *region;
+ int ret = 0;
+ bool with_sw_msi = false;
+ bool with_hw_msi = false;
+
+ INIT_LIST_HEAD(&dev_resv_regions);
+ iommu_get_resv_regions(dma_dev, &dev_resv_regions);
+
+ list_for_each_entry(region, &dev_resv_regions, list) {
+ ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
+ region->start + region->length - 1,
+ 0, 0, NULL);
+ if (ret) {
+ vhost_iotlb_reset(resv_iotlb);
+ break;
+ }
+
+ if (region->type == IOMMU_RESV_MSI)
+ with_hw_msi = true;
+
+ if (region->type == IOMMU_RESV_SW_MSI) {
+ resv_msi_base = region->start;
+ with_sw_msi = true;
+ }
+ }
+
+ if (!ret && !with_hw_msi && with_sw_msi)
+ ret = iommu_get_msi_cookie(domain, resv_msi_base);
+
+ iommu_put_resv_regions(dma_dev, &dev_resv_regions);
+
+ return ret;
+}
+
static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)

ret = iommu_attach_device(v->domain, dma_dev);
if (ret)
- goto err_attach;
+ goto err_alloc_domain;

- return 0;
+ ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
+ if (ret)
+ goto err_attach_device;

-err_attach:
+ return 0;
+err_attach_device:
+ iommu_detach_device(v->domain, dma_dev);
+err_alloc_domain:
iommu_domain_free(v->domain);
return ret;
}
@@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
goto err;
}

+ vhost_iotlb_init(&v->resv_iotlb, 0, 0);
+
r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
if (r)
goto err;
--
2.25.1



2023-02-13 12:23:21

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> From: Rong Wang <[email protected]>
>
> Once enable iommu domain for one device, the MSI
> translation tables have to be there for software-managed MSI.
> Otherwise, platform with software-managed MSI without an
> irq bypass function, can not get a correct memory write event
> from pcie, will not get irqs.
> The solution is to obtain the MSI phy base address from
> iommu reserved region, and set it to iommu MSI cookie,
> then translation tables will be created while request irq.
>
> Change log
> ----------
>
> v1->v2:
> - add resv iotlb to avoid overlap mapping.
>
> Signed-off-by: Rong Wang <[email protected]>
> Signed-off-by: Nanyong Sun <[email protected]>


Could I get an ACK from IOMMU maintainers on exporting this pls?
> ---
> drivers/iommu/iommu.c | 1 +
> drivers/vhost/vdpa.c | 59 ++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 5f6a85aea501..af9c064ad8b2 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
> if (ops->get_resv_regions)
> ops->get_resv_regions(dev, list);
> }
> +EXPORT_SYMBOL(iommu_get_resv_regions);
>
> /**
> * iommu_put_resv_regions - release resered regions
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index ec32f785dfde..a58979da8acd 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -49,6 +49,7 @@ struct vhost_vdpa {
> struct completion completion;
> struct vdpa_device *vdpa;
> struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
> + struct vhost_iotlb resv_iotlb;
> struct device dev;
> struct cdev cdev;
> atomic_t opened;
> @@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
>
> v->in_batch = 0;
>
> + vhost_iotlb_reset(&v->resv_iotlb);
> +
> return vdpa_reset(vdpa);
> }
>
> @@ -1013,6 +1016,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> msg->iova + msg->size - 1 > v->range.last)
> return -EINVAL;
>
> + if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
> + msg->iova + msg->size - 1))
> + return -EINVAL;
> +
> if (vhost_iotlb_itree_first(iotlb, msg->iova,
> msg->iova + msg->size - 1))
> return -EEXIST;
> @@ -1103,6 +1110,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
> return vhost_chr_write_iter(dev, from);
> }
>
> +static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
> + struct vhost_iotlb *resv_iotlb)
> +{
> + struct list_head dev_resv_regions;
> + phys_addr_t resv_msi_base = 0;
> + struct iommu_resv_region *region;
> + int ret = 0;
> + bool with_sw_msi = false;
> + bool with_hw_msi = false;
> +
> + INIT_LIST_HEAD(&dev_resv_regions);
> + iommu_get_resv_regions(dma_dev, &dev_resv_regions);
> +
> + list_for_each_entry(region, &dev_resv_regions, list) {
> + ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
> + region->start + region->length - 1,
> + 0, 0, NULL);
> + if (ret) {
> + vhost_iotlb_reset(resv_iotlb);
> + break;
> + }
> +
> + if (region->type == IOMMU_RESV_MSI)
> + with_hw_msi = true;
> +
> + if (region->type == IOMMU_RESV_SW_MSI) {
> + resv_msi_base = region->start;
> + with_sw_msi = true;
> + }
> + }
> +
> + if (!ret && !with_hw_msi && with_sw_msi)
> + ret = iommu_get_msi_cookie(domain, resv_msi_base);
> +
> + iommu_put_resv_regions(dma_dev, &dev_resv_regions);
> +
> + return ret;
> +}
> +
> static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> {
> struct vdpa_device *vdpa = v->vdpa;
> @@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>
> ret = iommu_attach_device(v->domain, dma_dev);
> if (ret)
> - goto err_attach;
> + goto err_alloc_domain;
>
> - return 0;
> + ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
> + if (ret)
> + goto err_attach_device;
>
> -err_attach:
> + return 0;
> +err_attach_device:
> + iommu_detach_device(v->domain, dma_dev);
> +err_alloc_domain:
> iommu_domain_free(v->domain);
> return ret;
> }
> @@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
> goto err;
> }
>
> + vhost_iotlb_init(&v->resv_iotlb, 0, 0);
> +
> r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
> if (r)
> goto err;

Jason any feedback on vdpa change here?

> --
> 2.25.1


2023-02-15 11:49:26

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> From: Rong Wang <[email protected]>
>
> Once enable iommu domain for one device, the MSI
> translation tables have to be there for software-managed MSI.
> Otherwise, platform with software-managed MSI without an
> irq bypass function, can not get a correct memory write event
> from pcie, will not get irqs.
> The solution is to obtain the MSI phy base address from
> iommu reserved region, and set it to iommu MSI cookie,
> then translation tables will be created while request irq.
>
> Change log
> ----------
>
> v1->v2:
> - add resv iotlb to avoid overlap mapping.

put changelog after --- pls

> Signed-off-by: Rong Wang <[email protected]>
> Signed-off-by: Nanyong Sun <[email protected]>
> ---
> drivers/iommu/iommu.c | 1 +
> drivers/vhost/vdpa.c | 59 ++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 5f6a85aea501..af9c064ad8b2 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
> if (ops->get_resv_regions)
> ops->get_resv_regions(dev, list);
> }
> +EXPORT_SYMBOL(iommu_get_resv_regions);
>
> /**
> * iommu_put_resv_regions - release resered regions
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index ec32f785dfde..a58979da8acd 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -49,6 +49,7 @@ struct vhost_vdpa {
> struct completion completion;
> struct vdpa_device *vdpa;
> struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
> + struct vhost_iotlb resv_iotlb;
> struct device dev;
> struct cdev cdev;
> atomic_t opened;
> @@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
>
> v->in_batch = 0;
>
> + vhost_iotlb_reset(&v->resv_iotlb);
> +
> return vdpa_reset(vdpa);
> }
>
> @@ -1013,6 +1016,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> msg->iova + msg->size - 1 > v->range.last)
> return -EINVAL;
>
> + if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
> + msg->iova + msg->size - 1))
> + return -EINVAL;
> +
> if (vhost_iotlb_itree_first(iotlb, msg->iova,
> msg->iova + msg->size - 1))
> return -EEXIST;
> @@ -1103,6 +1110,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
> return vhost_chr_write_iter(dev, from);
> }
>
> +static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
> + struct vhost_iotlb *resv_iotlb)
> +{
> + struct list_head dev_resv_regions;
> + phys_addr_t resv_msi_base = 0;
> + struct iommu_resv_region *region;
> + int ret = 0;
> + bool with_sw_msi = false;
> + bool with_hw_msi = false;
> +
> + INIT_LIST_HEAD(&dev_resv_regions);
> + iommu_get_resv_regions(dma_dev, &dev_resv_regions);
> +
> + list_for_each_entry(region, &dev_resv_regions, list) {
> + ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
> + region->start + region->length - 1,
> + 0, 0, NULL);
> + if (ret) {
> + vhost_iotlb_reset(resv_iotlb);
> + break;
> + }
> +
> + if (region->type == IOMMU_RESV_MSI)
> + with_hw_msi = true;
> +
> + if (region->type == IOMMU_RESV_SW_MSI) {
> + resv_msi_base = region->start;
> + with_sw_msi = true;
> + }
> + }
> +
> + if (!ret && !with_hw_msi && with_sw_msi)
> + ret = iommu_get_msi_cookie(domain, resv_msi_base);
> +
> + iommu_put_resv_regions(dma_dev, &dev_resv_regions);
> +
> + return ret;
> +}
> +
> static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> {
> struct vdpa_device *vdpa = v->vdpa;
> @@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>
> ret = iommu_attach_device(v->domain, dma_dev);
> if (ret)
> - goto err_attach;
> + goto err_alloc_domain;
>
> - return 0;
> + ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
> + if (ret)
> + goto err_attach_device;
>
> -err_attach:
> + return 0;
> +err_attach_device:
> + iommu_detach_device(v->domain, dma_dev);
> +err_alloc_domain:
> iommu_domain_free(v->domain);
> return ret;
> }
> @@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
> goto err;
> }
>
> + vhost_iotlb_init(&v->resv_iotlb, 0, 0);
> +
> r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
> if (r)
> goto err;
> --
> 2.25.1


2023-02-16 04:44:02

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI


在 2023/2/7 20:08, Nanyong Sun 写道:
> From: Rong Wang <[email protected]>
>
> Once enable iommu domain for one device, the MSI
> translation tables have to be there for software-managed MSI.
> Otherwise, platform with software-managed MSI without an
> irq bypass function, can not get a correct memory write event
> from pcie, will not get irqs.
> The solution is to obtain the MSI phy base address from
> iommu reserved region, and set it to iommu MSI cookie,
> then translation tables will be created while request irq.
>
> Change log
> ----------
>
> v1->v2:
> - add resv iotlb to avoid overlap mapping.
>
> Signed-off-by: Rong Wang <[email protected]>
> Signed-off-by: Nanyong Sun <[email protected]>
> ---
> drivers/iommu/iommu.c | 1 +
> drivers/vhost/vdpa.c | 59 ++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 5f6a85aea501..af9c064ad8b2 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
> if (ops->get_resv_regions)
> ops->get_resv_regions(dev, list);
> }
> +EXPORT_SYMBOL(iommu_get_resv_regions);
>
> /**
> * iommu_put_resv_regions - release resered regions
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index ec32f785dfde..a58979da8acd 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -49,6 +49,7 @@ struct vhost_vdpa {
> struct completion completion;
> struct vdpa_device *vdpa;
> struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
> + struct vhost_iotlb resv_iotlb;


Nit: it might be better to rename this as resv_regions.


> struct device dev;
> struct cdev cdev;
> atomic_t opened;
> @@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
>
> v->in_batch = 0;
>
> + vhost_iotlb_reset(&v->resv_iotlb);
> +
> return vdpa_reset(vdpa);
> }
>
> @@ -1013,6 +1016,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> msg->iova + msg->size - 1 > v->range.last)
> return -EINVAL;
>
> + if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
> + msg->iova + msg->size - 1))
> + return -EINVAL;
> +
> if (vhost_iotlb_itree_first(iotlb, msg->iova,
> msg->iova + msg->size - 1))
> return -EEXIST;
> @@ -1103,6 +1110,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
> return vhost_chr_write_iter(dev, from);
> }
>
> +static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
> + struct vhost_iotlb *resv_iotlb)
> +{
> + struct list_head dev_resv_regions;
> + phys_addr_t resv_msi_base = 0;
> + struct iommu_resv_region *region;
> + int ret = 0;
> + bool with_sw_msi = false;
> + bool with_hw_msi = false;
> +
> + INIT_LIST_HEAD(&dev_resv_regions);
> + iommu_get_resv_regions(dma_dev, &dev_resv_regions);
> +
> + list_for_each_entry(region, &dev_resv_regions, list) {
> + ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
> + region->start + region->length - 1,
> + 0, 0, NULL);
> + if (ret) {
> + vhost_iotlb_reset(resv_iotlb);
> + break;
> + }
> +
> + if (region->type == IOMMU_RESV_MSI)
> + with_hw_msi = true;
> +
> + if (region->type == IOMMU_RESV_SW_MSI) {
> + resv_msi_base = region->start;
> + with_sw_msi = true;
> + }
> + }
> +
> + if (!ret && !with_hw_msi && with_sw_msi)
> + ret = iommu_get_msi_cookie(domain, resv_msi_base);
> +
> + iommu_put_resv_regions(dma_dev, &dev_resv_regions);
> +
> + return ret;
> +}


As discussed in v1, I still prefer to factor out the common logic and
move them to iommu.c. It helps to simplify the future bug fixing and
enhancement.


> +
> static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> {
> struct vdpa_device *vdpa = v->vdpa;
> @@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>
> ret = iommu_attach_device(v->domain, dma_dev);
> if (ret)
> - goto err_attach;
> + goto err_alloc_domain;
>
> - return 0;
> + ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
> + if (ret)
> + goto err_attach_device;
>
> -err_attach:
> + return 0;
> +err_attach_device:
> + iommu_detach_device(v->domain, dma_dev);
> +err_alloc_domain:
> iommu_domain_free(v->domain);
> return ret;
> }
> @@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
> goto err;
> }
>
> + vhost_iotlb_init(&v->resv_iotlb, 0, 0);
> +
> r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
> if (r)
> goto err;


We need clean resv_iotlb during release().

Other looks good.

Thanks


2023-02-16 12:10:44

by Nanyong Sun

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On 2023/2/16 12:43, Jason Wang wrote:

>
> 在 2023/2/7 20:08, Nanyong Sun 写道:
>> From: Rong Wang <[email protected]>
>>
>> Once enable iommu domain for one device, the MSI
>> translation tables have to be there for software-managed MSI.
>> Otherwise, platform with software-managed MSI without an
>> irq bypass function, can not get a correct memory write event
>> from pcie, will not get irqs.
>> The solution is to obtain the MSI phy base address from
>> iommu reserved region, and set it to iommu MSI cookie,
>> then translation tables will be created while request irq.
>>
>> Change log
>> ----------
>>
>> v1->v2:
>> - add resv iotlb to avoid overlap mapping.
>>
>> Signed-off-by: Rong Wang <[email protected]>
>> Signed-off-by: Nanyong Sun <[email protected]>
>> ---
>>   drivers/iommu/iommu.c |  1 +
>>   drivers/vhost/vdpa.c  | 59 ++++++++++++++++++++++++++++++++++++++++---
>>   2 files changed, 57 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 5f6a85aea501..af9c064ad8b2 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev,
>> struct list_head *list)
>>       if (ops->get_resv_regions)
>>           ops->get_resv_regions(dev, list);
>>   }
>> +EXPORT_SYMBOL(iommu_get_resv_regions);
>>     /**
>>    * iommu_put_resv_regions - release resered regions
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index ec32f785dfde..a58979da8acd 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -49,6 +49,7 @@ struct vhost_vdpa {
>>       struct completion completion;
>>       struct vdpa_device *vdpa;
>>       struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
>> +    struct vhost_iotlb resv_iotlb;
>
>
> Nit: it might be better to rename this as resv_regions.
>

Agree, and will do that in version3

>
>>       struct device dev;
>>       struct cdev cdev;
>>       atomic_t opened;
>> @@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
>>         v->in_batch = 0;
>>   +    vhost_iotlb_reset(&v->resv_iotlb);
>> +
>>       return vdpa_reset(vdpa);
>>   }
>>   @@ -1013,6 +1016,10 @@ static int
>> vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>           msg->iova + msg->size - 1 > v->range.last)
>>           return -EINVAL;
>>   +    if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
>> +                    msg->iova + msg->size - 1))
>> +        return -EINVAL;
>> +
>>       if (vhost_iotlb_itree_first(iotlb, msg->iova,
>>                       msg->iova + msg->size - 1))
>>           return -EEXIST;
>> @@ -1103,6 +1110,45 @@ static ssize_t
>> vhost_vdpa_chr_write_iter(struct kiocb *iocb,
>>       return vhost_chr_write_iter(dev, from);
>>   }
>>   +static int vhost_vdpa_resv_iommu_region(struct iommu_domain
>> *domain, struct device *dma_dev,
>> +    struct vhost_iotlb *resv_iotlb)
>> +{
>> +    struct list_head dev_resv_regions;
>> +    phys_addr_t resv_msi_base = 0;
>> +    struct iommu_resv_region *region;
>> +    int ret = 0;
>> +    bool with_sw_msi = false;
>> +    bool with_hw_msi = false;
>> +
>> +    INIT_LIST_HEAD(&dev_resv_regions);
>> +    iommu_get_resv_regions(dma_dev, &dev_resv_regions);
>> +
>> +    list_for_each_entry(region, &dev_resv_regions, list) {
>> +        ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
>> +                region->start + region->length - 1,
>> +                0, 0, NULL);
>> +        if (ret) {
>> +            vhost_iotlb_reset(resv_iotlb);
>> +            break;
>> +        }
>> +
>> +        if (region->type == IOMMU_RESV_MSI)
>> +            with_hw_msi = true;
>> +
>> +        if (region->type == IOMMU_RESV_SW_MSI) {
>> +            resv_msi_base = region->start;
>> +            with_sw_msi = true;
>> +        }
>> +    }
>> +
>> +    if (!ret && !with_hw_msi && with_sw_msi)
>> +        ret = iommu_get_msi_cookie(domain, resv_msi_base);
>> +
>> +    iommu_put_resv_regions(dma_dev, &dev_resv_regions);
>> +
>> +    return ret;
>> +}
>
>
> As discussed in v1, I still prefer to factor out the common logic and
> move them to iommu.c. It helps to simplify the future bug fixing and
> enhancement.

Ok, will do that in version3

>
>
>> +
>>   static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>>   {
>>       struct vdpa_device *vdpa = v->vdpa;
>> @@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct
>> vhost_vdpa *v)
>>         ret = iommu_attach_device(v->domain, dma_dev);
>>       if (ret)
>> -        goto err_attach;
>> +        goto err_alloc_domain;
>>   -    return 0;
>> +    ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev,
>> &v->resv_iotlb);
>> +    if (ret)
>> +        goto err_attach_device;
>>   -err_attach:
>> +    return 0;
>> +err_attach_device:
>> +    iommu_detach_device(v->domain, dma_dev);
>> +err_alloc_domain:
>>       iommu_domain_free(v->domain);
>>       return ret;
>>   }
>> @@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device
>> *vdpa)
>>           goto err;
>>       }
>>   +    vhost_iotlb_init(&v->resv_iotlb, 0, 0);
>> +
>>       r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
>>       if (r)
>>           goto err;
>
>
> We need clean resv_iotlb during release().

I added vhost_iotlb_reset in vhost_vdpa_reset, so will clean while call vhost_vdpa_release() and vhost_vdpa_open().

>
> Other looks good.
>
> Thanks
>
> .

2023-02-16 12:11:18

by Nanyong Sun

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI


On 2023/2/15 19:48, Michael S. Tsirkin wrote:
> On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
>> From: Rong Wang <[email protected]>
>>
>> Once enable iommu domain for one device, the MSI
>> translation tables have to be there for software-managed MSI.
>> Otherwise, platform with software-managed MSI without an
>> irq bypass function, can not get a correct memory write event
>> from pcie, will not get irqs.
>> The solution is to obtain the MSI phy base address from
>> iommu reserved region, and set it to iommu MSI cookie,
>> then translation tables will be created while request irq.
>>
>> Change log
>> ----------
>>
>> v1->v2:
>> - add resv iotlb to avoid overlap mapping.
> put changelog after --- pls

Ok, will do that in version3

>
>> Signed-off-by: Rong Wang <[email protected]>
>> Signed-off-by: Nanyong Sun <[email protected]>
>> ---
>> drivers/iommu/iommu.c | 1 +
>> drivers/vhost/vdpa.c | 59 ++++++++++++++++++++++++++++++++++++++++---
>> 2 files changed, 57 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 5f6a85aea501..af9c064ad8b2 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
>> if (ops->get_resv_regions)
>> ops->get_resv_regions(dev, list);
>> }
>> +EXPORT_SYMBOL(iommu_get_resv_regions);
>>
>> /**
>> * iommu_put_resv_regions - release resered regions
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index ec32f785dfde..a58979da8acd 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -49,6 +49,7 @@ struct vhost_vdpa {
>> struct completion completion;
>> struct vdpa_device *vdpa;
>> struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
>> + struct vhost_iotlb resv_iotlb;
>> struct device dev;
>> struct cdev cdev;
>> atomic_t opened;
>> @@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
>>
>> v->in_batch = 0;
>>
>> + vhost_iotlb_reset(&v->resv_iotlb);
>> +
>> return vdpa_reset(vdpa);
>> }
>>
>> @@ -1013,6 +1016,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>> msg->iova + msg->size - 1 > v->range.last)
>> return -EINVAL;
>>
>> + if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
>> + msg->iova + msg->size - 1))
>> + return -EINVAL;
>> +
>> if (vhost_iotlb_itree_first(iotlb, msg->iova,
>> msg->iova + msg->size - 1))
>> return -EEXIST;
>> @@ -1103,6 +1110,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
>> return vhost_chr_write_iter(dev, from);
>> }
>>
>> +static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
>> + struct vhost_iotlb *resv_iotlb)
>> +{
>> + struct list_head dev_resv_regions;
>> + phys_addr_t resv_msi_base = 0;
>> + struct iommu_resv_region *region;
>> + int ret = 0;
>> + bool with_sw_msi = false;
>> + bool with_hw_msi = false;
>> +
>> + INIT_LIST_HEAD(&dev_resv_regions);
>> + iommu_get_resv_regions(dma_dev, &dev_resv_regions);
>> +
>> + list_for_each_entry(region, &dev_resv_regions, list) {
>> + ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
>> + region->start + region->length - 1,
>> + 0, 0, NULL);
>> + if (ret) {
>> + vhost_iotlb_reset(resv_iotlb);
>> + break;
>> + }
>> +
>> + if (region->type == IOMMU_RESV_MSI)
>> + with_hw_msi = true;
>> +
>> + if (region->type == IOMMU_RESV_SW_MSI) {
>> + resv_msi_base = region->start;
>> + with_sw_msi = true;
>> + }
>> + }
>> +
>> + if (!ret && !with_hw_msi && with_sw_msi)
>> + ret = iommu_get_msi_cookie(domain, resv_msi_base);
>> +
>> + iommu_put_resv_regions(dma_dev, &dev_resv_regions);
>> +
>> + return ret;
>> +}
>> +
>> static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>> {
>> struct vdpa_device *vdpa = v->vdpa;
>> @@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>>
>> ret = iommu_attach_device(v->domain, dma_dev);
>> if (ret)
>> - goto err_attach;
>> + goto err_alloc_domain;
>>
>> - return 0;
>> + ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
>> + if (ret)
>> + goto err_attach_device;
>>
>> -err_attach:
>> + return 0;
>> +err_attach_device:
>> + iommu_detach_device(v->domain, dma_dev);
>> +err_alloc_domain:
>> iommu_domain_free(v->domain);
>> return ret;
>> }
>> @@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
>> goto err;
>> }
>>
>> + vhost_iotlb_init(&v->resv_iotlb, 0, 0);
>> +
>> r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
>> if (r)
>> goto err;
>> --
>> 2.25.1
> .

2023-02-17 00:14:59

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> From: Rong Wang <[email protected]>
>
> Once enable iommu domain for one device, the MSI
> translation tables have to be there for software-managed MSI.
> Otherwise, platform with software-managed MSI without an
> irq bypass function, can not get a correct memory write event
> from pcie, will not get irqs.
> The solution is to obtain the MSI phy base address from
> iommu reserved region, and set it to iommu MSI cookie,
> then translation tables will be created while request irq.

Probably not what anyone wants to hear, but I would prefer we not add
more uses of this stuff. It looks like we have to get rid of
iommu_get_msi_cookie() :\

I'd like it if vdpa could move to iommufd not keep copying stuff from
it..

Also the iommu_group_has_isolated_msi() check is missing on the vdpa
path, and it is missing the iommu ownership mechanism.

Also which in-tree VDPA driver that uses the iommu runs on ARM? Please
don't propose core changes for unmerged drivers. :(

Jason

2023-02-17 05:37:10

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Feb 17, 2023 at 8:15 AM Jason Gunthorpe <[email protected]> wrote:
>
> On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > From: Rong Wang <[email protected]>
> >
> > Once enable iommu domain for one device, the MSI
> > translation tables have to be there for software-managed MSI.
> > Otherwise, platform with software-managed MSI without an
> > irq bypass function, can not get a correct memory write event
> > from pcie, will not get irqs.
> > The solution is to obtain the MSI phy base address from
> > iommu reserved region, and set it to iommu MSI cookie,
> > then translation tables will be created while request irq.
>
> Probably not what anyone wants to hear, but I would prefer we not add
> more uses of this stuff. It looks like we have to get rid of
> iommu_get_msi_cookie() :\
>
> I'd like it if vdpa could move to iommufd not keep copying stuff from
> it..

Yes, but we probably need a patch for -stable.

>
> Also the iommu_group_has_isolated_msi() check is missing on the vdpa
> path, and it is missing the iommu ownership mechanism.

Ok.

>
> Also which in-tree VDPA driver that uses the iommu runs on ARM? Please

ifcvf and vp_vpda are two drivers that use platform IOMMU.

Thanks

> don't propose core changes for unmerged drivers. :(
>
> Jason
>


2023-02-17 10:12:39

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Feb 17, 2023 at 01:35:59PM +0800, Jason Wang wrote:
> On Fri, Feb 17, 2023 at 8:15 AM Jason Gunthorpe <[email protected]> wrote:
> >
> > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > From: Rong Wang <[email protected]>
> > >
> > > Once enable iommu domain for one device, the MSI
> > > translation tables have to be there for software-managed MSI.
> > > Otherwise, platform with software-managed MSI without an
> > > irq bypass function, can not get a correct memory write event
> > > from pcie, will not get irqs.
> > > The solution is to obtain the MSI phy base address from
> > > iommu reserved region, and set it to iommu MSI cookie,
> > > then translation tables will be created while request irq.
> >
> > Probably not what anyone wants to hear, but I would prefer we not add
> > more uses of this stuff. It looks like we have to get rid of
> > iommu_get_msi_cookie() :\
> >
> > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > it..
>
> Yes, but we probably need a patch for -stable.

Hmm do we? this looks like it's enabling new platforms is not a bugfix...

> >
> > Also the iommu_group_has_isolated_msi() check is missing on the vdpa
> > path, and it is missing the iommu ownership mechanism.
>
> Ok.
>
> >
> > Also which in-tree VDPA driver that uses the iommu runs on ARM? Please
>
> ifcvf and vp_vpda are two drivers that use platform IOMMU.
>
> Thanks
>
> > don't propose core changes for unmerged drivers. :(
> >
> > Jason
> >


2023-02-17 10:13:28

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > From: Rong Wang <[email protected]>
> >
> > Once enable iommu domain for one device, the MSI
> > translation tables have to be there for software-managed MSI.
> > Otherwise, platform with software-managed MSI without an
> > irq bypass function, can not get a correct memory write event
> > from pcie, will not get irqs.
> > The solution is to obtain the MSI phy base address from
> > iommu reserved region, and set it to iommu MSI cookie,
> > then translation tables will be created while request irq.
>
> Probably not what anyone wants to hear, but I would prefer we not add
> more uses of this stuff. It looks like we have to get rid of
> iommu_get_msi_cookie() :\
>
> I'd like it if vdpa could move to iommufd not keep copying stuff from
> it..

Absolutely but when is that happening?

> Also the iommu_group_has_isolated_msi() check is missing on the vdpa
> path, and it is missing the iommu ownership mechanism.
>
> Also which in-tree VDPA driver that uses the iommu runs on ARM? Please
> don't propose core changes for unmerged drivers. :(
>
> Jason


2023-02-17 12:43:52

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
> On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > From: Rong Wang <[email protected]>
> > >
> > > Once enable iommu domain for one device, the MSI
> > > translation tables have to be there for software-managed MSI.
> > > Otherwise, platform with software-managed MSI without an
> > > irq bypass function, can not get a correct memory write event
> > > from pcie, will not get irqs.
> > > The solution is to obtain the MSI phy base address from
> > > iommu reserved region, and set it to iommu MSI cookie,
> > > then translation tables will be created while request irq.
> >
> > Probably not what anyone wants to hear, but I would prefer we not add
> > more uses of this stuff. It looks like we have to get rid of
> > iommu_get_msi_cookie() :\
> >
> > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > it..
>
> Absolutely but when is that happening?

Don't know, I think it has to come from the VDPA maintainers, Nicolin
made some drafts but wasn't able to get it beyond that.

Please have people who need more iommu platform enablement to pick it
up instead of merging hacks like this..

We are very close to having nested translation on ARM so anyone who is
serious about VDPA on ARM is going to need iommufd anyhow.

Jason

2023-02-20 02:37:28

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Feb 17, 2023 at 6:11 PM Michael S. Tsirkin <[email protected]> wrote:
>
> On Fri, Feb 17, 2023 at 01:35:59PM +0800, Jason Wang wrote:
> > On Fri, Feb 17, 2023 at 8:15 AM Jason Gunthorpe <[email protected]> wrote:
> > >
> > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > From: Rong Wang <[email protected]>
> > > >
> > > > Once enable iommu domain for one device, the MSI
> > > > translation tables have to be there for software-managed MSI.
> > > > Otherwise, platform with software-managed MSI without an
> > > > irq bypass function, can not get a correct memory write event
> > > > from pcie, will not get irqs.
> > > > The solution is to obtain the MSI phy base address from
> > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > then translation tables will be created while request irq.
> > >
> > > Probably not what anyone wants to hear, but I would prefer we not add
> > > more uses of this stuff. It looks like we have to get rid of
> > > iommu_get_msi_cookie() :\
> > >
> > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > it..
> >
> > Yes, but we probably need a patch for -stable.
>
> Hmm do we? this looks like it's enabling new platforms is not a bugfix...

I think we haven't limited vDPA to any specific arch in the past?

Thanks

>
> > >
> > > Also the iommu_group_has_isolated_msi() check is missing on the vdpa
> > > path, and it is missing the iommu ownership mechanism.
> >
> > Ok.
> >
> > >
> > > Also which in-tree VDPA driver that uses the iommu runs on ARM? Please
> >
> > ifcvf and vp_vpda are two drivers that use platform IOMMU.
> >
> > Thanks
> >
> > > don't propose core changes for unmerged drivers. :(
> > >
> > > Jason
> > >
>


2023-02-20 02:38:23

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Feb 17, 2023 at 8:43 PM Jason Gunthorpe <[email protected]> wrote:
>
> On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
> > On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > From: Rong Wang <[email protected]>
> > > >
> > > > Once enable iommu domain for one device, the MSI
> > > > translation tables have to be there for software-managed MSI.
> > > > Otherwise, platform with software-managed MSI without an
> > > > irq bypass function, can not get a correct memory write event
> > > > from pcie, will not get irqs.
> > > > The solution is to obtain the MSI phy base address from
> > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > then translation tables will be created while request irq.
> > >
> > > Probably not what anyone wants to hear, but I would prefer we not add
> > > more uses of this stuff. It looks like we have to get rid of
> > > iommu_get_msi_cookie() :\
> > >
> > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > it..
> >
> > Absolutely but when is that happening?
>
> Don't know, I think it has to come from the VDPA maintainers, Nicolin
> made some drafts but wasn't able to get it beyond that.

Cindy (cced) will carry on the work.

Thanks

>
> Please have people who need more iommu platform enablement to pick it
> up instead of merging hacks like this..
>
> We are very close to having nested translation on ARM so anyone who is
> serious about VDPA on ARM is going to need iommufd anyhow.
>
> Jason
>


2023-02-20 15:26:01

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Mon, Feb 20, 2023 at 10:36:27AM +0800, Jason Wang wrote:
> On Fri, Feb 17, 2023 at 6:11 PM Michael S. Tsirkin <[email protected]> wrote:
> >
> > On Fri, Feb 17, 2023 at 01:35:59PM +0800, Jason Wang wrote:
> > > On Fri, Feb 17, 2023 at 8:15 AM Jason Gunthorpe <[email protected]> wrote:
> > > >
> > > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > > From: Rong Wang <[email protected]>
> > > > >
> > > > > Once enable iommu domain for one device, the MSI
> > > > > translation tables have to be there for software-managed MSI.
> > > > > Otherwise, platform with software-managed MSI without an
> > > > > irq bypass function, can not get a correct memory write event
> > > > > from pcie, will not get irqs.
> > > > > The solution is to obtain the MSI phy base address from
> > > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > > then translation tables will be created while request irq.
> > > >
> > > > Probably not what anyone wants to hear, but I would prefer we not add
> > > > more uses of this stuff. It looks like we have to get rid of
> > > > iommu_get_msi_cookie() :\
> > > >
> > > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > > it..
> > >
> > > Yes, but we probably need a patch for -stable.
> >
> > Hmm do we? this looks like it's enabling new platforms is not a bugfix...
>
> I think we haven't limited vDPA to any specific arch in the past?
>
> Thanks

No, but it still fails gracefully right?

Anyway, this will need iommu maintainer's ack. We'll see.


> >
> > > >
> > > > Also the iommu_group_has_isolated_msi() check is missing on the vdpa
> > > > path, and it is missing the iommu ownership mechanism.
> > >
> > > Ok.
> > >
> > > >
> > > > Also which in-tree VDPA driver that uses the iommu runs on ARM? Please
> > >
> > > ifcvf and vp_vpda are two drivers that use platform IOMMU.
> > >
> > > Thanks
> > >
> > > > don't propose core changes for unmerged drivers. :(
> > > >
> > > > Jason
> > > >
> >


2023-03-10 08:45:23

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Mon, Feb 20, 2023 at 10:37:18AM +0800, Jason Wang wrote:
> On Fri, Feb 17, 2023 at 8:43 PM Jason Gunthorpe <[email protected]> wrote:
> >
> > On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
> > > On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> > > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > > From: Rong Wang <[email protected]>
> > > > >
> > > > > Once enable iommu domain for one device, the MSI
> > > > > translation tables have to be there for software-managed MSI.
> > > > > Otherwise, platform with software-managed MSI without an
> > > > > irq bypass function, can not get a correct memory write event
> > > > > from pcie, will not get irqs.
> > > > > The solution is to obtain the MSI phy base address from
> > > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > > then translation tables will be created while request irq.
> > > >
> > > > Probably not what anyone wants to hear, but I would prefer we not add
> > > > more uses of this stuff. It looks like we have to get rid of
> > > > iommu_get_msi_cookie() :\
> > > >
> > > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > > it..
> > >
> > > Absolutely but when is that happening?
> >
> > Don't know, I think it has to come from the VDPA maintainers, Nicolin
> > made some drafts but wasn't able to get it beyond that.
>
> Cindy (cced) will carry on the work.
>
> Thanks

Hmm didn't see anything yet. Nanyong Sun maybe you can take a look?

> >
> > Please have people who need more iommu platform enablement to pick it
> > up instead of merging hacks like this..
> >
> > We are very close to having nested translation on ARM so anyone who is
> > serious about VDPA on ARM is going to need iommufd anyhow.
> >
> > Jason
> >


2023-03-10 09:47:11

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Mar 10, 2023 at 4:41 PM Michael S. Tsirkin <[email protected]> wrote:
>
> On Mon, Feb 20, 2023 at 10:37:18AM +0800, Jason Wang wrote:
> > On Fri, Feb 17, 2023 at 8:43 PM Jason Gunthorpe <[email protected]> wrote:
> > >
> > > On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
> > > > On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> > > > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > > > From: Rong Wang <[email protected]>
> > > > > >
> > > > > > Once enable iommu domain for one device, the MSI
> > > > > > translation tables have to be there for software-managed MSI.
> > > > > > Otherwise, platform with software-managed MSI without an
> > > > > > irq bypass function, can not get a correct memory write event
> > > > > > from pcie, will not get irqs.
> > > > > > The solution is to obtain the MSI phy base address from
> > > > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > > > then translation tables will be created while request irq.
> > > > >
> > > > > Probably not what anyone wants to hear, but I would prefer we not add
> > > > > more uses of this stuff. It looks like we have to get rid of
> > > > > iommu_get_msi_cookie() :\
> > > > >
> > > > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > > > it..
> > > >
> > > > Absolutely but when is that happening?
> > >
> > > Don't know, I think it has to come from the VDPA maintainers, Nicolin
> > > made some drafts but wasn't able to get it beyond that.
> >
> > Cindy (cced) will carry on the work.
> >
> > Thanks
>
> Hmm didn't see anything yet. Nanyong Sun maybe you can take a look?

Just to clarify, Cindy will work on the iommufd conversion for
vhost-vDPA, the changes are non-trivial and may take time. Before we
are able to achieve that, I think we still need something like this
patch to make vDPA work on software managed MSI platforms.

Maybe Nanyong can post a new version that addresses the comment so far?

Thanks

>
> > >
> > > Please have people who need more iommu platform enablement to pick it
> > > up instead of merging hacks like this..
> > >
> > > We are very close to having nested translation on ARM so anyone who is
> > > serious about VDPA on ARM is going to need iommufd anyhow.
> > >
> > > Jason
> > >
>


2023-03-10 09:55:11

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Mar 10, 2023 at 05:45:46PM +0800, Jason Wang wrote:
> On Fri, Mar 10, 2023 at 4:41 PM Michael S. Tsirkin <[email protected]> wrote:
> >
> > On Mon, Feb 20, 2023 at 10:37:18AM +0800, Jason Wang wrote:
> > > On Fri, Feb 17, 2023 at 8:43 PM Jason Gunthorpe <[email protected]> wrote:
> > > >
> > > > On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
> > > > > On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> > > > > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > > > > From: Rong Wang <[email protected]>
> > > > > > >
> > > > > > > Once enable iommu domain for one device, the MSI
> > > > > > > translation tables have to be there for software-managed MSI.
> > > > > > > Otherwise, platform with software-managed MSI without an
> > > > > > > irq bypass function, can not get a correct memory write event
> > > > > > > from pcie, will not get irqs.
> > > > > > > The solution is to obtain the MSI phy base address from
> > > > > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > > > > then translation tables will be created while request irq.
> > > > > >
> > > > > > Probably not what anyone wants to hear, but I would prefer we not add
> > > > > > more uses of this stuff. It looks like we have to get rid of
> > > > > > iommu_get_msi_cookie() :\
> > > > > >
> > > > > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > > > > it..
> > > > >
> > > > > Absolutely but when is that happening?
> > > >
> > > > Don't know, I think it has to come from the VDPA maintainers, Nicolin
> > > > made some drafts but wasn't able to get it beyond that.
> > >
> > > Cindy (cced) will carry on the work.
> > >
> > > Thanks
> >
> > Hmm didn't see anything yet. Nanyong Sun maybe you can take a look?
>
> Just to clarify, Cindy will work on the iommufd conversion for
> vhost-vDPA, the changes are non-trivial and may take time. Before we
> are able to achieve that, I think we still need something like this
> patch to make vDPA work on software managed MSI platforms.
>
> Maybe Nanyong can post a new version that addresses the comment so far?
>
> Thanks

Maybe but an ack from iommu maintainers will be needed anyway. Let's see
that version, maybe split the export to a patch by itself to make the
need for that ack clear.


> >
> > > >
> > > > Please have people who need more iommu platform enablement to pick it
> > > > up instead of merging hacks like this..
> > > >
> > > > We are very close to having nested translation on ARM so anyone who is
> > > > serious about VDPA on ARM is going to need iommufd anyhow.
> > > >
> > > > Jason
> > > >
> >


2023-03-10 12:36:58

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Fri, Mar 10, 2023 at 04:53:42AM -0500, Michael S. Tsirkin wrote:
> On Fri, Mar 10, 2023 at 05:45:46PM +0800, Jason Wang wrote:
> > On Fri, Mar 10, 2023 at 4:41 PM Michael S. Tsirkin <[email protected]> wrote:
> > >
> > > On Mon, Feb 20, 2023 at 10:37:18AM +0800, Jason Wang wrote:
> > > > On Fri, Feb 17, 2023 at 8:43 PM Jason Gunthorpe <[email protected]> wrote:
> > > > >
> > > > > On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
> > > > > > On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
> > > > > > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> > > > > > > > From: Rong Wang <[email protected]>
> > > > > > > >
> > > > > > > > Once enable iommu domain for one device, the MSI
> > > > > > > > translation tables have to be there for software-managed MSI.
> > > > > > > > Otherwise, platform with software-managed MSI without an
> > > > > > > > irq bypass function, can not get a correct memory write event
> > > > > > > > from pcie, will not get irqs.
> > > > > > > > The solution is to obtain the MSI phy base address from
> > > > > > > > iommu reserved region, and set it to iommu MSI cookie,
> > > > > > > > then translation tables will be created while request irq.
> > > > > > >
> > > > > > > Probably not what anyone wants to hear, but I would prefer we not add
> > > > > > > more uses of this stuff. It looks like we have to get rid of
> > > > > > > iommu_get_msi_cookie() :\
> > > > > > >
> > > > > > > I'd like it if vdpa could move to iommufd not keep copying stuff from
> > > > > > > it..
> > > > > >
> > > > > > Absolutely but when is that happening?
> > > > >
> > > > > Don't know, I think it has to come from the VDPA maintainers, Nicolin
> > > > > made some drafts but wasn't able to get it beyond that.
> > > >
> > > > Cindy (cced) will carry on the work.
> > > >
> > > > Thanks
> > >
> > > Hmm didn't see anything yet. Nanyong Sun maybe you can take a look?
> >
> > Just to clarify, Cindy will work on the iommufd conversion for
> > vhost-vDPA, the changes are non-trivial and may take time. Before we
> > are able to achieve that, I think we still need something like this
> > patch to make vDPA work on software managed MSI platforms.
> >
> > Maybe Nanyong can post a new version that addresses the comment so far?
> >
> > Thanks
>
> Maybe but an ack from iommu maintainers will be needed anyway. Let's see
> that version, maybe split the export to a patch by itself to make the
> need for that ack clear.

A patch to export that function is alread posted:

https://lore.kernel.org/linux-iommu/BN9PR11MB52760E9705F2985EACCD5C4A8CBA9@BN9PR11MB5276.namprd11.prod.outlook.com/T/#u

But I do not want VDPA to mis-use it unless it also implements all the
ownership stuff properly.

Jason

2023-03-23 09:27:35

by Nanyong Sun

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI


On 2023/3/10 20:36, Jason Gunthorpe wrote:
> On Fri, Mar 10, 2023 at 04:53:42AM -0500, Michael S. Tsirkin wrote:
>> On Fri, Mar 10, 2023 at 05:45:46PM +0800, Jason Wang wrote:
>>> On Fri, Mar 10, 2023 at 4:41 PM Michael S. Tsirkin <[email protected]> wrote:
>>>> On Mon, Feb 20, 2023 at 10:37:18AM +0800, Jason Wang wrote:
>>>>> On Fri, Feb 17, 2023 at 8:43 PM Jason Gunthorpe <[email protected]> wrote:
>>>>>> On Fri, Feb 17, 2023 at 05:12:29AM -0500, Michael S. Tsirkin wrote:
>>>>>>> On Thu, Feb 16, 2023 at 08:14:50PM -0400, Jason Gunthorpe wrote:
>>>>>>>> On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
>>>>>>>>> From: Rong Wang <[email protected]>
>>>>>>>>>
>>>>>>>>> Once enable iommu domain for one device, the MSI
>>>>>>>>> translation tables have to be there for software-managed MSI.
>>>>>>>>> Otherwise, platform with software-managed MSI without an
>>>>>>>>> irq bypass function, can not get a correct memory write event
>>>>>>>>> from pcie, will not get irqs.
>>>>>>>>> The solution is to obtain the MSI phy base address from
>>>>>>>>> iommu reserved region, and set it to iommu MSI cookie,
>>>>>>>>> then translation tables will be created while request irq.
>>>>>>>> Probably not what anyone wants to hear, but I would prefer we not add
>>>>>>>> more uses of this stuff. It looks like we have to get rid of
>>>>>>>> iommu_get_msi_cookie() :\
>>>>>>>>
>>>>>>>> I'd like it if vdpa could move to iommufd not keep copying stuff from
>>>>>>>> it..
>>>>>>> Absolutely but when is that happening?
>>>>>> Don't know, I think it has to come from the VDPA maintainers, Nicolin
>>>>>> made some drafts but wasn't able to get it beyond that.
>>>>> Cindy (cced) will carry on the work.
>>>>>
>>>>> Thanks
>>>> Hmm didn't see anything yet. Nanyong Sun maybe you can take a look?
>>> Just to clarify, Cindy will work on the iommufd conversion for
>>> vhost-vDPA, the changes are non-trivial and may take time. Before we
>>> are able to achieve that, I think we still need something like this
>>> patch to make vDPA work on software managed MSI platforms.
>>>
>>> Maybe Nanyong can post a new version that addresses the comment so far?
>>>
>>> Thanks
>> Maybe but an ack from iommu maintainers will be needed anyway. Let's see
>> that version, maybe split the export to a patch by itself to make the
>> need for that ack clear.
> A patch to export that function is alread posted:
>
> https://lore.kernel.org/linux-iommu/BN9PR11MB52760E9705F2985EACCD5C4A8CBA9@BN9PR11MB5276.namprd11.prod.outlook.com/T/#u
>
> But I do not want VDPA to mis-use it unless it also implements all the
> ownership stuff properly.
>
> Jason
> .
I want to confirm if we need to introduce iommu group logic to vdpa, as
"all the ownership stuff" ?

2023-03-23 11:39:26

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Thu, Mar 23, 2023 at 05:22:36PM +0800, Nanyong Sun wrote:
> > A patch to export that function is alread posted:
> >
> > https://lore.kernel.org/linux-iommu/BN9PR11MB52760E9705F2985EACCD5C4A8CBA9@BN9PR11MB5276.namprd11.prod.outlook.com/T/#u
> >
> > But I do not want VDPA to mis-use it unless it also implements all the
> > ownership stuff properly.
> >

> I want to confirm if we need to introduce iommu group logic to vdpa, as "all
> the ownership stuff" ?

You have to call iommu_device_claim_dma_owner()

But again, this is all pointless, iommufd takes are of all of this and
VDPA should switch to it instead of more hacking.

Jason

2023-03-23 12:29:44

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Thu, Mar 23, 2023 at 08:15:44PM +0800, Nanyong Sun wrote:
> On 2023/3/23 19:31, Jason Gunthorpe wrote:
>
> > On Thu, Mar 23, 2023 at 05:22:36PM +0800, Nanyong Sun wrote:
> > > > A patch to export that function is alread posted:
> > > >
> > > > https://lore.kernel.org/linux-iommu/BN9PR11MB52760E9705F2985EACCD5C4A8CBA9@BN9PR11MB5276.namprd11.prod.outlook.com/T/#u
> > > >
> > > > But I do not want VDPA to mis-use it unless it also implements all the
> > > > ownership stuff properly.
> > > >
> > > I want to confirm if we need to introduce iommu group logic to vdpa, as "all
> > > the ownership stuff" ?
> > You have to call iommu_device_claim_dma_owner()
> >
> > But again, this is all pointless, iommufd takes are of all of this and
> > VDPA should switch to it instead of more hacking.
> >
> > Jason
> > .
> Yeah,  thanks for your suggestion,but as Michael and Jason Wang said,
> before iommufd is ready, we may need to make vDPA work well on software
> managed MSI platforms.
> To achieve that, basically we have two ways:
>
> 1. export iommu_get_resv_regions, and get regions device by device.
> 2. introduce iommu group, get regions by iommu_get_group_resv_regions, which
> already exported.

I do not think you should dig the hole deeper. If proper iommu
support is important to you then you should invest in iommufd
conversion.

Jason

2023-03-23 12:36:27

by Nanyong Sun

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On 2023/3/23 19:31, Jason Gunthorpe wrote:

> On Thu, Mar 23, 2023 at 05:22:36PM +0800, Nanyong Sun wrote:
>>> A patch to export that function is alread posted:
>>>
>>> https://lore.kernel.org/linux-iommu/BN9PR11MB52760E9705F2985EACCD5C4A8CBA9@BN9PR11MB5276.namprd11.prod.outlook.com/T/#u
>>>
>>> But I do not want VDPA to mis-use it unless it also implements all the
>>> ownership stuff properly.
>>>
>> I want to confirm if we need to introduce iommu group logic to vdpa, as "all
>> the ownership stuff" ?
> You have to call iommu_device_claim_dma_owner()
>
> But again, this is all pointless, iommufd takes are of all of this and
> VDPA should switch to it instead of more hacking.
>
> Jason
> .
Yeah,  thanks for your suggestion,but as Michael and Jason Wang said,
before iommufd is ready, we may need to make vDPA work well on software
managed MSI platforms.
To achieve that, basically we have two ways:

1. export iommu_get_resv_regions, and get regions device by device.
2. introduce iommu group, get regions by iommu_get_group_resv_regions,
which already exported.

2023-06-02 12:18:11

by Michael S. Tsirkin

[permalink] [raw]
Subject: Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI

On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote:
> From: Rong Wang <[email protected]>
>
> Once enable iommu domain for one device, the MSI
> translation tables have to be there for software-managed MSI.
> Otherwise, platform with software-managed MSI without an
> irq bypass function, can not get a correct memory write event
> from pcie, will not get irqs.
> The solution is to obtain the MSI phy base address from
> iommu reserved region, and set it to iommu MSI cookie,
> then translation tables will be created while request irq.


OK this one seems to be going nowhere I untagged it.

> Change log
> ----------
>
> v1->v2:
> - add resv iotlb to avoid overlap mapping.
>
> Signed-off-by: Rong Wang <[email protected]>
> Signed-off-by: Nanyong Sun <[email protected]>
> ---
> drivers/iommu/iommu.c | 1 +
> drivers/vhost/vdpa.c | 59 ++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 5f6a85aea501..af9c064ad8b2 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2623,6 +2623,7 @@ void iommu_get_resv_regions(struct device *dev, struct list_head *list)
> if (ops->get_resv_regions)
> ops->get_resv_regions(dev, list);
> }
> +EXPORT_SYMBOL(iommu_get_resv_regions);
>
> /**
> * iommu_put_resv_regions - release resered regions
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index ec32f785dfde..a58979da8acd 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -49,6 +49,7 @@ struct vhost_vdpa {
> struct completion completion;
> struct vdpa_device *vdpa;
> struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
> + struct vhost_iotlb resv_iotlb;
> struct device dev;
> struct cdev cdev;
> atomic_t opened;
> @@ -216,6 +217,8 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
>
> v->in_batch = 0;
>
> + vhost_iotlb_reset(&v->resv_iotlb);
> +
> return vdpa_reset(vdpa);
> }
>
> @@ -1013,6 +1016,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> msg->iova + msg->size - 1 > v->range.last)
> return -EINVAL;
>
> + if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova,
> + msg->iova + msg->size - 1))
> + return -EINVAL;
> +
> if (vhost_iotlb_itree_first(iotlb, msg->iova,
> msg->iova + msg->size - 1))
> return -EEXIST;
> @@ -1103,6 +1110,45 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
> return vhost_chr_write_iter(dev, from);
> }
>
> +static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev,
> + struct vhost_iotlb *resv_iotlb)
> +{
> + struct list_head dev_resv_regions;
> + phys_addr_t resv_msi_base = 0;
> + struct iommu_resv_region *region;
> + int ret = 0;
> + bool with_sw_msi = false;
> + bool with_hw_msi = false;
> +
> + INIT_LIST_HEAD(&dev_resv_regions);
> + iommu_get_resv_regions(dma_dev, &dev_resv_regions);
> +
> + list_for_each_entry(region, &dev_resv_regions, list) {
> + ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start,
> + region->start + region->length - 1,
> + 0, 0, NULL);
> + if (ret) {
> + vhost_iotlb_reset(resv_iotlb);
> + break;
> + }
> +
> + if (region->type == IOMMU_RESV_MSI)
> + with_hw_msi = true;
> +
> + if (region->type == IOMMU_RESV_SW_MSI) {
> + resv_msi_base = region->start;
> + with_sw_msi = true;
> + }
> + }
> +
> + if (!ret && !with_hw_msi && with_sw_msi)
> + ret = iommu_get_msi_cookie(domain, resv_msi_base);
> +
> + iommu_put_resv_regions(dma_dev, &dev_resv_regions);
> +
> + return ret;
> +}
> +
> static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> {
> struct vdpa_device *vdpa = v->vdpa;
> @@ -1128,11 +1174,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>
> ret = iommu_attach_device(v->domain, dma_dev);
> if (ret)
> - goto err_attach;
> + goto err_alloc_domain;
>
> - return 0;
> + ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb);
> + if (ret)
> + goto err_attach_device;
>
> -err_attach:
> + return 0;
> +err_attach_device:
> + iommu_detach_device(v->domain, dma_dev);
> +err_alloc_domain:
> iommu_domain_free(v->domain);
> return ret;
> }
> @@ -1385,6 +1436,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
> goto err;
> }
>
> + vhost_iotlb_init(&v->resv_iotlb, 0, 0);
> +
> r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
> if (r)
> goto err;
> --
> 2.25.1