Different endpoint can support different page size, probe
endpoint if it supports specific page size otherwise use
global page sizes.
Signed-off-by: Bharat Bhushan <[email protected]>
---
v4->v5:
- Rebase to Linux v5.7-rc4
v3->v4:
- Fix whitespace error
v2->v3:
- Fixed error return for incompatible endpoint
- __u64 changed to __le64 in header file
drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
include/uapi/linux/virtio_iommu.h | 7 +++++
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index d5cac4f46ca5..9513d2ab819e 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -78,6 +78,7 @@ struct viommu_endpoint {
struct viommu_dev *viommu;
struct viommu_domain *vdomain;
struct list_head resv_regions;
+ u64 pgsize_bitmap;
};
struct viommu_request {
@@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
return ret;
}
+static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
+ struct virtio_iommu_probe_pgsize_mask *mask,
+ size_t len)
+{
+ u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
+
+ if (len < sizeof(*mask))
+ return -EINVAL;
+
+ vdev->pgsize_bitmap = pgsize_bitmap;
+ return 0;
+}
+
static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
struct virtio_iommu_probe_resv_mem *mem,
size_t len)
@@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
ret = viommu_add_resv_mem(vdev, (void *)prop, len);
break;
+ case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
+ ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
+ break;
default:
dev_err(dev, "unknown viommu prop 0x%x\n", type);
}
@@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
vdomain->id = (unsigned int)ret;
- domain->pgsize_bitmap = viommu->pgsize_bitmap;
+ domain->pgsize_bitmap = vdev->pgsize_bitmap;
domain->geometry = viommu->geometry;
vdomain->map_flags = viommu->map_flags;
@@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain *domain)
kfree(vdomain);
}
+/*
+ * Check whether the endpoint's capabilities are compatible with other
+ * endpoints in the domain. Report any inconsistency.
+ */
+static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
+ struct viommu_domain *vdomain)
+{
+ struct device *dev = vdev->dev;
+
+ if (vdomain->viommu != vdev->viommu) {
+ dev_err(dev, "cannot attach to foreign vIOMMU\n");
+ return false;
+ }
+
+ if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
+ dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
+ vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
+ return false;
+ }
+
+ return true;
+}
+
static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int i;
@@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
* owns it.
*/
ret = viommu_domain_finalise(vdev, domain);
- } else if (vdomain->viommu != vdev->viommu) {
- dev_err(dev, "cannot attach to foreign vIOMMU\n");
- ret = -EXDEV;
+ } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
+ ret = -EINVAL;
}
mutex_unlock(&vdomain->mutex);
@@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
vdev->dev = dev;
vdev->viommu = viommu;
+ vdev->pgsize_bitmap = viommu->pgsize_bitmap;
INIT_LIST_HEAD(&vdev->resv_regions);
dev_iommu_priv_set(dev, vdev);
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index 48e3c29223b5..2cced7accc99 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
#define VIRTIO_IOMMU_PROBE_T_NONE 0
#define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
+#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
#define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
@@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
__le16 length;
};
+struct virtio_iommu_probe_pgsize_mask {
+ struct virtio_iommu_probe_property head;
+ __u8 reserved[4];
+ __le64 pgsize_bitmap;
+};
+
#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
#define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
--
2.17.1
On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
> Different endpoint can support different page size, probe
> endpoint if it supports specific page size otherwise use
> global page sizes.
>
> Signed-off-by: Bharat Bhushan <[email protected]>
> ---
> v4->v5:
> - Rebase to Linux v5.7-rc4
>
> v3->v4:
> - Fix whitespace error
>
> v2->v3:
> - Fixed error return for incompatible endpoint
> - __u64 changed to __le64 in header file
>
> drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
> include/uapi/linux/virtio_iommu.h | 7 +++++
> 2 files changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index d5cac4f46ca5..9513d2ab819e 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -78,6 +78,7 @@ struct viommu_endpoint {
> struct viommu_dev *viommu;
> struct viommu_domain *vdomain;
> struct list_head resv_regions;
> + u64 pgsize_bitmap;
> };
>
> struct viommu_request {
> @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
> return ret;
> }
>
> +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
> + struct virtio_iommu_probe_pgsize_mask *mask,
> + size_t len)
> +{
> + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
> +
> + if (len < sizeof(*mask))
This is too late to validate length, you have dereferenced it already.
do it before the read pls.
> + return -EINVAL;
OK but note that guest will then just proceed to ignore the
property. Is that really OK? Wouldn't host want to know?
> +
> + vdev->pgsize_bitmap = pgsize_bitmap;
what if bitmap is 0? Is that a valid size? I see a bunch of
BUG_ON with that value ...
I also see a bunch of code like e.g. this:
pg_size = 1UL << __ffs(pgsize_bitmap);
which probably won't DTRT on a 32 bit guest if the bitmap has bits
set in the high word.
> + return 0;
> +}
> +
> static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> struct virtio_iommu_probe_resv_mem *mem,
> size_t len)
> @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
> case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> break;
> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> + break;
> default:
> dev_err(dev, "unknown viommu prop 0x%x\n", type);
> }
> @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
>
> vdomain->id = (unsigned int)ret;
>
> - domain->pgsize_bitmap = viommu->pgsize_bitmap;
> + domain->pgsize_bitmap = vdev->pgsize_bitmap;
> domain->geometry = viommu->geometry;
>
> vdomain->map_flags = viommu->map_flags;
> @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain *domain)
> kfree(vdomain);
> }
>
> +/*
> + * Check whether the endpoint's capabilities are compatible with other
> + * endpoints in the domain. Report any inconsistency.
> + */
> +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
> + struct viommu_domain *vdomain)
> +{
> + struct device *dev = vdev->dev;
> +
> + if (vdomain->viommu != vdev->viommu) {
> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> + return false;
> + }
> +
> + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
> + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
> + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
> + return false;
> + }
I'm confused by this. So let's assume host supports pages sizes
of 4k, 2M, 1G. It signals this in the properties. Nice.
Now domain supports 4k, 2M and that's all. Why is that a problem?
Just don't use 1G ...
> +
> + return true;
> +}
> +
> static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
> {
> int i;
> @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
> * owns it.
> */
> ret = viommu_domain_finalise(vdev, domain);
> - } else if (vdomain->viommu != vdev->viommu) {
> - dev_err(dev, "cannot attach to foreign vIOMMU\n");
> - ret = -EXDEV;
> + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
> + ret = -EINVAL;
> }
> mutex_unlock(&vdomain->mutex);
>
> @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
>
> vdev->dev = dev;
> vdev->viommu = viommu;
> + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
> INIT_LIST_HEAD(&vdev->resv_regions);
> dev_iommu_priv_set(dev, vdev);
>
> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
> index 48e3c29223b5..2cced7accc99 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
As any virtio UAPI change, you need to copy virtio TC at some point
before this is merged ...
> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
>
> #define VIRTIO_IOMMU_PROBE_T_NONE 0
> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
>
> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
>
Does host need to know that guest will ignore the page size mask?
Maybe we need a feature bit.
> @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
> __le16 length;
> };
>
> +struct virtio_iommu_probe_pgsize_mask {
> + struct virtio_iommu_probe_property head;
> + __u8 reserved[4];
> + __le64 pgsize_bitmap;
> +};
> +
This is UAPI. Document the format of pgsize_bitmap please.
> #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
> #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
>
> --
> 2.17.1
> -----Original Message-----
> From: Michael S. Tsirkin <[email protected]>
> Sent: Wednesday, May 6, 2020 5:53 AM
> To: Bharat Bhushan <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected]
> Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
> endpoint
>
> External Email
>
> ----------------------------------------------------------------------
> On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
> > Different endpoint can support different page size, probe endpoint if
> > it supports specific page size otherwise use global page sizes.
> >
> > Signed-off-by: Bharat Bhushan <[email protected]>
> > ---
> > v4->v5:
> > - Rebase to Linux v5.7-rc4
> >
> > v3->v4:
> > - Fix whitespace error
> >
> > v2->v3:
> > - Fixed error return for incompatible endpoint
> > - __u64 changed to __le64 in header file
> >
> > drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
> > include/uapi/linux/virtio_iommu.h | 7 +++++
> > 2 files changed, 51 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/virtio-iommu.c
> > b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..9513d2ab819e 100644
> > --- a/drivers/iommu/virtio-iommu.c
> > +++ b/drivers/iommu/virtio-iommu.c
> > @@ -78,6 +78,7 @@ struct viommu_endpoint {
> > struct viommu_dev *viommu;
> > struct viommu_domain *vdomain;
> > struct list_head resv_regions;
> > + u64 pgsize_bitmap;
> > };
> >
> > struct viommu_request {
> > @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct
> viommu_domain *vdomain)
> > return ret;
> > }
> >
> > +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
> > + struct virtio_iommu_probe_pgsize_mask *mask,
> > + size_t len)
> > +{
> > + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
> > +
> > + if (len < sizeof(*mask))
>
> This is too late to validate length, you have dereferenced it already.
> do it before the read pls.
Yes, Will change here and other places as well
>
> > + return -EINVAL;
>
> OK but note that guest will then just proceed to ignore the property. Is that really
> OK? Wouldn't host want to know?
Guest need to be in sync with device, so yes seems like guest need to tell device which page-size-mask it is using.
Corresponding spec change patch (https://www.mail-archive.com/[email protected]/msg06214.html)
Would like Jean/Eric to comment here as well.
>
>
> > +
> > + vdev->pgsize_bitmap = pgsize_bitmap;
>
> what if bitmap is 0? Is that a valid size? I see a bunch of BUG_ON with that value ...
As per spec proposed device is supposed to set at-least one bit.
Will add a bug_on her.
Should we add bug_on or switch to global config page-size mask if this is zero (notify device which page-size-mask it is using).
>
> I also see a bunch of code like e.g. this:
>
> pg_size = 1UL << __ffs(pgsize_bitmap);
>
> which probably won't DTRT on a 32 bit guest if the bitmap has bits set in the high
> word.
>
My thought is that in that case viommu_domain_finalise() will fail, do not proceed.
>
>
> > + return 0;
> > +}
> > +
> > static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> > struct virtio_iommu_probe_resv_mem *mem,
> > size_t len)
> > @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev
> *viommu, struct device *dev)
> > case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> > ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> > break;
> > + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> > + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> > + break;
> > default:
> > dev_err(dev, "unknown viommu prop 0x%x\n", type);
> > }
> > @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct
> > viommu_endpoint *vdev,
> >
> > vdomain->id = (unsigned int)ret;
> >
> > - domain->pgsize_bitmap = viommu->pgsize_bitmap;
> > + domain->pgsize_bitmap = vdev->pgsize_bitmap;
> > domain->geometry = viommu->geometry;
> >
> > vdomain->map_flags = viommu->map_flags;
> > @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain
> *domain)
> > kfree(vdomain);
> > }
> >
> > +/*
> > + * Check whether the endpoint's capabilities are compatible with
> > +other
> > + * endpoints in the domain. Report any inconsistency.
> > + */
> > +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
> > + struct viommu_domain *vdomain) {
> > + struct device *dev = vdev->dev;
> > +
> > + if (vdomain->viommu != vdev->viommu) {
> > + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> > + return false;
> > + }
> > +
> > + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
> > + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
> > + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
> > + return false;
> > + }
>
> I'm confused by this. So let's assume host supports pages sizes of 4k, 2M, 1G. It
> signals this in the properties. Nice.
> Now domain supports 4k, 2M and that's all. Why is that a problem?
> Just don't use 1G ...
Is not it too to change the existing domain properties, for devices already attached to domain? New devices must match to domain page-size.
>
>
> > +
> > + return true;
> > +}
> > +
> > static int viommu_attach_dev(struct iommu_domain *domain, struct
> > device *dev) {
> > int i;
> > @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain
> *domain, struct device *dev)
> > * owns it.
> > */
> > ret = viommu_domain_finalise(vdev, domain);
> > - } else if (vdomain->viommu != vdev->viommu) {
> > - dev_err(dev, "cannot attach to foreign vIOMMU\n");
> > - ret = -EXDEV;
> > + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
> > + ret = -EINVAL;
> > }
> > mutex_unlock(&vdomain->mutex);
> >
> > @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
> >
> > vdev->dev = dev;
> > vdev->viommu = viommu;
> > + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
> > INIT_LIST_HEAD(&vdev->resv_regions);
> > dev_iommu_priv_set(dev, vdev);
> >
> > diff --git a/include/uapi/linux/virtio_iommu.h
> > b/include/uapi/linux/virtio_iommu.h
> > index 48e3c29223b5..2cced7accc99 100644
> > --- a/include/uapi/linux/virtio_iommu.h
> > +++ b/include/uapi/linux/virtio_iommu.h
>
> As any virtio UAPI change, you need to copy virtio TC at some point before this is
> merged ...
Jean already send patch for same
https://www.mail-archive.com/[email protected]/msg06214.html
Do we need to do anything additional?
>
> > @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
> >
> > #define VIRTIO_IOMMU_PROBE_T_NONE 0
> > #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
> > +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
> >
> > #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
> >
>
> Does host need to know that guest will ignore the page size mask?
> Maybe we need a feature bit.
>
> > @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
> > __le16 length;
> > };
> >
> > +struct virtio_iommu_probe_pgsize_mask {
> > + struct virtio_iommu_probe_property head;
> > + __u8 reserved[4];
> > + __le64 pgsize_bitmap;
> > +};
> > +
>
> This is UAPI. Document the format of pgsize_bitmap please.
Ok,
Thanks
-Bharat
>
>
> > #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
> > #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
> >
> > --
> > 2.17.1
On Thu, May 07, 2020 at 11:24:29AM +0000, Bharat Bhushan wrote:
>
>
> > -----Original Message-----
> > From: Michael S. Tsirkin <[email protected]>
> > Sent: Wednesday, May 6, 2020 5:53 AM
> > To: Bharat Bhushan <[email protected]>
> > Cc: [email protected]; [email protected]; [email protected];
> > [email protected]; [email protected];
> > [email protected]; [email protected]; [email protected]
> > Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
> > endpoint
> >
> > External Email
> >
> > ----------------------------------------------------------------------
> > On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
> > > Different endpoint can support different page size, probe endpoint if
> > > it supports specific page size otherwise use global page sizes.
> > >
> > > Signed-off-by: Bharat Bhushan <[email protected]>
> > > ---
> > > v4->v5:
> > > - Rebase to Linux v5.7-rc4
> > >
> > > v3->v4:
> > > - Fix whitespace error
> > >
> > > v2->v3:
> > > - Fixed error return for incompatible endpoint
> > > - __u64 changed to __le64 in header file
> > >
> > > drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
> > > include/uapi/linux/virtio_iommu.h | 7 +++++
> > > 2 files changed, 51 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/iommu/virtio-iommu.c
> > > b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..9513d2ab819e 100644
> > > --- a/drivers/iommu/virtio-iommu.c
> > > +++ b/drivers/iommu/virtio-iommu.c
> > > @@ -78,6 +78,7 @@ struct viommu_endpoint {
> > > struct viommu_dev *viommu;
> > > struct viommu_domain *vdomain;
> > > struct list_head resv_regions;
> > > + u64 pgsize_bitmap;
> > > };
> > >
> > > struct viommu_request {
> > > @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct
> > viommu_domain *vdomain)
> > > return ret;
> > > }
> > >
> > > +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
> > > + struct virtio_iommu_probe_pgsize_mask *mask,
> > > + size_t len)
> > > +{
> > > + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
> > > +
> > > + if (len < sizeof(*mask))
> >
> > This is too late to validate length, you have dereferenced it already.
> > do it before the read pls.
>
> Yes, Will change here and other places as well
>
> >
> > > + return -EINVAL;
> >
> > OK but note that guest will then just proceed to ignore the property. Is that really
> > OK? Wouldn't host want to know?
>
>
> Guest need to be in sync with device, so yes seems like guest need to tell device which page-size-mask it is using.
>
> Corresponding spec change patch (https://www.mail-archive.com/[email protected]/msg06214.html)
>
> Would like Jean/Eric to comment here as well.
>
> >
> >
> > > +
> > > + vdev->pgsize_bitmap = pgsize_bitmap;
> >
> > what if bitmap is 0? Is that a valid size? I see a bunch of BUG_ON with that value ...
>
> As per spec proposed device is supposed to set at-least one bit.
> Will add a bug_on her.
Or better fail probe ...
> Should we add bug_on or switch to global config page-size mask if this is zero (notify device which page-size-mask it is using).
It's a spec violation, I wouldn't try to use the device.
> >
> > I also see a bunch of code like e.g. this:
> >
> > pg_size = 1UL << __ffs(pgsize_bitmap);
> >
> > which probably won't DTRT on a 32 bit guest if the bitmap has bits set in the high
> > word.
> >
>
> My thought is that in that case viommu_domain_finalise() will fail, do not proceed.
That's undefined behaviour in C. You need to make sure this condition
is never reached. And spec does not make this illegal at all
so it looks like we actually need to handle this gracefully.
> >
> >
> > > + return 0;
> > > +}
> > > +
> > > static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> > > struct virtio_iommu_probe_resv_mem *mem,
> > > size_t len)
> > > @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev
> > *viommu, struct device *dev)
> > > case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> > > ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> > > break;
> > > + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> > > + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> > > + break;
> > > default:
> > > dev_err(dev, "unknown viommu prop 0x%x\n", type);
> > > }
> > > @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct
> > > viommu_endpoint *vdev,
> > >
> > > vdomain->id = (unsigned int)ret;
> > >
> > > - domain->pgsize_bitmap = viommu->pgsize_bitmap;
> > > + domain->pgsize_bitmap = vdev->pgsize_bitmap;
> > > domain->geometry = viommu->geometry;
> > >
> > > vdomain->map_flags = viommu->map_flags;
> > > @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain
> > *domain)
> > > kfree(vdomain);
> > > }
> > >
> > > +/*
> > > + * Check whether the endpoint's capabilities are compatible with
> > > +other
> > > + * endpoints in the domain. Report any inconsistency.
> > > + */
> > > +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
> > > + struct viommu_domain *vdomain) {
> > > + struct device *dev = vdev->dev;
> > > +
> > > + if (vdomain->viommu != vdev->viommu) {
> > > + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> > > + return false;
> > > + }
> > > +
> > > + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
> > > + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
> > > + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
> > > + return false;
> > > + }
> >
> > I'm confused by this. So let's assume host supports pages sizes of 4k, 2M, 1G. It
> > signals this in the properties. Nice.
> > Now domain supports 4k, 2M and that's all. Why is that a problem?
> > Just don't use 1G ...
>
> Is not it too to change the existing domain properties, for devices already attached to domain? New devices must match to domain page-size.
Again if IOMMU supports more page sizes than domain uses, why is
that a problem? Just don't utilize the bits domain does not use.
> >
> >
> > > +
> > > + return true;
> > > +}
> > > +
> > > static int viommu_attach_dev(struct iommu_domain *domain, struct
> > > device *dev) {
> > > int i;
> > > @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain
> > *domain, struct device *dev)
> > > * owns it.
> > > */
> > > ret = viommu_domain_finalise(vdev, domain);
> > > - } else if (vdomain->viommu != vdev->viommu) {
> > > - dev_err(dev, "cannot attach to foreign vIOMMU\n");
> > > - ret = -EXDEV;
> > > + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
> > > + ret = -EINVAL;
> > > }
> > > mutex_unlock(&vdomain->mutex);
> > >
> > > @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
> > >
> > > vdev->dev = dev;
> > > vdev->viommu = viommu;
> > > + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
> > > INIT_LIST_HEAD(&vdev->resv_regions);
> > > dev_iommu_priv_set(dev, vdev);
> > >
> > > diff --git a/include/uapi/linux/virtio_iommu.h
> > > b/include/uapi/linux/virtio_iommu.h
> > > index 48e3c29223b5..2cced7accc99 100644
> > > --- a/include/uapi/linux/virtio_iommu.h
> > > +++ b/include/uapi/linux/virtio_iommu.h
> >
> > As any virtio UAPI change, you need to copy virtio TC at some point before this is
> > merged ...
>
> Jean already send patch for same
> https://www.mail-archive.com/[email protected]/msg06214.html
>
> Do we need to do anything additional?
Yes, that is spec patch. you need to see the UAPI patch to virtio-dev.
> >
> > > @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
> > >
> > > #define VIRTIO_IOMMU_PROBE_T_NONE 0
> > > #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
> > > +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
> > >
> > > #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
> > >
> >
> > Does host need to know that guest will ignore the page size mask?
> > Maybe we need a feature bit.
> >
> > > @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
> > > __le16 length;
> > > };
> > >
> > > +struct virtio_iommu_probe_pgsize_mask {
> > > + struct virtio_iommu_probe_property head;
> > > + __u8 reserved[4];
> > > + __le64 pgsize_bitmap;
> > > +};
> > > +
> >
> > This is UAPI. Document the format of pgsize_bitmap please.
>
> Ok,
>
> Thanks
> -Bharat
>
> >
> >
> > > #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
> > > #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
> > >
> > > --
> > > 2.17.1
Hi Bharat,
On 5/7/20 1:24 PM, Bharat Bhushan wrote:
>
>
>> -----Original Message-----
>> From: Michael S. Tsirkin <[email protected]>
>> Sent: Wednesday, May 6, 2020 5:53 AM
>> To: Bharat Bhushan <[email protected]>
>> Cc: [email protected]; [email protected]; [email protected];
>> [email protected]; [email protected];
>> [email protected]; [email protected]; [email protected]
>> Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
>> endpoint
>>
>> External Email
>>
>> ----------------------------------------------------------------------
>> On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
>>> Different endpoint can support different page size, probe endpoint if
>>> it supports specific page size otherwise use global page sizes.
>>>
>>> Signed-off-by: Bharat Bhushan <[email protected]>
>>> ---
>>> v4->v5:
>>> - Rebase to Linux v5.7-rc4
>>>
>>> v3->v4:
>>> - Fix whitespace error
>>>
>>> v2->v3:
>>> - Fixed error return for incompatible endpoint
>>> - __u64 changed to __le64 in header file
>>>
>>> drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
>>> include/uapi/linux/virtio_iommu.h | 7 +++++
>>> 2 files changed, 51 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/iommu/virtio-iommu.c
>>> b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..9513d2ab819e 100644
>>> --- a/drivers/iommu/virtio-iommu.c
>>> +++ b/drivers/iommu/virtio-iommu.c
>>> @@ -78,6 +78,7 @@ struct viommu_endpoint {
>>> struct viommu_dev *viommu;
>>> struct viommu_domain *vdomain;
>>> struct list_head resv_regions;
>>> + u64 pgsize_bitmap;
>>> };
>>>
>>> struct viommu_request {
>>> @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct
>> viommu_domain *vdomain)
>>> return ret;
>>> }
>>>
>>> +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
>>> + struct virtio_iommu_probe_pgsize_mask *mask,
>>> + size_t len)
>>> +{
>>> + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
>>> +
>>> + if (len < sizeof(*mask))
>>
>> This is too late to validate length, you have dereferenced it already.
>> do it before the read pls.
>
> Yes, Will change here and other places as well
>
>>
>>> + return -EINVAL;
>>
>> OK but note that guest will then just proceed to ignore the property. Is that really
>> OK? Wouldn't host want to know?
>
>
> Guest need to be in sync with device, so yes seems like guest need to tell device which page-size-mask it is using.
>
> Corresponding spec change patch (https://www.mail-archive.com/[email protected]/msg06214.html)
>
> Would like Jean/Eric to comment here as well.
why can't we fail the probe request in that case? This is a misbehaving
device that reports malformed property, right?
Thanks
Eric
>
>>
>>
>>> +
>>> + vdev->pgsize_bitmap = pgsize_bitmap;
>>
>> what if bitmap is 0? Is that a valid size? I see a bunch of BUG_ON with that value ...
>
> As per spec proposed device is supposed to set at-least one bit.
> Will add a bug_on her.
> Should we add bug_on or switch to global config page-size mask if this is zero (notify device which page-size-mask it is using).
>
>>
>> I also see a bunch of code like e.g. this:
>>
>> pg_size = 1UL << __ffs(pgsize_bitmap);
>>
>> which probably won't DTRT on a 32 bit guest if the bitmap has bits set in the high
>> word.
>>
>
> My thought is that in that case viommu_domain_finalise() will fail, do not proceed.
>
>>
>>
>>> + return 0;
>>> +}
>>> +
>>> static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
>>> struct virtio_iommu_probe_resv_mem *mem,
>>> size_t len)
>>> @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev
>> *viommu, struct device *dev)
>>> case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
>>> ret = viommu_add_resv_mem(vdev, (void *)prop, len);
>>> break;
>>> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
>>> + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
>>> + break;
>>> default:
>>> dev_err(dev, "unknown viommu prop 0x%x\n", type);
>>> }
>>> @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct
>>> viommu_endpoint *vdev,
>>>
>>> vdomain->id = (unsigned int)ret;
>>>
>>> - domain->pgsize_bitmap = viommu->pgsize_bitmap;
>>> + domain->pgsize_bitmap = vdev->pgsize_bitmap;
>>> domain->geometry = viommu->geometry;
>>>
>>> vdomain->map_flags = viommu->map_flags;
>>> @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain
>> *domain)
>>> kfree(vdomain);
>>> }
>>>
>>> +/*
>>> + * Check whether the endpoint's capabilities are compatible with
>>> +other
>>> + * endpoints in the domain. Report any inconsistency.
>>> + */
>>> +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
>>> + struct viommu_domain *vdomain) {
>>> + struct device *dev = vdev->dev;
>>> +
>>> + if (vdomain->viommu != vdev->viommu) {
>>> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
>>> + return false;
>>> + }
>>> +
>>> + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
>>> + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
>>> + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
>>> + return false;
>>> + }
>>
>> I'm confused by this. So let's assume host supports pages sizes of 4k, 2M, 1G. It
>> signals this in the properties. Nice.
>> Now domain supports 4k, 2M and that's all. Why is that a problem?
>> Just don't use 1G ...
>
> Is not it too to change the existing domain properties, for devices already attached to domain? New devices must match to domain page-size.
>
>>
>>
>>> +
>>> + return true;
>>> +}
>>> +
>>> static int viommu_attach_dev(struct iommu_domain *domain, struct
>>> device *dev) {
>>> int i;
>>> @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain
>> *domain, struct device *dev)
>>> * owns it.
>>> */
>>> ret = viommu_domain_finalise(vdev, domain);
>>> - } else if (vdomain->viommu != vdev->viommu) {
>>> - dev_err(dev, "cannot attach to foreign vIOMMU\n");
>>> - ret = -EXDEV;
>>> + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
>>> + ret = -EINVAL;
>>> }
>>> mutex_unlock(&vdomain->mutex);
>>>
>>> @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
>>>
>>> vdev->dev = dev;
>>> vdev->viommu = viommu;
>>> + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
>>> INIT_LIST_HEAD(&vdev->resv_regions);
>>> dev_iommu_priv_set(dev, vdev);
>>>
>>> diff --git a/include/uapi/linux/virtio_iommu.h
>>> b/include/uapi/linux/virtio_iommu.h
>>> index 48e3c29223b5..2cced7accc99 100644
>>> --- a/include/uapi/linux/virtio_iommu.h
>>> +++ b/include/uapi/linux/virtio_iommu.h
>>
>> As any virtio UAPI change, you need to copy virtio TC at some point before this is
>> merged ...
>
> Jean already send patch for same
> https://www.mail-archive.com/[email protected]/msg06214.html
>
> Do we need to do anything additional?
>
>>
>>> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
>>>
>>> #define VIRTIO_IOMMU_PROBE_T_NONE 0
>>> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
>>> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
>>>
>>> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
>>>
>>
>> Does host need to know that guest will ignore the page size mask?
>> Maybe we need a feature bit.
>>
>>> @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
>>> __le16 length;
>>> };
>>>
>>> +struct virtio_iommu_probe_pgsize_mask {
>>> + struct virtio_iommu_probe_property head;
>>> + __u8 reserved[4];
>>> + __le64 pgsize_bitmap;
>>> +};
>>> +
>>
>> This is UAPI. Document the format of pgsize_bitmap please.
>
> Ok,
>
> Thanks
> -Bharat
>
>>
>>
>>> #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
>>> #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
>>>
>>> --
>>> 2.17.1
>
Hi,
On 5/7/20 1:32 PM, Michael S. Tsirkin wrote:
> On Thu, May 07, 2020 at 11:24:29AM +0000, Bharat Bhushan wrote:
>>
>>
>>> -----Original Message-----
>>> From: Michael S. Tsirkin <[email protected]>
>>> Sent: Wednesday, May 6, 2020 5:53 AM
>>> To: Bharat Bhushan <[email protected]>
>>> Cc: [email protected]; [email protected]; [email protected];
>>> [email protected]; [email protected];
>>> [email protected]; [email protected]; [email protected]
>>> Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
>>> endpoint
>>>
>>> External Email
>>>
>>> ----------------------------------------------------------------------
>>> On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
>>>> Different endpoint can support different page size, probe endpoint if
>>>> it supports specific page size otherwise use global page sizes.
>>>>
>>>> Signed-off-by: Bharat Bhushan <[email protected]>
>>>> ---
>>>> v4->v5:
>>>> - Rebase to Linux v5.7-rc4
>>>>
>>>> v3->v4:
>>>> - Fix whitespace error
>>>>
>>>> v2->v3:
>>>> - Fixed error return for incompatible endpoint
>>>> - __u64 changed to __le64 in header file
>>>>
>>>> drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
>>>> include/uapi/linux/virtio_iommu.h | 7 +++++
>>>> 2 files changed, 51 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/iommu/virtio-iommu.c
>>>> b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..9513d2ab819e 100644
>>>> --- a/drivers/iommu/virtio-iommu.c
>>>> +++ b/drivers/iommu/virtio-iommu.c
>>>> @@ -78,6 +78,7 @@ struct viommu_endpoint {
>>>> struct viommu_dev *viommu;
>>>> struct viommu_domain *vdomain;
>>>> struct list_head resv_regions;
>>>> + u64 pgsize_bitmap;
>>>> };
>>>>
>>>> struct viommu_request {
>>>> @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct
>>> viommu_domain *vdomain)
>>>> return ret;
>>>> }
>>>>
>>>> +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
>>>> + struct virtio_iommu_probe_pgsize_mask *mask,
>>>> + size_t len)
>>>> +{
>>>> + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
>>>> +
>>>> + if (len < sizeof(*mask))
>>>
>>> This is too late to validate length, you have dereferenced it already.
>>> do it before the read pls.
>>
>> Yes, Will change here and other places as well
>>
>>>
>>>> + return -EINVAL;
>>>
>>> OK but note that guest will then just proceed to ignore the property. Is that really
>>> OK? Wouldn't host want to know?
>>
>>
>> Guest need to be in sync with device, so yes seems like guest need to tell device which page-size-mask it is using.
>>
>> Corresponding spec change patch (https://www.mail-archive.com/[email protected]/msg06214.html)
>>
>> Would like Jean/Eric to comment here as well.
>>
>>>
>>>
>>>> +
>>>> + vdev->pgsize_bitmap = pgsize_bitmap;
>>>
>>> what if bitmap is 0? Is that a valid size? I see a bunch of BUG_ON with that value ...
>>
>> As per spec proposed device is supposed to set at-least one bit.
>> Will add a bug_on her.
>
> Or better fail probe ...
Yes I agree I would rather fail the probe.
>
>> Should we add bug_on or switch to global config page-size mask if this is zero (notify device which page-size-mask it is using).
>
> It's a spec violation, I wouldn't try to use the device.
>
>>>
>>> I also see a bunch of code like e.g. this:
>>>
>>> pg_size = 1UL << __ffs(pgsize_bitmap);
>>>
>>> which probably won't DTRT on a 32 bit guest if the bitmap has bits set in the high
>>> word.
>>>
>>
>> My thought is that in that case viommu_domain_finalise() will fail, do not proceed.
>
> That's undefined behaviour in C. You need to make sure this condition
> is never reached. And spec does not make this illegal at all
> so it looks like we actually need to handle this gracefully.
>
>
>>>
>>>
>>>> + return 0;
>>>> +}
>>>> +
>>>> static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
>>>> struct virtio_iommu_probe_resv_mem *mem,
>>>> size_t len)
>>>> @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev
>>> *viommu, struct device *dev)
>>>> case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
>>>> ret = viommu_add_resv_mem(vdev, (void *)prop, len);
>>>> break;
>>>> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
>>>> + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
>>>> + break;
>>>> default:
>>>> dev_err(dev, "unknown viommu prop 0x%x\n", type);
>>>> }
>>>> @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct
>>>> viommu_endpoint *vdev,
>>>>
>>>> vdomain->id = (unsigned int)ret;
>>>>
>>>> - domain->pgsize_bitmap = viommu->pgsize_bitmap;
>>>> + domain->pgsize_bitmap = vdev->pgsize_bitmap;
>>>> domain->geometry = viommu->geometry;
>>>>
>>>> vdomain->map_flags = viommu->map_flags;
>>>> @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain
>>> *domain)
>>>> kfree(vdomain);
>>>> }
>>>>
>>>> +/*
>>>> + * Check whether the endpoint's capabilities are compatible with
>>>> +other
>>>> + * endpoints in the domain. Report any inconsistency.
>>>> + */
>>>> +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
>>>> + struct viommu_domain *vdomain) {
>>>> + struct device *dev = vdev->dev;
>>>> +
>>>> + if (vdomain->viommu != vdev->viommu) {
>>>> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
>>>> + return false;
>>>> + }
>>>> +
>>>> + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
>>>> + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
>>>> + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
>>>> + return false;
>>>> + }
>>>
>>> I'm confused by this. So let's assume host supports pages sizes of 4k, 2M, 1G. It
>>> signals this in the properties. Nice.
>>> Now domain supports 4k, 2M and that's all. Why is that a problem?
>>> Just don't use 1G ...
>>
>> Is not it too to change the existing domain properties, for devices already attached to domain? New devices must match to domain page-size.
>
> Again if IOMMU supports more page sizes than domain uses, why is
> that a problem? Just don't utilize the bits domain does not use.
I think I agree with you in that case. However it is a problem in the
opposite, ie. when a new device is added and this latter has less
options than the existing domain, right?
Thanks
Eric
>
>
>>>
>>>
>>>> +
>>>> + return true;
>>>> +}
>>>> +
>>>> static int viommu_attach_dev(struct iommu_domain *domain, struct
>>>> device *dev) {
>>>> int i;
>>>> @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain
>>> *domain, struct device *dev)
>>>> * owns it.
>>>> */
>>>> ret = viommu_domain_finalise(vdev, domain);
>>>> - } else if (vdomain->viommu != vdev->viommu) {
>>>> - dev_err(dev, "cannot attach to foreign vIOMMU\n");
>>>> - ret = -EXDEV;
>>>> + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
>>>> + ret = -EINVAL;
>>>> }
>>>> mutex_unlock(&vdomain->mutex);
>>>>
>>>> @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
>>>>
>>>> vdev->dev = dev;
>>>> vdev->viommu = viommu;
>>>> + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
>>>> INIT_LIST_HEAD(&vdev->resv_regions);
>>>> dev_iommu_priv_set(dev, vdev);
>>>>
>>>> diff --git a/include/uapi/linux/virtio_iommu.h
>>>> b/include/uapi/linux/virtio_iommu.h
>>>> index 48e3c29223b5..2cced7accc99 100644
>>>> --- a/include/uapi/linux/virtio_iommu.h
>>>> +++ b/include/uapi/linux/virtio_iommu.h
>>>
>>> As any virtio UAPI change, you need to copy virtio TC at some point before this is
>>> merged ...
>>
>> Jean already send patch for same
>> https://www.mail-archive.com/[email protected]/msg06214.html
>>
>> Do we need to do anything additional?
>
>
> Yes, that is spec patch. you need to see the UAPI patch to virtio-dev.
>
>>>
>>>> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
>>>>
>>>> #define VIRTIO_IOMMU_PROBE_T_NONE 0
>>>> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
>>>> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
>>>>
>>>> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
>>>>
>>>
>>> Does host need to know that guest will ignore the page size mask?
>>> Maybe we need a feature bit.
>>>
>>>> @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
>>>> __le16 length;
>>>> };
>>>>
>>>> +struct virtio_iommu_probe_pgsize_mask {
>>>> + struct virtio_iommu_probe_property head;
>>>> + __u8 reserved[4];
>>>> + __le64 pgsize_bitmap;
>>>> +};
>>>> +
>>>
>>> This is UAPI. Document the format of pgsize_bitmap please.
>>
>> Ok,
>>
>> Thanks
>> -Bharat
>>
>>>
>>>
>>>> #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
>>>> #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
>>>>
>>>> --
>>>> 2.17.1
>
Hi,
On 5/6/20 2:22 AM, Michael S. Tsirkin wrote:
> On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
>> Different endpoint can support different page size, probe
>> endpoint if it supports specific page size otherwise use
>> global page sizes.
>>
>> Signed-off-by: Bharat Bhushan <[email protected]>
>> ---
>> v4->v5:
>> - Rebase to Linux v5.7-rc4
>>
>> v3->v4:
>> - Fix whitespace error
>>
>> v2->v3:
>> - Fixed error return for incompatible endpoint
>> - __u64 changed to __le64 in header file
>>
>> drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
>> include/uapi/linux/virtio_iommu.h | 7 +++++
>> 2 files changed, 51 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
>> index d5cac4f46ca5..9513d2ab819e 100644
>> --- a/drivers/iommu/virtio-iommu.c
>> +++ b/drivers/iommu/virtio-iommu.c
>> @@ -78,6 +78,7 @@ struct viommu_endpoint {
>> struct viommu_dev *viommu;
>> struct viommu_domain *vdomain;
>> struct list_head resv_regions;
>> + u64 pgsize_bitmap;
>> };
>>
>> struct viommu_request {
>> @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
>> return ret;
>> }
>>
>> +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
>> + struct virtio_iommu_probe_pgsize_mask *mask,
>> + size_t len)
>> +{
>> + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
>> +
>> + if (len < sizeof(*mask))
>
> This is too late to validate length, you have dereferenced it already.
> do it before the read pls.
>
>> + return -EINVAL;
>
> OK but note that guest will then just proceed to ignore the
> property. Is that really OK? Wouldn't host want to know?
>
>
>> +
>> + vdev->pgsize_bitmap = pgsize_bitmap;
>
> what if bitmap is 0? Is that a valid size? I see a bunch of
> BUG_ON with that value ...
Indeed [PATCH v2] virtio-iommu: Add PAGE_SIZE_MASK property states
"The device MUST set at least one bit in page_size_mask, describing
the page granularity".
atm if the device returns a null mask the guest hits such BUG_ON()
[ 1.841434] kernel BUG at drivers/iommu/iommu.c:653!
[ 1.842868] Internal error: Oops - BUG: 0 [#1] SMP
[ 1.844261] Modules linked in:
[ 1.845161] CPU: 6 PID: 325 Comm: kworker/6:1 Not tainted
5.7.0-rc4-aarch64-guest-bharat-v5+ #196
[ 1.847474] Hardware name: linux,dummy-virt (DT)
[ 1.848329] Workqueue: events deferred_probe_work_func
[ 1.849229] pstate: 60400005 (nZCv daif +PAN -UAO)
[ 1.850116] pc : iommu_group_create_direct_mappings.isra.24+0x210/0x228
[ 1.851351] lr : iommu_group_add_device+0x108/0x2d0
[ 1.852270] sp : ffff800015bef880
[ 1.852901] x29: ffff800015bef880 x28: ffff0003cc3dab98
[ 1.853897] x27: 0000000000000000 x26: ffff0003cc3dab80
[ 1.854894] x25: ffff800011033480 x24: ffff0003cc080948
[ 1.855891] x23: ffff8000113f49c8 x22: 0000000000000000
[ 1.856887] x21: ffff0003cc3dac00 x20: ffff0003cc080a00
[ 1.858440] x19: 0000000000000000 x18: 0000000000000010
[ 1.860029] x17: 000000009a468e4c x16: 0000000000300604
[ 1.861616] x15: ffffffffffffffff x14: 0720072007200720
[ 1.863200] x13: 0720072007200720 x12: 0000000000000020
[ 1.864787] x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f
[ 1.866373] x9 : ffff8000107de0d8 x8 : 7f7f7f7f7f7f7f7f
[ 1.868048] x7 : 39322f392f2f2f2f x6 : 0000000080808080
[ 1.869634] x5 : ffff0003cc171000 x4 : ffff0003cc171000
[ 1.871218] x3 : ffff8000114aff28 x2 : 0000000000000000
[ 1.872802] x1 : ffff0003cb3c60b0 x0 : 0000000000000003
[ 1.874383] Call trace:
[ 1.875133] iommu_group_create_direct_mappings.isra.24+0x210/0x228
[ 1.876996] iommu_group_add_device+0x108/0x2d0
[ 1.878359] iommu_group_get_for_dev+0xa0/0x210
[ 1.879714] viommu_add_device+0x128/0x480
[ 1.880942] iommu_probe_device+0x5c/0xd8
[ 1.882144] of_iommu_configure+0x160/0x208
[ 1.883535] of_dma_configure+0xec/0x2b8
[ 1.884732] pci_dma_configure+0x48/0xd0
[ 1.885911] really_probe+0xa0/0x448
[ 1.886985] driver_probe_device+0xe8/0x140
[ 1.888253] __device_attach_driver+0x94/0x120
[ 1.889581] bus_for_each_drv+0x84/0xd8
[ 1.890730] __device_attach+0xe4/0x168
[ 1.891879] device_initial_probe+0x1c/0x28
[ 1.893164] bus_probe_device+0xa4/0xb0
[ 1.894311] deferred_probe_work_func+0xa0/0xf0
[ 1.895663] process_one_work+0x1bc/0x458
[ 1.896864] worker_thread+0x150/0x4f8
[ 1.898003] kthread+0x114/0x118
[ 1.898977] ret_from_fork+0x10/0x18
[ 1.900056] Code: d63f0020 b9406be2 17ffffe4 a90573fb (d4210000)
[ 1.901872] ---[ end trace 47e5fb5111a3e69b ]---
[ 1.903251] Kernel panic - not syncing: Fatal exception
[ 1.904809] SMP: stopping secondary CPUs
[ 1.906024] Kernel Offset: disabled
[ 1.907079] CPU features: 0x084002,22000438
[ 1.908332] Memory Limit: none
[ 1.909255] ---[ end Kernel panic - not syncing: Fatal exception ]---
Connection closed by foreign host.
Thanks
Eric
>
> I also see a bunch of code like e.g. this:
>
> pg_size = 1UL << __ffs(pgsize_bitmap);
>
> which probably won't DTRT on a 32 bit guest if the bitmap has bits
> set in the high word.
>
>
>
>> + return 0;
>> +}
>> +
>> static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
>> struct virtio_iommu_probe_resv_mem *mem,
>> size_t len)
>> @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
>> case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
>> ret = viommu_add_resv_mem(vdev, (void *)prop, len);
>> break;
>> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
>> + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
>> + break;
>> default:
>> dev_err(dev, "unknown viommu prop 0x%x\n", type);
>> }
>> @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
>>
>> vdomain->id = (unsigned int)ret;
>>
>> - domain->pgsize_bitmap = viommu->pgsize_bitmap;
>> + domain->pgsize_bitmap = vdev->pgsize_bitmap;
>> domain->geometry = viommu->geometry;
>>
>> vdomain->map_flags = viommu->map_flags;
>> @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain *domain)
>> kfree(vdomain);
>> }
>>
>> +/*
>> + * Check whether the endpoint's capabilities are compatible with other
>> + * endpoints in the domain. Report any inconsistency.
>> + */
>> +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
>> + struct viommu_domain *vdomain)
>> +{
>> + struct device *dev = vdev->dev;
>> +
>> + if (vdomain->viommu != vdev->viommu) {
>> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
>> + return false;
>> + }
>> +
>> + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
>> + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
>> + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
>> + return false;
>> + }
>
> I'm confused by this. So let's assume host supports pages sizes
> of 4k, 2M, 1G. It signals this in the properties. Nice.
> Now domain supports 4k, 2M and that's all. Why is that a problem?
> Just don't use 1G ..>
>
>> +
>> + return true;
>> +}
>> +
>> static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
>> {
>> int i;
>> @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
>> * owns it.
>> */
>> ret = viommu_domain_finalise(vdev, domain);
>> - } else if (vdomain->viommu != vdev->viommu) {
>> - dev_err(dev, "cannot attach to foreign vIOMMU\n");
>> - ret = -EXDEV;
>> + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
>> + ret = -EINVAL;
>> }
>> mutex_unlock(&vdomain->mutex);
>>
>> @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
>>
>> vdev->dev = dev;
>> vdev->viommu = viommu;
>> + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
>> INIT_LIST_HEAD(&vdev->resv_regions);
>> dev_iommu_priv_set(dev, vdev);
>>
>> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
>> index 48e3c29223b5..2cced7accc99 100644
>> --- a/include/uapi/linux/virtio_iommu.h
>> +++ b/include/uapi/linux/virtio_iommu.h
>
> As any virtio UAPI change, you need to copy virtio TC at some point
> before this is merged ...
>
>> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
>>
>> #define VIRTIO_IOMMU_PROBE_T_NONE 0
>> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
>> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
>>
>> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
>>
>
> Does host need to know that guest will ignore the page size mask?
> Maybe we need a feature bit.
>
>> @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
>> __le16 length;
>> };
>>
>> +struct virtio_iommu_probe_pgsize_mask {
>> + struct virtio_iommu_probe_property head;
>> + __u8 reserved[4];
>> + __le64 pgsize_bitmap;
>> +};
>> +
>
> This is UAPI. Document the format of pgsize_bitmap please.
>
>
>> #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
>> #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
>>
>> --
>> 2.17.1
>
On Thu, May 07, 2020 at 02:51:32PM +0200, Auger Eric wrote:
> Hi,
>
> On 5/7/20 1:32 PM, Michael S. Tsirkin wrote:
> > On Thu, May 07, 2020 at 11:24:29AM +0000, Bharat Bhushan wrote:
> >>
> >>
> >>> -----Original Message-----
> >>> From: Michael S. Tsirkin <[email protected]>
> >>> Sent: Wednesday, May 6, 2020 5:53 AM
> >>> To: Bharat Bhushan <[email protected]>
> >>> Cc: [email protected]; [email protected]; [email protected];
> >>> [email protected]; [email protected];
> >>> [email protected]; [email protected]; [email protected]
> >>> Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
> >>> endpoint
> >>>
> >>> External Email
> >>>
> >>> ----------------------------------------------------------------------
> >>> On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
> >>>> Different endpoint can support different page size, probe endpoint if
> >>>> it supports specific page size otherwise use global page sizes.
> >>>>
> >>>> Signed-off-by: Bharat Bhushan <[email protected]>
> >>>> ---
> >>>> v4->v5:
> >>>> - Rebase to Linux v5.7-rc4
> >>>>
> >>>> v3->v4:
> >>>> - Fix whitespace error
> >>>>
> >>>> v2->v3:
> >>>> - Fixed error return for incompatible endpoint
> >>>> - __u64 changed to __le64 in header file
> >>>>
> >>>> drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
> >>>> include/uapi/linux/virtio_iommu.h | 7 +++++
> >>>> 2 files changed, 51 insertions(+), 4 deletions(-)
> >>>>
> >>>> diff --git a/drivers/iommu/virtio-iommu.c
> >>>> b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..9513d2ab819e 100644
> >>>> --- a/drivers/iommu/virtio-iommu.c
> >>>> +++ b/drivers/iommu/virtio-iommu.c
> >>>> @@ -78,6 +78,7 @@ struct viommu_endpoint {
> >>>> struct viommu_dev *viommu;
> >>>> struct viommu_domain *vdomain;
> >>>> struct list_head resv_regions;
> >>>> + u64 pgsize_bitmap;
> >>>> };
> >>>>
> >>>> struct viommu_request {
> >>>> @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct
> >>> viommu_domain *vdomain)
> >>>> return ret;
> >>>> }
> >>>>
> >>>> +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
> >>>> + struct virtio_iommu_probe_pgsize_mask *mask,
> >>>> + size_t len)
> >>>> +{
> >>>> + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
> >>>> +
> >>>> + if (len < sizeof(*mask))
> >>>
> >>> This is too late to validate length, you have dereferenced it already.
> >>> do it before the read pls.
> >>
> >> Yes, Will change here and other places as well
> >>
> >>>
> >>>> + return -EINVAL;
> >>>
> >>> OK but note that guest will then just proceed to ignore the property. Is that really
> >>> OK? Wouldn't host want to know?
> >>
> >>
> >> Guest need to be in sync with device, so yes seems like guest need to tell device which page-size-mask it is using.
> >>
> >> Corresponding spec change patch (https://www.mail-archive.com/[email protected]/msg06214.html)
> >>
> >> Would like Jean/Eric to comment here as well.
> >>
> >>>
> >>>
> >>>> +
> >>>> + vdev->pgsize_bitmap = pgsize_bitmap;
> >>>
> >>> what if bitmap is 0? Is that a valid size? I see a bunch of BUG_ON with that value ...
> >>
> >> As per spec proposed device is supposed to set at-least one bit.
> >> Will add a bug_on her.
> >
> > Or better fail probe ...
> Yes I agree I would rather fail the probe.
> >
> >> Should we add bug_on or switch to global config page-size mask if this is zero (notify device which page-size-mask it is using).
> >
> > It's a spec violation, I wouldn't try to use the device.
> >
> >>>
> >>> I also see a bunch of code like e.g. this:
> >>>
> >>> pg_size = 1UL << __ffs(pgsize_bitmap);
> >>>
> >>> which probably won't DTRT on a 32 bit guest if the bitmap has bits set in the high
> >>> word.
> >>>
> >>
> >> My thought is that in that case viommu_domain_finalise() will fail, do not proceed.
> >
> > That's undefined behaviour in C. You need to make sure this condition
> > is never reached. And spec does not make this illegal at all
> > so it looks like we actually need to handle this gracefully.
> >
> >
> >>>
> >>>
> >>>> + return 0;
> >>>> +}
> >>>> +
> >>>> static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> >>>> struct virtio_iommu_probe_resv_mem *mem,
> >>>> size_t len)
> >>>> @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev
> >>> *viommu, struct device *dev)
> >>>> case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> >>>> ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> >>>> break;
> >>>> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> >>>> + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> >>>> + break;
> >>>> default:
> >>>> dev_err(dev, "unknown viommu prop 0x%x\n", type);
> >>>> }
> >>>> @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct
> >>>> viommu_endpoint *vdev,
> >>>>
> >>>> vdomain->id = (unsigned int)ret;
> >>>>
> >>>> - domain->pgsize_bitmap = viommu->pgsize_bitmap;
> >>>> + domain->pgsize_bitmap = vdev->pgsize_bitmap;
> >>>> domain->geometry = viommu->geometry;
> >>>>
> >>>> vdomain->map_flags = viommu->map_flags;
> >>>> @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain
> >>> *domain)
> >>>> kfree(vdomain);
> >>>> }
> >>>>
> >>>> +/*
> >>>> + * Check whether the endpoint's capabilities are compatible with
> >>>> +other
> >>>> + * endpoints in the domain. Report any inconsistency.
> >>>> + */
> >>>> +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
> >>>> + struct viommu_domain *vdomain) {
> >>>> + struct device *dev = vdev->dev;
> >>>> +
> >>>> + if (vdomain->viommu != vdev->viommu) {
> >>>> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> >>>> + return false;
> >>>> + }
> >>>> +
> >>>> + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
> >>>> + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
> >>>> + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
> >>>> + return false;
> >>>> + }
> >>>
> >>> I'm confused by this. So let's assume host supports pages sizes of 4k, 2M, 1G. It
> >>> signals this in the properties. Nice.
> >>> Now domain supports 4k, 2M and that's all. Why is that a problem?
> >>> Just don't use 1G ...
> >>
> >> Is not it too to change the existing domain properties, for devices already attached to domain? New devices must match to domain page-size.
> >
> > Again if IOMMU supports more page sizes than domain uses, why is
> > that a problem? Just don't utilize the bits domain does not use.
>
> I think I agree with you in that case. However it is a problem in the
> opposite, ie. when a new device is added and this latter has less
> options than the existing domain, right?
>
> Thanks
>
> Eric
Well device initialization order is up to Linux really,
so it's annoying to set limits based on this.
Ideally we'd just use domain&device.
But if the limit is going only one way then I guess
it's workable. Requiring the exact match is probably too
onerous.
> >
> >
> >>>
> >>>
> >>>> +
> >>>> + return true;
> >>>> +}
> >>>> +
> >>>> static int viommu_attach_dev(struct iommu_domain *domain, struct
> >>>> device *dev) {
> >>>> int i;
> >>>> @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain
> >>> *domain, struct device *dev)
> >>>> * owns it.
> >>>> */
> >>>> ret = viommu_domain_finalise(vdev, domain);
> >>>> - } else if (vdomain->viommu != vdev->viommu) {
> >>>> - dev_err(dev, "cannot attach to foreign vIOMMU\n");
> >>>> - ret = -EXDEV;
> >>>> + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
> >>>> + ret = -EINVAL;
> >>>> }
> >>>> mutex_unlock(&vdomain->mutex);
> >>>>
> >>>> @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
> >>>>
> >>>> vdev->dev = dev;
> >>>> vdev->viommu = viommu;
> >>>> + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
> >>>> INIT_LIST_HEAD(&vdev->resv_regions);
> >>>> dev_iommu_priv_set(dev, vdev);
> >>>>
> >>>> diff --git a/include/uapi/linux/virtio_iommu.h
> >>>> b/include/uapi/linux/virtio_iommu.h
> >>>> index 48e3c29223b5..2cced7accc99 100644
> >>>> --- a/include/uapi/linux/virtio_iommu.h
> >>>> +++ b/include/uapi/linux/virtio_iommu.h
> >>>
> >>> As any virtio UAPI change, you need to copy virtio TC at some point before this is
> >>> merged ...
> >>
> >> Jean already send patch for same
> >> https://www.mail-archive.com/[email protected]/msg06214.html
> >>
> >> Do we need to do anything additional?
> >
> >
> > Yes, that is spec patch. you need to see the UAPI patch to virtio-dev.
> >
> >>>
> >>>> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
> >>>>
> >>>> #define VIRTIO_IOMMU_PROBE_T_NONE 0
> >>>> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
> >>>> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
> >>>>
> >>>> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
> >>>>
> >>>
> >>> Does host need to know that guest will ignore the page size mask?
> >>> Maybe we need a feature bit.
> >>>
> >>>> @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
> >>>> __le16 length;
> >>>> };
> >>>>
> >>>> +struct virtio_iommu_probe_pgsize_mask {
> >>>> + struct virtio_iommu_probe_property head;
> >>>> + __u8 reserved[4];
> >>>> + __le64 pgsize_bitmap;
> >>>> +};
> >>>> +
> >>>
> >>> This is UAPI. Document the format of pgsize_bitmap please.
> >>
> >> Ok,
> >>
> >> Thanks
> >> -Bharat
> >>
> >>>
> >>>
> >>>> #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
> >>>> #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
> >>>>
> >>>> --
> >>>> 2.17.1
> >
On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
> Different endpoint can support different page size, probe
> endpoint if it supports specific page size otherwise use
> global page sizes.
>
> Signed-off-by: Bharat Bhushan <[email protected]>
> ---
> v4->v5:
> - Rebase to Linux v5.7-rc4
>
> v3->v4:
> - Fix whitespace error
>
> v2->v3:
> - Fixed error return for incompatible endpoint
> - __u64 changed to __le64 in header file
>
> drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
> include/uapi/linux/virtio_iommu.h | 7 +++++
> 2 files changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index d5cac4f46ca5..9513d2ab819e 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -78,6 +78,7 @@ struct viommu_endpoint {
> struct viommu_dev *viommu;
> struct viommu_domain *vdomain;
> struct list_head resv_regions;
> + u64 pgsize_bitmap;
> };
>
> struct viommu_request {
> @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
> return ret;
> }
>
> +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
> + struct virtio_iommu_probe_pgsize_mask *mask,
> + size_t len)
> +{
> + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
> +
> + if (len < sizeof(*mask))
> + return -EINVAL;
> +
> + vdev->pgsize_bitmap = pgsize_bitmap;
> + return 0;
> +}
> +
> static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> struct virtio_iommu_probe_resv_mem *mem,
> size_t len)
> @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
> case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> break;
> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> + break;
> default:
> dev_err(dev, "unknown viommu prop 0x%x\n", type);
> }
So given this is necessary early in boot, how about we
add this in the config space? And maybe ACPI too ...
> @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
>
> vdomain->id = (unsigned int)ret;
>
> - domain->pgsize_bitmap = viommu->pgsize_bitmap;
> + domain->pgsize_bitmap = vdev->pgsize_bitmap;
> domain->geometry = viommu->geometry;
>
> vdomain->map_flags = viommu->map_flags;
> @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain *domain)
> kfree(vdomain);
> }
>
> +/*
> + * Check whether the endpoint's capabilities are compatible with other
> + * endpoints in the domain. Report any inconsistency.
> + */
> +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
> + struct viommu_domain *vdomain)
> +{
> + struct device *dev = vdev->dev;
> +
> + if (vdomain->viommu != vdev->viommu) {
> + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> + return false;
> + }
> +
> + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
> + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
> + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
> + return false;
> + }
> +
> + return true;
> +}
> +
> static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
> {
> int i;
> @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
> * owns it.
> */
> ret = viommu_domain_finalise(vdev, domain);
> - } else if (vdomain->viommu != vdev->viommu) {
> - dev_err(dev, "cannot attach to foreign vIOMMU\n");
> - ret = -EXDEV;
> + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
> + ret = -EINVAL;
> }
> mutex_unlock(&vdomain->mutex);
>
> @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
>
> vdev->dev = dev;
> vdev->viommu = viommu;
> + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
> INIT_LIST_HEAD(&vdev->resv_regions);
> dev_iommu_priv_set(dev, vdev);
>
> diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
> index 48e3c29223b5..2cced7accc99 100644
> --- a/include/uapi/linux/virtio_iommu.h
> +++ b/include/uapi/linux/virtio_iommu.h
> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
>
> #define VIRTIO_IOMMU_PROBE_T_NONE 0
> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
>
> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
>
> @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
> __le16 length;
> };
>
> +struct virtio_iommu_probe_pgsize_mask {
> + struct virtio_iommu_probe_property head;
> + __u8 reserved[4];
> + __le64 pgsize_bitmap;
> +};
> +
> #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
> #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
>
> --
> 2.17.1
On Tue, May 12, 2020 at 10:53:39AM -0400, Michael S. Tsirkin wrote:
> > static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> > struct virtio_iommu_probe_resv_mem *mem,
> > size_t len)
> > @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
> > case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> > ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> > break;
> > + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> > + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> > + break;
> > default:
> > dev_err(dev, "unknown viommu prop 0x%x\n", type);
> > }
>
> So given this is necessary early in boot, how about we
> add this in the config space? And maybe ACPI too ...
A page_size_mask field is already in the config space and applies to all
endpoints. Here we add a property to override the global value for
individual endpoints. It can be necessary when mixing physical (pass-
through) and virtual endpoints under the same virtio-iommu device.
Thanks,
Jean
Hi Jean,
> -----Original Message-----
> From: Michael S. Tsirkin <[email protected]>
> Sent: Wednesday, May 6, 2020 5:53 AM
> To: Bharat Bhushan <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected]
> Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
> endpoint
>
> External Email
>
> ----------------------------------------------------------------------
> On Tue, May 05, 2020 at 03:00:04PM +0530, Bharat Bhushan wrote:
> > Different endpoint can support different page size, probe endpoint if
> > it supports specific page size otherwise use global page sizes.
> >
> > Signed-off-by: Bharat Bhushan <[email protected]>
> > ---
> > v4->v5:
> > - Rebase to Linux v5.7-rc4
> >
> > v3->v4:
> > - Fix whitespace error
> >
> > v2->v3:
> > - Fixed error return for incompatible endpoint
> > - __u64 changed to __le64 in header file
> >
> > drivers/iommu/virtio-iommu.c | 48 ++++++++++++++++++++++++++++---
> > include/uapi/linux/virtio_iommu.h | 7 +++++
> > 2 files changed, 51 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/virtio-iommu.c
> > b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..9513d2ab819e 100644
> > --- a/drivers/iommu/virtio-iommu.c
> > +++ b/drivers/iommu/virtio-iommu.c
> > @@ -78,6 +78,7 @@ struct viommu_endpoint {
> > struct viommu_dev *viommu;
> > struct viommu_domain *vdomain;
> > struct list_head resv_regions;
> > + u64 pgsize_bitmap;
> > };
> >
> > struct viommu_request {
> > @@ -415,6 +416,19 @@ static int viommu_replay_mappings(struct
> viommu_domain *vdomain)
> > return ret;
> > }
> >
> > +static int viommu_set_pgsize_bitmap(struct viommu_endpoint *vdev,
> > + struct virtio_iommu_probe_pgsize_mask *mask,
> > + size_t len)
> > +{
> > + u64 pgsize_bitmap = le64_to_cpu(mask->pgsize_bitmap);
> > +
> > + if (len < sizeof(*mask))
>
> This is too late to validate length, you have dereferenced it already.
> do it before the read pls.
>
> > + return -EINVAL;
>
> OK but note that guest will then just proceed to ignore the property. Is that really
> OK? Wouldn't host want to know?
>
>
> > +
> > + vdev->pgsize_bitmap = pgsize_bitmap;
>
> what if bitmap is 0? Is that a valid size? I see a bunch of BUG_ON with that value ...
>
> I also see a bunch of code like e.g. this:
>
> pg_size = 1UL << __ffs(pgsize_bitmap);
>
> which probably won't DTRT on a 32 bit guest if the bitmap has bits set in the high
> word.
>
>
>
> > + return 0;
> > +}
> > +
> > static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> > struct virtio_iommu_probe_resv_mem *mem,
> > size_t len)
> > @@ -499,6 +513,9 @@ static int viommu_probe_endpoint(struct viommu_dev
> *viommu, struct device *dev)
> > case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> > ret = viommu_add_resv_mem(vdev, (void *)prop, len);
> > break;
> > + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK:
> > + ret = viommu_set_pgsize_bitmap(vdev, (void *)prop, len);
> > + break;
> > default:
> > dev_err(dev, "unknown viommu prop 0x%x\n", type);
> > }
> > @@ -630,7 +647,7 @@ static int viommu_domain_finalise(struct
> > viommu_endpoint *vdev,
> >
> > vdomain->id = (unsigned int)ret;
> >
> > - domain->pgsize_bitmap = viommu->pgsize_bitmap;
> > + domain->pgsize_bitmap = vdev->pgsize_bitmap;
> > domain->geometry = viommu->geometry;
> >
> > vdomain->map_flags = viommu->map_flags;
> > @@ -654,6 +671,29 @@ static void viommu_domain_free(struct iommu_domain
> *domain)
> > kfree(vdomain);
> > }
> >
> > +/*
> > + * Check whether the endpoint's capabilities are compatible with
> > +other
> > + * endpoints in the domain. Report any inconsistency.
> > + */
> > +static bool viommu_endpoint_is_compatible(struct viommu_endpoint *vdev,
> > + struct viommu_domain *vdomain) {
> > + struct device *dev = vdev->dev;
> > +
> > + if (vdomain->viommu != vdev->viommu) {
> > + dev_err(dev, "cannot attach to foreign vIOMMU\n");
> > + return false;
> > + }
> > +
> > + if (vdomain->domain.pgsize_bitmap != vdev->pgsize_bitmap) {
> > + dev_err(dev, "incompatible domain bitmap 0x%lx != 0x%llx\n",
> > + vdomain->domain.pgsize_bitmap, vdev->pgsize_bitmap);
> > + return false;
> > + }
>
> I'm confused by this. So let's assume host supports pages sizes of 4k, 2M, 1G. It
> signals this in the properties. Nice.
> Now domain supports 4k, 2M and that's all. Why is that a problem?
> Just don't use 1G ...
>
>
> > +
> > + return true;
> > +}
> > +
> > static int viommu_attach_dev(struct iommu_domain *domain, struct
> > device *dev) {
> > int i;
> > @@ -670,9 +710,8 @@ static int viommu_attach_dev(struct iommu_domain
> *domain, struct device *dev)
> > * owns it.
> > */
> > ret = viommu_domain_finalise(vdev, domain);
> > - } else if (vdomain->viommu != vdev->viommu) {
> > - dev_err(dev, "cannot attach to foreign vIOMMU\n");
> > - ret = -EXDEV;
> > + } else if (!viommu_endpoint_is_compatible(vdev, vdomain)) {
> > + ret = -EINVAL;
> > }
> > mutex_unlock(&vdomain->mutex);
> >
> > @@ -886,6 +925,7 @@ static int viommu_add_device(struct device *dev)
> >
> > vdev->dev = dev;
> > vdev->viommu = viommu;
> > + vdev->pgsize_bitmap = viommu->pgsize_bitmap;
> > INIT_LIST_HEAD(&vdev->resv_regions);
> > dev_iommu_priv_set(dev, vdev);
> >
> > diff --git a/include/uapi/linux/virtio_iommu.h
> > b/include/uapi/linux/virtio_iommu.h
> > index 48e3c29223b5..2cced7accc99 100644
> > --- a/include/uapi/linux/virtio_iommu.h
> > +++ b/include/uapi/linux/virtio_iommu.h
>
> As any virtio UAPI change, you need to copy virtio TC at some point before this is
> merged ...
>
> > @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap {
> >
> > #define VIRTIO_IOMMU_PROBE_T_NONE 0
> > #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
> > +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2
> >
> > #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
> >
>
> Does host need to know that guest will ignore the page size mask?
> Maybe we need a feature bit.
>
> > @@ -119,6 +120,12 @@ struct virtio_iommu_probe_property {
> > __le16 length;
> > };
> >
> > +struct virtio_iommu_probe_pgsize_mask {
> > + struct virtio_iommu_probe_property head;
> > + __u8 reserved[4];
> > + __le64 pgsize_bitmap;
> > +};
> > +
>
> This is UAPI. Document the format of pgsize_bitmap please.
I do not see uapi documentation in "Documentation" folder of other data struct in this file, am I missing something?
Thanks
-Bharat
>
>
> > #define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
> > #define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
> >
> > --
> > 2.17.1
On Wed, May 13, 2020 at 09:15:22AM +0000, Bharat Bhushan wrote:
> Hi Jean,
>
> > -----Original Message-----
> > From: Michael S. Tsirkin <[email protected]>
> > Sent: Wednesday, May 6, 2020 5:53 AM
> > To: Bharat Bhushan <[email protected]>
> > Cc: [email protected]; [email protected]; [email protected];
> > [email protected]; [email protected];
> > [email protected]; [email protected]; [email protected]
> > Subject: [EXT] Re: [PATCH v5] iommu/virtio: Use page size bitmap supported by
> > endpoint
[...]
> > > +struct virtio_iommu_probe_pgsize_mask {
> > > + struct virtio_iommu_probe_property head;
> > > + __u8 reserved[4];
> > > + __le64 pgsize_bitmap;
> > > +};
> > > +
> >
> > This is UAPI. Document the format of pgsize_bitmap please.
>
> I do not see uapi documentation in "Documentation" folder of other data struct in this file, am I missing something?
I'm not the one requesting this change, but I'm guessing you should add a
comment in this header, above pgsize_bitmap (which should actually be
called page_size_mask to follow the spec). I guess I'd add:
/* Same format as virtio_iommu_config::page_size_mask */
for this field, and then maybe change the comment of virtio_iommu_config,
currently "/* Supported page sizes */", to something more precise such as:
/*
* Bitmap of supported page sizes. The least significant bit indicates the
* smallest granularity and the other bits are hints indicating optimal
* block sizes.
*/
But I don't know how much should be spelled out here, since those details
are available in the spec.
Thanks,
Jean