2020-07-04 11:20:42

by Yi Liu

[permalink] [raw]
Subject: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

This patch exports iommu nesting capability info to user space through
VFIO. User space is expected to check this info for supported uAPIs (e.g.
PASID alloc/free, bind page table, and cache invalidation) and the vendor
specific format information for first level/stage page table that will be
bound to.

The nesting info is available only after the nesting iommu type is set
for a container. Current implementation imposes one limitation - one
nesting container should include at most one group. The philosophy of
vfio container is having all groups/devices within the container share
the same IOMMU context. When vSVA is enabled, one IOMMU context could
include one 2nd-level address space and multiple 1st-level address spaces.
While the 2nd-leve address space is reasonably sharable by multiple groups
, blindly sharing 1st-level address spaces across all groups within the
container might instead break the guest expectation. In the future sub/
super container concept might be introduced to allow partial address space
sharing within an IOMMU context. But for now let's go with this restriction
by requiring singleton container for using nesting iommu features. Below
link has the related discussion about this decision.

https://lkml.org/lkml/2020/5/15/1028

Cc: Kevin Tian <[email protected]>
CC: Jacob Pan <[email protected]>
Cc: Alex Williamson <[email protected]>
Cc: Eric Auger <[email protected]>
Cc: Jean-Philippe Brucker <[email protected]>
Cc: Joerg Roedel <[email protected]>
Cc: Lu Baolu <[email protected]>
Signed-off-by: Liu Yi L <[email protected]>
---
v3 -> v4:
*) address comments against v3.

v1 -> v2:
*) added in v2
---

drivers/vfio/vfio_iommu_type1.c | 105 +++++++++++++++++++++++++++++++++++-----
include/uapi/linux/vfio.h | 16 ++++++
2 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 7accb59..80623b8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
"Maximum number of user DMA mappings per container (65535).");

struct vfio_iommu {
- struct list_head domain_list;
- struct list_head iova_list;
- struct vfio_domain *external_domain; /* domain for external user */
- struct mutex lock;
- struct rb_root dma_list;
- struct blocking_notifier_head notifier;
- unsigned int dma_avail;
- uint64_t pgsize_bitmap;
- bool v2;
- bool nesting;
- bool dirty_page_tracking;
- bool pinned_page_dirty_scope;
+ struct list_head domain_list;
+ struct list_head iova_list;
+ struct vfio_domain *external_domain; /* domain for
+ external user */
+ struct mutex lock;
+ struct rb_root dma_list;
+ struct blocking_notifier_head notifier;
+ unsigned int dma_avail;
+ uint64_t pgsize_bitmap;
+ bool v2;
+ bool nesting;
+ bool dirty_page_tracking;
+ bool pinned_page_dirty_scope;
+ struct iommu_nesting_info *nesting_info;
};

struct vfio_domain {
@@ -130,6 +132,9 @@ struct vfio_regions {
#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
(!list_empty(&iommu->domain_list))

+#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \
+ (!list_empty(&iommu->domain_list)))
+
#define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)

/*
@@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,

list_splice_tail(iova_copy, iova);
}
+
+static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
+{
+ kfree(iommu->nesting_info);
+ iommu->nesting_info = NULL;
+}
+
static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group)
{
@@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
}
}

+ /* Nesting type container can include only one group */
+ if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
+ mutex_unlock(&iommu->lock);
+ return -EINVAL;
+ }
+
group = kzalloc(sizeof(*group), GFP_KERNEL);
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!group || !domain) {
@@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (ret)
goto out_domain;

+ /* Nesting cap info is available only after attaching */
+ if (iommu->nesting) {
+ struct iommu_nesting_info tmp;
+ struct iommu_nesting_info *info;
+
+ /* First get the size of vendor specific nesting info */
+ ret = iommu_domain_get_attr(domain->domain,
+ DOMAIN_ATTR_NESTING,
+ &tmp);
+ if (ret)
+ goto out_detach;
+
+ info = kzalloc(tmp.size, GFP_KERNEL);
+ if (!info) {
+ ret = -ENOMEM;
+ goto out_detach;
+ }
+
+ /* Now get the nesting info */
+ info->size = tmp.size;
+ ret = iommu_domain_get_attr(domain->domain,
+ DOMAIN_ATTR_NESTING,
+ info);
+ if (ret) {
+ kfree(info);
+ goto out_detach;
+ }
+ iommu->nesting_info = info;
+ }
+
/* Get aperture info */
iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);

@@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
return 0;

out_detach:
+ vfio_iommu_release_nesting_info(iommu);
vfio_iommu_detach_group(domain, group);
out_domain:
iommu_domain_free(domain->domain);
@@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
vfio_iommu_unmap_unpin_all(iommu);
else
vfio_iommu_unmap_unpin_reaccount(iommu);
+
+ vfio_iommu_release_nesting_info(iommu);
}
iommu_domain_free(domain->domain);
list_del(&domain->next);
@@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
}

+static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_info_cap_header *header;
+ struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
+ size_t size;
+
+ size = sizeof(*nesting_cap) + iommu->nesting_info->size;
+
+ header = vfio_info_cap_add(caps, size,
+ VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
+ if (IS_ERR(header))
+ return PTR_ERR(header);
+
+ nesting_cap = container_of(header,
+ struct vfio_iommu_type1_info_cap_nesting,
+ header);
+
+ memcpy(&nesting_cap->info, iommu->nesting_info,
+ iommu->nesting_info->size);
+
+ return 0;
+}
+
static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
unsigned long arg)
{
@@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
if (ret)
return ret;

+ if (iommu->nesting_info) {
+ ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
+ if (ret)
+ return ret;
+ }
+
if (caps.size) {
info.flags |= VFIO_IOMMU_INFO_CAPS;

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9204705..3e3de9c 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
__u64 max_dirty_bitmap_size; /* in bytes */
};

+#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
+
+/*
+ * Reporting nesting info to user space.
+ *
+ * @info: the nesting info provided by IOMMU driver. Today
+ * it is expected to be a struct iommu_nesting_info
+ * data.
+ */
+struct vfio_iommu_type1_info_cap_nesting {
+ struct vfio_info_cap_header header;
+ __u32 flags;
+ __u32 padding;
+ __u8 info[];
+};
+
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)

/**
--
2.7.4


2020-07-06 10:38:27

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Yi,

On 7/4/20 1:26 PM, Liu Yi L wrote:
> This patch exports iommu nesting capability info to user space through
> VFIO. User space is expected to check this info for supported uAPIs (e.g.
> PASID alloc/free, bind page table, and cache invalidation) and the vendor
> specific format information for first level/stage page table that will be
> bound to.
>
> The nesting info is available only after the nesting iommu type is set
> for a container. Current implementation imposes one limitation - one
> nesting container should include at most one group. The philosophy of
> vfio container is having all groups/devices within the container share
> the same IOMMU context. When vSVA is enabled, one IOMMU context could
> include one 2nd-level address space and multiple 1st-level address spaces.
> While the 2nd-leve address space is reasonably sharable by multiple groups
level
> , blindly sharing 1st-level address spaces across all groups within the
> container might instead break the guest expectation. In the future sub/
> super container concept might be introduced to allow partial address space
> sharing within an IOMMU context. But for now let's go with this restriction
> by requiring singleton container for using nesting iommu features. Below
> link has the related discussion about this decision.
>
> https://lkml.org/lkml/2020/5/15/1028
>
> Cc: Kevin Tian <[email protected]>
> CC: Jacob Pan <[email protected]>
> Cc: Alex Williamson <[email protected]>
> Cc: Eric Auger <[email protected]>
> Cc: Jean-Philippe Brucker <[email protected]>
> Cc: Joerg Roedel <[email protected]>
> Cc: Lu Baolu <[email protected]>
> Signed-off-by: Liu Yi L <[email protected]>
> ---
> v3 -> v4:
> *) address comments against v3.
>
> v1 -> v2:
> *) added in v2
> ---
>
> drivers/vfio/vfio_iommu_type1.c | 105 +++++++++++++++++++++++++++++++++++-----
> include/uapi/linux/vfio.h | 16 ++++++
> 2 files changed, 109 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 7accb59..80623b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> "Maximum number of user DMA mappings per container (65535).");
>
> struct vfio_iommu {
> - struct list_head domain_list;
> - struct list_head iova_list;
> - struct vfio_domain *external_domain; /* domain for external user */
> - struct mutex lock;
> - struct rb_root dma_list;
> - struct blocking_notifier_head notifier;
> - unsigned int dma_avail;
> - uint64_t pgsize_bitmap;
> - bool v2;
> - bool nesting;
> - bool dirty_page_tracking;
> - bool pinned_page_dirty_scope;
> + struct list_head domain_list;
> + struct list_head iova_list;
> + struct vfio_domain *external_domain; /* domain for
> + external user */
nit: put the comment before the field?
> + struct mutex lock;
> + struct rb_root dma_list;
> + struct blocking_notifier_head notifier;
> + unsigned int dma_avail;
> + uint64_t pgsize_bitmap;
> + bool v2;
> + bool nesting;
> + bool dirty_page_tracking;
> + bool pinned_page_dirty_scope;
> + struct iommu_nesting_info *nesting_info;
> };
>
> struct vfio_domain {
> @@ -130,6 +132,9 @@ struct vfio_regions {
> #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
> (!list_empty(&iommu->domain_list))
>
> +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \
> + (!list_empty(&iommu->domain_list)))
rename into something like CONTAINER_HAS_DOMAIN()?
> +
> #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
>
> /*
> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
>
> list_splice_tail(iova_copy, iova);
> }
> +
> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> +{
> + kfree(iommu->nesting_info);
> + iommu->nesting_info = NULL;
> +}
> +
> static int vfio_iommu_type1_attach_group(void *iommu_data,
> struct iommu_group *iommu_group)
> {
> @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> }
> }
>
> + /* Nesting type container can include only one group */
> + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> + mutex_unlock(&iommu->lock);
> + return -EINVAL;
> + }
> +
> group = kzalloc(sizeof(*group), GFP_KERNEL);
> domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> if (!group || !domain) {
> @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> if (ret)
> goto out_domain;
>
> + /* Nesting cap info is available only after attaching */
> + if (iommu->nesting) {
> + struct iommu_nesting_info tmp;
> + struct iommu_nesting_info *info;
> +
> + /* First get the size of vendor specific nesting info */
> + ret = iommu_domain_get_attr(domain->domain,
> + DOMAIN_ATTR_NESTING,
> + &tmp);
> + if (ret)
> + goto out_detach;
> +
> + info = kzalloc(tmp.size, GFP_KERNEL);
nit: you may directly use iommu->nesting_info
> + if (!info) {
> + ret = -ENOMEM;
> + goto out_detach;
> + }
> +
> + /* Now get the nesting info */
> + info->size = tmp.size;
> + ret = iommu_domain_get_attr(domain->domain,
> + DOMAIN_ATTR_NESTING,
> + info);
> + if (ret) {
> + kfree(info);
... and set it back to NULL here if it fails
> + goto out_detach;
> + }
> + iommu->nesting_info = info;
> + }
> +
> /* Get aperture info */
> iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
>
> @@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> return 0;
>
> out_detach:
> + vfio_iommu_release_nesting_info(iommu);
> vfio_iommu_detach_group(domain, group);
> out_domain:
> iommu_domain_free(domain->domain);
> @@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
> vfio_iommu_unmap_unpin_all(iommu);
> else
> vfio_iommu_unmap_unpin_reaccount(iommu);
> +
> + vfio_iommu_release_nesting_info(iommu);
> }
> iommu_domain_free(domain->domain);
> list_del(&domain->next);
> @@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
> return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
> }
>
> +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
> + struct vfio_info_cap *caps)
> +{
> + struct vfio_info_cap_header *header;
> + struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
> + size_t size;
> +
> + size = sizeof(*nesting_cap) + iommu->nesting_info->size;
> +
> + header = vfio_info_cap_add(caps, size,
> + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
> + if (IS_ERR(header))
> + return PTR_ERR(header);
> +
> + nesting_cap = container_of(header,
> + struct vfio_iommu_type1_info_cap_nesting,
> + header);
> +
> + memcpy(&nesting_cap->info, iommu->nesting_info,
> + iommu->nesting_info->size);
> +
> + return 0;
> +}
> +
> static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> unsigned long arg)
> {
> @@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> if (ret)
> return ret;
>
> + if (iommu->nesting_info) {
> + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
> + if (ret)
> + return ret;
> + }
> +
> if (caps.size) {
> info.flags |= VFIO_IOMMU_INFO_CAPS;
>
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 9204705..3e3de9c 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
> __u64 max_dirty_bitmap_size; /* in bytes */
> };
>
> +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3

You may improve the documentation by taking examples from the above caps.
> +
> +/*
> + * Reporting nesting info to user space.
> + *
> + * @info: the nesting info provided by IOMMU driver. Today
> + * it is expected to be a struct iommu_nesting_info
> + * data.
Is it expected to change?
> + */
> +struct vfio_iommu_type1_info_cap_nesting {
> + struct vfio_info_cap_header header;
> + __u32 flags;
You may document flags.
> + __u32 padding;
> + __u8 info[];
> +};
> +
> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>
> /**
>
Thanks

Eric

2020-07-06 13:13:37

by Yi Liu

[permalink] [raw]
Subject: RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Eric,

> From: Auger Eric <[email protected]>
> Sent: Monday, July 6, 2020 6:37 PM
>
> Yi,
>
> On 7/4/20 1:26 PM, Liu Yi L wrote:
> > This patch exports iommu nesting capability info to user space through
> > VFIO. User space is expected to check this info for supported uAPIs (e.g.
> > PASID alloc/free, bind page table, and cache invalidation) and the vendor
> > specific format information for first level/stage page table that will be
> > bound to.
> >
> > The nesting info is available only after the nesting iommu type is set
> > for a container. Current implementation imposes one limitation - one
> > nesting container should include at most one group. The philosophy of
> > vfio container is having all groups/devices within the container share
> > the same IOMMU context. When vSVA is enabled, one IOMMU context could
> > include one 2nd-level address space and multiple 1st-level address spaces.
> > While the 2nd-leve address space is reasonably sharable by multiple groups
> level

oh, yes.

> > , blindly sharing 1st-level address spaces across all groups within the
> > container might instead break the guest expectation. In the future sub/
> > super container concept might be introduced to allow partial address space
> > sharing within an IOMMU context. But for now let's go with this restriction
> > by requiring singleton container for using nesting iommu features. Below
> > link has the related discussion about this decision.
> >
> > https://lkml.org/lkml/2020/5/15/1028
> >
> > Cc: Kevin Tian <[email protected]>
> > CC: Jacob Pan <[email protected]>
> > Cc: Alex Williamson <[email protected]>
> > Cc: Eric Auger <[email protected]>
> > Cc: Jean-Philippe Brucker <[email protected]>
> > Cc: Joerg Roedel <[email protected]>
> > Cc: Lu Baolu <[email protected]>
> > Signed-off-by: Liu Yi L <[email protected]>
> > ---
> > v3 -> v4:
> > *) address comments against v3.
> >
> > v1 -> v2:
> > *) added in v2
> > ---
> >
> > drivers/vfio/vfio_iommu_type1.c | 105
> +++++++++++++++++++++++++++++++++++-----
> > include/uapi/linux/vfio.h | 16 ++++++
> > 2 files changed, 109 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > index 7accb59..80623b8 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> > "Maximum number of user DMA mappings per container (65535).");
> >
> > struct vfio_iommu {
> > - struct list_head domain_list;
> > - struct list_head iova_list;
> > - struct vfio_domain *external_domain; /* domain for external user */
> > - struct mutex lock;
> > - struct rb_root dma_list;
> > - struct blocking_notifier_head notifier;
> > - unsigned int dma_avail;
> > - uint64_t pgsize_bitmap;
> > - bool v2;
> > - bool nesting;
> > - bool dirty_page_tracking;
> > - bool pinned_page_dirty_scope;
> > + struct list_head domain_list;
> > + struct list_head iova_list;
> > + struct vfio_domain *external_domain; /* domain for
> > + external user */
> nit: put the comment before the field?

do you mean below?

+ /* domain for external user */
+ struct vfio_domain *external_domain;

> > + struct mutex lock;
> > + struct rb_root dma_list;
> > + struct blocking_notifier_head notifier;
> > + unsigned int dma_avail;
> > + uint64_t pgsize_bitmap;
> > + bool v2;
> > + bool nesting;
> > + bool dirty_page_tracking;
> > + bool pinned_page_dirty_scope;
> > + struct iommu_nesting_info *nesting_info;
> > };
> >
> > struct vfio_domain {
> > @@ -130,6 +132,9 @@ struct vfio_regions {
> > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
> > (!list_empty(&iommu->domain_list))
> >
> > +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \
> > + (!list_empty(&iommu->domain_list)))
> rename into something like CONTAINER_HAS_DOMAIN()?

got it.

> > +
> > #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) /
> BITS_PER_BYTE)
> >
> > /*
> > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
> vfio_iommu *iommu,
> >
> > list_splice_tail(iova_copy, iova);
> > }
> > +
> > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> > +{
> > + kfree(iommu->nesting_info);
> > + iommu->nesting_info = NULL;
> > +}
> > +
> > static int vfio_iommu_type1_attach_group(void *iommu_data,
> > struct iommu_group *iommu_group)
> > {
> > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > }
> > }
> >
> > + /* Nesting type container can include only one group */
> > + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> > + mutex_unlock(&iommu->lock);
> > + return -EINVAL;
> > + }
> > +
> > group = kzalloc(sizeof(*group), GFP_KERNEL);
> > domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> > if (!group || !domain) {
> > @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > if (ret)
> > goto out_domain;
> >
> > + /* Nesting cap info is available only after attaching */
> > + if (iommu->nesting) {
> > + struct iommu_nesting_info tmp;
> > + struct iommu_nesting_info *info;
> > +
> > + /* First get the size of vendor specific nesting info */
> > + ret = iommu_domain_get_attr(domain->domain,
> > + DOMAIN_ATTR_NESTING,
> > + &tmp);
> > + if (ret)
> > + goto out_detach;
> > +
> > + info = kzalloc(tmp.size, GFP_KERNEL);
> nit: you may directly use iommu->nesting_info

got you.

> > + if (!info) {
> > + ret = -ENOMEM;
> > + goto out_detach;
> > + }
> > +
> > + /* Now get the nesting info */
> > + info->size = tmp.size;
> > + ret = iommu_domain_get_attr(domain->domain,
> > + DOMAIN_ATTR_NESTING,
> > + info);
> > + if (ret) {
> > + kfree(info);
> ... and set it back to NULL here if it fails

and maybe no need to free it here as vfio_iommu_release_nesting_info()
will free the nesting_info.

> > + goto out_detach;
> > + }
> > + iommu->nesting_info = info;
> > + }
> > +
> > /* Get aperture info */
> > iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
> &geo);
> >
> > @@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > return 0;
> >
> > out_detach:
> > + vfio_iommu_release_nesting_info(iommu);
> > vfio_iommu_detach_group(domain, group);
> > out_domain:
> > iommu_domain_free(domain->domain);
> > @@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void
> *iommu_data,
> > vfio_iommu_unmap_unpin_all(iommu);
> > else
> >
> vfio_iommu_unmap_unpin_reaccount(iommu);
> > +
> > + vfio_iommu_release_nesting_info(iommu);
> > }
> > iommu_domain_free(domain->domain);
> > list_del(&domain->next);
> > @@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct
> vfio_iommu *iommu,
> > return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
> > }
> >
> > +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
> > + struct vfio_info_cap *caps)
> > +{
> > + struct vfio_info_cap_header *header;
> > + struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
> > + size_t size;
> > +
> > + size = sizeof(*nesting_cap) + iommu->nesting_info->size;
> > +
> > + header = vfio_info_cap_add(caps, size,
> > + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
> > + if (IS_ERR(header))
> > + return PTR_ERR(header);
> > +
> > + nesting_cap = container_of(header,
> > + struct vfio_iommu_type1_info_cap_nesting,
> > + header);
> > +
> > + memcpy(&nesting_cap->info, iommu->nesting_info,
> > + iommu->nesting_info->size);
> > +
> > + return 0;
> > +}
> > +
> > static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> > unsigned long arg)
> > {
> > @@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct
> vfio_iommu *iommu,
> > if (ret)
> > return ret;
> >
> > + if (iommu->nesting_info) {
> > + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
> > + if (ret)
> > + return ret;
> > + }
> > +
> > if (caps.size) {
> > info.flags |= VFIO_IOMMU_INFO_CAPS;
> >
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 9204705..3e3de9c 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
> > __u64 max_dirty_bitmap_size; /* in bytes */
> > };
> >
> > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
>
> You may improve the documentation by taking examples from the above caps.

yes, it is. I somehow broke the style. how about below?



/*
* The nesting capability allows to report the related capability
* and info for nesting iommu type.
*
* The structures below define version 1 of this capability.
*
* User space should check this cap for setup nesting iommu type.
*
* @info: the nesting info provided by IOMMU driver. Today
* it is expected to be a struct iommu_nesting_info
* data.
#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3

struct vfio_iommu_type1_info_cap_nesting {
...
};

> > +
> > +/*
> > + * Reporting nesting info to user space.
> > + *
> > + * @info: the nesting info provided by IOMMU driver. Today
> > + * it is expected to be a struct iommu_nesting_info
> > + * data.
> Is it expected to change?

honestly, I'm not quite sure on it. I did considered to embed
struct iommu_nesting_info here instead of using info[]. but I
hesitated as using info[] may leave more flexibility on this
struct. how about your opinion? perhaps it's fine to embed the
struct iommu_nesting_info here as long as VFIO is setup nesting
based on IOMMU UAPI.

> > + */
> > +struct vfio_iommu_type1_info_cap_nesting {
> > + struct vfio_info_cap_header header;
> > + __u32 flags;
> You may document flags.

sure. it's reserved for future.

Regards,
Yi Liu

> > + __u32 padding;
> > + __u8 info[];
> > +};
> > +
> > #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> >
> > /**
> >
> Thanks
>
> Eric

2020-07-06 13:46:42

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Yi,

On 7/6/20 3:10 PM, Liu, Yi L wrote:
> Hi Eric,
>
>> From: Auger Eric <[email protected]>
>> Sent: Monday, July 6, 2020 6:37 PM
>>
>> Yi,
>>
>> On 7/4/20 1:26 PM, Liu Yi L wrote:
>>> This patch exports iommu nesting capability info to user space through
>>> VFIO. User space is expected to check this info for supported uAPIs (e.g.
>>> PASID alloc/free, bind page table, and cache invalidation) and the vendor
>>> specific format information for first level/stage page table that will be
>>> bound to.
>>>
>>> The nesting info is available only after the nesting iommu type is set
>>> for a container. Current implementation imposes one limitation - one
>>> nesting container should include at most one group. The philosophy of
>>> vfio container is having all groups/devices within the container share
>>> the same IOMMU context. When vSVA is enabled, one IOMMU context could
>>> include one 2nd-level address space and multiple 1st-level address spaces.
>>> While the 2nd-leve address space is reasonably sharable by multiple groups
>> level
>
> oh, yes.
>
>>> , blindly sharing 1st-level address spaces across all groups within the
>>> container might instead break the guest expectation. In the future sub/
>>> super container concept might be introduced to allow partial address space
>>> sharing within an IOMMU context. But for now let's go with this restriction
>>> by requiring singleton container for using nesting iommu features. Below
>>> link has the related discussion about this decision.
>>>
>>> https://lkml.org/lkml/2020/5/15/1028
>>>
>>> Cc: Kevin Tian <[email protected]>
>>> CC: Jacob Pan <[email protected]>
>>> Cc: Alex Williamson <[email protected]>
>>> Cc: Eric Auger <[email protected]>
>>> Cc: Jean-Philippe Brucker <[email protected]>
>>> Cc: Joerg Roedel <[email protected]>
>>> Cc: Lu Baolu <[email protected]>
>>> Signed-off-by: Liu Yi L <[email protected]>
>>> ---
>>> v3 -> v4:
>>> *) address comments against v3.
>>>
>>> v1 -> v2:
>>> *) added in v2
>>> ---
>>>
>>> drivers/vfio/vfio_iommu_type1.c | 105
>> +++++++++++++++++++++++++++++++++++-----
>>> include/uapi/linux/vfio.h | 16 ++++++
>>> 2 files changed, 109 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>>> index 7accb59..80623b8 100644
>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
>>> "Maximum number of user DMA mappings per container (65535).");
>>>
>>> struct vfio_iommu {
>>> - struct list_head domain_list;
>>> - struct list_head iova_list;
>>> - struct vfio_domain *external_domain; /* domain for external user */
>>> - struct mutex lock;
>>> - struct rb_root dma_list;
>>> - struct blocking_notifier_head notifier;
>>> - unsigned int dma_avail;
>>> - uint64_t pgsize_bitmap;
>>> - bool v2;
>>> - bool nesting;
>>> - bool dirty_page_tracking;
>>> - bool pinned_page_dirty_scope;
>>> + struct list_head domain_list;
>>> + struct list_head iova_list;
>>> + struct vfio_domain *external_domain; /* domain for
>>> + external user */
>> nit: put the comment before the field?
>
> do you mean below?
>
> + /* domain for external user */
> + struct vfio_domain *external_domain;
yes that's what I meant
>
>>> + struct mutex lock;
>>> + struct rb_root dma_list;
>>> + struct blocking_notifier_head notifier;
>>> + unsigned int dma_avail;
>>> + uint64_t pgsize_bitmap;
>>> + bool v2;
>>> + bool nesting;
>>> + bool dirty_page_tracking;
>>> + bool pinned_page_dirty_scope;
>>> + struct iommu_nesting_info *nesting_info;
>>> };
>>>
>>> struct vfio_domain {
>>> @@ -130,6 +132,9 @@ struct vfio_regions {
>>> #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
>>> (!list_empty(&iommu->domain_list))
>>>
>>> +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \
>>> + (!list_empty(&iommu->domain_list)))
>> rename into something like CONTAINER_HAS_DOMAIN()?
>
> got it.
>
>>> +
>>> #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) /
>> BITS_PER_BYTE)
>>>
>>> /*
>>> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
>> vfio_iommu *iommu,
>>>
>>> list_splice_tail(iova_copy, iova);
>>> }
>>> +
>>> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
>>> +{
>>> + kfree(iommu->nesting_info);
>>> + iommu->nesting_info = NULL;
>>> +}
>>> +
>>> static int vfio_iommu_type1_attach_group(void *iommu_data,
>>> struct iommu_group *iommu_group)
>>> {
>>> @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void
>> *iommu_data,
>>> }
>>> }
>>>
>>> + /* Nesting type container can include only one group */
>>> + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
>>> + mutex_unlock(&iommu->lock);
>>> + return -EINVAL;
>>> + }
>>> +
>>> group = kzalloc(sizeof(*group), GFP_KERNEL);
>>> domain = kzalloc(sizeof(*domain), GFP_KERNEL);
>>> if (!group || !domain) {
>>> @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void
>> *iommu_data,
>>> if (ret)
>>> goto out_domain;
>>>
>>> + /* Nesting cap info is available only after attaching */
>>> + if (iommu->nesting) {
>>> + struct iommu_nesting_info tmp;
>>> + struct iommu_nesting_info *info;
>>> +
>>> + /* First get the size of vendor specific nesting info */
>>> + ret = iommu_domain_get_attr(domain->domain,
>>> + DOMAIN_ATTR_NESTING,
>>> + &tmp);
>>> + if (ret)
>>> + goto out_detach;
>>> +
>>> + info = kzalloc(tmp.size, GFP_KERNEL);
>> nit: you may directly use iommu->nesting_info
>
> got you.
>
>>> + if (!info) {
>>> + ret = -ENOMEM;
>>> + goto out_detach;
>>> + }
>>> +
>>> + /* Now get the nesting info */
>>> + info->size = tmp.size;
>>> + ret = iommu_domain_get_attr(domain->domain,
>>> + DOMAIN_ATTR_NESTING,
>>> + info);
>>> + if (ret) {
>>> + kfree(info);
>> ... and set it back to NULL here if it fails
>
> and maybe no need to free it here as vfio_iommu_release_nesting_info()
> will free the nesting_info.
>
>>> + goto out_detach;
>>> + }
>>> + iommu->nesting_info = info;
>>> + }
>>> +
>>> /* Get aperture info */
>>> iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
>> &geo);
>>>
>>> @@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void
>> *iommu_data,
>>> return 0;
>>>
>>> out_detach:
>>> + vfio_iommu_release_nesting_info(iommu);
>>> vfio_iommu_detach_group(domain, group);
>>> out_domain:
>>> iommu_domain_free(domain->domain);
>>> @@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void
>> *iommu_data,
>>> vfio_iommu_unmap_unpin_all(iommu);
>>> else
>>>
>> vfio_iommu_unmap_unpin_reaccount(iommu);
>>> +
>>> + vfio_iommu_release_nesting_info(iommu);
>>> }
>>> iommu_domain_free(domain->domain);
>>> list_del(&domain->next);
>>> @@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct
>> vfio_iommu *iommu,
>>> return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
>>> }
>>>
>>> +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
>>> + struct vfio_info_cap *caps)
>>> +{
>>> + struct vfio_info_cap_header *header;
>>> + struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
>>> + size_t size;
>>> +
>>> + size = sizeof(*nesting_cap) + iommu->nesting_info->size;
>>> +
>>> + header = vfio_info_cap_add(caps, size,
>>> + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
>>> + if (IS_ERR(header))
>>> + return PTR_ERR(header);
>>> +
>>> + nesting_cap = container_of(header,
>>> + struct vfio_iommu_type1_info_cap_nesting,
>>> + header);
>>> +
>>> + memcpy(&nesting_cap->info, iommu->nesting_info,
>>> + iommu->nesting_info->size);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
>>> unsigned long arg)
>>> {
>>> @@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct
>> vfio_iommu *iommu,
>>> if (ret)
>>> return ret;
>>>
>>> + if (iommu->nesting_info) {
>>> + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
>>> + if (ret)
>>> + return ret;
>>> + }
>>> +
>>> if (caps.size) {
>>> info.flags |= VFIO_IOMMU_INFO_CAPS;
>>>
>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>> index 9204705..3e3de9c 100644
>>> --- a/include/uapi/linux/vfio.h
>>> +++ b/include/uapi/linux/vfio.h
>>> @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
>>> __u64 max_dirty_bitmap_size; /* in bytes */
>>> };
>>>
>>> +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
>>
>> You may improve the documentation by taking examples from the above caps.
>
> yes, it is. I somehow broke the style. how about below?
>
>
>
> /*
> * The nesting capability allows to report the related capability
> * and info for nesting iommu type.
> *
> * The structures below define version 1 of this capability.
> *
> * User space should check this cap for setup nesting iommu type.
before setting up stage 1 information? The wording above sounds a bit
confusing to me as it can be interpreted as before choosing
VFIO_TYPE1_NESTING_IOMMU.

You also need to document it returns the capability only after a group
is attached - which looks strange by the way -.

Thanks

Eric
> *
> * @info: the nesting info provided by IOMMU driver. Today
> * it is expected to be a struct iommu_nesting_info
> * data.
> #define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
>
> struct vfio_iommu_type1_info_cap_nesting {
> ...
> };
>
>>> +
>>> +/*
>>> + * Reporting nesting info to user space.
>>> + *
>>> + * @info: the nesting info provided by IOMMU driver. Today
>>> + * it is expected to be a struct iommu_nesting_info
>>> + * data.
>> Is it expected to change?
>
> honestly, I'm not quite sure on it. I did considered to embed
> struct iommu_nesting_info here instead of using info[]. but I
> hesitated as using info[] may leave more flexibility on this
> struct. how about your opinion? perhaps it's fine to embed the
> struct iommu_nesting_info here as long as VFIO is setup nesting
> based on IOMMU UAPI.
>
>>> + */
>>> +struct vfio_iommu_type1_info_cap_nesting {
>>> + struct vfio_info_cap_header header;
>>> + __u32 flags;
>> You may document flags.
>
> sure. it's reserved for future.
>
> Regards,
> Yi Liu
>
>>> + __u32 padding;
>>> + __u8 info[];
>>> +};
>>> +
>>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>>>
>>> /**
>>>
>> Thanks
>>
>> Eric
>

2020-07-06 14:09:46

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Yi,
On 7/4/20 1:26 PM, Liu Yi L wrote:
> This patch exports iommu nesting capability info to user space through
> VFIO. User space is expected to check this info for supported uAPIs (e.g.
> PASID alloc/free, bind page table, and cache invalidation) and the vendor
> specific format information for first level/stage page table that will be
> bound to.
>
> The nesting info is available only after the nesting iommu type is set
> for a container. Current implementation imposes one limitation - one
> nesting container should include at most one group. The philosophy of
> vfio container is having all groups/devices within the container share
> the same IOMMU context. When vSVA is enabled, one IOMMU context could
> include one 2nd-level address space and multiple 1st-level address spaces.
> While the 2nd-leve address space is reasonably sharable by multiple groups
> , blindly sharing 1st-level address spaces across all groups within the
> container might instead break the guest expectation. In the future sub/
> super container concept might be introduced to allow partial address space
> sharing within an IOMMU context. But for now let's go with this restriction
> by requiring singleton container for using nesting iommu features. Below
> link has the related discussion about this decision.
>
> https://lkml.org/lkml/2020/5/15/1028
>
> Cc: Kevin Tian <[email protected]>
> CC: Jacob Pan <[email protected]>
> Cc: Alex Williamson <[email protected]>
> Cc: Eric Auger <[email protected]>
> Cc: Jean-Philippe Brucker <[email protected]>
> Cc: Joerg Roedel <[email protected]>
> Cc: Lu Baolu <[email protected]>
> Signed-off-by: Liu Yi L <[email protected]>
> ---
> v3 -> v4:
> *) address comments against v3.
>
> v1 -> v2:
> *) added in v2
> ---
>
> drivers/vfio/vfio_iommu_type1.c | 105 +++++++++++++++++++++++++++++++++++-----
> include/uapi/linux/vfio.h | 16 ++++++
> 2 files changed, 109 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 7accb59..80623b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> "Maximum number of user DMA mappings per container (65535).");
>
> struct vfio_iommu {
> - struct list_head domain_list;
> - struct list_head iova_list;
> - struct vfio_domain *external_domain; /* domain for external user */
> - struct mutex lock;
> - struct rb_root dma_list;
> - struct blocking_notifier_head notifier;
> - unsigned int dma_avail;
> - uint64_t pgsize_bitmap;
> - bool v2;
> - bool nesting;
> - bool dirty_page_tracking;
> - bool pinned_page_dirty_scope;
> + struct list_head domain_list;
> + struct list_head iova_list;
> + struct vfio_domain *external_domain; /* domain for
> + external user */
> + struct mutex lock;
> + struct rb_root dma_list;
> + struct blocking_notifier_head notifier;
> + unsigned int dma_avail;
> + uint64_t pgsize_bitmap;
> + bool v2;
> + bool nesting;
> + bool dirty_page_tracking;
> + bool pinned_page_dirty_scope;
> + struct iommu_nesting_info *nesting_info;
> };
>
> struct vfio_domain {
> @@ -130,6 +132,9 @@ struct vfio_regions {
> #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
> (!list_empty(&iommu->domain_list))
>
> +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \
> + (!list_empty(&iommu->domain_list)))
> +
> #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
>
> /*
> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
>
> list_splice_tail(iova_copy, iova);
> }
> +
> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> +{
> + kfree(iommu->nesting_info);
> + iommu->nesting_info = NULL;
> +}
> +
> static int vfio_iommu_type1_attach_group(void *iommu_data,
> struct iommu_group *iommu_group)
> {
> @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> }
> }
>
> + /* Nesting type container can include only one group */
> + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> + mutex_unlock(&iommu->lock);
> + return -EINVAL;
> + }
> +
> group = kzalloc(sizeof(*group), GFP_KERNEL);
> domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> if (!group || !domain) {
> @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> if (ret)
> goto out_domain;
>
> + /* Nesting cap info is available only after attaching */
> + if (iommu->nesting) {
> + struct iommu_nesting_info tmp;
> + struct iommu_nesting_info *info;
> +
> + /* First get the size of vendor specific nesting info */
> + ret = iommu_domain_get_attr(domain->domain,
> + DOMAIN_ATTR_NESTING,
> + &tmp);
> + if (ret)
> + goto out_detach;
> +
> + info = kzalloc(tmp.size, GFP_KERNEL);
> + if (!info) {
> + ret = -ENOMEM;
> + goto out_detach;
> + }
> +
> + /* Now get the nesting info */
> + info->size = tmp.size;
> + ret = iommu_domain_get_attr(domain->domain,
> + DOMAIN_ATTR_NESTING,
> + info);
> + if (ret) {
> + kfree(info);
> + goto out_detach;
> + }
> + iommu->nesting_info = info;
> + }
> +
> /* Get aperture info */
> iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
>
> @@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
> return 0;
>
> out_detach:
> + vfio_iommu_release_nesting_info(iommu);
> vfio_iommu_detach_group(domain, group);
> out_domain:
> iommu_domain_free(domain->domain);
> @@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
> vfio_iommu_unmap_unpin_all(iommu);
> else
> vfio_iommu_unmap_unpin_reaccount(iommu);
> +
> + vfio_iommu_release_nesting_info(iommu);
> }
> iommu_domain_free(domain->domain);
> list_del(&domain->next);
> @@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
> return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
> }
>
> +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
> + struct vfio_info_cap *caps)
> +{
> + struct vfio_info_cap_header *header;
> + struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
> + size_t size;
> +
> + size = sizeof(*nesting_cap) + iommu->nesting_info->size;
> +
> + header = vfio_info_cap_add(caps, size,
> + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
> + if (IS_ERR(header))
> + return PTR_ERR(header);
> +
> + nesting_cap = container_of(header,
> + struct vfio_iommu_type1_info_cap_nesting,
> + header);
> +
> + memcpy(&nesting_cap->info, iommu->nesting_info,
> + iommu->nesting_info->size);
> +
> + return 0;
> +}
> +
> static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> unsigned long arg)
> {
> @@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> if (ret)
> return ret;
>
> + if (iommu->nesting_info) {
> + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
I think this should happen while holding the &iommu->lock because
nothing prevents the group from being detached in-between

Thanks

Eric
> + if (ret)
> + return ret;
> + }
> +
> if (caps.size) {
> info.flags |= VFIO_IOMMU_INFO_CAPS;
>
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 9204705..3e3de9c 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
> __u64 max_dirty_bitmap_size; /* in bytes */
> };
>
> +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
> +
> +/*
> + * Reporting nesting info to user space.
> + *
> + * @info: the nesting info provided by IOMMU driver. Today
> + * it is expected to be a struct iommu_nesting_info
> + * data.
> + */
> +struct vfio_iommu_type1_info_cap_nesting {
> + struct vfio_info_cap_header header;
> + __u32 flags;
> + __u32 padding;
> + __u8 info[];
> +};
> +
> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>
> /**
>

2020-07-07 09:33:29

by Yi Liu

[permalink] [raw]
Subject: RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Eric,

> From: Auger Eric < [email protected] >
> Sent: Monday, July 6, 2020 9:45 PM
>
> Hi Yi,
>
> On 7/6/20 3:10 PM, Liu, Yi L wrote:
> > Hi Eric,
> >
> >> From: Auger Eric <[email protected]>
> >> Sent: Monday, July 6, 2020 6:37 PM
> >>
> >> Yi,
> >>
> >> On 7/4/20 1:26 PM, Liu Yi L wrote:
> >>> This patch exports iommu nesting capability info to user space through
> >>> VFIO. User space is expected to check this info for supported uAPIs (e.g.
> >>> PASID alloc/free, bind page table, and cache invalidation) and the vendor
> >>> specific format information for first level/stage page table that will be
> >>> bound to.
> >>>
> >>> The nesting info is available only after the nesting iommu type is set
> >>> for a container. Current implementation imposes one limitation - one
> >>> nesting container should include at most one group. The philosophy of
> >>> vfio container is having all groups/devices within the container share
> >>> the same IOMMU context. When vSVA is enabled, one IOMMU context could
> >>> include one 2nd-level address space and multiple 1st-level address spaces.
> >>> While the 2nd-leve address space is reasonably sharable by multiple groups
> >> level
> >
> > oh, yes.
> >
> >>> , blindly sharing 1st-level address spaces across all groups within the
> >>> container might instead break the guest expectation. In the future sub/
> >>> super container concept might be introduced to allow partial address space
> >>> sharing within an IOMMU context. But for now let's go with this restriction
> >>> by requiring singleton container for using nesting iommu features. Below
> >>> link has the related discussion about this decision.
> >>>
> >>> https://lkml.org/lkml/2020/5/15/1028
> >>>
> >>> Cc: Kevin Tian <[email protected]>
> >>> CC: Jacob Pan <[email protected]>
> >>> Cc: Alex Williamson <[email protected]>
> >>> Cc: Eric Auger <[email protected]>
> >>> Cc: Jean-Philippe Brucker <[email protected]>
> >>> Cc: Joerg Roedel <[email protected]>
> >>> Cc: Lu Baolu <[email protected]>
> >>> Signed-off-by: Liu Yi L <[email protected]>
> >>> ---
> >>> v3 -> v4:
> >>> *) address comments against v3.
> >>>
> >>> v1 -> v2:
> >>> *) added in v2
> >>> ---
> >>>
> >>> drivers/vfio/vfio_iommu_type1.c | 105
> >> +++++++++++++++++++++++++++++++++++-----
> >>> include/uapi/linux/vfio.h | 16 ++++++
> >>> 2 files changed, 109 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> >>> index 7accb59..80623b8 100644
> >>> --- a/drivers/vfio/vfio_iommu_type1.c
> >>> +++ b/drivers/vfio/vfio_iommu_type1.c
> >>> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> >>> "Maximum number of user DMA mappings per container (65535).");
> >>>
> >>> struct vfio_iommu {
> >>> - struct list_head domain_list;
> >>> - struct list_head iova_list;
> >>> - struct vfio_domain *external_domain; /* domain for external user */
> >>> - struct mutex lock;
> >>> - struct rb_root dma_list;
> >>> - struct blocking_notifier_head notifier;
> >>> - unsigned int dma_avail;
> >>> - uint64_t pgsize_bitmap;
> >>> - bool v2;
> >>> - bool nesting;
> >>> - bool dirty_page_tracking;
> >>> - bool pinned_page_dirty_scope;
> >>> + struct list_head domain_list;
> >>> + struct list_head iova_list;
> >>> + struct vfio_domain *external_domain; /* domain for
> >>> + external user */
> >> nit: put the comment before the field?
> >
> > do you mean below?
> >
> > + /* domain for external user */
> > + struct vfio_domain *external_domain;
> yes that's what I meant

got you. :-)

> >
> >>> + struct mutex lock;
> >>> + struct rb_root dma_list;
> >>> + struct blocking_notifier_head notifier;
> >>> + unsigned int dma_avail;
> >>> + uint64_t pgsize_bitmap;
> >>> + bool v2;
> >>> + bool nesting;
> >>> + bool dirty_page_tracking;
> >>> + bool pinned_page_dirty_scope;
> >>> + struct iommu_nesting_info *nesting_info;
> >>> };
> >>>
> >>> struct vfio_domain {
> >>> @@ -130,6 +132,9 @@ struct vfio_regions {
> >>> #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
> >>> (!list_empty(&iommu->domain_list))
> >>>
> >>> +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu-
> >external_domain) || \
> >>> + (!list_empty(&iommu->domain_list)))
> >> rename into something like CONTAINER_HAS_DOMAIN()?
> >
> > got it.
> >
> >>> +
> >>> #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) /
> >> BITS_PER_BYTE)
> >>>
> >>> /*
> >>> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
> >> vfio_iommu *iommu,
> >>>
> >>> list_splice_tail(iova_copy, iova);
> >>> }
> >>> +
> >>> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> >>> +{
> >>> + kfree(iommu->nesting_info);
> >>> + iommu->nesting_info = NULL;
> >>> +}
> >>> +
> >>> static int vfio_iommu_type1_attach_group(void *iommu_data,
> >>> struct iommu_group *iommu_group)
> >>> {
> >>> @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void
> >> *iommu_data,
> >>> }
> >>> }
> >>>
> >>> + /* Nesting type container can include only one group */
> >>> + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> >>> + mutex_unlock(&iommu->lock);
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> group = kzalloc(sizeof(*group), GFP_KERNEL);
> >>> domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> >>> if (!group || !domain) {
> >>> @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void
> >> *iommu_data,
> >>> if (ret)
> >>> goto out_domain;
> >>>
> >>> + /* Nesting cap info is available only after attaching */
> >>> + if (iommu->nesting) {
> >>> + struct iommu_nesting_info tmp;
> >>> + struct iommu_nesting_info *info;
> >>> +
> >>> + /* First get the size of vendor specific nesting info */
> >>> + ret = iommu_domain_get_attr(domain->domain,
> >>> + DOMAIN_ATTR_NESTING,
> >>> + &tmp);
> >>> + if (ret)
> >>> + goto out_detach;
> >>> +
> >>> + info = kzalloc(tmp.size, GFP_KERNEL);
> >> nit: you may directly use iommu->nesting_info
> >
> > got you.
> >
> >>> + if (!info) {
> >>> + ret = -ENOMEM;
> >>> + goto out_detach;
> >>> + }
> >>> +
> >>> + /* Now get the nesting info */
> >>> + info->size = tmp.size;
> >>> + ret = iommu_domain_get_attr(domain->domain,
> >>> + DOMAIN_ATTR_NESTING,
> >>> + info);
> >>> + if (ret) {
> >>> + kfree(info);
> >> ... and set it back to NULL here if it fails
> >
> > and maybe no need to free it here as vfio_iommu_release_nesting_info()
> > will free the nesting_info.
> >
> >>> + goto out_detach;
> >>> + }
> >>> + iommu->nesting_info = info;
> >>> + }
> >>> +
> >>> /* Get aperture info */
> >>> iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
> >> &geo);
> >>>
> >>> @@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void
> >> *iommu_data,
> >>> return 0;
> >>>
> >>> out_detach:
> >>> + vfio_iommu_release_nesting_info(iommu);
> >>> vfio_iommu_detach_group(domain, group);
> >>> out_domain:
> >>> iommu_domain_free(domain->domain);
> >>> @@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void
> >> *iommu_data,
> >>> vfio_iommu_unmap_unpin_all(iommu);
> >>> else
> >>>
> >> vfio_iommu_unmap_unpin_reaccount(iommu);
> >>> +
> >>> + vfio_iommu_release_nesting_info(iommu);
> >>> }
> >>> iommu_domain_free(domain->domain);
> >>> list_del(&domain->next);
> >>> @@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct
> >> vfio_iommu *iommu,
> >>> return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
> >>> }
> >>>
> >>> +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
> >>> + struct vfio_info_cap *caps)
> >>> +{
> >>> + struct vfio_info_cap_header *header;
> >>> + struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
> >>> + size_t size;
> >>> +
> >>> + size = sizeof(*nesting_cap) + iommu->nesting_info->size;
> >>> +
> >>> + header = vfio_info_cap_add(caps, size,
> >>> + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
> >>> + if (IS_ERR(header))
> >>> + return PTR_ERR(header);
> >>> +
> >>> + nesting_cap = container_of(header,
> >>> + struct vfio_iommu_type1_info_cap_nesting,
> >>> + header);
> >>> +
> >>> + memcpy(&nesting_cap->info, iommu->nesting_info,
> >>> + iommu->nesting_info->size);
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> >>> unsigned long arg)
> >>> {
> >>> @@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct
> >> vfio_iommu *iommu,
> >>> if (ret)
> >>> return ret;
> >>>
> >>> + if (iommu->nesting_info) {
> >>> + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
> >>> + if (ret)
> >>> + return ret;
> >>> + }
> >>> +
> >>> if (caps.size) {
> >>> info.flags |= VFIO_IOMMU_INFO_CAPS;
> >>>
> >>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >>> index 9204705..3e3de9c 100644
> >>> --- a/include/uapi/linux/vfio.h
> >>> +++ b/include/uapi/linux/vfio.h
> >>> @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
> >>> __u64 max_dirty_bitmap_size; /* in bytes */
> >>> };
> >>>
> >>> +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
> >>
> >> You may improve the documentation by taking examples from the above caps.
> >
> > yes, it is. I somehow broke the style. how about below?
> >
> >
> >
> > /*
> > * The nesting capability allows to report the related capability
> > * and info for nesting iommu type.
> > *
> > * The structures below define version 1 of this capability.
> > *
> > * User space should check this cap for setup nesting iommu type.
> before setting up stage 1 information? The wording above sounds a bit
> confusing to me as it can be interpreted as before choosing
> VFIO_TYPE1_NESTING_IOMMU.
>

oh, yep. this cap is available only for nesting type iommu. a.ka.
VFIO_TYPE1_NESTING_IOMMU is selected.

> You also need to document it returns the capability only after a group
> is attached - which looks strange by the way -.

I think this should be aligned with VFIO_IOMMU_GET_INFO usage.
GET_INFO is meaningful after VFIO_SET_IOMMU, which includes
group attaching.

Regards,
Yi Liu

> Thanks
>
> Eric
> > *
> > * @info: the nesting info provided by IOMMU driver. Today
> > * it is expected to be a struct iommu_nesting_info
> > * data.
> > #define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
> >
> > struct vfio_iommu_type1_info_cap_nesting {
> > ...
> > };
> >
> >>> +
> >>> +/*
> >>> + * Reporting nesting info to user space.
> >>> + *
> >>> + * @info: the nesting info provided by IOMMU driver. Today
> >>> + * it is expected to be a struct iommu_nesting_info
> >>> + * data.
> >> Is it expected to change?
> >
> > honestly, I'm not quite sure on it. I did considered to embed
> > struct iommu_nesting_info here instead of using info[]. but I
> > hesitated as using info[] may leave more flexibility on this
> > struct. how about your opinion? perhaps it's fine to embed the
> > struct iommu_nesting_info here as long as VFIO is setup nesting
> > based on IOMMU UAPI.
> >
> >>> + */
> >>> +struct vfio_iommu_type1_info_cap_nesting {
> >>> + struct vfio_info_cap_header header;
> >>> + __u32 flags;
> >> You may document flags.
> >
> > sure. it's reserved for future.
> >
> > Regards,
> > Yi Liu
> >
> >>> + __u32 padding;
> >>> + __u8 info[];
> >>> +};
> >>> +
> >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> >>>
> >>> /**
> >>>
> >> Thanks
> >>
> >> Eric
> >

2020-07-07 09:38:14

by Yi Liu

[permalink] [raw]
Subject: RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Eric,

> From: Auger Eric <[email protected]>
> Sent: Monday, July 6, 2020 10:07 PM
>
> Hi Yi,
> On 7/4/20 1:26 PM, Liu Yi L wrote:
> > This patch exports iommu nesting capability info to user space through
> > VFIO. User space is expected to check this info for supported uAPIs (e.g.
> > PASID alloc/free, bind page table, and cache invalidation) and the vendor
> > specific format information for first level/stage page table that will be
> > bound to.
> >
> > The nesting info is available only after the nesting iommu type is set
> > for a container. Current implementation imposes one limitation - one
> > nesting container should include at most one group. The philosophy of
> > vfio container is having all groups/devices within the container share
> > the same IOMMU context. When vSVA is enabled, one IOMMU context could
> > include one 2nd-level address space and multiple 1st-level address spaces.
> > While the 2nd-leve address space is reasonably sharable by multiple groups
> > , blindly sharing 1st-level address spaces across all groups within the
> > container might instead break the guest expectation. In the future sub/
> > super container concept might be introduced to allow partial address space
> > sharing within an IOMMU context. But for now let's go with this restriction
> > by requiring singleton container for using nesting iommu features. Below
> > link has the related discussion about this decision.
> >
> > https://lkml.org/lkml/2020/5/15/1028
> >
> > Cc: Kevin Tian <[email protected]>
> > CC: Jacob Pan <[email protected]>
> > Cc: Alex Williamson <[email protected]>
> > Cc: Eric Auger <[email protected]>
> > Cc: Jean-Philippe Brucker <[email protected]>
> > Cc: Joerg Roedel <[email protected]>
> > Cc: Lu Baolu <[email protected]>
> > Signed-off-by: Liu Yi L <[email protected]>
> > ---
> > v3 -> v4:
> > *) address comments against v3.
> >
> > v1 -> v2:
> > *) added in v2
> > ---
> >
> > drivers/vfio/vfio_iommu_type1.c | 105
> +++++++++++++++++++++++++++++++++++-----
> > include/uapi/linux/vfio.h | 16 ++++++
> > 2 files changed, 109 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> > index 7accb59..80623b8 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> > "Maximum number of user DMA mappings per container (65535).");
> >
> > struct vfio_iommu {
> > - struct list_head domain_list;
> > - struct list_head iova_list;
> > - struct vfio_domain *external_domain; /* domain for external user */
> > - struct mutex lock;
> > - struct rb_root dma_list;
> > - struct blocking_notifier_head notifier;
> > - unsigned int dma_avail;
> > - uint64_t pgsize_bitmap;
> > - bool v2;
> > - bool nesting;
> > - bool dirty_page_tracking;
> > - bool pinned_page_dirty_scope;
> > + struct list_head domain_list;
> > + struct list_head iova_list;
> > + struct vfio_domain *external_domain; /* domain for
> > + external user */
> > + struct mutex lock;
> > + struct rb_root dma_list;
> > + struct blocking_notifier_head notifier;
> > + unsigned int dma_avail;
> > + uint64_t pgsize_bitmap;
> > + bool v2;
> > + bool nesting;
> > + bool dirty_page_tracking;
> > + bool pinned_page_dirty_scope;
> > + struct iommu_nesting_info *nesting_info;
> > };
> >
> > struct vfio_domain {
> > @@ -130,6 +132,9 @@ struct vfio_regions {
> > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
> > (!list_empty(&iommu->domain_list))
> >
> > +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \
> > + (!list_empty(&iommu->domain_list)))
> > +
> > #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) /
> BITS_PER_BYTE)
> >
> > /*
> > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
> vfio_iommu *iommu,
> >
> > list_splice_tail(iova_copy, iova);
> > }
> > +
> > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> > +{
> > + kfree(iommu->nesting_info);
> > + iommu->nesting_info = NULL;
> > +}
> > +
> > static int vfio_iommu_type1_attach_group(void *iommu_data,
> > struct iommu_group *iommu_group)
> > {
> > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > }
> > }
> >
> > + /* Nesting type container can include only one group */
> > + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> > + mutex_unlock(&iommu->lock);
> > + return -EINVAL;
> > + }
> > +
> > group = kzalloc(sizeof(*group), GFP_KERNEL);
> > domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> > if (!group || !domain) {
> > @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > if (ret)
> > goto out_domain;
> >
> > + /* Nesting cap info is available only after attaching */
> > + if (iommu->nesting) {
> > + struct iommu_nesting_info tmp;
> > + struct iommu_nesting_info *info;
> > +
> > + /* First get the size of vendor specific nesting info */
> > + ret = iommu_domain_get_attr(domain->domain,
> > + DOMAIN_ATTR_NESTING,
> > + &tmp);
> > + if (ret)
> > + goto out_detach;
> > +
> > + info = kzalloc(tmp.size, GFP_KERNEL);
> > + if (!info) {
> > + ret = -ENOMEM;
> > + goto out_detach;
> > + }
> > +
> > + /* Now get the nesting info */
> > + info->size = tmp.size;
> > + ret = iommu_domain_get_attr(domain->domain,
> > + DOMAIN_ATTR_NESTING,
> > + info);
> > + if (ret) {
> > + kfree(info);
> > + goto out_detach;
> > + }
> > + iommu->nesting_info = info;
> > + }
> > +
> > /* Get aperture info */
> > iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
> &geo);
> >
> > @@ -2138,6 +2186,7 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > return 0;
> >
> > out_detach:
> > + vfio_iommu_release_nesting_info(iommu);
> > vfio_iommu_detach_group(domain, group);
> > out_domain:
> > iommu_domain_free(domain->domain);
> > @@ -2338,6 +2387,8 @@ static void vfio_iommu_type1_detach_group(void
> *iommu_data,
> > vfio_iommu_unmap_unpin_all(iommu);
> > else
> >
> vfio_iommu_unmap_unpin_reaccount(iommu);
> > +
> > + vfio_iommu_release_nesting_info(iommu);
> > }
> > iommu_domain_free(domain->domain);
> > list_del(&domain->next);
> > @@ -2546,6 +2597,30 @@ static int vfio_iommu_migration_build_caps(struct
> vfio_iommu *iommu,
> > return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
> > }
> >
> > +static int vfio_iommu_info_add_nesting_cap(struct vfio_iommu *iommu,
> > + struct vfio_info_cap *caps)
> > +{
> > + struct vfio_info_cap_header *header;
> > + struct vfio_iommu_type1_info_cap_nesting *nesting_cap;
> > + size_t size;
> > +
> > + size = sizeof(*nesting_cap) + iommu->nesting_info->size;
> > +
> > + header = vfio_info_cap_add(caps, size,
> > + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING, 1);
> > + if (IS_ERR(header))
> > + return PTR_ERR(header);
> > +
> > + nesting_cap = container_of(header,
> > + struct vfio_iommu_type1_info_cap_nesting,
> > + header);
> > +
> > + memcpy(&nesting_cap->info, iommu->nesting_info,
> > + iommu->nesting_info->size);
> > +
> > + return 0;
> > +}
> > +
> > static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
> > unsigned long arg)
> > {
> > @@ -2586,6 +2661,12 @@ static int vfio_iommu_type1_get_info(struct
> vfio_iommu *iommu,
> > if (ret)
> > return ret;
> >
> > + if (iommu->nesting_info) {
> > + ret = vfio_iommu_info_add_nesting_cap(iommu, &caps);
> I think this should happen while holding the &iommu->lock because
> nothing prevents the group from being detached in-between

yes, you're right. will correct it.

Regards,
Yi Liu

> Thanks
>
> Eric
> > + if (ret)
> > + return ret;
> > + }
> > +
> > if (caps.size) {
> > info.flags |= VFIO_IOMMU_INFO_CAPS;
> >
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 9204705..3e3de9c 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -1039,6 +1039,22 @@ struct vfio_iommu_type1_info_cap_migration {
> > __u64 max_dirty_bitmap_size; /* in bytes */
> > };
> >
> > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
> > +
> > +/*
> > + * Reporting nesting info to user space.
> > + *
> > + * @info: the nesting info provided by IOMMU driver. Today
> > + * it is expected to be a struct iommu_nesting_info
> > + * data.
> > + */
> > +struct vfio_iommu_type1_info_cap_nesting {
> > + struct vfio_info_cap_header header;
> > + __u32 flags;
> > + __u32 padding;
> > + __u8 info[];
> > +};
> > +
> > #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> >
> > /**
> >

2020-07-08 08:26:56

by Yi Liu

[permalink] [raw]
Subject: RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Alex,

Eric asked if we will to have data strcut other than struct iommu_nesting_info
type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not
quit sure on it. I guess the answer may be not as VFIO's nesting support should
based on IOMMU UAPI. how about your opinion?

+#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
+
+/*
+ * Reporting nesting info to user space.
+ *
+ * @info: the nesting info provided by IOMMU driver. Today
+ * it is expected to be a struct iommu_nesting_info
+ * data.
+ */
+struct vfio_iommu_type1_info_cap_nesting {
+ struct vfio_info_cap_header header;
+ __u32 flags;
+ __u32 padding;
+ __u8 info[];
+};

https://lore.kernel.org/linux-iommu/DM5PR11MB1435290B6CD561EC61027892C3690@DM5PR11MB1435.namprd11.prod.outlook.com/

Regards,
Yi Liu

> From: Liu, Yi L
> Sent: Tuesday, July 7, 2020 5:32 PM
>
[...]
> > >
> > >>> +
> > >>> +/*
> > >>> + * Reporting nesting info to user space.
> > >>> + *
> > >>> + * @info: the nesting info provided by IOMMU driver. Today
> > >>> + * it is expected to be a struct iommu_nesting_info
> > >>> + * data.
> > >> Is it expected to change?
> > >
> > > honestly, I'm not quite sure on it. I did considered to embed struct
> > > iommu_nesting_info here instead of using info[]. but I hesitated as
> > > using info[] may leave more flexibility on this struct. how about
> > > your opinion? perhaps it's fine to embed the struct
> > > iommu_nesting_info here as long as VFIO is setup nesting based on
> > > IOMMU UAPI.
> > >
> > >>> + */
> > >>> +struct vfio_iommu_type1_info_cap_nesting {
> > >>> + struct vfio_info_cap_header header;
> > >>> + __u32 flags;
> > >> You may document flags.
> > >
> > > sure. it's reserved for future.
> > >
> > > Regards,
> > > Yi Liu
> > >
> > >>> + __u32 padding;
> > >>> + __u8 info[];
> > >>> +};
> > >>> +
> > >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > >>>
> > >>> /**
> > >>>
> > >> Thanks
> > >>
> > >> Eric
> > >

2020-07-08 19:31:21

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

On Wed, 8 Jul 2020 08:08:40 +0000
"Liu, Yi L" <[email protected]> wrote:

> Hi Alex,
>
> Eric asked if we will to have data strcut other than struct iommu_nesting_info
> type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not
> quit sure on it. I guess the answer may be not as VFIO's nesting support should
> based on IOMMU UAPI. how about your opinion?
>
> +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
> +
> +/*
> + * Reporting nesting info to user space.
> + *
> + * @info: the nesting info provided by IOMMU driver. Today
> + * it is expected to be a struct iommu_nesting_info
> + * data.
> + */
> +struct vfio_iommu_type1_info_cap_nesting {
> + struct vfio_info_cap_header header;
> + __u32 flags;
> + __u32 padding;
> + __u8 info[];
> +};

It's not a very useful uAPI if the user can't be sure what they're
getting out of it. Info capabilities are "cheap", they don't need to
be as extensible as an ioctl. It's not clear that we really even need
the flags (and therefore the padding), just define it to return the
IOMMU uAPI structure with no extensibility. If we need to expose
something else, create a new capability. Thanks,

Alex

>
> https://lore.kernel.org/linux-iommu/DM5PR11MB1435290B6CD561EC61027892C3690@DM5PR11MB1435.namprd11.prod.outlook.com/
>
> Regards,
> Yi Liu
>
> > From: Liu, Yi L
> > Sent: Tuesday, July 7, 2020 5:32 PM
> >
> [...]
> > > >
> > > >>> +
> > > >>> +/*
> > > >>> + * Reporting nesting info to user space.
> > > >>> + *
> > > >>> + * @info: the nesting info provided by IOMMU driver. Today
> > > >>> + * it is expected to be a struct iommu_nesting_info
> > > >>> + * data.
> > > >> Is it expected to change?
> > > >
> > > > honestly, I'm not quite sure on it. I did considered to embed struct
> > > > iommu_nesting_info here instead of using info[]. but I hesitated as
> > > > using info[] may leave more flexibility on this struct. how about
> > > > your opinion? perhaps it's fine to embed the struct
> > > > iommu_nesting_info here as long as VFIO is setup nesting based on
> > > > IOMMU UAPI.
> > > >
> > > >>> + */
> > > >>> +struct vfio_iommu_type1_info_cap_nesting {
> > > >>> + struct vfio_info_cap_header header;
> > > >>> + __u32 flags;
> > > >> You may document flags.
> > > >
> > > > sure. it's reserved for future.
> > > >
> > > > Regards,
> > > > Yi Liu
> > > >
> > > >>> + __u32 padding;
> > > >>> + __u8 info[];
> > > >>> +};
> > > >>> +
> > > >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > > >>>
> > > >>> /**
> > > >>>
> > > >> Thanks
> > > >>
> > > >> Eric
> > > >
>

2020-07-09 00:27:09

by Yi Liu

[permalink] [raw]
Subject: RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

Hi Alex,

> From: Alex Williamson <[email protected]>
> Sent: Thursday, July 9, 2020 3:30 AM
>
> On Wed, 8 Jul 2020 08:08:40 +0000
> "Liu, Yi L" <[email protected]> wrote:
>
> > Hi Alex,
> >
> > Eric asked if we will to have data strcut other than struct iommu_nesting_info
> > type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not
> > quit sure on it. I guess the answer may be not as VFIO's nesting support should
> > based on IOMMU UAPI. how about your opinion?
> >
> > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3
> > +
> > +/*
> > + * Reporting nesting info to user space.
> > + *
> > + * @info: the nesting info provided by IOMMU driver. Today
> > + * it is expected to be a struct iommu_nesting_info
> > + * data.
> > + */
> > +struct vfio_iommu_type1_info_cap_nesting {
> > + struct vfio_info_cap_header header;
> > + __u32 flags;
> > + __u32 padding;
> > + __u8 info[];
> > +};
>
> It's not a very useful uAPI if the user can't be sure what they're
> getting out of it. Info capabilities are "cheap", they don't need to
> be as extensible as an ioctl. It's not clear that we really even need
> the flags (and therefore the padding), just define it to return the
> IOMMU uAPI structure with no extensibility. If we need to expose
> something else, create a new capability. Thanks,

thanks for the guiding, then I may embed the struct iommu_nesting_info
here. :-)

Regards,
Yi Liu

> Alex
>
> >
> > https://lore.kernel.org/linux-
> iommu/[email protected]
> prd11.prod.outlook.com/
> >
> > Regards,
> > Yi Liu
> >
> > > From: Liu, Yi L
> > > Sent: Tuesday, July 7, 2020 5:32 PM
> > >
> > [...]
> > > > >
> > > > >>> +
> > > > >>> +/*
> > > > >>> + * Reporting nesting info to user space.
> > > > >>> + *
> > > > >>> + * @info: the nesting info provided by IOMMU driver. Today
> > > > >>> + * it is expected to be a struct iommu_nesting_info
> > > > >>> + * data.
> > > > >> Is it expected to change?
> > > > >
> > > > > honestly, I'm not quite sure on it. I did considered to embed struct
> > > > > iommu_nesting_info here instead of using info[]. but I hesitated as
> > > > > using info[] may leave more flexibility on this struct. how about
> > > > > your opinion? perhaps it's fine to embed the struct
> > > > > iommu_nesting_info here as long as VFIO is setup nesting based on
> > > > > IOMMU UAPI.
> > > > >
> > > > >>> + */
> > > > >>> +struct vfio_iommu_type1_info_cap_nesting {
> > > > >>> + struct vfio_info_cap_header header;
> > > > >>> + __u32 flags;
> > > > >> You may document flags.
> > > > >
> > > > > sure. it's reserved for future.
> > > > >
> > > > > Regards,
> > > > > Yi Liu
> > > > >
> > > > >>> + __u32 padding;
> > > > >>> + __u8 info[];
> > > > >>> +};
> > > > >>> +
> > > > >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > > > >>>
> > > > >>> /**
> > > > >>>
> > > > >> Thanks
> > > > >>
> > > > >> Eric
> > > > >
> >