Add a new iommu domain type IOMMU_DOMAIN_SVA to represent an I/O page
table which is shared from CPU host VA. Add some helpers to get and
put an SVA domain and implement SVA domain life cycle management.
Signed-off-by: Lu Baolu <[email protected]>
---
include/linux/iommu.h | 7 +++
drivers/iommu/iommu-sva-lib.h | 10 ++++
drivers/iommu/iommu-sva-lib.c | 89 +++++++++++++++++++++++++++++++++++
3 files changed, 106 insertions(+)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 36f43af0af53..29c4c2edd706 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -42,6 +42,7 @@ struct notifier_block;
struct iommu_sva;
struct iommu_fault_event;
struct iommu_dma_cookie;
+struct iommu_sva_cookie;
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
@@ -64,6 +65,9 @@ struct iommu_domain_geometry {
#define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */
#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */
+#define __IOMMU_DOMAIN_SHARED (1U << 4) /* Page table shared from CPU */
+#define __IOMMU_DOMAIN_HOST_VA (1U << 5) /* Host CPU virtual address */
+
/*
* This are the possible domain-types
*
@@ -86,6 +90,8 @@ struct iommu_domain_geometry {
#define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \
__IOMMU_DOMAIN_DMA_API | \
__IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SHARED | \
+ __IOMMU_DOMAIN_HOST_VA)
struct iommu_domain {
unsigned type;
@@ -95,6 +101,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+ struct iommu_sva_cookie *sva_cookie;
};
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 8909ea1094e3..1a71218b07f5 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -10,6 +10,7 @@
int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
struct mm_struct *iommu_sva_find(ioasid_t pasid);
+struct mm_struct *iommu_sva_domain_mm(struct iommu_domain *domain);
/* I/O Page fault */
struct device;
@@ -26,6 +27,8 @@ int iopf_queue_flush_dev(struct device *dev);
struct iopf_queue *iopf_queue_alloc(const char *name);
void iopf_queue_free(struct iopf_queue *queue);
int iopf_queue_discard_partial(struct iopf_queue *queue);
+bool iommu_sva_domain_get_user(struct iommu_domain *domain);
+void iommu_sva_domain_put_user(struct iommu_domain *domain);
#else /* CONFIG_IOMMU_SVA */
static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
@@ -63,5 +66,12 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
{
return -ENODEV;
}
+
+static inline bool iommu_sva_domain_get_user(struct iommu_domain *domain)
+{
+ return false;
+}
+
+static inline void iommu_sva_domain_put_user(struct iommu_domain *domain) { }
#endif /* CONFIG_IOMMU_SVA */
#endif /* _IOMMU_SVA_LIB_H */
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 106506143896..78820be23f15 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -3,12 +3,21 @@
* Helpers for IOMMU drivers implementing SVA
*/
#include <linux/mutex.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
#include <linux/sched/mm.h>
#include "iommu-sva-lib.h"
static DEFINE_MUTEX(iommu_sva_lock);
static DECLARE_IOASID_SET(iommu_sva_pasid);
+static DEFINE_XARRAY_ALLOC(sva_domain_array);
+
+struct iommu_sva_cookie {
+ struct mm_struct *mm;
+ ioasid_t pasid;
+ refcount_t users;
+};
/**
* iommu_sva_alloc_pasid - Allocate a PASID for the mm
@@ -69,3 +78,83 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
}
EXPORT_SYMBOL_GPL(iommu_sva_find);
+
+static struct iommu_domain *
+iommu_sva_alloc_domain(struct device *dev, struct mm_struct *mm)
+{
+ struct bus_type *bus = dev->bus;
+ struct iommu_sva_cookie *cookie;
+ struct iommu_domain *domain;
+ void *curr;
+
+ if (!bus || !bus->iommu_ops)
+ return NULL;
+
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+ if (!cookie)
+ return NULL;
+
+ domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_SVA);
+ if (!domain)
+ goto err_domain_alloc;
+
+ cookie->mm = mm;
+ cookie->pasid = mm->pasid;
+ refcount_set(&cookie->users, 1);
+ domain->type = IOMMU_DOMAIN_SVA;
+ domain->sva_cookie = cookie;
+ curr = xa_store(&sva_domain_array, mm->pasid, domain, GFP_KERNEL);
+ if (xa_err(curr))
+ goto err_xa_store;
+
+ return domain;
+err_xa_store:
+ domain->ops->free(domain);
+err_domain_alloc:
+ kfree(cookie);
+ return NULL;
+}
+
+static void iommu_sva_free_domain(struct iommu_domain *domain)
+{
+ xa_erase(&sva_domain_array, domain->sva_cookie->pasid);
+ kfree(domain->sva_cookie);
+ domain->ops->free(domain);
+}
+
+bool iommu_sva_domain_get_user(struct iommu_domain *domain)
+{
+ struct iommu_sva_cookie *cookie = domain->sva_cookie;
+
+ return refcount_inc_not_zero(&cookie->users);
+}
+
+void iommu_sva_domain_put_user(struct iommu_domain *domain)
+{
+ struct iommu_sva_cookie *cookie = domain->sva_cookie;
+
+ if (refcount_dec_and_test(&cookie->users))
+ iommu_sva_free_domain(domain);
+}
+
+static __maybe_unused struct iommu_domain *
+iommu_sva_get_domain(struct device *dev, struct mm_struct *mm)
+{
+ struct iommu_domain *domain;
+ ioasid_t pasid = mm->pasid;
+
+ if (pasid == INVALID_IOASID)
+ return NULL;
+
+ domain = xa_load(&sva_domain_array, pasid);
+ if (!domain)
+ return iommu_sva_alloc_domain(dev, mm);
+ iommu_sva_domain_get_user(domain);
+
+ return domain;
+}
+
+struct mm_struct *iommu_sva_domain_mm(struct iommu_domain *domain)
+{
+ return domain->sva_cookie->mm;
+}
--
2.25.1
On 2022/3/30 5:38, Jacob Pan wrote:
>> +static struct iommu_domain *
>> +iommu_sva_alloc_domain(struct device *dev, struct mm_struct *mm)
>> +{
>> + struct bus_type *bus = dev->bus;
>> + struct iommu_sva_cookie *cookie;
>> + struct iommu_domain *domain;
>> + void *curr;
>> +
>> + if (!bus || !bus->iommu_ops)
>> + return NULL;
>> +
>> + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
>> + if (!cookie)
>> + return NULL;
>> +
>> + domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_SVA);
>> + if (!domain)
>> + goto err_domain_alloc;
>> +
>> + cookie->mm = mm;
>> + cookie->pasid = mm->pasid;
> How do you manage the mm life cycle? do you require caller take mm reference?
> Or this should be limited to the current mm?
>
The existing sva_bind() interface requires the caller to take the mm
reference before calling it. So mm is safe during sva bind() and
unbind().
drivers/iommu/iommu.c:
/**
* iommu_sva_bind_device() - Bind a process address space to a device
* @dev: the device
* @mm: the mm to bind, caller must hold a reference to it
* @drvdata: opaque data pointer to pass to bind callback
*
* Create a bond between device and address space, allowing the device
to access
* the mm using the returned PASID. If a bond already exists between
@device and
* @mm, it is returned and an additional reference is taken. Caller
must call
* iommu_sva_unbind_device() to release each reference.
*
* iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called
first, to
* initialize the required SVA features.
*
* On error, returns an ERR_PTR value.
*/
struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
*drvdata)
Best regards,
baolu
On Tue, Mar 29, 2022 at 01:37:52PM +0800, Lu Baolu wrote:
> @@ -95,6 +101,7 @@ struct iommu_domain {
> void *handler_token;
> struct iommu_domain_geometry geometry;
> struct iommu_dma_cookie *iova_cookie;
> + struct iommu_sva_cookie *sva_cookie;
Cookie is still the wrong word to use here
> +struct iommu_sva_cookie {
> + struct mm_struct *mm;
> + ioasid_t pasid;
> + refcount_t users;
Really surprised to see a refcount buried inside the iommu_domain..
This design seems inside out, the SVA struct should 'enclose' the domain, not
be a pointer inside it.
struct iommu_sva_domain {
struct kref_t kref;
struct mm_struct *mm;
ioasid_t pasid;
/* All the domains that are linked to this */
struct xarray domain_list;
};
And then you could have a pointer to that inside the mm_struct instead
of just the naked pasid.
> +static __maybe_unused struct iommu_domain *
Why maybe unused?
> +iommu_sva_get_domain(struct device *dev, struct mm_struct *mm)
> +{
> + struct iommu_domain *domain;
> + ioasid_t pasid = mm->pasid;
> +
> + if (pasid == INVALID_IOASID)
> + return NULL;
> +
> + domain = xa_load(&sva_domain_array, pasid);
> + if (!domain)
> + return iommu_sva_alloc_domain(dev, mm);
> + iommu_sva_domain_get_user(domain);
This assumes any domain is interchangeable with any device, which is
not the iommu model. We need a domain op to check if a device is
compatiable with the domain for vfio an iommufd, this should do the
same.
It means each mm can have a list of domains associated with it and a
new domain is auto-created if the device doesn't work with any of the
existing domains.
Jason
Hi BaoLu,
On Tue, 29 Mar 2022 13:37:52 +0800, Lu Baolu <[email protected]>
wrote:
> Add a new iommu domain type IOMMU_DOMAIN_SVA to represent an I/O page
> table which is shared from CPU host VA. Add some helpers to get and
> put an SVA domain and implement SVA domain life cycle management.
>
> Signed-off-by: Lu Baolu <[email protected]>
> ---
> include/linux/iommu.h | 7 +++
> drivers/iommu/iommu-sva-lib.h | 10 ++++
> drivers/iommu/iommu-sva-lib.c | 89 +++++++++++++++++++++++++++++++++++
> 3 files changed, 106 insertions(+)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 36f43af0af53..29c4c2edd706 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -42,6 +42,7 @@ struct notifier_block;
> struct iommu_sva;
> struct iommu_fault_event;
> struct iommu_dma_cookie;
> +struct iommu_sva_cookie;
>
> /* iommu fault flags */
> #define IOMMU_FAULT_READ 0x0
> @@ -64,6 +65,9 @@ struct iommu_domain_geometry {
> #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped
> */ #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses
> flush queue */
> +#define __IOMMU_DOMAIN_SHARED (1U << 4) /* Page table shared
> from CPU */ +#define __IOMMU_DOMAIN_HOST_VA (1U << 5) /* Host
> CPU virtual address */ +
> /*
> * This are the possible domain-types
> *
> @@ -86,6 +90,8 @@ struct iommu_domain_geometry {
> #define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \
> __IOMMU_DOMAIN_DMA_API | \
> __IOMMU_DOMAIN_DMA_FQ)
> +#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SHARED | \
> + __IOMMU_DOMAIN_HOST_VA)
>
> struct iommu_domain {
> unsigned type;
> @@ -95,6 +101,7 @@ struct iommu_domain {
> void *handler_token;
> struct iommu_domain_geometry geometry;
> struct iommu_dma_cookie *iova_cookie;
> + struct iommu_sva_cookie *sva_cookie;
> };
>
> static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
> index 8909ea1094e3..1a71218b07f5 100644
> --- a/drivers/iommu/iommu-sva-lib.h
> +++ b/drivers/iommu/iommu-sva-lib.h
> @@ -10,6 +10,7 @@
>
> int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t
> max); struct mm_struct *iommu_sva_find(ioasid_t pasid);
> +struct mm_struct *iommu_sva_domain_mm(struct iommu_domain *domain);
>
> /* I/O Page fault */
> struct device;
> @@ -26,6 +27,8 @@ int iopf_queue_flush_dev(struct device *dev);
> struct iopf_queue *iopf_queue_alloc(const char *name);
> void iopf_queue_free(struct iopf_queue *queue);
> int iopf_queue_discard_partial(struct iopf_queue *queue);
> +bool iommu_sva_domain_get_user(struct iommu_domain *domain);
> +void iommu_sva_domain_put_user(struct iommu_domain *domain);
>
> #else /* CONFIG_IOMMU_SVA */
> static inline int iommu_queue_iopf(struct iommu_fault *fault, void
> *cookie) @@ -63,5 +66,12 @@ static inline int
> iopf_queue_discard_partial(struct iopf_queue *queue) {
> return -ENODEV;
> }
> +
> +static inline bool iommu_sva_domain_get_user(struct iommu_domain *domain)
> +{
> + return false;
> +}
> +
> +static inline void iommu_sva_domain_put_user(struct iommu_domain
> *domain) { } #endif /* CONFIG_IOMMU_SVA */
> #endif /* _IOMMU_SVA_LIB_H */
> diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
> index 106506143896..78820be23f15 100644
> --- a/drivers/iommu/iommu-sva-lib.c
> +++ b/drivers/iommu/iommu-sva-lib.c
> @@ -3,12 +3,21 @@
> * Helpers for IOMMU drivers implementing SVA
> */
> #include <linux/mutex.h>
> +#include <linux/iommu.h>
> +#include <linux/slab.h>
> #include <linux/sched/mm.h>
>
> #include "iommu-sva-lib.h"
>
> static DEFINE_MUTEX(iommu_sva_lock);
> static DECLARE_IOASID_SET(iommu_sva_pasid);
> +static DEFINE_XARRAY_ALLOC(sva_domain_array);
> +
> +struct iommu_sva_cookie {
> + struct mm_struct *mm;
> + ioasid_t pasid;
> + refcount_t users;
> +};
>
> /**
> * iommu_sva_alloc_pasid - Allocate a PASID for the mm
> @@ -69,3 +78,83 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
> return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
> }
> EXPORT_SYMBOL_GPL(iommu_sva_find);
> +
> +static struct iommu_domain *
> +iommu_sva_alloc_domain(struct device *dev, struct mm_struct *mm)
> +{
> + struct bus_type *bus = dev->bus;
> + struct iommu_sva_cookie *cookie;
> + struct iommu_domain *domain;
> + void *curr;
> +
> + if (!bus || !bus->iommu_ops)
> + return NULL;
> +
> + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
> + if (!cookie)
> + return NULL;
> +
> + domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_SVA);
> + if (!domain)
> + goto err_domain_alloc;
> +
> + cookie->mm = mm;
> + cookie->pasid = mm->pasid;
How do you manage the mm life cycle? do you require caller take mm reference?
Or this should be limited to the current mm?
> + refcount_set(&cookie->users, 1);
> + domain->type = IOMMU_DOMAIN_SVA;
> + domain->sva_cookie = cookie;
> + curr = xa_store(&sva_domain_array, mm->pasid, domain,
> GFP_KERNEL);
> + if (xa_err(curr))
> + goto err_xa_store;
> +
> + return domain;
> +err_xa_store:
> + domain->ops->free(domain);
> +err_domain_alloc:
> + kfree(cookie);
> + return NULL;
> +}
> +
> +static void iommu_sva_free_domain(struct iommu_domain *domain)
> +{
> + xa_erase(&sva_domain_array, domain->sva_cookie->pasid);
> + kfree(domain->sva_cookie);
> + domain->ops->free(domain);
> +}
> +
> +bool iommu_sva_domain_get_user(struct iommu_domain *domain)
> +{
> + struct iommu_sva_cookie *cookie = domain->sva_cookie;
> +
> + return refcount_inc_not_zero(&cookie->users);
> +}
> +
> +void iommu_sva_domain_put_user(struct iommu_domain *domain)
> +{
> + struct iommu_sva_cookie *cookie = domain->sva_cookie;
> +
> + if (refcount_dec_and_test(&cookie->users))
> + iommu_sva_free_domain(domain);
> +}
> +
> +static __maybe_unused struct iommu_domain *
> +iommu_sva_get_domain(struct device *dev, struct mm_struct *mm)
> +{
> + struct iommu_domain *domain;
> + ioasid_t pasid = mm->pasid;
> +
> + if (pasid == INVALID_IOASID)
> + return NULL;
> +
> + domain = xa_load(&sva_domain_array, pasid);
> + if (!domain)
> + return iommu_sva_alloc_domain(dev, mm);
> + iommu_sva_domain_get_user(domain);
> +
> + return domain;
> +}
> +
> +struct mm_struct *iommu_sva_domain_mm(struct iommu_domain *domain)
> +{
> + return domain->sva_cookie->mm;
> +}
Thanks,
Jacob
> From: Jason Gunthorpe <[email protected]>
> Sent: Thursday, March 31, 2022 3:02 AM
>
> On Tue, Mar 29, 2022 at 01:37:52PM +0800, Lu Baolu wrote:
> > @@ -95,6 +101,7 @@ struct iommu_domain {
> > void *handler_token;
> > struct iommu_domain_geometry geometry;
> > struct iommu_dma_cookie *iova_cookie;
> > + struct iommu_sva_cookie *sva_cookie;
>
> Cookie is still the wrong word to use here
>
> > +struct iommu_sva_cookie {
> > + struct mm_struct *mm;
> > + ioasid_t pasid;
> > + refcount_t users;
>
> Really surprised to see a refcount buried inside the iommu_domain..
>
> This design seems inside out, the SVA struct should 'enclose' the domain, not
> be a pointer inside it.
>
> struct iommu_sva_domain {
> struct kref_t kref;
> struct mm_struct *mm;
> ioasid_t pasid;
>
> /* All the domains that are linked to this */
> struct xarray domain_list;
> };
>
> And then you could have a pointer to that inside the mm_struct instead
> of just the naked pasid.
>
> > +static __maybe_unused struct iommu_domain *
>
> Why maybe unused?
>
> > +iommu_sva_get_domain(struct device *dev, struct mm_struct *mm)
> > +{
> > + struct iommu_domain *domain;
> > + ioasid_t pasid = mm->pasid;
> > +
> > + if (pasid == INVALID_IOASID)
> > + return NULL;
> > +
> > + domain = xa_load(&sva_domain_array, pasid);
> > + if (!domain)
> > + return iommu_sva_alloc_domain(dev, mm);
> > + iommu_sva_domain_get_user(domain);
>
> This assumes any domain is interchangeable with any device, which is
> not the iommu model. We need a domain op to check if a device is
> compatiable with the domain for vfio an iommufd, this should do the
> same.
This suggests that mm_struct needs to include the format information
of the CPU page table so the format can be checked by the domain op?
>
> It means each mm can have a list of domains associated with it and a
> new domain is auto-created if the device doesn't work with any of the
> existing domains.
>
mm has only one page table and one format. If a device is incompatible
with an existing domain wrapping that page table, how come creating
another domain could make it compatible?
Thanks
Kevin
On Sat, Apr 02, 2022 at 08:43:16AM +0000, Tian, Kevin wrote:
> > This assumes any domain is interchangeable with any device, which is
> > not the iommu model. We need a domain op to check if a device is
> > compatiable with the domain for vfio an iommufd, this should do the
> > same.
>
> This suggests that mm_struct needs to include the format information
> of the CPU page table so the format can be checked by the domain op?
No, Linux does not support multiple formats for CPU page tables,
AFAICT, and creating the SVA domain in the first place should check
this.
> > It means each mm can have a list of domains associated with it and a
> > new domain is auto-created if the device doesn't work with any of the
> > existing domains.
>
> mm has only one page table and one format. If a device is incompatible
> with an existing domain wrapping that page table, how come creating
> another domain could make it compatible?
Because domains wrap more than just the IOPTE format, they have
additional data related to the IOMMU HW block itself. Imagine a SOC
with two IOMMU HW blocks that can both process the CPU IOPTE format,
but have different configuration.
So if device A users IOMMU A it needs an iommu_domain from driver A and
same for another device B, even if both iommu_domains are thin
wrappers around the same mm_struct.
Jason
Hi Jason and Kevin,
On 2022/4/3 7:32, Jason Gunthorpe wrote:
> On Sat, Apr 02, 2022 at 08:43:16AM +0000, Tian, Kevin wrote:
>
>>> This assumes any domain is interchangeable with any device, which is
>>> not the iommu model. We need a domain op to check if a device is
>>> compatiable with the domain for vfio an iommufd, this should do the
>>> same.
>>
>> This suggests that mm_struct needs to include the format information
>> of the CPU page table so the format can be checked by the domain op?
>
> No, Linux does not support multiple formats for CPU page tables,
> AFAICT, and creating the SVA domain in the first place should check
> this.
>
>>> It means each mm can have a list of domains associated with it and a
>>> new domain is auto-created if the device doesn't work with any of the
>>> existing domains.
>>
>> mm has only one page table and one format. If a device is incompatible
>> with an existing domain wrapping that page table, how come creating
>> another domain could make it compatible?
>
> Because domains wrap more than just the IOPTE format, they have
> additional data related to the IOMMU HW block itself. Imagine a SOC
> with two IOMMU HW blocks that can both process the CPU IOPTE format,
> but have different configuration.
>
> So if device A users IOMMU A it needs an iommu_domain from driver A and
> same for another device B, even if both iommu_domains are thin
> wrappers around the same mm_struct.
How about below data structure design?
- [New]struct iommu_sva_ioas
Represent the I/O address space shared with an application CPU address
space. This structure has a 1:1 relationship with an mm_struct. It
graps a "mm->mm_count" refcount during creation and drop it on release.
struct iommu_sva_ioas {
struct mm_struct *mm;
ioasid_t pasid;
/* Counter of domains attached to this ioas. */
refcount_t users;
/* All bindings are linked here. */
struct list_head bonds;
};
- [Enhance existing] struct iommu_domain (IOMMU_DOMAIN_SVA type)
Represent a hardware pagetable that the IOMMU hardware could use for
SVA translation. Multiple iommu domains could be bound with an SVA ioas
and each graps a refcount from ioas in order to make sure ioas could
only be freed after all domains have been unbound.
@@ -95,6 +101,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
+ struct iommu_sva_ioas *sva_ioas;
};
- [Enhance existing] struct iommu_sva
Represent a bond relationship between an SVA ioas and an iommu domain.
If a bond already exists, it's reused and a reference is taken.
/**
* struct iommu_sva - handle to a device-mm bond
*/
struct iommu_sva {
struct device *dev;
struct iommu_sva_ioas *sva_ioas;
struct iommu_domain *domain;
/* Link to sva ioas's bonds list */
struct list_head node;
refcount_t users;
};
Best regards,
baolu
> From: Jason Gunthorpe <[email protected]>
> Sent: Sunday, April 3, 2022 7:32 AM
>
> On Sat, Apr 02, 2022 at 08:43:16AM +0000, Tian, Kevin wrote:
>
> > > This assumes any domain is interchangeable with any device, which is
> > > not the iommu model. We need a domain op to check if a device is
> > > compatiable with the domain for vfio an iommufd, this should do the
> > > same.
> >
> > This suggests that mm_struct needs to include the format information
> > of the CPU page table so the format can be checked by the domain op?
>
> No, Linux does not support multiple formats for CPU page tables,
> AFAICT, and creating the SVA domain in the first place should check
> this.
One interesting usage is when virtio-iommu supports vSVA one day. At
that time there needs a way to know the format of the CPU page table
and then virtio-iommu driver needs to check whether it is compatible
with what the host iommu driver supports. But possibly this can wait to
be solved until that usage comes...
>
> > > It means each mm can have a list of domains associated with it and a
> > > new domain is auto-created if the device doesn't work with any of the
> > > existing domains.
> >
> > mm has only one page table and one format. If a device is incompatible
> > with an existing domain wrapping that page table, how come creating
> > another domain could make it compatible?
>
> Because domains wrap more than just the IOPTE format, they have
> additional data related to the IOMMU HW block itself. Imagine a SOC
> with two IOMMU HW blocks that can both process the CPU IOPTE format,
> but have different configuration.
Curious. Is it hypothesis or real? If real can you help give a concrete
example?
>
> So if device A users IOMMU A it needs an iommu_domain from driver A and
> same for another device B, even if both iommu_domains are thin
> wrappers around the same mm_struct.
>
On Wed, Apr 06, 2022 at 01:00:13AM +0000, Tian, Kevin wrote:
> > Because domains wrap more than just the IOPTE format, they have
> > additional data related to the IOMMU HW block itself. Imagine a SOC
> > with two IOMMU HW blocks that can both process the CPU IOPTE format,
> > but have different configuration.
>
> Curious. Is it hypothesis or real? If real can you help give a concrete
> example?
Look at arm_smmu_attach_dev() - the domain has exactly one smmu
pointer which contains the base address for the SMMU IP block. If the
domain doesn't match the smmu pointer from the struct device it won't
allow attaching.
I know of ARM SOCs with many copies of the SMMU IP block.
So at least with current drivers ARM seems to have this limitation.
Jason
> From: Jason Gunthorpe <[email protected]>
> Sent: Wednesday, April 6, 2022 9:24 AM
>
> On Wed, Apr 06, 2022 at 01:00:13AM +0000, Tian, Kevin wrote:
>
> > > Because domains wrap more than just the IOPTE format, they have
> > > additional data related to the IOMMU HW block itself. Imagine a SOC
> > > with two IOMMU HW blocks that can both process the CPU IOPTE format,
> > > but have different configuration.
> >
> > Curious. Is it hypothesis or real? If real can you help give a concrete
> > example?
>
> Look at arm_smmu_attach_dev() - the domain has exactly one smmu
> pointer which contains the base address for the SMMU IP block. If the
> domain doesn't match the smmu pointer from the struct device it won't
> allow attaching.
>
> I know of ARM SOCs with many copies of the SMMU IP block.
>
> So at least with current drivers ARM seems to have this limitation.
>
I saw that code, but before this series it is used only for stage-2 instead
of SVA. and I didn't see similar check in the old sva related paths (though
it doesn't use domain):
arm_smmu_master_sva_enable_iopf()
arm_smmu_master_enable_sva{}
__arm_smmu_sva_bind()
If I didn't overlook some trick hiding in the call chain of those functions,
is there a bug in the existing SMMU sva logic or is it conceptually correct
to not have such check for SVA?
If the former then yes we have to take SMMU IP block into consideration
thus could have multiple domains per CPU page table. If the latter then
this is not a valid example for that configuration.
Which one is correct?
Thanks
Kevin
On 2022-04-06 06:58, Tian, Kevin wrote:
>> From: Jason Gunthorpe <[email protected]>
>> Sent: Wednesday, April 6, 2022 9:24 AM
>>
>> On Wed, Apr 06, 2022 at 01:00:13AM +0000, Tian, Kevin wrote:
>>
>>>> Because domains wrap more than just the IOPTE format, they have
>>>> additional data related to the IOMMU HW block itself. Imagine a SOC
>>>> with two IOMMU HW blocks that can both process the CPU IOPTE format,
>>>> but have different configuration.
>>>
>>> Curious. Is it hypothesis or real? If real can you help give a concrete
>>> example?
>>
>> Look at arm_smmu_attach_dev() - the domain has exactly one smmu
>> pointer which contains the base address for the SMMU IP block. If the
>> domain doesn't match the smmu pointer from the struct device it won't
>> allow attaching.
>>
>> I know of ARM SOCs with many copies of the SMMU IP block.
>>
>> So at least with current drivers ARM seems to have this limitation.
>>
>
> I saw that code, but before this series it is used only for stage-2 instead
> of SVA. and I didn't see similar check in the old sva related paths (though
> it doesn't use domain):
>
> arm_smmu_master_sva_enable_iopf()
> arm_smmu_master_enable_sva{}
> __arm_smmu_sva_bind()
>
> If I didn't overlook some trick hiding in the call chain of those functions,
> is there a bug in the existing SMMU sva logic or is it conceptually correct
> to not have such check for SVA?
The current SVA APIs are all device-based, so implicitly reflect
whichever SMMU instance serves the given device. Once domains come into
the picture, callers are going to have to be more aware that a domain
may be specific to a particular IOMMU instance, and potentially allocate
separate domains for separate devices to represent the same address
space, much like vfio_iommu_type1_attach_group() does.
It's not really worth IOMMU drivers trying to support a domain spanning
potentially-heterogeneous instances internally, since they can't
reasonably know what matters in any particular situation. That's
primarily why we've never tried to do it in the SMMU drivers. It's a lot
easier for relevant callers to look at what they get and figure out
whether any mismatch in capabilities is tolerable or not.
Robin.
> If the former then yes we have to take SMMU IP block into consideration
> thus could have multiple domains per CPU page table. If the latter then
> this is not a valid example for that configuration.
>
> Which one is correct?
>
> Thanks
> Kevin
On Wed, Apr 06, 2022 at 01:32:07PM +0100, Robin Murphy wrote:
> a particular IOMMU instance, and potentially allocate separate domains for
> separate devices to represent the same address space, much like
> vfio_iommu_type1_attach_group() does.
I think this VFIO code also needs some more work, it currently assumes
that if the domain ops are the same then the domains are compatible,
and that is not true for ARM SMMU drivers.
I've been thinking of adding a domain callback 'can device attach' and
replacing the ops compare with that instead.
> It's not really worth IOMMU drivers trying to support a domain spanning
> potentially-heterogeneous instances internally, since they can't reasonably
> know what matters in any particular situation.
In the long run I think it will be worth optimizing. If the SMMU
instances can share IOPTE memory then we get two wins - memory
reduction and reduced work to read dirty bits.
The dirty read in particular is very performance sensitive so if real
work loads have many SMMUs per VM it will become a pain point.
Jason
On 2022-04-06 14:06, Jason Gunthorpe wrote:
> On Wed, Apr 06, 2022 at 01:32:07PM +0100, Robin Murphy wrote:
>> a particular IOMMU instance, and potentially allocate separate domains for
>> separate devices to represent the same address space, much like
>> vfio_iommu_type1_attach_group() does.
>
> I think this VFIO code also needs some more work, it currently assumes
> that if the domain ops are the same then the domains are compatible,
> and that is not true for ARM SMMU drivers.
Well, strictly it uses the ops as a "negative" heuristic that the
domains are not definitely incompatible, and only then consolidates
domains if cross-attaching actually succeeds. So I don't think it's
really so bad.
> I've been thinking of adding a domain callback 'can device attach' and
> replacing the ops compare with that instead.
Previous comment notwithstanding, much as I do tend to prefer "just try
the operation and see what happens" APIs, that might be more reliable in
the long run than trying to encode specific "sorry, you'll need to
allocate a separate domain for this device" vs. "this should have worked
but something went wrong" semantics in the return value from attach.
>> It's not really worth IOMMU drivers trying to support a domain spanning
>> potentially-heterogeneous instances internally, since they can't reasonably
>> know what matters in any particular situation.
>
> In the long run I think it will be worth optimizing. If the SMMU
> instances can share IOPTE memory then we get two wins - memory
> reduction and reduced work to read dirty bits.
>
> The dirty read in particular is very performance sensitive so if real
> work loads have many SMMUs per VM it will become a pain point.
In the ideal case though, the SVA domains are only there to logically
bridge between an existing process pagetable and IOMMU instances at the
API level, right? Surely if we're shadowing physical pagetables for SVA
we've basically already lost the performance game?
Robin.
On Wed, Apr 06, 2022 at 02:37:40PM +0100, Robin Murphy wrote:
> On 2022-04-06 14:06, Jason Gunthorpe wrote:
> > On Wed, Apr 06, 2022 at 01:32:07PM +0100, Robin Murphy wrote:
> > > a particular IOMMU instance, and potentially allocate separate domains for
> > > separate devices to represent the same address space, much like
> > > vfio_iommu_type1_attach_group() does.
> >
> > I think this VFIO code also needs some more work, it currently assumes
> > that if the domain ops are the same then the domains are compatible,
> > and that is not true for ARM SMMU drivers.
>
> Well, strictly it uses the ops as a "negative" heuristic that the domains
> are not definitely incompatible, and only then consolidates domains if
> cross-attaching actually succeeds. So I don't think it's really so bad.
Oh that is sneaky, I didn't appreciate that bit of logic..
> > I've been thinking of adding a domain callback 'can device attach' and
> > replacing the ops compare with that instead.
>
> Previous comment notwithstanding, much as I do tend to prefer "just try the
> operation and see what happens" APIs, that might be more reliable in the
> long run than trying to encode specific "sorry, you'll need to allocate a
> separate domain for this device" vs. "this should have worked but something
> went wrong" semantics in the return value from attach.
Overall the way vfio is doing this has alot of overhead. Not only does
it create a domain it may not use it also does this:
iommu_detach_group(domain->domain, group->iommu_group);
if (!iommu_attach_group(d->domain,
group->iommu_group)) {
list_add(&group->next, &d->group_list);
iommu_domain_free(domain->domain);
kfree(domain);
goto done;
}
ret = iommu_attach_group(domain->domain,
group->iommu_group);
So, if we already have a compatible domain VFIO does an extra domain
alloc/dealloc and 3 extra attach/detatches per domain it tests.
It is not very elegant at least..
> > > It's not really worth IOMMU drivers trying to support a domain spanning
> > > potentially-heterogeneous instances internally, since they can't reasonably
> > > know what matters in any particular situation.
> >
> > In the long run I think it will be worth optimizing. If the SMMU
> > instances can share IOPTE memory then we get two wins - memory
> > reduction and reduced work to read dirty bits.
> >
> > The dirty read in particular is very performance sensitive so if real
> > work loads have many SMMUs per VM it will become a pain point.
>
> In the ideal case though, the SVA domains are only there to logically bridge
> between an existing process pagetable and IOMMU instances at the API level,
> right? Surely if we're shadowing physical pagetables for SVA we've basically
> already lost the performance game?
Sorry, I was not talking about SVA with that remark, speaking
generally about normal iommu_domains and sharing between SMMU
instances.
For SVA I see no issue with duplicating it per instance since it is
very small/etc - the only drawback is that the common code has to do
the 'can device attach' dance and keep a domain list per
mm-struct. Which seems OK to me.
Jason
> From: Robin Murphy <[email protected]>
> Sent: Wednesday, April 6, 2022 8:32 PM
>
> On 2022-04-06 06:58, Tian, Kevin wrote:
> >> From: Jason Gunthorpe <[email protected]>
> >> Sent: Wednesday, April 6, 2022 9:24 AM
> >>
> >> On Wed, Apr 06, 2022 at 01:00:13AM +0000, Tian, Kevin wrote:
> >>
> >>>> Because domains wrap more than just the IOPTE format, they have
> >>>> additional data related to the IOMMU HW block itself. Imagine a SOC
> >>>> with two IOMMU HW blocks that can both process the CPU IOPTE
> format,
> >>>> but have different configuration.
> >>>
> >>> Curious. Is it hypothesis or real? If real can you help give a concrete
> >>> example?
> >>
> >> Look at arm_smmu_attach_dev() - the domain has exactly one smmu
> >> pointer which contains the base address for the SMMU IP block. If the
> >> domain doesn't match the smmu pointer from the struct device it won't
> >> allow attaching.
> >>
> >> I know of ARM SOCs with many copies of the SMMU IP block.
> >>
> >> So at least with current drivers ARM seems to have this limitation.
> >>
> >
> > I saw that code, but before this series it is used only for stage-2 instead
> > of SVA. and I didn't see similar check in the old sva related paths (though
> > it doesn't use domain):
> >
> > arm_smmu_master_sva_enable_iopf()
> > arm_smmu_master_enable_sva{}
> > __arm_smmu_sva_bind()
> >
> > If I didn't overlook some trick hiding in the call chain of those functions,
> > is there a bug in the existing SMMU sva logic or is it conceptually correct
> > to not have such check for SVA?
>
> The current SVA APIs are all device-based, so implicitly reflect
> whichever SMMU instance serves the given device. Once domains come into
> the picture, callers are going to have to be more aware that a domain
> may be specific to a particular IOMMU instance, and potentially allocate
> separate domains for separate devices to represent the same address
> space, much like vfio_iommu_type1_attach_group() does.
The current definition of domain is specific to stage-2. It is interesting
whether we want to keep the same stage-2 constraints to stage-1 when
extending the domain concept to cover both. From what you explained
SVA conceptually is device-based thus SMMU instance doesn't matter.
Then allowing one domain to wrap CPU page table cross devices under
different SMMU instances is also one choice, as long as this domain is
differentiated from stage-2 domain in proper way. I get you may not
want to go this route for smmu driver as explained below, but let's
align on that it's just an implementation choice instead of a conceptual
requirement (I didn't see how IOMMU instances can have heterogenous
configurations when walking the same CPU page table). ????
>
> It's not really worth IOMMU drivers trying to support a domain spanning
> potentially-heterogeneous instances internally, since they can't
> reasonably know what matters in any particular situation. That's
> primarily why we've never tried to do it in the SMMU drivers. It's a lot
> easier for relevant callers to look at what they get and figure out
> whether any mismatch in capabilities is tolerable or not.
>
> Robin.
>
> > If the former then yes we have to take SMMU IP block into consideration
> > thus could have multiple domains per CPU page table. If the latter then
> > this is not a valid example for that configuration.
> >
> > Which one is correct?
> >
> > Thanks
> > Kevin