2024-01-22 07:45:22

by Baolu Lu

[permalink] [raw]
Subject: [PATCH v3 2/8] iommu/sva: Use iopf domain attach/detach interface

The iommu sva implementation relies on iopf handling. Allocate an
attachment cookie and use the iopf domain attach/detach interface.
The SVA domain is guaranteed to be released after all outstanding
page faults are handled.

In the fault delivering path, the attachment cookie is retrieved,
instead of the domain. This ensures that the page fault is forwarded
only if an iopf-capable domain is attached, and the domain will only
be released after all outstanding faults are handled.

Signed-off-by: Lu Baolu <[email protected]>
---
include/linux/iommu.h | 2 +-
drivers/iommu/io-pgfault.c | 59 +++++++++++++++++++-------------------
drivers/iommu/iommu-sva.c | 48 ++++++++++++++++++++++++-------
3 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6d85be23952a..511dc7b4bdb2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -142,9 +142,9 @@ struct iopf_group {
/* list node for iommu_fault_param::faults */
struct list_head pending_node;
struct work_struct work;
- struct iommu_domain *domain;
/* The device's fault data parameter. */
struct iommu_fault_param *fault_param;
+ struct iopf_attach_cookie *cookie;
};

/**
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index f7ce41573799..2567d8c04e46 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -40,7 +40,7 @@ static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
}

/* Get the domain attachment cookie for pasid of a device. */
-static struct iopf_attach_cookie __maybe_unused *
+static struct iopf_attach_cookie *
iopf_pasid_cookie_get(struct device *dev, ioasid_t pasid)
{
struct iommu_fault_param *iopf_param = iopf_get_dev_fault_param(dev);
@@ -147,6 +147,7 @@ static void __iopf_free_group(struct iopf_group *group)

/* Pair with iommu_report_device_fault(). */
iopf_put_dev_fault_param(group->fault_param);
+ iopf_pasid_cookie_put(group->cookie);
}

void iopf_free_group(struct iopf_group *group)
@@ -156,30 +157,6 @@ void iopf_free_group(struct iopf_group *group)
}
EXPORT_SYMBOL_GPL(iopf_free_group);

-static struct iommu_domain *get_domain_for_iopf(struct device *dev,
- struct iommu_fault *fault)
-{
- struct iommu_domain *domain;
-
- if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
- domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
- if (IS_ERR(domain))
- domain = NULL;
- } else {
- domain = iommu_get_domain_for_dev(dev);
- }
-
- if (!domain || !domain->iopf_handler) {
- dev_warn_ratelimited(dev,
- "iopf (pasid %d) without domain attached or handler installed\n",
- fault->prm.pasid);
-
- return NULL;
- }
-
- return domain;
-}
-
/* Non-last request of a group. Postpone until the last one. */
static int report_partial_fault(struct iommu_fault_param *fault_param,
struct iommu_fault *fault)
@@ -199,10 +176,20 @@ static int report_partial_fault(struct iommu_fault_param *fault_param,
return 0;
}

+static ioasid_t fault_to_pasid(struct iommu_fault *fault)
+{
+ if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
+ return fault->prm.pasid;
+
+ return IOMMU_NO_PASID;
+}
+
static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
struct iopf_fault *evt,
struct iopf_group *abort_group)
{
+ ioasid_t pasid = fault_to_pasid(&evt->fault);
+ struct iopf_attach_cookie *cookie;
struct iopf_fault *iopf, *next;
struct iopf_group *group;

@@ -215,7 +202,23 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
group = abort_group;
}

+ cookie = iopf_pasid_cookie_get(iopf_param->dev, pasid);
+ if (!cookie && pasid != IOMMU_NO_PASID)
+ cookie = iopf_pasid_cookie_get(iopf_param->dev, IOMMU_NO_PASID);
+ if (IS_ERR(cookie) || !cookie) {
+ /*
+ * The PASID of this device was not attached by an I/O-capable
+ * domain. Ask the caller to abort handling of this fault.
+ * Otherwise, the reference count will be switched to the new
+ * iopf group and will be released in iopf_free_group().
+ */
+ kfree(group);
+ group = abort_group;
+ cookie = NULL;
+ }
+
group->fault_param = iopf_param;
+ group->cookie = cookie;
group->last_fault.fault = evt->fault;
INIT_LIST_HEAD(&group->faults);
INIT_LIST_HEAD(&group->pending_node);
@@ -305,15 +308,11 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
if (group == &abort_group)
goto err_abort;

- group->domain = get_domain_for_iopf(dev, fault);
- if (!group->domain)
- goto err_abort;
-
/*
* On success iopf_handler must call iopf_group_response() and
* iopf_free_group()
*/
- if (group->domain->iopf_handler(group))
+ if (group->cookie->domain->iopf_handler(group))
goto err_abort;

return;
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index b51995b4fe90..fff3ee1ee9ce 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -50,6 +50,39 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
return iommu_mm;
}

+static void release_attach_cookie(struct iopf_attach_cookie *cookie)
+{
+ struct iommu_domain *domain = cookie->domain;
+
+ mutex_lock(&iommu_sva_lock);
+ if (--domain->users == 0) {
+ list_del(&domain->next);
+ iommu_domain_free(domain);
+ }
+ mutex_unlock(&iommu_sva_lock);
+
+ kfree(cookie);
+}
+
+static int sva_attach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ struct iopf_attach_cookie *cookie;
+ int ret;
+
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+ if (!cookie)
+ return -ENOMEM;
+
+ cookie->release = release_attach_cookie;
+
+ ret = iopf_domain_attach(domain, dev, pasid, cookie);
+ if (ret)
+ kfree(cookie);
+
+ return ret;
+}
+
/**
* iommu_sva_bind_device() - Bind a process address space to a device
* @dev: the device
@@ -90,7 +123,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm

/* Search for an existing domain. */
list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
- ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
+ ret = sva_attach_device_pasid(domain, dev, iommu_mm->pasid);
if (!ret) {
domain->users++;
goto out;
@@ -104,7 +137,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_free_handle;
}

- ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
+ ret = sva_attach_device_pasid(domain, dev, iommu_mm->pasid);
if (ret)
goto out_free_domain;
domain->users = 1;
@@ -140,13 +173,7 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm;
struct device *dev = handle->dev;

- mutex_lock(&iommu_sva_lock);
- iommu_detach_device_pasid(domain, dev, iommu_mm->pasid);
- if (--domain->users == 0) {
- list_del(&domain->next);
- iommu_domain_free(domain);
- }
- mutex_unlock(&iommu_sva_lock);
+ iopf_domain_detach(domain, dev, iommu_mm->pasid);
kfree(handle);
}
EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
@@ -242,7 +269,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work)
if (status != IOMMU_PAGE_RESP_SUCCESS)
break;

- status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm);
+ status = iommu_sva_handle_mm(&iopf->fault,
+ group->cookie->domain->mm);
}

iopf_group_response(group, status);
--
2.34.1



2024-03-08 17:46:14

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 2/8] iommu/sva: Use iopf domain attach/detach interface

On Mon, Jan 22, 2024 at 03:38:57PM +0800, Lu Baolu wrote:
> @@ -215,7 +202,23 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
> group = abort_group;
> }
>
> + cookie = iopf_pasid_cookie_get(iopf_param->dev, pasid);
> + if (!cookie && pasid != IOMMU_NO_PASID)
> + cookie = iopf_pasid_cookie_get(iopf_param->dev, IOMMU_NO_PASID);
> + if (IS_ERR(cookie) || !cookie) {
> + /*
> + * The PASID of this device was not attached by an I/O-capable
> + * domain. Ask the caller to abort handling of this fault.
> + * Otherwise, the reference count will be switched to the new
> + * iopf group and will be released in iopf_free_group().
> + */
> + kfree(group);
> + group = abort_group;
> + cookie = NULL;
> + }

If this is the main point of the cookie mechansim - why not just have
a refcount inside the domain itself?

I'm really having a hard time making sense of this cookie thing, we
have a lifetime issue on the domain pointer, why is adding another
structure the answer?

I see we also need to stick a pointer in the domain for iommufd to get
back to the hwpt, but that doesn't seem to need such a big system to
accomplish - just add a void *. It would make sense for the domain to
have some optional pointer to a struct for all the fault related data
that becomes allocated when a PRI domain is created..

Also, I thought we'd basically said that domain detatch is supposed to
flush any open PRIs before returning, what happened to that as a
solution to the lifetime problem?

Regardless I think we should split this into two series - improve the PRI
lifetime model for domains and then put iommufd on top of it..

Jason

2024-03-14 10:11:12

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH v3 2/8] iommu/sva: Use iopf domain attach/detach interface

On 2024/3/9 1:46, Jason Gunthorpe wrote:
> On Mon, Jan 22, 2024 at 03:38:57PM +0800, Lu Baolu wrote:
>> @@ -215,7 +202,23 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
>> group = abort_group;
>> }
>>
>> + cookie = iopf_pasid_cookie_get(iopf_param->dev, pasid);
>> + if (!cookie && pasid != IOMMU_NO_PASID)
>> + cookie = iopf_pasid_cookie_get(iopf_param->dev, IOMMU_NO_PASID);
>> + if (IS_ERR(cookie) || !cookie) {
>> + /*
>> + * The PASID of this device was not attached by an I/O-capable
>> + * domain. Ask the caller to abort handling of this fault.
>> + * Otherwise, the reference count will be switched to the new
>> + * iopf group and will be released in iopf_free_group().
>> + */
>> + kfree(group);
>> + group = abort_group;
>> + cookie = NULL;
>> + }
>
> If this is the main point of the cookie mechansim - why not just have
> a refcount inside the domain itself?
>
> I'm really having a hard time making sense of this cookie thing, we
> have a lifetime issue on the domain pointer, why is adding another
> structure the answer?

The whole cookie mechanism aims to address two things:

- Extend the domain lifetime until all pending page faults are resolved.
- Associate information about the iommu device with each attachment of
the domain so that the iommufd device object ID could be quickly
retrieved in the fault delivering path.


> I see we also need to stick a pointer in the domain for iommufd to get
> back to the hwpt, but that doesn't seem to need such a big system to
> accomplish - just add a void *. It would make sense for the domain to
> have some optional pointer to a struct for all the fault related data
> that becomes allocated when a PRI domain is created..

It's not getting back hwpt from domain, just getting the iommufd_device
in the fault delivering path. The iommufd_device is not per-domain, but
per-domain-attachment.

>
> Also, I thought we'd basically said that domain detatch is supposed to
> flush any open PRIs before returning, what happened to that as a
> solution to the lifetime problem?

If I remember it correctly, what we discussed was to flush all pending
page faults when disabling PRI. Anyway, the current driver behavior is
to flush faults during domain detachment. And if we keep this behavior,
we no longer need to worry about domain lifetime anymore.

>
> Regardless I think we should split this into two series - improve the PRI
> lifetime model for domains and then put iommufd on top of it..

Yes, agreed. Let's tackle those two points in a separate series.

Best regards,
baolu

2024-03-22 16:59:39

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v3 2/8] iommu/sva: Use iopf domain attach/detach interface

On Thu, Mar 14, 2024 at 03:41:23PM +0800, Baolu Lu wrote:

> The whole cookie mechanism aims to address two things:
>
> - Extend the domain lifetime until all pending page faults are
> resolved.

Like you answered, I think the flush is a simpler scheme..

> - Associate information about the iommu device with each attachment of
> the domain so that the iommufd device object ID could be quickly
> retrieved in the fault delivering path.

> > I see we also need to stick a pointer in the domain for iommufd to get
> > back to the hwpt, but that doesn't seem to need such a big system to
> > accomplish - just add a void *. It would make sense for the domain to
> > have some optional pointer to a struct for all the fault related data
> > that becomes allocated when a PRI domain is created..
>
> It's not getting back hwpt from domain, just getting the iommufd_device
> in the fault delivering path. The iommufd_device is not per-domain, but
> per-domain-attachment.

It does make sense you'd need that, but I think something like this is
a more direct way to get it. Caller allocates the handle struct. The
iopf will provide the handle from the XA to the
callback. container_of() not void * is used to in the caller's API.

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 68e648b5576706..0d29d8f0847cd9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -46,6 +46,8 @@ static unsigned int iommu_def_domain_type __read_mostly;
static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
static u32 iommu_cmd_line __read_mostly;

+enum { IOMMU_PASID_ARRAY_DOMAIN, IOMMU_PASID_ARRAY_HANDLE };
+
struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
@@ -3516,18 +3518,20 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
}

/*
- * iommu_attach_device_pasid() - Attach a domain to pasid of device
+ * __iommu_attach_device_pasid() - Attach a domain to pasid of device
* @domain: the iommu domain.
* @dev: the attached device.
* @pasid: the pasid of the device.
*
* Return: 0 on success, or an error.
*/
-int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+int __iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev,
+ ioasid_t pasid,
+ struct iommu_pasid_handle *handle)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
+ void *xa_entry;
void *curr;
int ret;

@@ -3541,9 +3545,14 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
return -EINVAL;

mutex_lock(&group->mutex);
- curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
+ if (handle)
+ xa_entry = xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE);
+ else
+ xa_entry = xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
+ curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, xa_entry,
+ GFP_KERNEL);
if (curr) {
- ret = xa_err(curr) ? : -EBUSY;
+ ret = xa_err(curr) ?: -EBUSY;
goto out_unlock;
}

@@ -3556,7 +3565,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_unlock(&group->mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
+EXPORT_SYMBOL_GPL(__iommu_attach_device_pasid);

/*
* iommu_detach_device_pasid() - Detach the domain from pasid of device
@@ -3600,13 +3609,23 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
+ struct iommu_pasid_handle *handle;
struct iommu_domain *domain;
+ void *xa_entry;

if (!group)
return NULL;

xa_lock(&group->pasid_array);
- domain = xa_load(&group->pasid_array, pasid);
+ xa_entry = xa_load(&group->pasid_array, pasid);
+ if (xa_pointer_tag(xa_entry) == IOMMU_PASID_ARRAY_HANDLE) {
+ handle = xa_untag_pointer(xa_entry);
+ domain = handle->domain;
+ } else if (xa_pointer_tag(xa_entry) == IOMMU_PASID_ARRAY_DOMAIN) {
+ domain = xa_untag_pointer(xa_entry);
+ } else {
+ domain = NULL;
+ }
if (type && domain && domain->type != type)
domain = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1ea2a820e1eb03..47c121d4e13f65 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -947,8 +947,27 @@ void iommu_device_release_dma_owner(struct device *dev);

struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
struct mm_struct *mm);
-int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid);
+
+struct iommu_pasid_handle {
+ struct iommu_domain *domain;
+};
+
+int __iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev,
+ ioasid_t pasid,
+ struct iommu_pasid_handle *handle);
+
+static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ return __iommu_attach_device_pasid(domain, dev, pasid, NULL);
+}
+static inline int
+iommu_attach_device_pasid_handle(struct iommu_pasid_handle *handle,
+ struct device *dev, ioasid_t pasid)
+{
+ return __iommu_attach_device_pasid(handle->domain, dev, pasid, handle);
+}
+
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct iommu_domain *

2024-03-25 12:28:39

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH v3 2/8] iommu/sva: Use iopf domain attach/detach interface



On 3/23/24 12:59 AM, Jason Gunthorpe wrote:
> On Thu, Mar 14, 2024 at 03:41:23PM +0800, Baolu Lu wrote:
>
>> The whole cookie mechanism aims to address two things:
>>
>> - Extend the domain lifetime until all pending page faults are
>> resolved.
> Like you answered, I think the flush is a simpler scheme..

Yeah! Let me head this direction.

>
>> - Associate information about the iommu device with each attachment of
>> the domain so that the iommufd device object ID could be quickly
>> retrieved in the fault delivering path.
>
>>> I see we also need to stick a pointer in the domain for iommufd to get
>>> back to the hwpt, but that doesn't seem to need such a big system to
>>> accomplish - just add a void *. It would make sense for the domain to
>>> have some optional pointer to a struct for all the fault related data
>>> that becomes allocated when a PRI domain is created..
>> It's not getting back hwpt from domain, just getting the iommufd_device
>> in the fault delivering path. The iommufd_device is not per-domain, but
>> per-domain-attachment.
> It does make sense you'd need that, but I think something like this is
> a more direct way to get it. Caller allocates the handle struct. The
> iopf will provide the handle from the XA to the
> callback. container_of() not void * is used to in the caller's API.

Your code looks attractive to me. It's much simpler. Thank you!

Best regards,
baolu