This adds some mechanisms around the iommu_domain so that the I/O page
fault handling framework could route a page fault to the domain and
call the fault handler from it.
It includes:
- A reference counter for the iommu domain. The page fault handler is
a different context from the bind()/unbind() thread. A reference
counter is added to struct iommu_domain so that the life cycle of
the iommu_domain could be synced between different threads.
- A pointer to the page fault handler and its private data. The fault
handler will be called with the private data as a parameter once a
page fault is routed to the domain. Any kernel component which owns
an iommu domain could install handler and its parameter so that the
page fault could be further routed or handled.
This also prepares the SVA implementation to be the first consumer of
the per-domain page fault handling model.
Signed-off-by: Lu Baolu <[email protected]>
---
include/linux/iommu.h | 18 ++++++++++
drivers/iommu/iommu-sva-lib.c | 68 ++++++++++++++++++++++++++++++++++-
drivers/iommu/iommu.c | 29 +++++++++++++++
3 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 57650b773f3f..4278a6310a7b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -102,6 +102,10 @@ struct iommu_domain {
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
struct iommu_sva_ioas *sva_ioas;
+ enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
+ void *data);
+ void *fault_data;
+ refcount_t async_users;
};
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -680,6 +684,14 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
+struct iommu_domain *
+iommu_get_domain_for_dev_pasid_async(struct device *dev, ioasid_t pasid);
+
+static inline void iommu_domain_put_async(struct iommu_domain *domain)
+{
+ if (refcount_dec_and_test(&domain->async_users))
+ iommu_domain_free(domain);
+}
#else /* CONFIG_IOMMU_API */
struct iommu_ops {};
@@ -1044,6 +1056,12 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
}
+
+static inline struct iommu_domain *
+iommu_get_domain_for_dev_pasid_async(struct device *dev, ioasid_t pasid)
+{
+ return NULL;
+}
#endif /* CONFIG_IOMMU_API */
#ifdef CONFIG_IOMMU_SVA
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index e90d53436a22..1024c61519dc 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -138,6 +138,69 @@ void iommu_sva_ioas_put(struct iommu_sva_ioas *ioas)
}
}
+/*
+ * I/O page fault handler for SVA
+ *
+ * Copied from io-pgfault.c with mmget_not_zero() added before
+ * mmap_read_lock().
+ */
+static enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+ vm_fault_t ret;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned int access_flags = 0;
+ struct iommu_domain *domain = data;
+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
+ struct iommu_fault_page_request *prm = &fault->prm;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+ if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+ return status;
+
+ mm = iommu_sva_domain_mm(domain);
+ if (IS_ERR_OR_NULL(mm) || !mmget_not_zero(mm))
+ return status;
+
+ mmap_read_lock(mm);
+
+ vma = find_extend_vma(mm, prm->addr);
+ if (!vma)
+ /* Unmapped area */
+ goto out_put_mm;
+
+ if (prm->perm & IOMMU_FAULT_PERM_READ)
+ access_flags |= VM_READ;
+
+ if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+ access_flags |= VM_WRITE;
+ fault_flags |= FAULT_FLAG_WRITE;
+ }
+
+ if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+ access_flags |= VM_EXEC;
+ fault_flags |= FAULT_FLAG_INSTRUCTION;
+ }
+
+ if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+ fault_flags |= FAULT_FLAG_USER;
+
+ if (access_flags & ~vma->vm_flags)
+ /* Access fault */
+ goto out_put_mm;
+
+ ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
+ status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+ IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ return status;
+}
+
/*
* IOMMU SVA driver-oriented interfaces
*/
@@ -157,6 +220,9 @@ iommu_sva_alloc_domain(struct device *dev, struct iommu_sva_ioas *ioas)
/* The caller must hold a reference to ioas. */
domain->sva_ioas = ioas;
domain->type = IOMMU_DOMAIN_SVA;
+ domain->iopf_handler = iommu_sva_handle_iopf;
+ domain->fault_data = domain;
+ refcount_set(&domain->async_users, 1);
return domain;
}
@@ -271,7 +337,7 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
if (refcount_dec_and_test(&handle->users)) {
list_del(&handle->node);
iommu_detach_device_pasid(domain, dev, ioas->pasid);
- iommu_domain_free(domain);
+ iommu_domain_put_async(domain);
kfree(handle);
}
mutex_unlock(&iommu_sva_lock);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7ed0a61351a5..f12c7851d16d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3173,3 +3173,32 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
iommu_group_put(group);
}
+
+/*
+ * Get the attached domain for asynchronous usage, for example the I/O
+ * page fault handling framework. The caller get a reference counter
+ * of the domain automatically on a successful return and should put
+ * it with iommu_domain_put() after usage.
+ */
+struct iommu_domain *
+iommu_get_domain_for_dev_pasid_async(struct device *dev, ioasid_t pasid)
+{
+ struct iommu_domain *domain;
+ struct iommu_group *group;
+
+ if (!pasid_valid(pasid))
+ return NULL;
+
+ group = iommu_group_get(dev);
+ if (!group)
+ return NULL;
+
+ mutex_lock(&group->mutex);
+ domain = xa_load(&group->pasid_array, pasid);
+ if (domain)
+ refcount_inc(&domain->async_users);
+ mutex_unlock(&group->mutex);
+ iommu_group_put(group);
+
+ return domain;
+}
--
2.25.1
Hi Baolu,
On Thu, Apr 21, 2022 at 01:21:19PM +0800, Lu Baolu wrote:
> +/*
> + * Get the attached domain for asynchronous usage, for example the I/O
> + * page fault handling framework. The caller get a reference counter
> + * of the domain automatically on a successful return and should put
> + * it with iommu_domain_put() after usage.
> + */
> +struct iommu_domain *
> +iommu_get_domain_for_dev_pasid_async(struct device *dev, ioasid_t pasid)
> +{
> + struct iommu_domain *domain;
> + struct iommu_group *group;
> +
> + if (!pasid_valid(pasid))
> + return NULL;
> +
> + group = iommu_group_get(dev);
> + if (!group)
> + return NULL;
> +
> + mutex_lock(&group->mutex);
There is a possible deadlock between unbind() and the fault handler:
unbind() iopf_handle_group()
mutex_lock(&group->mutex)
iommu_detach_device_pasid()
iopf_queue_flush_dev() iommu_get_domain_for_dev_pasid_async()
... waits for IOPF work mutex_lock(&group->mutex)
I was wrong in my previous review: we do have a guarantee that the SVA
domain does not go away during IOPF handling, because unbind() waits for
pending faults with iopf_queue_flush_dev() before freeing the domain (or
for Arm stall, knows that there are no pending faults). So we can just get
rid of domain->async_users and the group->mutex in IOPF, I think?
Thanks,
Jean
> + domain = xa_load(&group->pasid_array, pasid);
> + if (domain)
> + refcount_inc(&domain->async_users);
> + mutex_unlock(&group->mutex);
> + iommu_group_put(group);
> +
> + return domain;
> +}
> --
> 2.25.1
>
Hi Jean,
On 2022/4/28 22:47, Jean-Philippe Brucker wrote:
> Hi Baolu,
>
> On Thu, Apr 21, 2022 at 01:21:19PM +0800, Lu Baolu wrote:
>> +/*
>> + * Get the attached domain for asynchronous usage, for example the I/O
>> + * page fault handling framework. The caller get a reference counter
>> + * of the domain automatically on a successful return and should put
>> + * it with iommu_domain_put() after usage.
>> + */
>> +struct iommu_domain *
>> +iommu_get_domain_for_dev_pasid_async(struct device *dev, ioasid_t pasid)
>> +{
>> + struct iommu_domain *domain;
>> + struct iommu_group *group;
>> +
>> + if (!pasid_valid(pasid))
>> + return NULL;
>> +
>> + group = iommu_group_get(dev);
>> + if (!group)
>> + return NULL;
>> +
>> + mutex_lock(&group->mutex);
>
> There is a possible deadlock between unbind() and the fault handler:
>
> unbind() iopf_handle_group()
> mutex_lock(&group->mutex)
> iommu_detach_device_pasid()
> iopf_queue_flush_dev() iommu_get_domain_for_dev_pasid_async()
> ... waits for IOPF work mutex_lock(&group->mutex)
>
Yes, really.
> I was wrong in my previous review: we do have a guarantee that the SVA
> domain does not go away during IOPF handling, because unbind() waits for
> pending faults with iopf_queue_flush_dev() before freeing the domain (or
> for Arm stall, knows that there are no pending faults). So we can just get
> rid of domain->async_users and the group->mutex in IOPF, I think?
Agreed with you. The Intel code does the same thing in its unbind().
Thus, the sva domain's life cycle has already synchronized with IOPF
handling, there's no need for domain->async.
I will drop it in the next version. Thanks you!
Best regards,
baolu