The existing IOPF handling framework only handles the I/O page faults for
SVA. Ginven that we are able to link iommu domain with each I/O page fault,
we can now make the I/O page fault handling framework more general for
more types of page faults.
Signed-off-by: Lu Baolu <[email protected]>
---
include/linux/iommu.h | 4 +++
drivers/iommu/io-pgfault.c | 67 ++++++-----------------------------
drivers/iommu/iommu-sva-lib.c | 59 ++++++++++++++++++++++++++++++
3 files changed, 73 insertions(+), 57 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 803e7b07605e..11c65a7bed88 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -50,6 +50,8 @@ struct iommu_dma_cookie;
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
+typedef enum iommu_page_response_code (*iommu_domain_iopf_handler_t)
+ (struct iommu_fault *, void *);
struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped */
@@ -101,6 +103,8 @@ struct iommu_domain {
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
struct mm_struct *sva_cookie;
+ iommu_domain_iopf_handler_t fault_handler;
+ void *fault_data;
};
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 1df8c1dcae77..dad0e40cd8d2 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -69,62 +69,6 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
return iommu_page_response(dev, &resp);
}
-static enum iommu_page_response_code
-iopf_handle_single(struct iopf_fault *iopf)
-{
- vm_fault_t ret;
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- unsigned int access_flags = 0;
- unsigned int fault_flags = FAULT_FLAG_REMOTE;
- struct iommu_fault_page_request *prm = &iopf->fault.prm;
- enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
-
- if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
- return status;
-
- mm = iommu_sva_find(prm->pasid);
- if (IS_ERR_OR_NULL(mm))
- return status;
-
- mmap_read_lock(mm);
-
- vma = find_extend_vma(mm, prm->addr);
- if (!vma)
- /* Unmapped area */
- goto out_put_mm;
-
- if (prm->perm & IOMMU_FAULT_PERM_READ)
- access_flags |= VM_READ;
-
- if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
- access_flags |= VM_WRITE;
- fault_flags |= FAULT_FLAG_WRITE;
- }
-
- if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
- access_flags |= VM_EXEC;
- fault_flags |= FAULT_FLAG_INSTRUCTION;
- }
-
- if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
- fault_flags |= FAULT_FLAG_USER;
-
- if (access_flags & ~vma->vm_flags)
- /* Access fault */
- goto out_put_mm;
-
- ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
- status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
- IOMMU_PAGE_RESP_SUCCESS;
-
-out_put_mm:
- mmap_read_unlock(mm);
- mmput(mm);
-
- return status;
-}
-
static void iopf_handle_group(struct work_struct *work)
{
struct iopf_group *group;
@@ -134,12 +78,21 @@ static void iopf_handle_group(struct work_struct *work)
group = container_of(work, struct iopf_group, work);
list_for_each_entry_safe(iopf, next, &group->faults, list) {
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev_pasid(group->dev,
+ iopf->fault.prm.pasid);
+
+ if (!domain || !domain->fault_handler)
+ status = IOMMU_PAGE_RESP_INVALID;
+
/*
* For the moment, errors are sticky: don't handle subsequent
* faults in the group if there is an error.
*/
if (status == IOMMU_PAGE_RESP_SUCCESS)
- status = iopf_handle_single(iopf);
+ status = domain->fault_handler(&iopf->fault,
+ domain->fault_data);
if (!(iopf->fault.prm.flags &
IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 47cf98e661ff..01fa8096bd02 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -87,6 +87,63 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev)
return domain;
}
+static enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+ vm_fault_t ret;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned int access_flags = 0;
+ struct iommu_domain *domain = data;
+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
+ struct iommu_fault_page_request *prm = &fault->prm;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+ if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+ return status;
+
+ mm = domain->sva_cookie;
+ if (IS_ERR_OR_NULL(mm))
+ return status;
+
+ mmap_read_lock(mm);
+
+ vma = find_extend_vma(mm, prm->addr);
+ if (!vma)
+ /* Unmapped area */
+ goto out_put_mm;
+
+ if (prm->perm & IOMMU_FAULT_PERM_READ)
+ access_flags |= VM_READ;
+
+ if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+ access_flags |= VM_WRITE;
+ fault_flags |= FAULT_FLAG_WRITE;
+ }
+
+ if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+ access_flags |= VM_EXEC;
+ fault_flags |= FAULT_FLAG_INSTRUCTION;
+ }
+
+ if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+ fault_flags |= FAULT_FLAG_USER;
+
+ if (access_flags & ~vma->vm_flags)
+ /* Access fault */
+ goto out_put_mm;
+
+ ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
+ status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+ IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ return status;
+}
+
/**
* iommu_sva_bind_device() - Bind a process address space to a device
* @dev: the device
@@ -124,6 +181,8 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
goto out;
}
domain->sva_cookie = mm;
+ domain->fault_handler = iommu_sva_handle_iopf;
+ domain->fault_data = domain;
ret = iommu_attach_device_pasid(domain, dev, mm->pasid);
if (ret)
--
2.25.1
On Sun, Mar 20, 2022 at 02:40:29PM +0800, Lu Baolu wrote:
> The existing IOPF handling framework only handles the I/O page faults for
> SVA. Ginven that we are able to link iommu domain with each I/O page fault,
> we can now make the I/O page fault handling framework more general for
> more types of page faults.
>
> Signed-off-by: Lu Baolu <[email protected]>
> ---
> include/linux/iommu.h | 4 +++
> drivers/iommu/io-pgfault.c | 67 ++++++-----------------------------
> drivers/iommu/iommu-sva-lib.c | 59 ++++++++++++++++++++++++++++++
> 3 files changed, 73 insertions(+), 57 deletions(-)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 803e7b07605e..11c65a7bed88 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -50,6 +50,8 @@ struct iommu_dma_cookie;
> typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
> struct device *, unsigned long, int, void *);
> typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
> +typedef enum iommu_page_response_code (*iommu_domain_iopf_handler_t)
> + (struct iommu_fault *, void *);
>
> struct iommu_domain_geometry {
> dma_addr_t aperture_start; /* First address that can be mapped */
> @@ -101,6 +103,8 @@ struct iommu_domain {
> struct iommu_domain_geometry geometry;
> struct iommu_dma_cookie *iova_cookie;
> struct mm_struct *sva_cookie;
> + iommu_domain_iopf_handler_t fault_handler;
> + void *fault_data;
> };
>
> static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> index 1df8c1dcae77..dad0e40cd8d2 100644
> --- a/drivers/iommu/io-pgfault.c
> +++ b/drivers/iommu/io-pgfault.c
> @@ -69,62 +69,6 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
> return iommu_page_response(dev, &resp);
> }
>
> -static enum iommu_page_response_code
> -iopf_handle_single(struct iopf_fault *iopf)
> -{
> - vm_fault_t ret;
> - struct mm_struct *mm;
> - struct vm_area_struct *vma;
> - unsigned int access_flags = 0;
> - unsigned int fault_flags = FAULT_FLAG_REMOTE;
> - struct iommu_fault_page_request *prm = &iopf->fault.prm;
> - enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
> -
> - if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
> - return status;
> -
> - mm = iommu_sva_find(prm->pasid);
> - if (IS_ERR_OR_NULL(mm))
> - return status;
> -
> - mmap_read_lock(mm);
> -
> - vma = find_extend_vma(mm, prm->addr);
> - if (!vma)
> - /* Unmapped area */
> - goto out_put_mm;
> -
> - if (prm->perm & IOMMU_FAULT_PERM_READ)
> - access_flags |= VM_READ;
> -
> - if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
> - access_flags |= VM_WRITE;
> - fault_flags |= FAULT_FLAG_WRITE;
> - }
> -
> - if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
> - access_flags |= VM_EXEC;
> - fault_flags |= FAULT_FLAG_INSTRUCTION;
> - }
> -
> - if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
> - fault_flags |= FAULT_FLAG_USER;
> -
> - if (access_flags & ~vma->vm_flags)
> - /* Access fault */
> - goto out_put_mm;
> -
> - ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
> - status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
> - IOMMU_PAGE_RESP_SUCCESS;
> -
> -out_put_mm:
> - mmap_read_unlock(mm);
> - mmput(mm);
> -
> - return status;
> -}
> -
> static void iopf_handle_group(struct work_struct *work)
> {
> struct iopf_group *group;
> @@ -134,12 +78,21 @@ static void iopf_handle_group(struct work_struct *work)
> group = container_of(work, struct iopf_group, work);
>
> list_for_each_entry_safe(iopf, next, &group->faults, list) {
> + struct iommu_domain *domain;
> +
> + domain = iommu_get_domain_for_dev_pasid(group->dev,
> + iopf->fault.prm.pasid);
Do we have a guarantee that the domain is not freed while we handle the
fault? We could prevent unbind() while there are pending faults on this
bond. But a refcount on SVA domains could defer freeing, and would also
help with keeping the semantics where bind() returns a single refcounted
bond for any {dev, mm}.
Given that this path is full of circular locking pitfalls, and to keep the
fault handler efficient (well, at least not make it worse), we should
probably keep a getter like iommu_sva_find() that does not require
locking.
> +
> + if (!domain || !domain->fault_handler)
> + status = IOMMU_PAGE_RESP_INVALID;
> +
> /*
> * For the moment, errors are sticky: don't handle subsequent
> * faults in the group if there is an error.
> */
> if (status == IOMMU_PAGE_RESP_SUCCESS)
> - status = iopf_handle_single(iopf);
> + status = domain->fault_handler(&iopf->fault,
> + domain->fault_data);
If we make this a direct call and only use a light getter for the
PASID->mm lookup we don't need to look at the domain at all. Or are you
planning to add external fault handlers?
>
> if (!(iopf->fault.prm.flags &
> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
> diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
> index 47cf98e661ff..01fa8096bd02 100644
> --- a/drivers/iommu/iommu-sva-lib.c
> +++ b/drivers/iommu/iommu-sva-lib.c
> @@ -87,6 +87,63 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev)
> return domain;
> }
>
> +static enum iommu_page_response_code
> +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
> +{
> + vm_fault_t ret;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + unsigned int access_flags = 0;
> + struct iommu_domain *domain = data;
> + unsigned int fault_flags = FAULT_FLAG_REMOTE;
> + struct iommu_fault_page_request *prm = &fault->prm;
> + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
> +
> + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
> + return status;
> +
> + mm = domain->sva_cookie;
> + if (IS_ERR_OR_NULL(mm))
> + return status;
> +
> + mmap_read_lock(mm);
> +
> + vma = find_extend_vma(mm, prm->addr);
> + if (!vma)
> + /* Unmapped area */
> + goto out_put_mm;
> +
> + if (prm->perm & IOMMU_FAULT_PERM_READ)
> + access_flags |= VM_READ;
> +
> + if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
> + access_flags |= VM_WRITE;
> + fault_flags |= FAULT_FLAG_WRITE;
> + }
> +
> + if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
> + access_flags |= VM_EXEC;
> + fault_flags |= FAULT_FLAG_INSTRUCTION;
> + }
> +
> + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
> + fault_flags |= FAULT_FLAG_USER;
> +
> + if (access_flags & ~vma->vm_flags)
> + /* Access fault */
> + goto out_put_mm;
> +
> + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
> + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
> + IOMMU_PAGE_RESP_SUCCESS;
> +
> +out_put_mm:
> + mmap_read_unlock(mm);
> + mmput(mm);
mmget_not_zero() is missing since iommu_sva_find() is gone. I'm guessing
we still need it in case the process dies
Thanks,
Jean
> +
> + return status;
> +}
> +
> /**
> * iommu_sva_bind_device() - Bind a process address space to a device
> * @dev: the device
> @@ -124,6 +181,8 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> goto out;
> }
> domain->sva_cookie = mm;
> + domain->fault_handler = iommu_sva_handle_iopf;
> + domain->fault_data = domain;
>
> ret = iommu_attach_device_pasid(domain, dev, mm->pasid);
> if (ret)
> --
> 2.25.1
>
On Sun, Mar 20, 2022 at 02:40:29PM +0800, Lu Baolu wrote:
> +static enum iommu_page_response_code
> +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
> +{
> + vm_fault_t ret;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + unsigned int access_flags = 0;
> + struct iommu_domain *domain = data;
Why is the iommu_domain not passed in as a fully typed object? I would
think data should some opaque value used by non-sva cases.
What is the lifetime model here anyhow?
> + unsigned int fault_flags = FAULT_FLAG_REMOTE;
> + struct iommu_fault_page_request *prm = &fault->prm;
> + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
> +
> + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
> + return status;
> +
> + mm = domain->sva_cookie;
> + if (IS_ERR_OR_NULL(mm))
Do not use this function
Do not store err pointers in structs.
> +out_put_mm:
> + mmap_read_unlock(mm);
> + mmput(mm);
mm structs are weird, they have two refcounts.
The 'sva_cookie' should hold a mmgrab/mmdrop() refcount to keep the
pointer alive but to touch the mmap lock you have to upgrade it to a
refcount that prevents destruction using mmget_not_zero() just for
this short period.
Jason
On 2022/3/21 19:39, Jean-Philippe Brucker wrote:
> On Sun, Mar 20, 2022 at 02:40:29PM +0800, Lu Baolu wrote:
>> The existing IOPF handling framework only handles the I/O page faults for
>> SVA. Ginven that we are able to link iommu domain with each I/O page fault,
>> we can now make the I/O page fault handling framework more general for
>> more types of page faults.
>>
>> Signed-off-by: Lu Baolu <[email protected]>
>> ---
>> include/linux/iommu.h | 4 +++
>> drivers/iommu/io-pgfault.c | 67 ++++++-----------------------------
>> drivers/iommu/iommu-sva-lib.c | 59 ++++++++++++++++++++++++++++++
>> 3 files changed, 73 insertions(+), 57 deletions(-)
>>
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 803e7b07605e..11c65a7bed88 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -50,6 +50,8 @@ struct iommu_dma_cookie;
>> typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
>> struct device *, unsigned long, int, void *);
>> typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
>> +typedef enum iommu_page_response_code (*iommu_domain_iopf_handler_t)
>> + (struct iommu_fault *, void *);
>>
>> struct iommu_domain_geometry {
>> dma_addr_t aperture_start; /* First address that can be mapped */
>> @@ -101,6 +103,8 @@ struct iommu_domain {
>> struct iommu_domain_geometry geometry;
>> struct iommu_dma_cookie *iova_cookie;
>> struct mm_struct *sva_cookie;
>> + iommu_domain_iopf_handler_t fault_handler;
>> + void *fault_data;
>> };
>>
>> static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
>> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
>> index 1df8c1dcae77..dad0e40cd8d2 100644
>> --- a/drivers/iommu/io-pgfault.c
>> +++ b/drivers/iommu/io-pgfault.c
>> @@ -69,62 +69,6 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
>> return iommu_page_response(dev, &resp);
>> }
>>
>> -static enum iommu_page_response_code
>> -iopf_handle_single(struct iopf_fault *iopf)
>> -{
>> - vm_fault_t ret;
>> - struct mm_struct *mm;
>> - struct vm_area_struct *vma;
>> - unsigned int access_flags = 0;
>> - unsigned int fault_flags = FAULT_FLAG_REMOTE;
>> - struct iommu_fault_page_request *prm = &iopf->fault.prm;
>> - enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
>> -
>> - if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
>> - return status;
>> -
>> - mm = iommu_sva_find(prm->pasid);
>> - if (IS_ERR_OR_NULL(mm))
>> - return status;
>> -
>> - mmap_read_lock(mm);
>> -
>> - vma = find_extend_vma(mm, prm->addr);
>> - if (!vma)
>> - /* Unmapped area */
>> - goto out_put_mm;
>> -
>> - if (prm->perm & IOMMU_FAULT_PERM_READ)
>> - access_flags |= VM_READ;
>> -
>> - if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
>> - access_flags |= VM_WRITE;
>> - fault_flags |= FAULT_FLAG_WRITE;
>> - }
>> -
>> - if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
>> - access_flags |= VM_EXEC;
>> - fault_flags |= FAULT_FLAG_INSTRUCTION;
>> - }
>> -
>> - if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
>> - fault_flags |= FAULT_FLAG_USER;
>> -
>> - if (access_flags & ~vma->vm_flags)
>> - /* Access fault */
>> - goto out_put_mm;
>> -
>> - ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
>> - status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
>> - IOMMU_PAGE_RESP_SUCCESS;
>> -
>> -out_put_mm:
>> - mmap_read_unlock(mm);
>> - mmput(mm);
>> -
>> - return status;
>> -}
>> -
>> static void iopf_handle_group(struct work_struct *work)
>> {
>> struct iopf_group *group;
>> @@ -134,12 +78,21 @@ static void iopf_handle_group(struct work_struct *work)
>> group = container_of(work, struct iopf_group, work);
>>
>> list_for_each_entry_safe(iopf, next, &group->faults, list) {
>> + struct iommu_domain *domain;
>> +
>> + domain = iommu_get_domain_for_dev_pasid(group->dev,
>> + iopf->fault.prm.pasid);
>
> Do we have a guarantee that the domain is not freed while we handle the
> fault? We could prevent unbind() while there are pending faults on this
> bond. But a refcount on SVA domains could defer freeing, and would also
> help with keeping the semantics where bind() returns a single refcounted
> bond for any {dev, mm}.
>
> Given that this path is full of circular locking pitfalls, and to keep the
> fault handler efficient (well, at least not make it worse), we should
> probably keep a getter like iommu_sva_find() that does not require
> locking.
Agreed. We need a mechanism to ensure concurrency. I will look into it.
>
>> +
>> + if (!domain || !domain->fault_handler)
>> + status = IOMMU_PAGE_RESP_INVALID;
>> +
>> /*
>> * For the moment, errors are sticky: don't handle subsequent
>> * faults in the group if there is an error.
>> */
>> if (status == IOMMU_PAGE_RESP_SUCCESS)
>> - status = iopf_handle_single(iopf);
>> + status = domain->fault_handler(&iopf->fault,
>> + domain->fault_data);
>
> If we make this a direct call and only use a light getter for the
> PASID->mm lookup we don't need to look at the domain at all. Or are you
> planning to add external fault handlers?
Yes. I'd like the I/O page fault handling framework to work for
external domains as well, for example, the I/O page faults for user
space page table should be routed to user space.
>
>>
>> if (!(iopf->fault.prm.flags &
>> IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
>> diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
>> index 47cf98e661ff..01fa8096bd02 100644
>> --- a/drivers/iommu/iommu-sva-lib.c
>> +++ b/drivers/iommu/iommu-sva-lib.c
>> @@ -87,6 +87,63 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev)
>> return domain;
>> }
>>
>> +static enum iommu_page_response_code
>> +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
>> +{
>> + vm_fault_t ret;
>> + struct mm_struct *mm;
>> + struct vm_area_struct *vma;
>> + unsigned int access_flags = 0;
>> + struct iommu_domain *domain = data;
>> + unsigned int fault_flags = FAULT_FLAG_REMOTE;
>> + struct iommu_fault_page_request *prm = &fault->prm;
>> + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
>> +
>> + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
>> + return status;
>> +
>> + mm = domain->sva_cookie;
>> + if (IS_ERR_OR_NULL(mm))
>> + return status;
>> +
>> + mmap_read_lock(mm);
>> +
>> + vma = find_extend_vma(mm, prm->addr);
>> + if (!vma)
>> + /* Unmapped area */
>> + goto out_put_mm;
>> +
>> + if (prm->perm & IOMMU_FAULT_PERM_READ)
>> + access_flags |= VM_READ;
>> +
>> + if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
>> + access_flags |= VM_WRITE;
>> + fault_flags |= FAULT_FLAG_WRITE;
>> + }
>> +
>> + if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
>> + access_flags |= VM_EXEC;
>> + fault_flags |= FAULT_FLAG_INSTRUCTION;
>> + }
>> +
>> + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
>> + fault_flags |= FAULT_FLAG_USER;
>> +
>> + if (access_flags & ~vma->vm_flags)
>> + /* Access fault */
>> + goto out_put_mm;
>> +
>> + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
>> + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
>> + IOMMU_PAGE_RESP_SUCCESS;
>> +
>> +out_put_mm:
>> + mmap_read_unlock(mm);
>> + mmput(mm);
>
> mmget_not_zero() is missing since iommu_sva_find() is gone. I'm guessing
> we still need it in case the process dies
Agreed.
>
> Thanks,
> Jean
Best regards,
baolu
On 2022/3/21 20:50, Jason Gunthorpe wrote:
> On Sun, Mar 20, 2022 at 02:40:29PM +0800, Lu Baolu wrote:
>
>> +static enum iommu_page_response_code
>> +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
>> +{
>> + vm_fault_t ret;
>> + struct mm_struct *mm;
>> + struct vm_area_struct *vma;
>> + unsigned int access_flags = 0;
>> + struct iommu_domain *domain = data;
>
> Why is the iommu_domain not passed in as a fully typed object? I would
> think data should some opaque value used by non-sva cases.
The "data" is set together with the fault handler when the caller
installs a fault handler for an iommu domain. We will add a generic
interface to install fault handler for an iommu domain later when a real
non-sva case comes.
>
> What is the lifetime model here anyhow?
I simply thought that the device driver should guarantee that there are
no pending faults after sva_unbind(). This is insufficient. I need to
rework this.
>
>> + unsigned int fault_flags = FAULT_FLAG_REMOTE;
>> + struct iommu_fault_page_request *prm = &fault->prm;
>> + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
>> +
>> + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
>> + return status;
>> +
>> + mm = domain->sva_cookie;
>> + if (IS_ERR_OR_NULL(mm))
>
> Do not use this function
>
> Do not store err pointers in structs.
Sure.
>
>> +out_put_mm:
>> + mmap_read_unlock(mm);
>> + mmput(mm);
>
> mm structs are weird, they have two refcounts.
>
> The 'sva_cookie' should hold a mmgrab/mmdrop() refcount to keep the
> pointer alive but to touch the mmap lock you have to upgrade it to a
> refcount that prevents destruction using mmget_not_zero() just for
> this short period.
Yes. Will look into it.
Best regards,
baolu
On Tue, Mar 22, 2022 at 10:24:26AM +0000, Tian, Kevin wrote:
> > From: Jean-Philippe Brucker <[email protected]>
> > Sent: Tuesday, March 22, 2022 6:06 PM
> >
> > On Tue, Mar 22, 2022 at 01:00:08AM +0000, Tian, Kevin wrote:
> > > > From: Jean-Philippe Brucker <[email protected]>
> > > > Sent: Monday, March 21, 2022 7:42 PM
> > > >
> > > > Hi Kevin,
> > > >
> > > > On Mon, Mar 21, 2022 at 08:09:36AM +0000, Tian, Kevin wrote:
> > > > > > From: Lu Baolu <[email protected]>
> > > > > > Sent: Sunday, March 20, 2022 2:40 PM
> > > > > >
> > > > > > The existing IOPF handling framework only handles the I/O page faults
> > for
> > > > > > SVA. Ginven that we are able to link iommu domain with each I/O
> > page
> > > > fault,
> > > > > > we can now make the I/O page fault handling framework more
> > general
> > > > for
> > > > > > more types of page faults.
> > > > >
> > > > > "make ... generic" in subject line is kind of confusing. Reading this patch
> > I
> > > > > think you really meant changing from per-device fault handling to per-
> > > > domain
> > > > > fault handling. This is more accurate in concept since the fault is caused
> > by
> > > > > the domain page table. ????
> > > >
> > > > I tend to disagree with that last part. The fault is caused by a specific
> > > > device accessing shared page tables. We should keep that device
> > > > information throughout the fault handling, so that we can report it to the
> > > > driver when things go wrong. A process can have multiple threads bound
> > to
> > > > different devices, they share the same mm so if the driver wanted to
> > > > signal a misbehaving thread, similarly to a SEGV on the CPU side, it would
> > > > need the device information to precisely report it to userspace.
> > > >
> > >
> > > iommu driver can include the device information in the fault data. But
> > > in concept the IOPF should be reported per domain.
> >
> > So I don't remember where we left off on that topic, what about fault
> > injection into guests? In that case device info is more than just
> > diagnostic, fault injection can't work without it. I think we talked about
> > passing a device cookie to userspace, just want to make sure.
> >
> > > and I agree with Jason that at most we can send SEGV to the entire thread
> > > group since there is no way to associate a DMA back to a thread which
> > > initiates the DMA.
> >
> > The point is providing the most accurate information to the device driver
> > for diagnostics and debugging. A process opens multiple queues to
> > different devices, then if one of the queues issues invalid DMA, the
> > driver won't even know which queue is broken if you only report the target
> > mm and not the source dev. I don't think we gain anything from discarding
> > the device information from the fault path.
> >
>
> In case I didn't make it clear, what I talked about is just about having iommu
> core to report IOPF per domain handler vs. per device handler while this
> design choice doesn't change what the fault data should include (device,
> pasid, addr, etc.). i.e. it always includes all the information provided by the
> iommu driver no matter how the fault is reported upwards.
Right thanks, I misunderstood.
Thanks,
Jean
>
> e.g. with iommufd it is iommufd to register a IOPF handler per managed
> domain and receive IOPF on those domains. If necessary, iommufd further
> forwards to userspace including device cookie according to the fault data.
>
> Thanks
> Kevin