This adds some mechanisms around the iommu_domain so that the I/O page
fault handling framework could route a page fault to the domain and
call the fault handler from it.
Add pointers to the page fault handler and its private data in struct
iommu_domain. The fault handler will be called with the private data
as a parameter once a page fault is routed to the domain. Any kernel
component which owns an iommu domain could install handler and its
private parameter so that the page fault could be further routed and
handled.
A new helper iommu_get_domain_for_dev_pasid() which retrieves attached
domain for a {device, PASID} is added. It will be used by the page fault
handling framework which knows {device, PASID} reported from the iommu
driver. We have a guarantee that the SVA domain doesn't go away during
IOPF handling, because unbind() waits for pending faults with
iopf_queue_flush_dev() before freeing the domain. Hence, there's no need
to synchronize life cycle of the iommu domains between the unbind() and
the interrupt threads.
This also prepares the SVA implementation to be the first consumer of
the per-domain page fault handling model.
Signed-off-by: Lu Baolu <[email protected]>
---
include/linux/iommu.h | 12 +++++++
drivers/iommu/iommu-sva-lib.c | 65 +++++++++++++++++++++++++++++++++++
drivers/iommu/iommu.c | 21 +++++++++++
3 files changed, 98 insertions(+)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 19718939d9df..1164524814cb 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -102,6 +102,9 @@ struct iommu_domain {
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
struct iommu_sva_ioas *sva_ioas;
+ enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
+ void *data);
+ void *fault_data;
};
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -686,6 +689,9 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
+struct iommu_domain *
+iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid);
+
#else /* CONFIG_IOMMU_API */
struct iommu_ops {};
@@ -1055,6 +1061,12 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
}
+
+static inline struct iommu_domain *
+iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+ return NULL;
+}
#endif /* CONFIG_IOMMU_API */
#ifdef CONFIG_IOMMU_SVA
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 992388106da0..05a7d2f0e46f 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -135,6 +135,69 @@ static void iommu_sva_ioas_put(struct iommu_sva_ioas *ioas)
}
}
+/*
+ * I/O page fault handler for SVA
+ *
+ * Copied from io-pgfault.c with mmget_not_zero() added before
+ * mmap_read_lock().
+ */
+static enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+ vm_fault_t ret;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned int access_flags = 0;
+ struct iommu_domain *domain = data;
+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
+ struct iommu_fault_page_request *prm = &fault->prm;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+ if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+ return status;
+
+ mm = iommu_sva_domain_mm(domain);
+ if (IS_ERR_OR_NULL(mm) || !mmget_not_zero(mm))
+ return status;
+
+ mmap_read_lock(mm);
+
+ vma = find_extend_vma(mm, prm->addr);
+ if (!vma)
+ /* Unmapped area */
+ goto out_put_mm;
+
+ if (prm->perm & IOMMU_FAULT_PERM_READ)
+ access_flags |= VM_READ;
+
+ if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+ access_flags |= VM_WRITE;
+ fault_flags |= FAULT_FLAG_WRITE;
+ }
+
+ if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+ access_flags |= VM_EXEC;
+ fault_flags |= FAULT_FLAG_INSTRUCTION;
+ }
+
+ if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+ fault_flags |= FAULT_FLAG_USER;
+
+ if (access_flags & ~vma->vm_flags)
+ /* Access fault */
+ goto out_put_mm;
+
+ ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
+ status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+ IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ return status;
+}
+
/*
* IOMMU SVA driver-oriented interfaces
*/
@@ -154,6 +217,8 @@ iommu_sva_alloc_domain(struct device *dev, struct iommu_sva_ioas *ioas)
/* The caller must hold a reference to ioas. */
domain->sva_ioas = ioas;
domain->type = IOMMU_DOMAIN_SVA;
+ domain->iopf_handler = iommu_sva_handle_iopf;
+ domain->fault_data = domain;
return domain;
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7cae631c1baa..33449523afbe 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3174,3 +3174,24 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
iommu_group_put(group);
}
+
+struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
+ ioasid_t pasid)
+{
+ struct iommu_domain *domain;
+ struct iommu_group *group;
+
+ if (!pasid_valid(pasid))
+ return NULL;
+
+ group = iommu_group_get(dev);
+ if (!group)
+ return NULL;
+
+ mutex_lock(&group->mutex);
+ domain = xa_load(&group->pasid_array, pasid);
+ mutex_unlock(&group->mutex);
+ iommu_group_put(group);
+
+ return domain;
+}
--
2.25.1
On Mon, May 02, 2022 at 09:48:40AM +0800, Lu Baolu wrote:
> This adds some mechanisms around the iommu_domain so that the I/O page
> fault handling framework could route a page fault to the domain and
> call the fault handler from it.
>
> Add pointers to the page fault handler and its private data in struct
> iommu_domain. The fault handler will be called with the private data
> as a parameter once a page fault is routed to the domain. Any kernel
> component which owns an iommu domain could install handler and its
> private parameter so that the page fault could be further routed and
> handled.
>
> A new helper iommu_get_domain_for_dev_pasid() which retrieves attached
> domain for a {device, PASID} is added. It will be used by the page fault
> handling framework which knows {device, PASID} reported from the iommu
> driver. We have a guarantee that the SVA domain doesn't go away during
> IOPF handling, because unbind() waits for pending faults with
> iopf_queue_flush_dev() before freeing the domain. Hence, there's no need
> to synchronize life cycle of the iommu domains between the unbind() and
> the interrupt threads.
>
> This also prepares the SVA implementation to be the first consumer of
> the per-domain page fault handling model.
>
> Signed-off-by: Lu Baolu <[email protected]>
> ---
> include/linux/iommu.h | 12 +++++++
> drivers/iommu/iommu-sva-lib.c | 65 +++++++++++++++++++++++++++++++++++
> drivers/iommu/iommu.c | 21 +++++++++++
> 3 files changed, 98 insertions(+)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 19718939d9df..1164524814cb 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -102,6 +102,9 @@ struct iommu_domain {
> struct iommu_domain_geometry geometry;
> struct iommu_dma_cookie *iova_cookie;
> struct iommu_sva_ioas *sva_ioas;
> + enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
> + void *data);
> + void *fault_data;
> };
>
> static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> @@ -686,6 +689,9 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
> struct device *dev, ioasid_t pasid);
> void iommu_detach_device_pasid(struct iommu_domain *domain,
> struct device *dev, ioasid_t pasid);
> +struct iommu_domain *
> +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid);
> +
> #else /* CONFIG_IOMMU_API */
>
> struct iommu_ops {};
> @@ -1055,6 +1061,12 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
> struct device *dev, ioasid_t pasid)
> {
> }
> +
> +static inline struct iommu_domain *
> +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid)
> +{
> + return NULL;
> +}
> #endif /* CONFIG_IOMMU_API */
>
> #ifdef CONFIG_IOMMU_SVA
> diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
> index 992388106da0..05a7d2f0e46f 100644
> --- a/drivers/iommu/iommu-sva-lib.c
> +++ b/drivers/iommu/iommu-sva-lib.c
> @@ -135,6 +135,69 @@ static void iommu_sva_ioas_put(struct iommu_sva_ioas *ioas)
> }
> }
>
> +/*
> + * I/O page fault handler for SVA
> + *
> + * Copied from io-pgfault.c with mmget_not_zero() added before
> + * mmap_read_lock().
> + */
> +static enum iommu_page_response_code
> +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
> +{
> + vm_fault_t ret;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + unsigned int access_flags = 0;
> + struct iommu_domain *domain = data;
> + unsigned int fault_flags = FAULT_FLAG_REMOTE;
> + struct iommu_fault_page_request *prm = &fault->prm;
> + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
> +
> + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
> + return status;
> +
> + mm = iommu_sva_domain_mm(domain);
> + if (IS_ERR_OR_NULL(mm) || !mmget_not_zero(mm))
> + return status;
> +
> + mmap_read_lock(mm);
> +
> + vma = find_extend_vma(mm, prm->addr);
> + if (!vma)
> + /* Unmapped area */
> + goto out_put_mm;
> +
> + if (prm->perm & IOMMU_FAULT_PERM_READ)
> + access_flags |= VM_READ;
> +
> + if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
> + access_flags |= VM_WRITE;
> + fault_flags |= FAULT_FLAG_WRITE;
> + }
> +
> + if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
> + access_flags |= VM_EXEC;
> + fault_flags |= FAULT_FLAG_INSTRUCTION;
> + }
> +
> + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
> + fault_flags |= FAULT_FLAG_USER;
> +
> + if (access_flags & ~vma->vm_flags)
> + /* Access fault */
> + goto out_put_mm;
> +
> + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
> + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
> + IOMMU_PAGE_RESP_SUCCESS;
> +
> +out_put_mm:
> + mmap_read_unlock(mm);
> + mmput(mm);
> +
> + return status;
> +}
> +
> /*
> * IOMMU SVA driver-oriented interfaces
> */
> @@ -154,6 +217,8 @@ iommu_sva_alloc_domain(struct device *dev, struct iommu_sva_ioas *ioas)
> /* The caller must hold a reference to ioas. */
> domain->sva_ioas = ioas;
> domain->type = IOMMU_DOMAIN_SVA;
> + domain->iopf_handler = iommu_sva_handle_iopf;
> + domain->fault_data = domain;
>
> return domain;
> }
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 7cae631c1baa..33449523afbe 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3174,3 +3174,24 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
>
> iommu_group_put(group);
> }
> +
> +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
> + ioasid_t pasid)
> +{
> + struct iommu_domain *domain;
> + struct iommu_group *group;
> +
> + if (!pasid_valid(pasid))
> + return NULL;
> +
> + group = iommu_group_get(dev);
> + if (!group)
> + return NULL;
> +
> + mutex_lock(&group->mutex);
Unfortunately this still causes the deadlock when unbind() flushes the
IOPF queue while holding the group mutex.
If we make this function private to IOPF, then we can get rid of this
mutex_lock(). It's OK because:
* xarray protects its internal state with RCU, so we can call
xa_load() outside the lock.
* The domain obtained from xa_load is finalized. Its content is valid
because xarray stores the domain using rcu_assign_pointer(), which has a
release memory barrier, which pairs with data dependencies in IOPF
(domain->sva_ioas etc).
We'll need to be careful about this when allowing other users to install
a fault handler. Should be fine as long as the handler and data are
installed before the domain is added to pasid_array.
* We know the domain is valid the whole time IOPF is using it, because
unbind() waits for pending faults.
We just need a comment explaining the last point, something like:
/*
* Safe to fetch outside the group mutex because:
* - xarray protects its internal state with RCU
* - the domain obtained is either NULL or fully formed
* - the IOPF work is the only caller and is flushed before the
* domain is freed.
*/
Thanks,
Jean
> + domain = xa_load(&group->pasid_array, pasid);
> + mutex_unlock(&group->mutex);
> + iommu_group_put(group);
> +
> + return domain;
> +}
> --
> 2.25.1
>
On 2022/5/5 21:38, Jean-Philippe Brucker wrote:
> Hi Baolu,
>
> On Thu, May 05, 2022 at 04:31:38PM +0800, Baolu Lu wrote:
>> On 2022/5/4 02:20, Jean-Philippe Brucker wrote:
>>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>>> index 7cae631c1baa..33449523afbe 100644
>>>> --- a/drivers/iommu/iommu.c
>>>> +++ b/drivers/iommu/iommu.c
>>>> @@ -3174,3 +3174,24 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
>>>> iommu_group_put(group);
>>>> }
>>>> +
>>>> +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
>>>> + ioasid_t pasid)
>>>> +{
>>>> + struct iommu_domain *domain;
>>>> + struct iommu_group *group;
>>>> +
>>>> + if (!pasid_valid(pasid))
>>>> + return NULL;
>>>> +
>>>> + group = iommu_group_get(dev);
>>>> + if (!group)
>>>> + return NULL;
>>>> +
>>>> + mutex_lock(&group->mutex);
>>> Unfortunately this still causes the deadlock when unbind() flushes the
>>> IOPF queue while holding the group mutex.
>>
>> Sorry, I didn't get your point here.
>>
>> Do you mean unbind() could hold group mutex before calling this helper?
>> The group mutex is only available in iommu.c. The unbind() has no means
>> to hold this lock. Or, I missed anything?
>
> I wasn't clear, it's iommu_detach_device_pasid() that holds the
> group->mutex:
>
> iommu_sva_unbind_device() |
> iommu_detach_device_pasid() |
> mutex_lock(&group->mutex) |
> domain->ops->detach_dev_pasid() | iopf_handle_group()
> iopf_queue_flush_dev() | iommu_get_domain_for_dev_pasid()
> ... wait for IOPF work | mutex_lock(&group->mutex)
> | ... deadlock
Ah! Yes. Thank you for the clarification.
>
> Thanks,
> Jean
>
>>
>> Best regards,
>> baolu
>>
>>>
>>> If we make this function private to IOPF, then we can get rid of this
>>> mutex_lock(). It's OK because:
>>>
>>> * xarray protects its internal state with RCU, so we can call
>>> xa_load() outside the lock.
>>>
>>> * The domain obtained from xa_load is finalized. Its content is valid
>>> because xarray stores the domain using rcu_assign_pointer(), which has a
>>> release memory barrier, which pairs with data dependencies in IOPF
>>> (domain->sva_ioas etc).
>>>
>>> We'll need to be careful about this when allowing other users to install
>>> a fault handler. Should be fine as long as the handler and data are
>>> installed before the domain is added to pasid_array.
>>>
>>> * We know the domain is valid the whole time IOPF is using it, because
>>> unbind() waits for pending faults.
>>>
>>> We just need a comment explaining the last point, something like:
>>>
>>> /*
>>> * Safe to fetch outside the group mutex because:
>>> * - xarray protects its internal state with RCU
>>> * - the domain obtained is either NULL or fully formed
>>> * - the IOPF work is the only caller and is flushed before the
>>> * domain is freed.
>>> */
Agreed. The mutex is needed only when domain could possibly be freed
before unbind(). In that case, we need this mutex and get a reference
from the domain. As we have dropped the domain user reference, this lock
is unnecessary.
>>>
>>> Thanks,
>>> Jean
>>>
>>>> + domain = xa_load(&group->pasid_array, pasid);
>>>> + mutex_unlock(&group->mutex);
>>>> + iommu_group_put(group);
>>>> +
>>>> + return domain;
>>>> +}
>>
Best regards,
baolu
Hi Baolu,
On Thu, May 05, 2022 at 04:31:38PM +0800, Baolu Lu wrote:
> On 2022/5/4 02:20, Jean-Philippe Brucker wrote:
> > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > index 7cae631c1baa..33449523afbe 100644
> > > --- a/drivers/iommu/iommu.c
> > > +++ b/drivers/iommu/iommu.c
> > > @@ -3174,3 +3174,24 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
> > > iommu_group_put(group);
> > > }
> > > +
> > > +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
> > > + ioasid_t pasid)
> > > +{
> > > + struct iommu_domain *domain;
> > > + struct iommu_group *group;
> > > +
> > > + if (!pasid_valid(pasid))
> > > + return NULL;
> > > +
> > > + group = iommu_group_get(dev);
> > > + if (!group)
> > > + return NULL;
> > > +
> > > + mutex_lock(&group->mutex);
> > Unfortunately this still causes the deadlock when unbind() flushes the
> > IOPF queue while holding the group mutex.
>
> Sorry, I didn't get your point here.
>
> Do you mean unbind() could hold group mutex before calling this helper?
> The group mutex is only available in iommu.c. The unbind() has no means
> to hold this lock. Or, I missed anything?
I wasn't clear, it's iommu_detach_device_pasid() that holds the
group->mutex:
iommu_sva_unbind_device() |
iommu_detach_device_pasid() |
mutex_lock(&group->mutex) |
domain->ops->detach_dev_pasid() | iopf_handle_group()
iopf_queue_flush_dev() | iommu_get_domain_for_dev_pasid()
... wait for IOPF work | mutex_lock(&group->mutex)
| ... deadlock
Thanks,
Jean
>
> Best regards,
> baolu
>
> >
> > If we make this function private to IOPF, then we can get rid of this
> > mutex_lock(). It's OK because:
> >
> > * xarray protects its internal state with RCU, so we can call
> > xa_load() outside the lock.
> >
> > * The domain obtained from xa_load is finalized. Its content is valid
> > because xarray stores the domain using rcu_assign_pointer(), which has a
> > release memory barrier, which pairs with data dependencies in IOPF
> > (domain->sva_ioas etc).
> >
> > We'll need to be careful about this when allowing other users to install
> > a fault handler. Should be fine as long as the handler and data are
> > installed before the domain is added to pasid_array.
> >
> > * We know the domain is valid the whole time IOPF is using it, because
> > unbind() waits for pending faults.
> >
> > We just need a comment explaining the last point, something like:
> >
> > /*
> > * Safe to fetch outside the group mutex because:
> > * - xarray protects its internal state with RCU
> > * - the domain obtained is either NULL or fully formed
> > * - the IOPF work is the only caller and is flushed before the
> > * domain is freed.
> > */
> >
> > Thanks,
> > Jean
> >
> > > + domain = xa_load(&group->pasid_array, pasid);
> > > + mutex_unlock(&group->mutex);
> > > + iommu_group_put(group);
> > > +
> > > + return domain;
> > > +}
>
On 2022/5/4 02:20, Jean-Philippe Brucker wrote:
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 7cae631c1baa..33449523afbe 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -3174,3 +3174,24 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
>>
>> iommu_group_put(group);
>> }
>> +
>> +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
>> + ioasid_t pasid)
>> +{
>> + struct iommu_domain *domain;
>> + struct iommu_group *group;
>> +
>> + if (!pasid_valid(pasid))
>> + return NULL;
>> +
>> + group = iommu_group_get(dev);
>> + if (!group)
>> + return NULL;
>> +
>> + mutex_lock(&group->mutex);
> Unfortunately this still causes the deadlock when unbind() flushes the
> IOPF queue while holding the group mutex.
Sorry, I didn't get your point here.
Do you mean unbind() could hold group mutex before calling this helper?
The group mutex is only available in iommu.c. The unbind() has no means
to hold this lock. Or, I missed anything?
Best regards,
baolu
>
> If we make this function private to IOPF, then we can get rid of this
> mutex_lock(). It's OK because:
>
> * xarray protects its internal state with RCU, so we can call
> xa_load() outside the lock.
>
> * The domain obtained from xa_load is finalized. Its content is valid
> because xarray stores the domain using rcu_assign_pointer(), which has a
> release memory barrier, which pairs with data dependencies in IOPF
> (domain->sva_ioas etc).
>
> We'll need to be careful about this when allowing other users to install
> a fault handler. Should be fine as long as the handler and data are
> installed before the domain is added to pasid_array.
>
> * We know the domain is valid the whole time IOPF is using it, because
> unbind() waits for pending faults.
>
> We just need a comment explaining the last point, something like:
>
> /*
> * Safe to fetch outside the group mutex because:
> * - xarray protects its internal state with RCU
> * - the domain obtained is either NULL or fully formed
> * - the IOPF work is the only caller and is flushed before the
> * domain is freed.
> */
>
> Thanks,
> Jean
>
>> + domain = xa_load(&group->pasid_array, pasid);
>> + mutex_unlock(&group->mutex);
>> + iommu_group_put(group);
>> +
>> + return domain;
>> +}