This implements the .cache_invalidate_user() callback and sets the
.cache_invalidate_user_data_len to support iotlb flush for nested domain.
Signed-off-by: Lu Baolu <[email protected]>
Signed-off-by: Yi Liu <[email protected]>
---
drivers/iommu/intel/nested.c | 63 ++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 98164894f22f..2739c0d7880d 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -69,8 +69,71 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
kfree(to_dmar_domain(domain));
}
+static void intel_nested_invalidate(struct device *dev,
+ struct dmar_domain *domain,
+ u64 addr, unsigned long npages)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ if (addr == 0 && npages == -1)
+ intel_flush_iotlb_all(&domain->domain);
+ else
+ iommu_flush_iotlb_psi(iommu, domain,
+ addr >> VTD_PAGE_SHIFT,
+ npages, 1, 0);
+}
+
+static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
+ void *user_data)
+{
+ struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
+ struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ unsigned int entry_size = inv_info->entry_size;
+ u64 uptr = inv_info->inv_data_uptr;
+ u64 nr_uptr = inv_info->entry_nr_uptr;
+ struct device_domain_info *info;
+ u32 entry_nr, index;
+ unsigned long flags;
+ int ret = 0;
+
+ if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
+ return -EFAULT;
+
+ for (index = 0; index < entry_nr; index++) {
+ ret = copy_struct_from_user(req, sizeof(*req),
+ u64_to_user_ptr(uptr + index * entry_size),
+ entry_size);
+ if (ret) {
+ pr_err_ratelimited("Failed to fetch invalidation request\n");
+ break;
+ }
+
+ if (req->__reserved || (req->flags & ~IOMMU_VTD_QI_FLAGS_LEAF) ||
+ !IS_ALIGNED(req->addr, VTD_PAGE_SIZE)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_for_each_entry(info, &dmar_domain->devices, link)
+ intel_nested_invalidate(info->dev, dmar_domain,
+ req->addr, req->npages);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ }
+
+ if (put_user(index, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
+ return -EFAULT;
+
+ return ret;
+}
+
static const struct iommu_domain_ops intel_nested_domain_ops = {
.attach_dev = intel_nested_attach_dev,
+ .cache_invalidate_user = intel_nested_cache_invalidate_user,
+ .cache_invalidate_user_data_len =
+ sizeof(struct iommu_hwpt_vtd_s1_invalidate),
.free = intel_nested_domain_free,
.enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
};
--
2.34.1
> From: Liu, Yi L <[email protected]>
> Sent: Monday, July 24, 2023 7:14 PM
>
> +static int intel_nested_cache_invalidate_user(struct iommu_domain
> *domain,
> + void *user_data)
> +{
> + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> + unsigned int entry_size = inv_info->entry_size;
> + u64 uptr = inv_info->inv_data_uptr;
> + u64 nr_uptr = inv_info->entry_nr_uptr;
> + struct device_domain_info *info;
> + u32 entry_nr, index;
> + unsigned long flags;
> + int ret = 0;
> +
> + if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
> + return -EFAULT;
> +
> + for (index = 0; index < entry_nr; index++) {
> + ret = copy_struct_from_user(req, sizeof(*req),
> + u64_to_user_ptr(uptr + index *
> entry_size),
> + entry_size);
If continuing this direction then the driver should also check minsz etc.
for struct iommu_hwpt_vtd_s1_invalidate and iommu_hwpt_vtd_s1_invalidate_desc
since they are uAPI and subject to change.
> + if (ret) {
> + pr_err_ratelimited("Failed to fetch invalidation
> request\n");
> + break;
> + }
> +
> + if (req->__reserved || (req->flags &
> ~IOMMU_VTD_QI_FLAGS_LEAF) ||
> + !IS_ALIGNED(req->addr, VTD_PAGE_SIZE)) {
> + ret = -EINVAL;
> + break;
> + }
> +
> + spin_lock_irqsave(&dmar_domain->lock, flags);
> + list_for_each_entry(info, &dmar_domain->devices, link)
> + intel_nested_invalidate(info->dev, dmar_domain,
> + req->addr, req->npages);
> + spin_unlock_irqrestore(&dmar_domain->lock, flags);
Disabling interrupt while invalidating iotlb is certainly unacceptable.
Actually there is no need to walk devices. Under dmar_domain there
is already a list of attached iommu's.
> From: Baolu Lu <[email protected]>
> Sent: Thursday, August 3, 2023 11:25 AM
>
> On 2023/8/2 15:46, Tian, Kevin wrote:
> >> From: Liu, Yi L <[email protected]>
> >> Sent: Monday, July 24, 2023 7:14 PM
> >>
> >> +
> >> + spin_lock_irqsave(&dmar_domain->lock, flags);
> >> + list_for_each_entry(info, &dmar_domain->devices, link)
> >> + intel_nested_invalidate(info->dev, dmar_domain,
> >> + req->addr, req->npages);
> >> + spin_unlock_irqrestore(&dmar_domain->lock, flags);
> >
> > Disabling interrupt while invalidating iotlb is certainly unacceptable.
> >
> > Actually there is no need to walk devices. Under dmar_domain there
> > is already a list of attached iommu's.
>
> Walking device is only necessary when invalidating device TLB. For iotlb
> invalidation, it only needs to know the iommu's.
>
even for device tlb we may think whether there is any better way
to avoid disabling interrupt. It's a slow path, especially in a guest.
On 2023/8/3 12:00, Tian, Kevin wrote:
>> From: Baolu Lu <[email protected]>
>> Sent: Thursday, August 3, 2023 11:25 AM
>>
>> On 2023/8/2 15:46, Tian, Kevin wrote:
>>>> From: Liu, Yi L <[email protected]>
>>>> Sent: Monday, July 24, 2023 7:14 PM
>>>>
>>>> +
>>>> + spin_lock_irqsave(&dmar_domain->lock, flags);
>>>> + list_for_each_entry(info, &dmar_domain->devices, link)
>>>> + intel_nested_invalidate(info->dev, dmar_domain,
>>>> + req->addr, req->npages);
>>>> + spin_unlock_irqrestore(&dmar_domain->lock, flags);
>>>
>>> Disabling interrupt while invalidating iotlb is certainly unacceptable.
>>>
>>> Actually there is no need to walk devices. Under dmar_domain there
>>> is already a list of attached iommu's.
>>
>> Walking device is only necessary when invalidating device TLB. For iotlb
>> invalidation, it only needs to know the iommu's.
>>
>
> even for device tlb we may think whether there is any better way
> to avoid disabling interrupt. It's a slow path, especially in a guest.
I ever tried this. But some device drivers call iommu_unmap() in the
interrupt critical path. :-( So we have a long way to go.
Best regards,
baolu
> From: Baolu Lu <[email protected]>
> Sent: Thursday, August 3, 2023 12:06 PM
>
> On 2023/8/3 12:00, Tian, Kevin wrote:
> >> From: Baolu Lu <[email protected]>
> >> Sent: Thursday, August 3, 2023 11:25 AM
> >>
> >> On 2023/8/2 15:46, Tian, Kevin wrote:
> >>>> From: Liu, Yi L <[email protected]>
> >>>> Sent: Monday, July 24, 2023 7:14 PM
> >>>>
> >>>> +
> >>>> + spin_lock_irqsave(&dmar_domain->lock, flags);
> >>>> + list_for_each_entry(info, &dmar_domain->devices, link)
> >>>> + intel_nested_invalidate(info->dev, dmar_domain,
> >>>> + req->addr, req->npages);
> >>>> + spin_unlock_irqrestore(&dmar_domain->lock, flags);
> >>>
> >>> Disabling interrupt while invalidating iotlb is certainly unacceptable.
> >>>
> >>> Actually there is no need to walk devices. Under dmar_domain there
> >>> is already a list of attached iommu's.
> >>
> >> Walking device is only necessary when invalidating device TLB. For iotlb
> >> invalidation, it only needs to know the iommu's.
> >>
> >
> > even for device tlb we may think whether there is any better way
> > to avoid disabling interrupt. It's a slow path, especially in a guest.
>
> I ever tried this. But some device drivers call iommu_unmap() in the
> interrupt critical path. :-( So we have a long way to go.
>
emmm... this path only comes from iommufd and the domain is
user-managed. There won't be kernel drivers to call iommu_unmap()
on such domain.
On 2023/8/2 15:46, Tian, Kevin wrote:
>> From: Liu, Yi L <[email protected]>
>> Sent: Monday, July 24, 2023 7:14 PM
>>
>> +static int intel_nested_cache_invalidate_user(struct iommu_domain
>> *domain,
>> + void *user_data)
>> +{
>> + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
>> + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
>> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
>> + unsigned int entry_size = inv_info->entry_size;
>> + u64 uptr = inv_info->inv_data_uptr;
>> + u64 nr_uptr = inv_info->entry_nr_uptr;
>> + struct device_domain_info *info;
>> + u32 entry_nr, index;
>> + unsigned long flags;
>> + int ret = 0;
>> +
>> + if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
>> + return -EFAULT;
>> +
>> + for (index = 0; index < entry_nr; index++) {
>> + ret = copy_struct_from_user(req, sizeof(*req),
>> + u64_to_user_ptr(uptr + index *
>> entry_size),
>> + entry_size);
>
> If continuing this direction then the driver should also check minsz etc.
> for struct iommu_hwpt_vtd_s1_invalidate and iommu_hwpt_vtd_s1_invalidate_desc
> since they are uAPI and subject to change.
Agreed.
>
>> + if (ret) {
>> + pr_err_ratelimited("Failed to fetch invalidation
>> request\n");
>> + break;
>> + }
>> +
>> + if (req->__reserved || (req->flags &
>> ~IOMMU_VTD_QI_FLAGS_LEAF) ||
>> + !IS_ALIGNED(req->addr, VTD_PAGE_SIZE)) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> +
>> + spin_lock_irqsave(&dmar_domain->lock, flags);
>> + list_for_each_entry(info, &dmar_domain->devices, link)
>> + intel_nested_invalidate(info->dev, dmar_domain,
>> + req->addr, req->npages);
>> + spin_unlock_irqrestore(&dmar_domain->lock, flags);
>
> Disabling interrupt while invalidating iotlb is certainly unacceptable.
>
> Actually there is no need to walk devices. Under dmar_domain there
> is already a list of attached iommu's.
Walking device is only necessary when invalidating device TLB. For iotlb
invalidation, it only needs to know the iommu's.
Best regards,
baolu
On 2023/8/3 12:13, Tian, Kevin wrote:
>> From: Baolu Lu <[email protected]>
>> Sent: Thursday, August 3, 2023 12:06 PM
>>
>> On 2023/8/3 12:00, Tian, Kevin wrote:
>>>> From: Baolu Lu <[email protected]>
>>>> Sent: Thursday, August 3, 2023 11:25 AM
>>>>
>>>> On 2023/8/2 15:46, Tian, Kevin wrote:
>>>>>> From: Liu, Yi L <[email protected]>
>>>>>> Sent: Monday, July 24, 2023 7:14 PM
>>>>>>
>>>>>> +
>>>>>> + spin_lock_irqsave(&dmar_domain->lock, flags);
>>>>>> + list_for_each_entry(info, &dmar_domain->devices, link)
>>>>>> + intel_nested_invalidate(info->dev, dmar_domain,
>>>>>> + req->addr, req->npages);
>>>>>> + spin_unlock_irqrestore(&dmar_domain->lock, flags);
>>>>>
>>>>> Disabling interrupt while invalidating iotlb is certainly unacceptable.
>>>>>
>>>>> Actually there is no need to walk devices. Under dmar_domain there
>>>>> is already a list of attached iommu's.
>>>>
>>>> Walking device is only necessary when invalidating device TLB. For iotlb
>>>> invalidation, it only needs to know the iommu's.
>>>>
>>>
>>> even for device tlb we may think whether there is any better way
>>> to avoid disabling interrupt. It's a slow path, especially in a guest.
>>
>> I ever tried this. But some device drivers call iommu_unmap() in the
>> interrupt critical path. :-( So we have a long way to go.
>>
>
> emmm... this path only comes from iommufd and the domain is
> user-managed. There won't be kernel drivers to call iommu_unmap()
> on such domain.
Probably we can use a different lock for nested domain and add a comment
around the lock with above explanation.
Best regards,
baolu
> From: Tian, Kevin <[email protected]>
> Sent: Wednesday, August 2, 2023 3:47 PM
>
> Subject: RE: [PATCH v4 09/12] iommu/vt-d: Add iotlb flush for nested domain
>
> > From: Liu, Yi L <[email protected]>
> > Sent: Monday, July 24, 2023 7:14 PM
> >
> > +static int intel_nested_cache_invalidate_user(struct iommu_domain
> > *domain,
> > + void *user_data)
> > +{
> > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > + unsigned int entry_size = inv_info->entry_size;
> > + u64 uptr = inv_info->inv_data_uptr;
> > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > + struct device_domain_info *info;
> > + u32 entry_nr, index;
> > + unsigned long flags;
> > + int ret = 0;
> > +
> > + if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
> > + return -EFAULT;
> > +
> > + for (index = 0; index < entry_nr; index++) {
> > + ret = copy_struct_from_user(req, sizeof(*req),
> > + u64_to_user_ptr(uptr + index *
> > entry_size),
> > + entry_size);
>
> If continuing this direction then the driver should also check minsz etc.
> for struct iommu_hwpt_vtd_s1_invalidate and iommu_hwpt_vtd_s1_invalidate_desc
> since they are uAPI and subject to change.
Then needs to define size in the uapi data structure, and copy size first and
check minsz before going forward. How about the structures for hwpt alloc
like struct iommu_hwpt_vtd_s1? Should check minsz for them as well?
Regards,
Yi Liu
On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > From: Liu, Yi L <[email protected]>
> > > Sent: Monday, July 24, 2023 7:14 PM
> > >
> > > +static int intel_nested_cache_invalidate_user(struct iommu_domain
> > > *domain,
> > > + void *user_data)
> > > +{
> > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > > + unsigned int entry_size = inv_info->entry_size;
> > > + u64 uptr = inv_info->inv_data_uptr;
> > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > + struct device_domain_info *info;
> > > + u32 entry_nr, index;
> > > + unsigned long flags;
> > > + int ret = 0;
> > > +
> > > + if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
> > > + return -EFAULT;
> > > +
> > > + for (index = 0; index < entry_nr; index++) {
> > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > + u64_to_user_ptr(uptr + index *
> > > entry_size),
> > > + entry_size);
> >
> > If continuing this direction then the driver should also check minsz etc.
> > for struct iommu_hwpt_vtd_s1_invalidate and iommu_hwpt_vtd_s1_invalidate_desc
> > since they are uAPI and subject to change.
>
> Then needs to define size in the uapi data structure, and copy size first and
> check minsz before going forward. How about the structures for hwpt alloc
> like struct iommu_hwpt_vtd_s1? Should check minsz for them as well?
Assuming that every uAPI data structure needs a min_size, we can
either add a structure holding all min_sizes like iommufd main.c
or have another xx_min_len in iommu_/domain_ops.
Currently, we have the union of structures in iommu.h and also a
xx_data_len in iommu_ops. So, neither of the two places seem to
be optimal from my p.o.v... any suggestion?
Also, alloc allows data_len=0, so a min_size check will be only
applied to data_len > 0 case.
Thanks
Nic
On Mon, Aug 07, 2023 at 08:12:37PM -0700, Nicolin Chen wrote:
> On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > > From: Liu, Yi L <[email protected]>
> > > > Sent: Monday, July 24, 2023 7:14 PM
> > > >
> > > > +static int intel_nested_cache_invalidate_user(struct iommu_domain
> > > > *domain,
> > > > + void *user_data)
> > > > +{
> > > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > > > + unsigned int entry_size = inv_info->entry_size;
> > > > + u64 uptr = inv_info->inv_data_uptr;
> > > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > > + struct device_domain_info *info;
> > > > + u32 entry_nr, index;
> > > > + unsigned long flags;
> > > > + int ret = 0;
> > > > +
> > > > + if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
> > > > + return -EFAULT;
> > > > +
> > > > + for (index = 0; index < entry_nr; index++) {
> > > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > > + u64_to_user_ptr(uptr + index *
> > > > entry_size),
> > > > + entry_size);
> > >
> > > If continuing this direction then the driver should also check minsz etc.
> > > for struct iommu_hwpt_vtd_s1_invalidate and iommu_hwpt_vtd_s1_invalidate_desc
> > > since they are uAPI and subject to change.
> >
> > Then needs to define size in the uapi data structure, and copy size first and
> > check minsz before going forward. How about the structures for hwpt alloc
> > like struct iommu_hwpt_vtd_s1? Should check minsz for them as well?
>
> Assuming that every uAPI data structure needs a min_size, we can
> either add a structure holding all min_sizes like iommufd main.c
> or have another xx_min_len in iommu_/domain_ops.
If driver is doing the copy it is OK that driver does the min_size
check too
Jason
On Tue, Aug 08, 2023 at 09:34:03AM -0300, Jason Gunthorpe wrote:
> On Mon, Aug 07, 2023 at 08:12:37PM -0700, Nicolin Chen wrote:
> > On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > > > From: Liu, Yi L <[email protected]>
> > > > > Sent: Monday, July 24, 2023 7:14 PM
> > > > >
> > > > > +static int intel_nested_cache_invalidate_user(struct iommu_domain
> > > > > *domain,
> > > > > + void *user_data)
> > > > > +{
> > > > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > > > > + unsigned int entry_size = inv_info->entry_size;
> > > > > + u64 uptr = inv_info->inv_data_uptr;
> > > > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > > > + struct device_domain_info *info;
> > > > > + u32 entry_nr, index;
> > > > > + unsigned long flags;
> > > > > + int ret = 0;
> > > > > +
> > > > > + if (get_user(entry_nr, (uint32_t __user *)u64_to_user_ptr(nr_uptr)))
> > > > > + return -EFAULT;
> > > > > +
> > > > > + for (index = 0; index < entry_nr; index++) {
> > > > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > > > + u64_to_user_ptr(uptr + index *
> > > > > entry_size),
> > > > > + entry_size);
> > > >
> > > > If continuing this direction then the driver should also check minsz etc.
> > > > for struct iommu_hwpt_vtd_s1_invalidate and iommu_hwpt_vtd_s1_invalidate_desc
> > > > since they are uAPI and subject to change.
> > >
> > > Then needs to define size in the uapi data structure, and copy size first and
> > > check minsz before going forward. How about the structures for hwpt alloc
> > > like struct iommu_hwpt_vtd_s1? Should check minsz for them as well?
> >
> > Assuming that every uAPI data structure needs a min_size, we can
> > either add a structure holding all min_sizes like iommufd main.c
> > or have another xx_min_len in iommu_/domain_ops.
>
> If driver is doing the copy it is OK that driver does the min_size
> check too
Ah, just realized my reply above was missing a context..
Yi and I are having a concern that the core iommu_hpwt_alloc()
and iommu_hwpt_cache_invalidate(), in the nesting series, copy
data without checking the min_sizes. So, we are trying to add
the missing piece into the next version but not sure which way
could be optimal.
It probably makes sense to add cache_invalidate_user_min_len
next to the existing cache_invalidate_user_data_len. For the
iommu_hwpt_alloc, we are missing a data_len, as the core just
uses sizeof(union iommu_domain_user_data) when calling the
copy_struct_from_user().
Perhaps we could add two pairs of data_len/min_len in the ops
structs:
// iommu_ops
const size_t domain_alloc_user_data_len; // for sanity©
const size_t domain_alloc_user_min_len; // for sanity only
// iommu_domain_ops
const size_t cache_invalidate_user_data_len; // for sanity©
const size_t cache_invalidate_user_min_len; // for sanity only
Thanks
Nic
> From: Nicolin Chen <[email protected]>
> Sent: Wednesday, August 9, 2023 1:42 AM
>
> On Tue, Aug 08, 2023 at 09:34:03AM -0300, Jason Gunthorpe wrote:
> > On Mon, Aug 07, 2023 at 08:12:37PM -0700, Nicolin Chen wrote:
> > > On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > > > > From: Liu, Yi L <[email protected]>
> > > > > > Sent: Monday, July 24, 2023 7:14 PM
> > > > > >
> > > > > > +static int intel_nested_cache_invalidate_user(struct
> iommu_domain
> > > > > > *domain,
> > > > > > + void *user_data)
> > > > > > +{
> > > > > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > > > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > > > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > > > > > + unsigned int entry_size = inv_info->entry_size;
> > > > > > + u64 uptr = inv_info->inv_data_uptr;
> > > > > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > > > > + struct device_domain_info *info;
> > > > > > + u32 entry_nr, index;
> > > > > > + unsigned long flags;
> > > > > > + int ret = 0;
> > > > > > +
> > > > > > + if (get_user(entry_nr, (uint32_t __user
> *)u64_to_user_ptr(nr_uptr)))
> > > > > > + return -EFAULT;
> > > > > > +
> > > > > > + for (index = 0; index < entry_nr; index++) {
> > > > > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > > > > + u64_to_user_ptr(uptr + index *
> > > > > > entry_size),
> > > > > > + entry_size);
> > > > >
> > > > > If continuing this direction then the driver should also check minsz etc.
> > > > > for struct iommu_hwpt_vtd_s1_invalidate and
> iommu_hwpt_vtd_s1_invalidate_desc
> > > > > since they are uAPI and subject to change.
> > > >
> > > > Then needs to define size in the uapi data structure, and copy size first
> and
> > > > check minsz before going forward. How about the structures for hwpt
> alloc
> > > > like struct iommu_hwpt_vtd_s1? Should check minsz for them as well?
> > >
> > > Assuming that every uAPI data structure needs a min_size, we can
> > > either add a structure holding all min_sizes like iommufd main.c
> > > or have another xx_min_len in iommu_/domain_ops.
> >
> > If driver is doing the copy it is OK that driver does the min_size
> > check too
>
> Ah, just realized my reply above was missing a context..
>
> Yi and I are having a concern that the core iommu_hpwt_alloc()
> and iommu_hwpt_cache_invalidate(), in the nesting series, copy
> data without checking the min_sizes. So, we are trying to add
> the missing piece into the next version but not sure which way
> could be optimal.
>
> It probably makes sense to add cache_invalidate_user_min_len
> next to the existing cache_invalidate_user_data_len. For the
> iommu_hwpt_alloc, we are missing a data_len, as the core just
> uses sizeof(union iommu_domain_user_data) when calling the
> copy_struct_from_user().
>
> Perhaps we could add two pairs of data_len/min_len in the ops
> structs:
> // iommu_ops
> const size_t domain_alloc_user_data_len; // for sanity©
> const size_t domain_alloc_user_min_len; // for sanity only
> // iommu_domain_ops
> const size_t cache_invalidate_user_data_len; // for sanity©
> const size_t cache_invalidate_user_min_len; // for sanity only
>
What about creating a simple array to track type specific len in
iommufd instead of adding more fields to iommu/domain_ops?
anyway it's iommufd doing the copy and all the type specific
structures are already defined in the uapi header.
and a similar example already exists in union ucmd_buffer which
includes type specific structures to avoid memory copy...
> From: Tian, Kevin <[email protected]>
> Sent: Wednesday, August 9, 2023 4:23 PM
>
> > From: Nicolin Chen <[email protected]>
> > Sent: Wednesday, August 9, 2023 1:42 AM
> >
> > On Tue, Aug 08, 2023 at 09:34:03AM -0300, Jason Gunthorpe wrote:
> > > On Mon, Aug 07, 2023 at 08:12:37PM -0700, Nicolin Chen wrote:
> > > > On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > > > > > From: Liu, Yi L <[email protected]>
> > > > > > > Sent: Monday, July 24, 2023 7:14 PM
> > > > > > >
> > > > > > > +static int intel_nested_cache_invalidate_user(struct
> > iommu_domain
> > > > > > > *domain,
> > > > > > > + void *user_data)
> > > > > > > +{
> > > > > > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > > > > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > > > > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > > > > > > + unsigned int entry_size = inv_info->entry_size;
> > > > > > > + u64 uptr = inv_info->inv_data_uptr;
> > > > > > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > > > > > + struct device_domain_info *info;
> > > > > > > + u32 entry_nr, index;
> > > > > > > + unsigned long flags;
> > > > > > > + int ret = 0;
> > > > > > > +
> > > > > > > + if (get_user(entry_nr, (uint32_t __user
> > *)u64_to_user_ptr(nr_uptr)))
> > > > > > > + return -EFAULT;
> > > > > > > +
> > > > > > > + for (index = 0; index < entry_nr; index++) {
> > > > > > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > > > > > + u64_to_user_ptr(uptr + index *
> > > > > > > entry_size),
> > > > > > > + entry_size);
> > > > > >
> > > > > > If continuing this direction then the driver should also check minsz etc.
> > > > > > for struct iommu_hwpt_vtd_s1_invalidate and
> > iommu_hwpt_vtd_s1_invalidate_desc
> > > > > > since they are uAPI and subject to change.
> > > > >
> > > > > Then needs to define size in the uapi data structure, and copy size first
> > and
> > > > > check minsz before going forward. How about the structures for hwpt
> > alloc
> > > > > like struct iommu_hwpt_vtd_s1? Should check minsz for them as well?
> > > >
> > > > Assuming that every uAPI data structure needs a min_size, we can
> > > > either add a structure holding all min_sizes like iommufd main.c
> > > > or have another xx_min_len in iommu_/domain_ops.
> > >
> > > If driver is doing the copy it is OK that driver does the min_size
> > > check too
> >
> > Ah, just realized my reply above was missing a context..
> >
> > Yi and I are having a concern that the core iommu_hpwt_alloc()
> > and iommu_hwpt_cache_invalidate(), in the nesting series, copy
> > data without checking the min_sizes. So, we are trying to add
> > the missing piece into the next version but not sure which way
> > could be optimal.
> >
> > It probably makes sense to add cache_invalidate_user_min_len
> > next to the existing cache_invalidate_user_data_len. For the
> > iommu_hwpt_alloc, we are missing a data_len, as the core just
> > uses sizeof(union iommu_domain_user_data) when calling the
> > copy_struct_from_user().
> >
> > Perhaps we could add two pairs of data_len/min_len in the ops
> > structs:
> > // iommu_ops
> > const size_t domain_alloc_user_data_len; // for sanity©
> > const size_t domain_alloc_user_min_len; // for sanity only
> > // iommu_domain_ops
> > const size_t cache_invalidate_user_data_len; // for sanity©
> > const size_t cache_invalidate_user_min_len; // for sanity only
> >
>
> What about creating a simple array to track type specific len in
> iommufd instead of adding more fields to iommu/domain_ops?
> anyway it's iommufd doing the copy and all the type specific
> structures are already defined in the uapi header.
Then index the array with type value, is it? Seems like we have defined
such array before for the length of hwpt_alloc and invalidate structures.
but finally we dropped it the array may grow largely per new types.
>
> and a similar example already exists in union ucmd_buffer which
> includes type specific structures to avoid memory copy...
Not quite get here. ucmd_buffer is a union used to copy any user
data. But here we want to check the minsz of the the user data.
Seems not related.
Regards,
Yi Liu
> From: Tian, Kevin <[email protected]>
> Sent: Wednesday, August 9, 2023 4:58 PM
>
> > From: Liu, Yi L <[email protected]>
> > Sent: Wednesday, August 9, 2023 4:50 PM
> >
> > > From: Tian, Kevin <[email protected]>
> > > Sent: Wednesday, August 9, 2023 4:23 PM
> > >
> > > > From: Nicolin Chen <[email protected]>
> > > > Sent: Wednesday, August 9, 2023 1:42 AM
> > > >
> > > > On Tue, Aug 08, 2023 at 09:34:03AM -0300, Jason Gunthorpe wrote:
> > > > > On Mon, Aug 07, 2023 at 08:12:37PM -0700, Nicolin Chen wrote:
> > > > > > On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > > > > > > > From: Liu, Yi L <[email protected]>
> > > > > > > > > Sent: Monday, July 24, 2023 7:14 PM
> > > > > > > > >
> > > > > > > > > +static int intel_nested_cache_invalidate_user(struct
> > > > iommu_domain
> > > > > > > > > *domain,
> > > > > > > > > + void *user_data)
> > > > > > > > > +{
> > > > > > > > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > > > > > > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > > > > > > > + struct dmar_domain *dmar_domain =
> > to_dmar_domain(domain);
> > > > > > > > > + unsigned int entry_size = inv_info->entry_size;
> > > > > > > > > + u64 uptr = inv_info->inv_data_uptr;
> > > > > > > > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > > > > > > > + struct device_domain_info *info;
> > > > > > > > > + u32 entry_nr, index;
> > > > > > > > > + unsigned long flags;
> > > > > > > > > + int ret = 0;
> > > > > > > > > +
> > > > > > > > > + if (get_user(entry_nr, (uint32_t __user
> > > > *)u64_to_user_ptr(nr_uptr)))
> > > > > > > > > + return -EFAULT;
> > > > > > > > > +
> > > > > > > > > + for (index = 0; index < entry_nr; index++) {
> > > > > > > > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > > > > > > > + u64_to_user_ptr(uptr + index *
> > > > > > > > > entry_size),
> > > > > > > > > + entry_size);
> > > > > > > >
> > > > > > > > If continuing this direction then the driver should also check minsz
> > etc.
> > > > > > > > for struct iommu_hwpt_vtd_s1_invalidate and
> > > > iommu_hwpt_vtd_s1_invalidate_desc
> > > > > > > > since they are uAPI and subject to change.
> > > > > > >
> > > > > > > Then needs to define size in the uapi data structure, and copy size
> > first
> > > > and
> > > > > > > check minsz before going forward. How about the structures for
> > hwpt
> > > > alloc
> > > > > > > like struct iommu_hwpt_vtd_s1? Should check minsz for them as
> > well?
> > > > > >
> > > > > > Assuming that every uAPI data structure needs a min_size, we can
> > > > > > either add a structure holding all min_sizes like iommufd main.c
> > > > > > or have another xx_min_len in iommu_/domain_ops.
> > > > >
> > > > > If driver is doing the copy it is OK that driver does the min_size
> > > > > check too
> > > >
> > > > Ah, just realized my reply above was missing a context..
> > > >
> > > > Yi and I are having a concern that the core iommu_hpwt_alloc()
> > > > and iommu_hwpt_cache_invalidate(), in the nesting series, copy
> > > > data without checking the min_sizes. So, we are trying to add
> > > > the missing piece into the next version but not sure which way
> > > > could be optimal.
> > > >
> > > > It probably makes sense to add cache_invalidate_user_min_len
> > > > next to the existing cache_invalidate_user_data_len. For the
> > > > iommu_hwpt_alloc, we are missing a data_len, as the core just
> > > > uses sizeof(union iommu_domain_user_data) when calling the
> > > > copy_struct_from_user().
> > > >
> > > > Perhaps we could add two pairs of data_len/min_len in the ops
> > > > structs:
> > > > // iommu_ops
> > > > const size_t domain_alloc_user_data_len; // for sanity©
> > > > const size_t domain_alloc_user_min_len; // for sanity only
> > > > // iommu_domain_ops
> > > > const size_t cache_invalidate_user_data_len; // for sanity©
> > > > const size_t cache_invalidate_user_min_len; // for sanity only
> > > >
> > >
> > > What about creating a simple array to track type specific len in
> > > iommufd instead of adding more fields to iommu/domain_ops?
> > > anyway it's iommufd doing the copy and all the type specific
> > > structures are already defined in the uapi header.
> >
> > Then index the array with type value, is it? Seems like we have defined
> > such array before for the length of hwpt_alloc and invalidate structures.
> > but finally we dropped it the array may grow largely per new types.
>
> I'm not sure how many types iommufd will support in reality.
>
> Just my personal feeling that having information contained in iommufd
> is cleaner than expanding iommu core abstraction to assist iommufd
> user buffer copy/verification.
>
> >
> > >
> > > and a similar example already exists in union ucmd_buffer which
> > > includes type specific structures to avoid memory copy...
> >
> > Not quite get here. ucmd_buffer is a union used to copy any user
> > data. But here we want to check the minsz of the the user data.
> > Seems not related.
> >
>
> that's the example for recording vendor specific structure information
> in iommufd and it will also grow along with new added types.
Yeah, adding new structures to ucmd_buffer may increase the size as
well if the new one is larger. While for an array, if there is new entry,
it is for sure to increase the size. I remember there is one tricky thing
when handling the selftest type. E.g. it is defined as 0xbadbeef, if using
it to index array, it would expire. So we have some special handling on
it. If defining the things in iommu_ops, it is simpler. Selftest may be
not so critical to determining the direction though.
Regards,
Yi Liu
> From: Liu, Yi L <[email protected]>
> Sent: Wednesday, August 9, 2023 4:50 PM
>
> > From: Tian, Kevin <[email protected]>
> > Sent: Wednesday, August 9, 2023 4:23 PM
> >
> > > From: Nicolin Chen <[email protected]>
> > > Sent: Wednesday, August 9, 2023 1:42 AM
> > >
> > > On Tue, Aug 08, 2023 at 09:34:03AM -0300, Jason Gunthorpe wrote:
> > > > On Mon, Aug 07, 2023 at 08:12:37PM -0700, Nicolin Chen wrote:
> > > > > On Mon, Aug 07, 2023 at 03:08:29PM +0000, Liu, Yi L wrote:
> > > > > > > > From: Liu, Yi L <[email protected]>
> > > > > > > > Sent: Monday, July 24, 2023 7:14 PM
> > > > > > > >
> > > > > > > > +static int intel_nested_cache_invalidate_user(struct
> > > iommu_domain
> > > > > > > > *domain,
> > > > > > > > + void *user_data)
> > > > > > > > +{
> > > > > > > > + struct iommu_hwpt_vtd_s1_invalidate_desc *req = user_data;
> > > > > > > > + struct iommu_hwpt_vtd_s1_invalidate *inv_info = user_data;
> > > > > > > > + struct dmar_domain *dmar_domain =
> to_dmar_domain(domain);
> > > > > > > > + unsigned int entry_size = inv_info->entry_size;
> > > > > > > > + u64 uptr = inv_info->inv_data_uptr;
> > > > > > > > + u64 nr_uptr = inv_info->entry_nr_uptr;
> > > > > > > > + struct device_domain_info *info;
> > > > > > > > + u32 entry_nr, index;
> > > > > > > > + unsigned long flags;
> > > > > > > > + int ret = 0;
> > > > > > > > +
> > > > > > > > + if (get_user(entry_nr, (uint32_t __user
> > > *)u64_to_user_ptr(nr_uptr)))
> > > > > > > > + return -EFAULT;
> > > > > > > > +
> > > > > > > > + for (index = 0; index < entry_nr; index++) {
> > > > > > > > + ret = copy_struct_from_user(req, sizeof(*req),
> > > > > > > > + u64_to_user_ptr(uptr + index *
> > > > > > > > entry_size),
> > > > > > > > + entry_size);
> > > > > > >
> > > > > > > If continuing this direction then the driver should also check minsz
> etc.
> > > > > > > for struct iommu_hwpt_vtd_s1_invalidate and
> > > iommu_hwpt_vtd_s1_invalidate_desc
> > > > > > > since they are uAPI and subject to change.
> > > > > >
> > > > > > Then needs to define size in the uapi data structure, and copy size
> first
> > > and
> > > > > > check minsz before going forward. How about the structures for
> hwpt
> > > alloc
> > > > > > like struct iommu_hwpt_vtd_s1? Should check minsz for them as
> well?
> > > > >
> > > > > Assuming that every uAPI data structure needs a min_size, we can
> > > > > either add a structure holding all min_sizes like iommufd main.c
> > > > > or have another xx_min_len in iommu_/domain_ops.
> > > >
> > > > If driver is doing the copy it is OK that driver does the min_size
> > > > check too
> > >
> > > Ah, just realized my reply above was missing a context..
> > >
> > > Yi and I are having a concern that the core iommu_hpwt_alloc()
> > > and iommu_hwpt_cache_invalidate(), in the nesting series, copy
> > > data without checking the min_sizes. So, we are trying to add
> > > the missing piece into the next version but not sure which way
> > > could be optimal.
> > >
> > > It probably makes sense to add cache_invalidate_user_min_len
> > > next to the existing cache_invalidate_user_data_len. For the
> > > iommu_hwpt_alloc, we are missing a data_len, as the core just
> > > uses sizeof(union iommu_domain_user_data) when calling the
> > > copy_struct_from_user().
> > >
> > > Perhaps we could add two pairs of data_len/min_len in the ops
> > > structs:
> > > // iommu_ops
> > > const size_t domain_alloc_user_data_len; // for sanity©
> > > const size_t domain_alloc_user_min_len; // for sanity only
> > > // iommu_domain_ops
> > > const size_t cache_invalidate_user_data_len; // for sanity©
> > > const size_t cache_invalidate_user_min_len; // for sanity only
> > >
> >
> > What about creating a simple array to track type specific len in
> > iommufd instead of adding more fields to iommu/domain_ops?
> > anyway it's iommufd doing the copy and all the type specific
> > structures are already defined in the uapi header.
>
> Then index the array with type value, is it? Seems like we have defined
> such array before for the length of hwpt_alloc and invalidate structures.
> but finally we dropped it the array may grow largely per new types.
I'm not sure how many types iommufd will support in reality.
Just my personal feeling that having information contained in iommufd
is cleaner than expanding iommu core abstraction to assist iommufd
user buffer copy/verification.
>
> >
> > and a similar example already exists in union ucmd_buffer which
> > includes type specific structures to avoid memory copy...
>
> Not quite get here. ucmd_buffer is a union used to copy any user
> data. But here we want to check the minsz of the the user data.
> Seems not related.
>
that's the example for recording vendor specific structure information
in iommufd and it will also grow along with new added types.
On Wed, Aug 09, 2023 at 09:30:12AM +0000, Liu, Yi L wrote:
> Yeah, adding new structures to ucmd_buffer may increase the size as
> well if the new one is larger. While for an array, if there is new entry,
> it is for sure to increase the size. I remember there is one tricky thing
> when handling the selftest type. E.g. it is defined as 0xbadbeef, if using
> it to index array, it would expire. So we have some special handling on
> it. If defining the things in iommu_ops, it is simpler. Selftest may be
> not so critical to determining the direction though.
Maybe we are trying too hard to make it "easy" on the driver.
Can't we just have the driver invoke some:
driver_iommufd_invalidate_op(??? *opaque)
{
struct driver_base_struct args;
rc = iommufd_get_args(opaque, &args, sizeof(args),
offsetof(args, last));
if (rc)
return rc;
}
The whole point of this excercise was to avoid the mistake where
drivers code the uapi checks incorrectly. We can achieve the same
outcome by providing a mandatory helper.
Similarly for managing the array of invalidation commands.
Jason
On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > On Wed, Aug 09, 2023 at 09:30:12AM +0000, Liu, Yi L wrote:
> >
> > > Yeah, adding new structures to ucmd_buffer may increase the size as
> > > well if the new one is larger. While for an array, if there is new entry,
> > > it is for sure to increase the size. I remember there is one tricky thing
> > > when handling the selftest type. E.g. it is defined as 0xbadbeef, if using
> > > it to index array, it would expire. So we have some special handling on
> > > it. If defining the things in iommu_ops, it is simpler. Selftest may be
> > > not so critical to determining the direction though.
> >
> > Maybe we are trying too hard to make it "easy" on the driver.
> >
> > Can't we just have the driver invoke some:
> >
> > driver_iommufd_invalidate_op(??? *opaque)
> > {
> > struct driver_base_struct args;
> >
> > rc = iommufd_get_args(opaque, &args, sizeof(args),
> > offsetof(args, last));
>
> OK. So, IIUIC, the opaque should be:
>
> struct iommu_user_data {
> void __user *data_uptr;
> size_t data_len;
> }user_data;
>
> And core does basic sanity of data_uptr != NULL and data_len !=0
> before passing this to driver, and then do a full sanity during
> the iommufd_get_args (or iommufd_get_user_data?) call.
Don't even need to check datA_uptr and data_len, the helper should
check the size and null is caught by copy from user
> > Similarly for managing the array of invalidation commands.
>
> You mean an embedded uptr inside a driver user data struct right?
> Sure, that should go through the new helper too.
If we are committed that all drivers have to process an array then put
the array in the top level struct and pass it in the same user_data
struct and use another helper to allow the driver to iterate through
it.
Jason
On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 09, 2023 at 09:30:12AM +0000, Liu, Yi L wrote:
>
> > Yeah, adding new structures to ucmd_buffer may increase the size as
> > well if the new one is larger. While for an array, if there is new entry,
> > it is for sure to increase the size. I remember there is one tricky thing
> > when handling the selftest type. E.g. it is defined as 0xbadbeef, if using
> > it to index array, it would expire. So we have some special handling on
> > it. If defining the things in iommu_ops, it is simpler. Selftest may be
> > not so critical to determining the direction though.
>
> Maybe we are trying too hard to make it "easy" on the driver.
>
> Can't we just have the driver invoke some:
>
> driver_iommufd_invalidate_op(??? *opaque)
> {
> struct driver_base_struct args;
>
> rc = iommufd_get_args(opaque, &args, sizeof(args),
> offsetof(args, last));
OK. So, IIUIC, the opaque should be:
struct iommu_user_data {
void __user *data_uptr;
size_t data_len;
}user_data;
And core does basic sanity of data_uptr != NULL and data_len !=0
before passing this to driver, and then do a full sanity during
the iommufd_get_args (or iommufd_get_user_data?) call.
> The whole point of this excercise was to avoid the mistake where
> drivers code the uapi checks incorrectly. We can achieve the same
> outcome by providing a mandatory helper.
OK. I will rework this part today.
> Similarly for managing the array of invalidation commands.
You mean an embedded uptr inside a driver user data struct right?
Sure, that should go through the new helper too.
Thanks
Nicolin
On Wed, Aug 09, 2023 at 04:19:01PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> > On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > > On Wed, Aug 09, 2023 at 09:30:12AM +0000, Liu, Yi L wrote:
> > >
> > > > Yeah, adding new structures to ucmd_buffer may increase the size as
> > > > well if the new one is larger. While for an array, if there is new entry,
> > > > it is for sure to increase the size. I remember there is one tricky thing
> > > > when handling the selftest type. E.g. it is defined as 0xbadbeef, if using
> > > > it to index array, it would expire. So we have some special handling on
> > > > it. If defining the things in iommu_ops, it is simpler. Selftest may be
> > > > not so critical to determining the direction though.
> > >
> > > Maybe we are trying too hard to make it "easy" on the driver.
> > >
> > > Can't we just have the driver invoke some:
> > >
> > > driver_iommufd_invalidate_op(??? *opaque)
> > > {
> > > struct driver_base_struct args;
> > >
> > > rc = iommufd_get_args(opaque, &args, sizeof(args),
> > > offsetof(args, last));
> >
> > OK. So, IIUIC, the opaque should be:
> >
> > struct iommu_user_data {
> > void __user *data_uptr;
> > size_t data_len;
> > }user_data;
> >
> > And core does basic sanity of data_uptr != NULL and data_len !=0
> > before passing this to driver, and then do a full sanity during
> > the iommufd_get_args (or iommufd_get_user_data?) call.
>
> Don't even need to check datA_uptr and data_len, the helper should
> check the size and null is caught by copy from user
I see. I was worried about the alloc path since its data input is
optional upon IOMMU_DOMAIN_UNMANAGED. But this helper should work
for that also.
In that case, we might not even need to define the union with all
structures, in iommu.h.
> > > Similarly for managing the array of invalidation commands.
> >
> > You mean an embedded uptr inside a driver user data struct right?
> > Sure, that should go through the new helper too.
>
> If we are committed that all drivers have to process an array then put
> the array in the top level struct and pass it in the same user_data
> struct and use another helper to allow the driver to iterate through
> it.
I see. Both VTD and SMMU pass uptr to the arrays of invalidation
commands/requests. The only difference is that SMMU's array is a
ring buffer other than a plain one indexing from the beginning.
But the helper could take two index inputs, which should work for
VTD case too. If another IOMMU driver only supports one request,
rather than a array of requests, we can treat that as a single-
entry array.
Then, the driver-specific data structure will be the array entry
level only.
@Yi,
This seems to be a bigger rework than the top level struct. Along
with Jason's request for fail_nth below, we'd need to bisect the
workload between us, or can just continue each other's daily work.
Let me know which one you prefer.
https://lore.kernel.org/linux-iommu/[email protected]/
Thanks!
Nic
> From: Nicolin Chen <[email protected]>
> Sent: Thursday, August 10, 2023 4:17 AM
>
> On Wed, Aug 09, 2023 at 04:19:01PM -0300, Jason Gunthorpe wrote:
> > On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> > > On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > > > Similarly for managing the array of invalidation commands.
> > >
> > > You mean an embedded uptr inside a driver user data struct right?
> > > Sure, that should go through the new helper too.
> >
> > If we are committed that all drivers have to process an array then put
> > the array in the top level struct and pass it in the same user_data
> > struct and use another helper to allow the driver to iterate through
> > it.
>
> I see. Both VTD and SMMU pass uptr to the arrays of invalidation
> commands/requests. The only difference is that SMMU's array is a
> ring buffer other than a plain one indexing from the beginning.
> But the helper could take two index inputs, which should work for
> VTD case too. If another IOMMU driver only supports one request,
> rather than a array of requests, we can treat that as a single-
> entry array.
>
I like this approach.
> From: Nicolin Chen <[email protected]>
> Sent: Thursday, August 10, 2023 4:17 AM
>
> On Wed, Aug 09, 2023 at 04:19:01PM -0300, Jason Gunthorpe wrote:
> > On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> > > On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Aug 09, 2023 at 09:30:12AM +0000, Liu, Yi L wrote:
> > > >
> > > > > Yeah, adding new structures to ucmd_buffer may increase the size as
> > > > > well if the new one is larger. While for an array, if there is new entry,
> > > > > it is for sure to increase the size. I remember there is one tricky thing
> > > > > when handling the selftest type. E.g. it is defined as 0xbadbeef, if using
> > > > > it to index array, it would expire. So we have some special handling on
> > > > > it. If defining the things in iommu_ops, it is simpler. Selftest may be
> > > > > not so critical to determining the direction though.
> > > >
> > > > Maybe we are trying too hard to make it "easy" on the driver.
> > > >
> > > > Can't we just have the driver invoke some:
> > > >
> > > > driver_iommufd_invalidate_op(??? *opaque)
> > > > {
> > > > struct driver_base_struct args;
> > > >
> > > > rc = iommufd_get_args(opaque, &args, sizeof(args),
> > > > offsetof(args, last));
> > >
> > > OK. So, IIUIC, the opaque should be:
> > >
> > > struct iommu_user_data {
> > > void __user *data_uptr;
> > > size_t data_len;
> > > }user_data;
> > >
> > > And core does basic sanity of data_uptr != NULL and data_len !=0
> > > before passing this to driver, and then do a full sanity during
> > > the iommufd_get_args (or iommufd_get_user_data?) call.
> >
> > Don't even need to check datA_uptr and data_len, the helper should
> > check the size and null is caught by copy from user
>
> I see. I was worried about the alloc path since its data input is
> optional upon IOMMU_DOMAIN_UNMANAGED. But this helper should work
> for that also.
>
> In that case, we might not even need to define the union with all
> structures, in iommu.h.
>
> > > > Similarly for managing the array of invalidation commands.
> > >
> > > You mean an embedded uptr inside a driver user data struct right?
> > > Sure, that should go through the new helper too.
> >
> > If we are committed that all drivers have to process an array then put
> > the array in the top level struct and pass it in the same user_data
> > struct and use another helper to allow the driver to iterate through
> > it.
>
> I see. Both VTD and SMMU pass uptr to the arrays of invalidation
> commands/requests. The only difference is that SMMU's array is a
> ring buffer other than a plain one indexing from the beginning.
> But the helper could take two index inputs, which should work for
> VTD case too. If another IOMMU driver only supports one request,
> rather than a array of requests, we can treat that as a single-
> entry array.
>
> Then, the driver-specific data structure will be the array entry
> level only.
>
> @Yi,
> This seems to be a bigger rework than the top level struct. Along
> with Jason's request for fail_nth below, we'd need to bisect the
> workload between us, or can just continue each other's daily work.
> Let me know which one you prefer.
> https://lore.kernel.org/linux-iommu/[email protected]/
Let me address the fail_nth request first. You may rework the
iommufd_get_user_data(). If I can finish the fail_nth soon,
then may help to lift the array to the top level. If not, you
may make it as well. ???? I guess I need some study on nth
as well.
Regards,
Yi Liu
On Thu, Aug 10, 2023 at 03:28:10AM +0000, Liu, Yi L wrote:
> > @Yi,
> > This seems to be a bigger rework than the top level struct. Along
> > with Jason's request for fail_nth below, we'd need to bisect the
> > workload between us, or can just continue each other's daily work.
> > Let me know which one you prefer.
> > https://lore.kernel.org/linux-iommu/[email protected]/
>
> Let me address the fail_nth request first. You may rework the
> iommufd_get_user_data(). If I can finish the fail_nth soon,
> then may help to lift the array to the top level. If not, you
> may make it as well. ???? I guess I need some study on nth
> as well.
Ack. Am working on the iommufd_get_user_data() already. Will
continue the ring buffer thing.
Nic
On Thu, Aug 10, 2023 at 02:49:59AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <[email protected]>
> > Sent: Thursday, August 10, 2023 4:17 AM
> >
> > On Wed, Aug 09, 2023 at 04:19:01PM -0300, Jason Gunthorpe wrote:
> > > On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> > > > On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > > > > Similarly for managing the array of invalidation commands.
> > > >
> > > > You mean an embedded uptr inside a driver user data struct right?
> > > > Sure, that should go through the new helper too.
> > >
> > > If we are committed that all drivers have to process an array then put
> > > the array in the top level struct and pass it in the same user_data
> > > struct and use another helper to allow the driver to iterate through
> > > it.
> >
> > I see. Both VTD and SMMU pass uptr to the arrays of invalidation
> > commands/requests. The only difference is that SMMU's array is a
> > ring buffer other than a plain one indexing from the beginning.
> > But the helper could take two index inputs, which should work for
> > VTD case too. If another IOMMU driver only supports one request,
> > rather than a array of requests, we can treat that as a single-
> > entry array.
> >
>
> I like this approach.
Do we need to worry about the ring wrap around? It is already the case
that the VMM has to scan the ring and extract the invalidation
commands, wouldn't it already just linearize them?
Is there a use case for invaliation only SW emulated rings, and do we
care about optimizing for the wrap around case?
Let's answer those questions before designing something complicated :)
Jason
On Thu, Aug 10, 2023 at 12:57:04PM -0300, Jason Gunthorpe wrote:
> On Thu, Aug 10, 2023 at 02:49:59AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <[email protected]>
> > > Sent: Thursday, August 10, 2023 4:17 AM
> > >
> > > On Wed, Aug 09, 2023 at 04:19:01PM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> > > > > On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > > > > > Similarly for managing the array of invalidation commands.
> > > > >
> > > > > You mean an embedded uptr inside a driver user data struct right?
> > > > > Sure, that should go through the new helper too.
> > > >
> > > > If we are committed that all drivers have to process an array then put
> > > > the array in the top level struct and pass it in the same user_data
> > > > struct and use another helper to allow the driver to iterate through
> > > > it.
> > >
> > > I see. Both VTD and SMMU pass uptr to the arrays of invalidation
> > > commands/requests. The only difference is that SMMU's array is a
> > > ring buffer other than a plain one indexing from the beginning.
> > > But the helper could take two index inputs, which should work for
> > > VTD case too. If another IOMMU driver only supports one request,
> > > rather than a array of requests, we can treat that as a single-
> > > entry array.
> > >
> >
> > I like this approach.
>
> Do we need to worry about the ring wrap around? It is already the case
> that the VMM has to scan the ring and extract the invalidation
> commands, wouldn't it already just linearize them?
I haven't got the chance to send the latest vSMMU series but I
pass down the raw user CMDQ to the host to go through, as it'd
be easier to stall the consumer index movement when a command
in the middle fails.
> Is there a use case for invaliation only SW emulated rings, and do we
> care about optimizing for the wrap around case?
Hmm, why a SW emulated ring?
Yes for the latter question. SMMU kernel driver has something
like Q_WRP and other helpers, so it wasn't difficult to process
the user CMDQ in the same raw form. But it does complicates the
common code if we want to do it there.
Thanks
Nic
On Thu, Aug 10, 2023 at 10:14:37AM -0700, Nicolin Chen wrote:
> On Thu, Aug 10, 2023 at 12:57:04PM -0300, Jason Gunthorpe wrote:
> > On Thu, Aug 10, 2023 at 02:49:59AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <[email protected]>
> > > > Sent: Thursday, August 10, 2023 4:17 AM
> > > >
> > > > On Wed, Aug 09, 2023 at 04:19:01PM -0300, Jason Gunthorpe wrote:
> > > > > On Wed, Aug 09, 2023 at 12:12:25PM -0700, Nicolin Chen wrote:
> > > > > > On Wed, Aug 09, 2023 at 01:24:56PM -0300, Jason Gunthorpe wrote:
> > > > > > > Similarly for managing the array of invalidation commands.
> > > > > >
> > > > > > You mean an embedded uptr inside a driver user data struct right?
> > > > > > Sure, that should go through the new helper too.
> > > > >
> > > > > If we are committed that all drivers have to process an array then put
> > > > > the array in the top level struct and pass it in the same user_data
> > > > > struct and use another helper to allow the driver to iterate through
> > > > > it.
> > > >
> > > > I see. Both VTD and SMMU pass uptr to the arrays of invalidation
> > > > commands/requests. The only difference is that SMMU's array is a
> > > > ring buffer other than a plain one indexing from the beginning.
> > > > But the helper could take two index inputs, which should work for
> > > > VTD case too. If another IOMMU driver only supports one request,
> > > > rather than a array of requests, we can treat that as a single-
> > > > entry array.
> > > >
> > >
> > > I like this approach.
> >
> > Do we need to worry about the ring wrap around? It is already the case
> > that the VMM has to scan the ring and extract the invalidation
> > commands, wouldn't it already just linearize them?
>
> I haven't got the chance to send the latest vSMMU series but I
> pass down the raw user CMDQ to the host to go through, as it'd
> be easier to stall the consumer index movement when a command
> in the middle fails.
Don't some commands have to be executed by the VMM?
Even so, it seems straightforward enough for the kernel to report the
number of commands it executed and the VMM can adjust the virtual
consumer index.
>
> > Is there a use case for invaliation only SW emulated rings, and do we
> > care about optimizing for the wrap around case?
>
> Hmm, why a SW emulated ring?
That is what you are building. The VMM catches the write of the
producer pointer and the VMM SW bundles it up to call into the kernel.
> Yes for the latter question. SMMU kernel driver has something
> like Q_WRP and other helpers, so it wasn't difficult to process
> the user CMDQ in the same raw form. But it does complicates the
> common code if we want to do it there.
Optimizing wrap around means when the producer/consumer pointers pass
the end of the queue memory we execute one, not two ioctls toward the
kernel. That is possible a very minor optimization, it depends how big
the queues are and how frequent multi-entry items will be present.
Jaso
On Thu, Aug 10, 2023 at 04:27:02PM -0300, Jason Gunthorpe wrote:
> > > Do we need to worry about the ring wrap around? It is already the case
> > > that the VMM has to scan the ring and extract the invalidation
> > > commands, wouldn't it already just linearize them?
> >
> > I haven't got the chance to send the latest vSMMU series but I
> > pass down the raw user CMDQ to the host to go through, as it'd
> > be easier to stall the consumer index movement when a command
> > in the middle fails.
>
> Don't some commands have to be executed by the VMM?
Well, they do. VMM would go through the queue and "execute" non-
invalidation commands, then defer the queue to the kernel to go
through the queue once more. So, the flaw could be that some of
the commands behind the failing TLB flush command got "executed",
though in a real case most of other commands would be "executed"
standalone with a CMD_SYNC, i.e. not mixing with any invalidation
command.
> Even so, it seems straightforward enough for the kernel to report the
> number of commands it executed and the VMM can adjust the virtual
> consumer index.
It is not that straightforward to revert an array index back to
a consumer index because they might not be 1:1 mapped, since in
theory there could be other commands mixing in-between, although
it unlikely happens.
So, another index-mapping array would be needed for this matter.
And this doesn't address the flaw that I mentioned above either.
So, I took the former solution to reduce the complication.
> > > Is there a use case for invaliation only SW emulated rings, and do we
> > > care about optimizing for the wrap around case?
> >
> > Hmm, why a SW emulated ring?
>
> That is what you are building. The VMM catches the write of the
> producer pointer and the VMM SW bundles it up to call into the kernel.
Still not fully getting it. Do you mean a ring that is prepared
by the VMM? I think the only case that we need to handle a ring
is what I did by forwarding the guest CMDQ (a ring) to the host
directly. Not sure why VMM would need another ring for those
linearized invalidation commands. Or maybe I misunderstood..
> > Yes for the latter question. SMMU kernel driver has something
> > like Q_WRP and other helpers, so it wasn't difficult to process
> > the user CMDQ in the same raw form. But it does complicates the
> > common code if we want to do it there.
>
> Optimizing wrap around means when the producer/consumer pointers pass
> the end of the queue memory we execute one, not two ioctls toward the
> kernel. That is possible a very minor optimization, it depends how big
> the queues are and how frequent multi-entry items will be present.
There could be other commands being issued by other VMs or even
the host between the two ioctls. So probably we'd need to handle
the wrapping case when doing a ring solution?
Thanks
Nicolin
> From: Nicolin Chen <[email protected]>
> Sent: Friday, August 11, 2023 5:03 AM
>
> > > > Is there a use case for invaliation only SW emulated rings, and do we
> > > > care about optimizing for the wrap around case?
> > >
> > > Hmm, why a SW emulated ring?
> >
> > That is what you are building. The VMM catches the write of the
> > producer pointer and the VMM SW bundles it up to call into the kernel.
>
> Still not fully getting it. Do you mean a ring that is prepared
> by the VMM? I think the only case that we need to handle a ring
> is what I did by forwarding the guest CMDQ (a ring) to the host
> directly. Not sure why VMM would need another ring for those
> linearized invalidation commands. Or maybe I misunderstood..
>
iiuc the point of a ring-based native format is to maximum code reuse
when later in-kernel fast invalidation path (from kvm to smmu driver)
is enabled. Then both slow (via vmm) and fast (in-kernel) path use
the same logic to handle guest invalidation queue.
But if stepping back a bit supporting an array-based non-native format
could simplify the uAPI design and allows code sharing for array among
vendor drivers. You can still keep the entry as native format then the
only difference with future in-kernel fast path is just on walking an array
vs. walking a ring. And VMM doesn't need to expose non-invalidate
cmds to the kernel and then be skipped.
Thanks
Kevin
On Fri, Aug 11, 2023 at 03:52:52AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <[email protected]>
> > Sent: Friday, August 11, 2023 5:03 AM
> >
> > > > > Is there a use case for invaliation only SW emulated rings, and do we
> > > > > care about optimizing for the wrap around case?
> > > >
> > > > Hmm, why a SW emulated ring?
> > >
> > > That is what you are building. The VMM catches the write of the
> > > producer pointer and the VMM SW bundles it up to call into the kernel.
> >
> > Still not fully getting it. Do you mean a ring that is prepared
> > by the VMM? I think the only case that we need to handle a ring
> > is what I did by forwarding the guest CMDQ (a ring) to the host
> > directly. Not sure why VMM would need another ring for those
> > linearized invalidation commands. Or maybe I misunderstood..
> >
>
> iiuc the point of a ring-based native format is to maximum code reuse
> when later in-kernel fast invalidation path (from kvm to smmu driver)
> is enabled. Then both slow (via vmm) and fast (in-kernel) path use
> the same logic to handle guest invalidation queue.
I see. That's about the fast path topic. Thanks for the input.
> But if stepping back a bit supporting an array-based non-native format
> could simplify the uAPI design and allows code sharing for array among
> vendor drivers. You can still keep the entry as native format then the
> only difference with future in-kernel fast path is just on walking an array
> vs. walking a ring. And VMM doesn't need to expose non-invalidate
> cmds to the kernel and then be skipped.
Ah, so we might still design the uAPI to be ring based at this
moment, yet don't support a case CONS > 0 to leave that to an
upgrade in the future.
I will try estimating a bit how complicated to implement the
ring, to see if we could just start with that. Otherwise, will
just start with an array.
Thanks
Nic
On Fri, Aug 18, 2023 at 01:56:54PM -0300, Jason Gunthorpe wrote:
> On Mon, Aug 14, 2023 at 11:18:46PM -0700, Nicolin Chen wrote:
> > On Fri, Aug 11, 2023 at 09:45:21AM -0700, Nicolin Chen wrote:
> >
> > > > But if stepping back a bit supporting an array-based non-native format
> > > > could simplify the uAPI design and allows code sharing for array among
> > > > vendor drivers. You can still keep the entry as native format then the
> > > > only difference with future in-kernel fast path is just on walking an array
> > > > vs. walking a ring. And VMM doesn't need to expose non-invalidate
> > > > cmds to the kernel and then be skipped.
> > >
> > > Ah, so we might still design the uAPI to be ring based at this
> > > moment, yet don't support a case CONS > 0 to leave that to an
> > > upgrade in the future.
> > >
> > > I will try estimating a bit how complicated to implement the
> > > ring, to see if we could just start with that. Otherwise, will
> > > just start with an array.
> >
> > I drafted a uAPI structure for a ring-based SW queue. While I am
> > trying an implementation, I'd like to collect some comments at the
> > structure, to see if it overall makes sense.
>
> I don't think a ring makes alot of sense at this point. The only thing
> it optimizes is a system call if the kernel needs to wrap around the
> tail of the ring. It would possibly be better to have a small gather
> list than try to describe the ring logic..
>
> Further, the VMM already has to process it, so the vmm already knows
> what ops are going to kernel are not. The VMM can just organize them
> in a linear list in one way or another. We need to copy and read this
> stuff in the VMM anyhow to protect against a hostile VM.
OK. Then an linear array it is.
> > One thing that I couldn't add to this common structure for SMMU
> > is the hardware error code, which should be encoded in the higher
> > bits of the consumer index register, following the SMMU spec:
> > ERR, bits [30:24] Error reason code.
> > - When a command execution error is detected, ERR is set to a
> > reason code and then the SMMU_GERROR.CMDQ_ERR global error
> > becomes active.
> > - The value in this field is UNKNOWN when the CMDQ_ERR global
> > error is not active. This field resets to an UNKNOWN value.
>
> The invalidate ioctl should fail in some deterministic way and report
> back the error code and the highest array index that maybe could have
> triggered it.
Yea. Having an error code in the highest bits of array_index,
"array_index != array_max" could be the deterministic way to
indicate a failure. And a kernel errno could be returned also
to the invalidate ioctl.
> The highest array index sounds generic, the error code maybe is too
We could do in its and report the error code in its raw form:
__u32 out_array_index;
/* number of bits used to report error code in the returned array_index */
__u32 out_array_index_error_code_bits;
Or just:
__u32 out_array_index;
__u32 out_error_code;
Do we have to define a list of generic error code?
Thanks!
Nic
On Fri, Aug 18, 2023 at 03:20:55PM -0300, Jason Gunthorpe wrote:
> On Fri, Aug 18, 2023 at 10:56:45AM -0700, Nicolin Chen wrote:
>
> > > The highest array index sounds generic, the error code maybe is too
> >
> > We could do in its and report the error code in its raw form:
> > __u32 out_array_index;
> > /* number of bits used to report error code in the returned array_index */
> > __u32 out_array_index_error_code_bits;
> > Or just:
> > __u32 out_array_index;
> > __u32 out_error_code;
> >
> > Do we have to define a list of generic error code?
>
> out_driver_error_code may be OK
Ack. Will implement the array in that way.
Thanks!
Nic