2020-12-10 07:46:37

by Keqian Zhu

[permalink] [raw]
Subject: [PATCH 1/7] vfio: iommu_type1: Clear added dirty bit when unwind pin

Currently we do not clear added dirty bit of bitmap when unwind
pin, so if pin failed at halfway, we set unnecessary dirty bit
in bitmap. Clearing added dirty bit when unwind pin, userspace
will see less dirty page, which can save much time to handle them.

Note that we should distinguish the bits added by pin and the bits
already set before pin, so introduce bitmap_added to record this.

Signed-off-by: Keqian Zhu <[email protected]>
---
drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++-----------
1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 67e827638995..f129d24a6ec3 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_group *group;
int i, j, ret;
+ unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
unsigned long remote_vaddr;
+ unsigned long bitmap_offset;
+ unsigned long *bitmap_added;
+ dma_addr_t iova;
struct vfio_dma *dma;
bool do_accounting;

@@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,

mutex_lock(&iommu->lock);

+ bitmap_added = bitmap_zalloc(npage, GFP_KERNEL);
+ if (!bitmap_added) {
+ ret = -ENOMEM;
+ goto pin_done;
+ }
+
/* Fail if notifier list is empty */
if (!iommu->notifier.head) {
ret = -EINVAL;
@@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);

for (i = 0; i < npage; i++) {
- dma_addr_t iova;
struct vfio_pfn *vpfn;

iova = user_pfn[i] << PAGE_SHIFT;
@@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
}

if (iommu->dirty_page_tracking) {
- unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
-
- /*
- * Bitmap populated with the smallest supported page
- * size
- */
- bitmap_set(dma->bitmap,
- (iova - dma->iova) >> pgshift, 1);
+ /* Populated with the smallest supported page size */
+ bitmap_offset = (iova - dma->iova) >> pgshift;
+ if (!test_and_set_bit(bitmap_offset, dma->bitmap))
+ set_bit(i, bitmap_added);
}
}
ret = i;
@@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
pin_unwind:
phys_pfn[i] = 0;
for (j = 0; j < i; j++) {
- dma_addr_t iova;
-
iova = user_pfn[j] << PAGE_SHIFT;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
vfio_unpin_page_external(dma, iova, do_accounting);
phys_pfn[j] = 0;
+
+ if (test_bit(j, bitmap_added)) {
+ bitmap_offset = (iova - dma->iova) >> pgshift;
+ clear_bit(bitmap_offset, dma->bitmap);
+ }
}
pin_done:
+ if (bitmap_added)
+ bitmap_free(bitmap_added);
+
mutex_unlock(&iommu->lock);
return ret;
}
--
2.23.0


2020-12-11 12:05:49

by Keqian Zhu

[permalink] [raw]
Subject: Re: [PATCH 1/7] vfio: iommu_type1: Clear added dirty bit when unwind pin



On 2020/12/11 3:16, Alex Williamson wrote:
> On Thu, 10 Dec 2020 15:34:19 +0800
> Keqian Zhu <[email protected]> wrote:
>
>> Currently we do not clear added dirty bit of bitmap when unwind
>> pin, so if pin failed at halfway, we set unnecessary dirty bit
>> in bitmap. Clearing added dirty bit when unwind pin, userspace
>> will see less dirty page, which can save much time to handle them.
>>
>> Note that we should distinguish the bits added by pin and the bits
>> already set before pin, so introduce bitmap_added to record this.
>>
>> Signed-off-by: Keqian Zhu <[email protected]>
>> ---
>> drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++-----------
>> 1 file changed, 22 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 67e827638995..f129d24a6ec3 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>> struct vfio_iommu *iommu = iommu_data;
>> struct vfio_group *group;
>> int i, j, ret;
>> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
>> unsigned long remote_vaddr;
>> + unsigned long bitmap_offset;
>> + unsigned long *bitmap_added;
>> + dma_addr_t iova;
>> struct vfio_dma *dma;
>> bool do_accounting;
>>
>> @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>>
>> mutex_lock(&iommu->lock);
>>
>> + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL);
>> + if (!bitmap_added) {
>> + ret = -ENOMEM;
>> + goto pin_done;
>> + }
>
>
> This is allocated regardless of whether dirty tracking is enabled, so
> this adds overhead to the common case in order to reduce user overhead
> (not correctness) in the rare condition that dirty tracking is enabled,
> and the even rarer condition that there's a fault during that case.
> This is not a good trade-off. Thanks,

Hi Alex,

We can allocate the bitmap when dirty tracking is active, do you accept this?
Or we can set the dirty bitmap after all pins succeed, but this costs cpu time
to locate vfio_dma with iova.

Thanks,
Keqian

>
> Alex
>
>
>> +
>> /* Fail if notifier list is empty */
>> if (!iommu->notifier.head) {
>> ret = -EINVAL;
>> @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>> do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
>>
>> for (i = 0; i < npage; i++) {
>> - dma_addr_t iova;
>> struct vfio_pfn *vpfn;
>>
>> iova = user_pfn[i] << PAGE_SHIFT;
>> @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>> }
>>
>> if (iommu->dirty_page_tracking) {
>> - unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
>> -
>> - /*
>> - * Bitmap populated with the smallest supported page
>> - * size
>> - */
>> - bitmap_set(dma->bitmap,
>> - (iova - dma->iova) >> pgshift, 1);
>> + /* Populated with the smallest supported page size */
>> + bitmap_offset = (iova - dma->iova) >> pgshift;
>> + if (!test_and_set_bit(bitmap_offset, dma->bitmap))
>> + set_bit(i, bitmap_added);
>> }
>> }
>> ret = i;
>> @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>> pin_unwind:
>> phys_pfn[i] = 0;
>> for (j = 0; j < i; j++) {
>> - dma_addr_t iova;
>> -
>> iova = user_pfn[j] << PAGE_SHIFT;
>> dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
>> vfio_unpin_page_external(dma, iova, do_accounting);
>> phys_pfn[j] = 0;
>> +
>> + if (test_bit(j, bitmap_added)) {
>> + bitmap_offset = (iova - dma->iova) >> pgshift;
>> + clear_bit(bitmap_offset, dma->bitmap);
>> + }
>> }
>> pin_done:
>> + if (bitmap_added)
>> + bitmap_free(bitmap_added);
>> +
>> mutex_unlock(&iommu->lock);
>> return ret;
>> }
>
> .
>

2020-12-11 21:16:31

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH 1/7] vfio: iommu_type1: Clear added dirty bit when unwind pin

On Thu, 10 Dec 2020 15:34:19 +0800
Keqian Zhu <[email protected]> wrote:

> Currently we do not clear added dirty bit of bitmap when unwind
> pin, so if pin failed at halfway, we set unnecessary dirty bit
> in bitmap. Clearing added dirty bit when unwind pin, userspace
> will see less dirty page, which can save much time to handle them.
>
> Note that we should distinguish the bits added by pin and the bits
> already set before pin, so introduce bitmap_added to record this.
>
> Signed-off-by: Keqian Zhu <[email protected]>
> ---
> drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++-----------
> 1 file changed, 22 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 67e827638995..f129d24a6ec3 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> struct vfio_iommu *iommu = iommu_data;
> struct vfio_group *group;
> int i, j, ret;
> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
> unsigned long remote_vaddr;
> + unsigned long bitmap_offset;
> + unsigned long *bitmap_added;
> + dma_addr_t iova;
> struct vfio_dma *dma;
> bool do_accounting;
>
> @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>
> mutex_lock(&iommu->lock);
>
> + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL);
> + if (!bitmap_added) {
> + ret = -ENOMEM;
> + goto pin_done;
> + }


This is allocated regardless of whether dirty tracking is enabled, so
this adds overhead to the common case in order to reduce user overhead
(not correctness) in the rare condition that dirty tracking is enabled,
and the even rarer condition that there's a fault during that case.
This is not a good trade-off. Thanks,

Alex


> +
> /* Fail if notifier list is empty */
> if (!iommu->notifier.head) {
> ret = -EINVAL;
> @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
>
> for (i = 0; i < npage; i++) {
> - dma_addr_t iova;
> struct vfio_pfn *vpfn;
>
> iova = user_pfn[i] << PAGE_SHIFT;
> @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> }
>
> if (iommu->dirty_page_tracking) {
> - unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
> -
> - /*
> - * Bitmap populated with the smallest supported page
> - * size
> - */
> - bitmap_set(dma->bitmap,
> - (iova - dma->iova) >> pgshift, 1);
> + /* Populated with the smallest supported page size */
> + bitmap_offset = (iova - dma->iova) >> pgshift;
> + if (!test_and_set_bit(bitmap_offset, dma->bitmap))
> + set_bit(i, bitmap_added);
> }
> }
> ret = i;
> @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> pin_unwind:
> phys_pfn[i] = 0;
> for (j = 0; j < i; j++) {
> - dma_addr_t iova;
> -
> iova = user_pfn[j] << PAGE_SHIFT;
> dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
> vfio_unpin_page_external(dma, iova, do_accounting);
> phys_pfn[j] = 0;
> +
> + if (test_bit(j, bitmap_added)) {
> + bitmap_offset = (iova - dma->iova) >> pgshift;
> + clear_bit(bitmap_offset, dma->bitmap);
> + }
> }
> pin_done:
> + if (bitmap_added)
> + bitmap_free(bitmap_added);
> +
> mutex_unlock(&iommu->lock);
> return ret;
> }

2020-12-15 00:37:54

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH 1/7] vfio: iommu_type1: Clear added dirty bit when unwind pin

On Fri, 11 Dec 2020 14:51:47 +0800
zhukeqian <[email protected]> wrote:

> On 2020/12/11 3:16, Alex Williamson wrote:
> > On Thu, 10 Dec 2020 15:34:19 +0800
> > Keqian Zhu <[email protected]> wrote:
> >
> >> Currently we do not clear added dirty bit of bitmap when unwind
> >> pin, so if pin failed at halfway, we set unnecessary dirty bit
> >> in bitmap. Clearing added dirty bit when unwind pin, userspace
> >> will see less dirty page, which can save much time to handle them.
> >>
> >> Note that we should distinguish the bits added by pin and the bits
> >> already set before pin, so introduce bitmap_added to record this.
> >>
> >> Signed-off-by: Keqian Zhu <[email protected]>
> >> ---
> >> drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++-----------
> >> 1 file changed, 22 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> >> index 67e827638995..f129d24a6ec3 100644
> >> --- a/drivers/vfio/vfio_iommu_type1.c
> >> +++ b/drivers/vfio/vfio_iommu_type1.c
> >> @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> >> struct vfio_iommu *iommu = iommu_data;
> >> struct vfio_group *group;
> >> int i, j, ret;
> >> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
> >> unsigned long remote_vaddr;
> >> + unsigned long bitmap_offset;
> >> + unsigned long *bitmap_added;
> >> + dma_addr_t iova;
> >> struct vfio_dma *dma;
> >> bool do_accounting;
> >>
> >> @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> >>
> >> mutex_lock(&iommu->lock);
> >>
> >> + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL);
> >> + if (!bitmap_added) {
> >> + ret = -ENOMEM;
> >> + goto pin_done;
> >> + }
> >
> >
> > This is allocated regardless of whether dirty tracking is enabled, so
> > this adds overhead to the common case in order to reduce user overhead
> > (not correctness) in the rare condition that dirty tracking is enabled,
> > and the even rarer condition that there's a fault during that case.
> > This is not a good trade-off. Thanks,
>
> Hi Alex,
>
> We can allocate the bitmap when dirty tracking is active, do you accept this?
> Or we can set the dirty bitmap after all pins succeed, but this costs cpu time
> to locate vfio_dma with iova.

TBH I don't see this as a terribly significant problem, in the rare
event of an error with dirty tracking enabled, the user might see some
pages marked dirty that were not successfully pinned by the mdev vendor
driver. The solution shouldn't impose more overhead than the original
issue. Thanks,

Alex

> >> +
> >> /* Fail if notifier list is empty */
> >> if (!iommu->notifier.head) {
> >> ret = -EINVAL;
> >> @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> >> do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
> >>
> >> for (i = 0; i < npage; i++) {
> >> - dma_addr_t iova;
> >> struct vfio_pfn *vpfn;
> >>
> >> iova = user_pfn[i] << PAGE_SHIFT;
> >> @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> >> }
> >>
> >> if (iommu->dirty_page_tracking) {
> >> - unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
> >> -
> >> - /*
> >> - * Bitmap populated with the smallest supported page
> >> - * size
> >> - */
> >> - bitmap_set(dma->bitmap,
> >> - (iova - dma->iova) >> pgshift, 1);
> >> + /* Populated with the smallest supported page size */
> >> + bitmap_offset = (iova - dma->iova) >> pgshift;
> >> + if (!test_and_set_bit(bitmap_offset, dma->bitmap))
> >> + set_bit(i, bitmap_added);
> >> }
> >> }
> >> ret = i;
> >> @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
> >> pin_unwind:
> >> phys_pfn[i] = 0;
> >> for (j = 0; j < i; j++) {
> >> - dma_addr_t iova;
> >> -
> >> iova = user_pfn[j] << PAGE_SHIFT;
> >> dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
> >> vfio_unpin_page_external(dma, iova, do_accounting);
> >> phys_pfn[j] = 0;
> >> +
> >> + if (test_bit(j, bitmap_added)) {
> >> + bitmap_offset = (iova - dma->iova) >> pgshift;
> >> + clear_bit(bitmap_offset, dma->bitmap);
> >> + }
> >> }
> >> pin_done:
> >> + if (bitmap_added)
> >> + bitmap_free(bitmap_added);
> >> +
> >> mutex_unlock(&iommu->lock);
> >> return ret;
> >> }
> >
> > .
> >
>