Hi guys,
We found that the __domain_mapping() would take too long when
the memory region is too large, we try to make it faster in this
patchset. The performance number can be found in PATCH 2, please
review when you free, thanks.
Changes v2 -> v3:
- make first_pte_in_page() neater [Baolu]
- remove meaningless BUG_ON() in __domain_mapping() [Baolu]
Changes v1 -> v2:
- Fix compile warning on i386 [Baolu]
Longpeng(Mike) (2):
iommu/vt-d: convert the return type of first_pte_in_page to bool
iommu/vt-d: avoid duplicated removing in __domain_mapping
drivers/iommu/intel/iommu.c | 11 ++++++-----
include/linux/intel-iommu.h | 10 ++++++++--
2 files changed, 14 insertions(+), 7 deletions(-)
--
1.8.3.1
first_pte_in_page() returns boolean value, so let's convert its
return type to bool. In addition, use 'IS_ALIGNED' to make the
code more readable and neater.
Signed-off-by: Longpeng(Mike) <[email protected]>
---
include/linux/intel-iommu.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 05a65eb..9bcabc7 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -708,9 +708,9 @@ static inline bool dma_pte_superpage(struct dma_pte *pte)
return (pte->val & DMA_PTE_LARGE_PAGE);
}
-static inline int first_pte_in_page(struct dma_pte *pte)
+static inline bool first_pte_in_page(struct dma_pte *pte)
{
- return !((unsigned long)pte & ~VTD_PAGE_MASK);
+ return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
}
extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
--
1.8.3.1
__domain_mapping() always removes the pages in the range from
'iov_pfn' to 'end_pfn', but the 'end_pfn' is always the last pfn
of the range that the caller wants to map.
This would introduce too many duplicated removing and leads the
map operation take too long, for example:
Map iova=0x100000,nr_pages=0x7d61800
iov_pfn: 0x100000, end_pfn: 0x7e617ff
iov_pfn: 0x140000, end_pfn: 0x7e617ff
iov_pfn: 0x180000, end_pfn: 0x7e617ff
iov_pfn: 0x1c0000, end_pfn: 0x7e617ff
iov_pfn: 0x200000, end_pfn: 0x7e617ff
...
it takes about 50ms in total.
We can reduce the cost by recalculate the 'end_pfn' and limit it
to the boundary of the end of this pte page.
Map iova=0x100000,nr_pages=0x7d61800
iov_pfn: 0x100000, end_pfn: 0x13ffff
iov_pfn: 0x140000, end_pfn: 0x17ffff
iov_pfn: 0x180000, end_pfn: 0x1bffff
iov_pfn: 0x1c0000, end_pfn: 0x1fffff
iov_pfn: 0x200000, end_pfn: 0x23ffff
...
it only need 9ms now.
Signed-off-by: Longpeng(Mike) <[email protected]>
---
drivers/iommu/intel/iommu.c | 11 ++++++-----
include/linux/intel-iommu.h | 6 ++++++
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d75f59a..46edae6 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2354,12 +2354,17 @@ static void switch_to_super_page(struct dmar_domain *domain,
return -ENOMEM;
first_pte = pte;
+ lvl_pages = lvl_to_nr_pages(largepage_lvl);
+
/* It is large page*/
if (largepage_lvl > 1) {
unsigned long end_pfn;
+ unsigned long pages_to_remove;
pteval |= DMA_PTE_LARGE_PAGE;
- end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
+ pages_to_remove = min_t(unsigned long, nr_pages,
+ nr_pte_to_next_page(pte) * lvl_pages);
+ end_pfn = iov_pfn + pages_to_remove - 1;
switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
@@ -2381,10 +2386,6 @@ static void switch_to_super_page(struct dmar_domain *domain,
WARN_ON(1);
}
- lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
- BUG_ON(nr_pages < lvl_pages);
-
nr_pages -= lvl_pages;
iov_pfn += lvl_pages;
phys_pfn += lvl_pages;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 9bcabc7..b29b2a3 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -713,6 +713,12 @@ static inline bool first_pte_in_page(struct dma_pte *pte)
return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
}
+static inline int nr_pte_to_next_page(struct dma_pte *pte)
+{
+ return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
+ (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
+}
+
extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
extern int dmar_find_matched_atsr_unit(struct pci_dev *dev);
--
1.8.3.1
On 10/8/21 8:04 AM, Longpeng(Mike) wrote:
> __domain_mapping() always removes the pages in the range from
> 'iov_pfn' to 'end_pfn', but the 'end_pfn' is always the last pfn
> of the range that the caller wants to map.
>
> This would introduce too many duplicated removing and leads the
> map operation take too long, for example:
>
> Map iova=0x100000,nr_pages=0x7d61800
> iov_pfn: 0x100000, end_pfn: 0x7e617ff
> iov_pfn: 0x140000, end_pfn: 0x7e617ff
> iov_pfn: 0x180000, end_pfn: 0x7e617ff
> iov_pfn: 0x1c0000, end_pfn: 0x7e617ff
> iov_pfn: 0x200000, end_pfn: 0x7e617ff
> ...
> it takes about 50ms in total.
>
> We can reduce the cost by recalculate the 'end_pfn' and limit it
> to the boundary of the end of this pte page.
>
> Map iova=0x100000,nr_pages=0x7d61800
> iov_pfn: 0x100000, end_pfn: 0x13ffff
> iov_pfn: 0x140000, end_pfn: 0x17ffff
> iov_pfn: 0x180000, end_pfn: 0x1bffff
> iov_pfn: 0x1c0000, end_pfn: 0x1fffff
> iov_pfn: 0x200000, end_pfn: 0x23ffff
> ...
> it only need 9ms now.
>
> Signed-off-by: Longpeng(Mike) <[email protected]>
> ---
> drivers/iommu/intel/iommu.c | 11 ++++++-----
> include/linux/intel-iommu.h | 6 ++++++
> 2 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index d75f59a..46edae6 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -2354,12 +2354,17 @@ static void switch_to_super_page(struct dmar_domain *domain,
> return -ENOMEM;
> first_pte = pte;
>
> + lvl_pages = lvl_to_nr_pages(largepage_lvl);
> +
> /* It is large page*/
> if (largepage_lvl > 1) {
> unsigned long end_pfn;
> + unsigned long pages_to_remove;
>
> pteval |= DMA_PTE_LARGE_PAGE;
> - end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
> + pages_to_remove = min_t(unsigned long, nr_pages,
> + nr_pte_to_next_page(pte) * lvl_pages);
> + end_pfn = iov_pfn + pages_to_remove - 1;
> switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
> } else {
> pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
> @@ -2381,10 +2386,6 @@ static void switch_to_super_page(struct dmar_domain *domain,
> WARN_ON(1);
> }
>
> - lvl_pages = lvl_to_nr_pages(largepage_lvl);
> -
> - BUG_ON(nr_pages < lvl_pages);
> -
> nr_pages -= lvl_pages;
> iov_pfn += lvl_pages;
> phys_pfn += lvl_pages;
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 9bcabc7..b29b2a3 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -713,6 +713,12 @@ static inline bool first_pte_in_page(struct dma_pte *pte)
> return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
> }
>
> +static inline int nr_pte_to_next_page(struct dma_pte *pte)
> +{
> + return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
> + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
We should make it like this to avoid the 0day warning:
(struct dma_pte *)(uintptr_t)VTD_PAGE_ALIGN((unsigned long)pte) - pte;
Can you please test this line of change? No need to send a new version.
I will handle it if it passes your test.
> +}
> +
> extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
> extern int dmar_find_matched_atsr_unit(struct pci_dev *dev);
>
>
Best regards,
baolu
On 10/8/21 10:07 AM, Lu Baolu wrote:
> On 10/8/21 8:04 AM, Longpeng(Mike) wrote:
>> __domain_mapping() always removes the pages in the range from
>> 'iov_pfn' to 'end_pfn', but the 'end_pfn' is always the last pfn
>> of the range that the caller wants to map.
>>
>> This would introduce too many duplicated removing and leads the
>> map operation take too long, for example:
>>
>> Map iova=0x100000,nr_pages=0x7d61800
>> iov_pfn: 0x100000, end_pfn: 0x7e617ff
>> iov_pfn: 0x140000, end_pfn: 0x7e617ff
>> iov_pfn: 0x180000, end_pfn: 0x7e617ff
>> iov_pfn: 0x1c0000, end_pfn: 0x7e617ff
>> iov_pfn: 0x200000, end_pfn: 0x7e617ff
>> ...
>> it takes about 50ms in total.
>>
>> We can reduce the cost by recalculate the 'end_pfn' and limit it
>> to the boundary of the end of this pte page.
>>
>> Map iova=0x100000,nr_pages=0x7d61800
>> iov_pfn: 0x100000, end_pfn: 0x13ffff
>> iov_pfn: 0x140000, end_pfn: 0x17ffff
>> iov_pfn: 0x180000, end_pfn: 0x1bffff
>> iov_pfn: 0x1c0000, end_pfn: 0x1fffff
>> iov_pfn: 0x200000, end_pfn: 0x23ffff
>> ...
>> it only need 9ms now.
>>
>> Signed-off-by: Longpeng(Mike) <[email protected]>
>> ---
>> drivers/iommu/intel/iommu.c | 11 ++++++-----
>> include/linux/intel-iommu.h | 6 ++++++
>> 2 files changed, 12 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
>> index d75f59a..46edae6 100644
>> --- a/drivers/iommu/intel/iommu.c
>> +++ b/drivers/iommu/intel/iommu.c
>> @@ -2354,12 +2354,17 @@ static void switch_to_super_page(struct
>> dmar_domain *domain,
>> return -ENOMEM;
>> first_pte = pte;
>> + lvl_pages = lvl_to_nr_pages(largepage_lvl);
>> +
>> /* It is large page*/
>> if (largepage_lvl > 1) {
>> unsigned long end_pfn;
>> + unsigned long pages_to_remove;
>> pteval |= DMA_PTE_LARGE_PAGE;
>> - end_pfn = ((iov_pfn + nr_pages) &
>> level_mask(largepage_lvl)) - 1;
>> + pages_to_remove = min_t(unsigned long, nr_pages,
>> + nr_pte_to_next_page(pte) * lvl_pages);
>> + end_pfn = iov_pfn + pages_to_remove - 1;
>> switch_to_super_page(domain, iov_pfn, end_pfn,
>> largepage_lvl);
>> } else {
>> pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
>> @@ -2381,10 +2386,6 @@ static void switch_to_super_page(struct
>> dmar_domain *domain,
>> WARN_ON(1);
>> }
>> - lvl_pages = lvl_to_nr_pages(largepage_lvl);
>> -
>> - BUG_ON(nr_pages < lvl_pages);
>> -
>> nr_pages -= lvl_pages;
>> iov_pfn += lvl_pages;
>> phys_pfn += lvl_pages;
>> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
>> index 9bcabc7..b29b2a3 100644
>> --- a/include/linux/intel-iommu.h
>> +++ b/include/linux/intel-iommu.h
>> @@ -713,6 +713,12 @@ static inline bool first_pte_in_page(struct
>> dma_pte *pte)
>> return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
>> }
>> +static inline int nr_pte_to_next_page(struct dma_pte *pte)
>> +{
>> + return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
>> + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) -
>> pte;
>
> We should make it like this to avoid the 0day warning:
>
> (struct dma_pte *)(uintptr_t)VTD_PAGE_ALIGN((unsigned long)pte) - pte;
>
> Can you please test this line of change? No need to send a new version.
> I will handle it if it passes your test.
Just realized that ALIGN() has already done the type cast. Please ignore
above comment. Sorry for the noise.
Best regards,
baolu
> -----Original Message-----
> From: Lu Baolu [mailto:[email protected]]
> Sent: Friday, October 8, 2021 10:44 AM
> To: Longpeng (Mike, Cloud Infrastructure Service Product Dept.)
> <[email protected]>; [email protected]; [email protected];
> [email protected]
> Cc: [email protected]; [email protected];
> [email protected]; Gonglei (Arei) <[email protected]>
> Subject: Re: [PATCH v3 2/2] iommu/vt-d: avoid duplicated removing in
> __domain_mapping
>
> On 10/8/21 10:07 AM, Lu Baolu wrote:
> > On 10/8/21 8:04 AM, Longpeng(Mike) wrote:
> >> __domain_mapping() always removes the pages in the range from
> >> 'iov_pfn' to 'end_pfn', but the 'end_pfn' is always the last pfn
> >> of the range that the caller wants to map.
> >>
> >> This would introduce too many duplicated removing and leads the
> >> map operation take too long, for example:
> >>
> >> Map iova=0x100000,nr_pages=0x7d61800
> >> iov_pfn: 0x100000, end_pfn: 0x7e617ff
> >> iov_pfn: 0x140000, end_pfn: 0x7e617ff
> >> iov_pfn: 0x180000, end_pfn: 0x7e617ff
> >> iov_pfn: 0x1c0000, end_pfn: 0x7e617ff
> >> iov_pfn: 0x200000, end_pfn: 0x7e617ff
> >> ...
> >> it takes about 50ms in total.
> >>
> >> We can reduce the cost by recalculate the 'end_pfn' and limit it
> >> to the boundary of the end of this pte page.
> >>
> >> Map iova=0x100000,nr_pages=0x7d61800
> >> iov_pfn: 0x100000, end_pfn: 0x13ffff
> >> iov_pfn: 0x140000, end_pfn: 0x17ffff
> >> iov_pfn: 0x180000, end_pfn: 0x1bffff
> >> iov_pfn: 0x1c0000, end_pfn: 0x1fffff
> >> iov_pfn: 0x200000, end_pfn: 0x23ffff
> >> ...
> >> it only need 9ms now.
> >>
> >> Signed-off-by: Longpeng(Mike) <[email protected]>
> >> ---
> >> drivers/iommu/intel/iommu.c | 11 ++++++-----
> >> include/linux/intel-iommu.h | 6 ++++++
> >> 2 files changed, 12 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> >> index d75f59a..46edae6 100644
> >> --- a/drivers/iommu/intel/iommu.c
> >> +++ b/drivers/iommu/intel/iommu.c
> >> @@ -2354,12 +2354,17 @@ static void switch_to_super_page(struct
> >> dmar_domain *domain,
> >> return -ENOMEM;
> >> first_pte = pte;
> >> + lvl_pages = lvl_to_nr_pages(largepage_lvl);
> >> +
> >> /* It is large page*/
> >> if (largepage_lvl > 1) {
> >> unsigned long end_pfn;
> >> + unsigned long pages_to_remove;
> >> pteval |= DMA_PTE_LARGE_PAGE;
> >> - end_pfn = ((iov_pfn + nr_pages) &
> >> level_mask(largepage_lvl)) - 1;
> >> + pages_to_remove = min_t(unsigned long, nr_pages,
> >> + nr_pte_to_next_page(pte) * lvl_pages);
> >> + end_pfn = iov_pfn + pages_to_remove - 1;
> >> switch_to_super_page(domain, iov_pfn, end_pfn,
> >> largepage_lvl);
> >> } else {
> >> pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
> >> @@ -2381,10 +2386,6 @@ static void switch_to_super_page(struct
> >> dmar_domain *domain,
> >> WARN_ON(1);
> >> }
> >> - lvl_pages = lvl_to_nr_pages(largepage_lvl);
> >> -
> >> - BUG_ON(nr_pages < lvl_pages);
> >> -
> >> nr_pages -= lvl_pages;
> >> iov_pfn += lvl_pages;
> >> phys_pfn += lvl_pages;
> >> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> >> index 9bcabc7..b29b2a3 100644
> >> --- a/include/linux/intel-iommu.h
> >> +++ b/include/linux/intel-iommu.h
> >> @@ -713,6 +713,12 @@ static inline bool first_pte_in_page(struct
> >> dma_pte *pte)
> >> return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
> >> }
> >> +static inline int nr_pte_to_next_page(struct dma_pte *pte)
> >> +{
> >> + return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
> >> + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) -
> >> pte;
> >
> > We should make it like this to avoid the 0day warning:
> >
> > (struct dma_pte *)(uintptr_t)VTD_PAGE_ALIGN((unsigned long)pte) - pte;
> >
> > Can you please test this line of change? No need to send a new version.
> > I will handle it if it passes your test.
>
> Just realized that ALIGN() has already done the type cast. Please ignore
> above comment. Sorry for the noise.
>
Hi Baolu,
Our testing is completed, no compile warning on both X86 64bit and 32bit arch,
and the system is working as expected.
Please add:
Tested-by: Liujunjie <[email protected]>
> Best regards,
> baolu