2024-04-10 02:10:34

by Baolu Lu

[permalink] [raw]
Subject: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation helpers

Add several helpers to invalidate the caches after mappings in the
affected domain are changed.

- cache_tag_flush_range() invalidates a range of caches after mappings
within this range are changed. It uses the page-selective cache
invalidation methods.

- cache_tag_flush_all() invalidates all caches tagged by a domain ID.
It uses the domain-selective cache invalidation methods.

- cache_tag_flush_range_np() invalidates a range of caches when new
mappings are created in the domain and the corresponding page table
entries change from non-present to present.

Signed-off-by: Lu Baolu <[email protected]>
---
drivers/iommu/intel/iommu.h | 14 +++
drivers/iommu/intel/cache.c | 195 ++++++++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.c | 12 ---
3 files changed, 209 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 6f6bffc60852..700574421b51 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -35,6 +35,8 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)

+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)

@@ -1041,6 +1043,13 @@ static inline void context_set_sm_pre(struct context_entry *context)
context->lo |= BIT_ULL(4);
}

+/* Returns a number of VTD pages, but aligned to MM page size */
+static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
+{
+ host_addr &= ~PAGE_MASK;
+ return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
+}
+
/* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9)

@@ -1116,6 +1125,11 @@ int cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid);
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih);
+void cache_tag_flush_all(struct dmar_domain *domain);
+void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
+ unsigned long end);

#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index debbdaeff1c4..b2270dc8a765 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -12,6 +12,7 @@
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/memory.h>
+#include <linux/pci.h>
#include <linux/spinlock.h>

#include "iommu.h"
@@ -194,3 +195,197 @@ void cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
if (domain->domain.type == IOMMU_DOMAIN_NESTED)
__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
}
+
+static unsigned long calculate_psi_aligned_address(unsigned long start,
+ unsigned long end,
+ unsigned long *_pages,
+ unsigned long *_mask)
+{
+ unsigned long pages = aligned_nrpages(start, end - start + 1);
+ unsigned long aligned_pages = __roundup_pow_of_two(pages);
+ unsigned long bitmask = aligned_pages - 1;
+ unsigned long mask = ilog2(aligned_pages);
+ unsigned long pfn = IOVA_PFN(start);
+
+ /*
+ * PSI masks the low order bits of the base address. If the
+ * address isn't aligned to the mask, then compute a mask value
+ * needed to ensure the target range is flushed.
+ */
+ if (unlikely(bitmask & pfn)) {
+ unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+ /*
+ * Since end_pfn <= pfn + bitmask, the only way bits
+ * higher than bitmask can differ in pfn and end_pfn is
+ * by carrying. This means after masking out bitmask,
+ * high bits starting with the first set bit in
+ * shared_bits are all equal in both pfn and end_pfn.
+ */
+ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+ }
+
+ *_pages = aligned_pages;
+ *_mask = mask;
+
+ return ALIGN_DOWN(start, VTD_PAGE_SIZE);
+}
+
+/*
+ * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
+ * when the memory mappings in the target domain have been modified.
+ */
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ switch (tag->type) {
+ case CACHE_TAG_IOTLB:
+ case CACHE_TAG_PARENT_IOTLB:
+ if (domain->use_first_level) {
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, addr, pages, ih);
+ } else {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ break;
+ case CACHE_TAG_PARENT_DEVTLB:
+ /*
+ * Address translation cache in device side caches the
+ * result of nested translation. There is no easy way
+ * to identify the exact set of nested translations
+ * affected by a change in S2. So just flush the entire
+ * device cache.
+ */
+ addr = 0;
+ mask = MAX_AGAW_PFN_WIDTH;
+ fallthrough;
+ case CACHE_TAG_DEVTLB:
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ if (tag->pasid == IOMMU_NO_PASID)
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid,
+ info->ats_qdep, addr, mask);
+ else
+ qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
+ tag->pasid, info->ats_qdep,
+ addr, mask);
+
+ quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidates all ranges of IOVA when the memory mappings in the target
+ * domain have been modified.
+ */
+void cache_tag_flush_all(struct dmar_domain *domain)
+{
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ switch (tag->type) {
+ case CACHE_TAG_IOTLB:
+ case CACHE_TAG_PARENT_IOTLB:
+ if (domain->use_first_level)
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ break;
+ case CACHE_TAG_DEVTLB:
+ case CACHE_TAG_PARENT_DEVTLB:
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH);
+ quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
+ IOMMU_NO_PASID, info->ats_qdep);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidate a range of IOVA when new mappings are created in the target
+ * domain.
+ *
+ * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
+ * Set, any software updates to remapping structures other than first-
+ * stage mapping requires explicit invalidation of the caches.
+ * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
+ * write buffer flushing, software must explicitly perform write-buffer
+ * flushing, if cache invalidation is not required.
+ */
+void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
+ unsigned long end)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+
+ if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
+ iommu_flush_write_buffer(iommu);
+ continue;
+ }
+
+ if (tag->type == CACHE_TAG_IOTLB ||
+ tag->type == CACHE_TAG_PARENT_IOTLB) {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 17b8ce4b9154..4572624a275e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -54,11 +54,6 @@
__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)

-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;

@@ -1985,13 +1980,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
domain_context_mapping_cb, domain);
}

-/* Returns a number of VTD pages, but aligned to MM page size */
-static unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
-{
- host_addr &= ~PAGE_MASK;
- return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
-}
-
/* Return largest possible superpage level for a given mapping */
static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phy_pfn, unsigned long pages)
--
2.34.1



2024-04-15 04:15:14

by Zhang, Tina

[permalink] [raw]
Subject: RE: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation helpers



> -----Original Message-----
> From: Lu Baolu <[email protected]>
> Sent: Wednesday, April 10, 2024 10:09 AM
> To: Joerg Roedel <[email protected]>; Will Deacon <[email protected]>; Robin
> Murphy <[email protected]>; Tian, Kevin <[email protected]>;
> Jason Gunthorpe <[email protected]>
> Cc: Zhang, Tina <[email protected]>; Liu, Yi L <[email protected]>;
> [email protected]; [email protected]; Lu Baolu
> <[email protected]>
> Subject: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation helpers
>
> Add several helpers to invalidate the caches after mappings in the affected
> domain are changed.
>
> - cache_tag_flush_range() invalidates a range of caches after mappings
> within this range are changed. It uses the page-selective cache
> invalidation methods.
>
> - cache_tag_flush_all() invalidates all caches tagged by a domain ID.
> It uses the domain-selective cache invalidation methods.
>
> - cache_tag_flush_range_np() invalidates a range of caches when new
> mappings are created in the domain and the corresponding page table
> entries change from non-present to present.
>
> Signed-off-by: Lu Baolu <[email protected]>
> ---
> drivers/iommu/intel/iommu.h | 14 +++
> drivers/iommu/intel/cache.c | 195
> ++++++++++++++++++++++++++++++++++++
> drivers/iommu/intel/iommu.c | 12 ---
> 3 files changed, 209 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index 6f6bffc60852..700574421b51 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -35,6 +35,8 @@
> #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
> #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) &
> VTD_PAGE_MASK)
>
> +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
> +
> #define VTD_STRIDE_SHIFT (9)
> #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
>
> @@ -1041,6 +1043,13 @@ static inline void context_set_sm_pre(struct
> context_entry *context)
> context->lo |= BIT_ULL(4);
> }
>
> +/* Returns a number of VTD pages, but aligned to MM page size */ static
> +inline unsigned long aligned_nrpages(unsigned long host_addr, size_t
> +size) {
> + host_addr &= ~PAGE_MASK;
> + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; }
> +
> /* Convert value to context PASID directory size field coding. */
> #define context_pdts(pds) (((pds) & 0x7) << 9)
>
> @@ -1116,6 +1125,11 @@ int cache_tag_assign_domain(struct
> dmar_domain *domain, u16 did,
> struct device *dev, ioasid_t pasid); void
> cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
> struct device *dev, ioasid_t pasid);
> +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long
> start,
> + unsigned long end, int ih);
> +void cache_tag_flush_all(struct dmar_domain *domain); void
> +cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long
> start,
> + unsigned long end);
>
> #ifdef CONFIG_INTEL_IOMMU_SVM
> void intel_svm_check(struct intel_iommu *iommu); diff --git
> a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index
> debbdaeff1c4..b2270dc8a765 100644
> --- a/drivers/iommu/intel/cache.c
> +++ b/drivers/iommu/intel/cache.c
> @@ -12,6 +12,7 @@
> #include <linux/dmar.h>
> #include <linux/iommu.h>
> #include <linux/memory.h>
> +#include <linux/pci.h>
> #include <linux/spinlock.h>
>
> #include "iommu.h"
> @@ -194,3 +195,197 @@ void cache_tag_unassign_domain(struct
> dmar_domain *domain, u16 did,
> if (domain->domain.type == IOMMU_DOMAIN_NESTED)
> __cache_tag_unassign_parent_domain(domain->s2_domain,
> did, dev, pasid); }
> +
> +static unsigned long calculate_psi_aligned_address(unsigned long start,
> + unsigned long end,
> + unsigned long *_pages,
> + unsigned long *_mask)
> +{
> + unsigned long pages = aligned_nrpages(start, end - start + 1);
> + unsigned long aligned_pages = __roundup_pow_of_two(pages);
> + unsigned long bitmask = aligned_pages - 1;
> + unsigned long mask = ilog2(aligned_pages);
> + unsigned long pfn = IOVA_PFN(start);
> +
> + /*
> + * PSI masks the low order bits of the base address. If the
> + * address isn't aligned to the mask, then compute a mask value
> + * needed to ensure the target range is flushed.
> + */
> + if (unlikely(bitmask & pfn)) {
> + unsigned long end_pfn = pfn + pages - 1, shared_bits;
> +
> + /*
> + * Since end_pfn <= pfn + bitmask, the only way bits
> + * higher than bitmask can differ in pfn and end_pfn is
> + * by carrying. This means after masking out bitmask,
> + * high bits starting with the first set bit in
> + * shared_bits are all equal in both pfn and end_pfn.
> + */
> + shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
> + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
> + }
> +
> + *_pages = aligned_pages;
> + *_mask = mask;
> +
> + return ALIGN_DOWN(start, VTD_PAGE_SIZE); }
It's a good idea to use the above logic to calculate the appropriate mask for non-size-aligned page selective invalidation for all domains, including sva domain. Sounds like we plan to allow non-size-aligned page selection invalidation.

However, currently there are some places in the code which have the assumption that the size of the page selective invalidation should be aligned with the start address, a.k.a. only size-aligned page selective invalidation should happen and non-size-aligned one isn't expected.

One example is in qi_flush_dev_iotlb_pasid() and there is a checking:
if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)
pr_warn_ratelimited(...)
If the non-size-aligned page selective invalidation is allowed, the warning message may come out which sounds confusing and impacts performance.

Another example is in qi_flush_piotlb() and there is a WARN_ON_ONCE() to remind user when non-size-aligned page selective invalidation is being used.
If (WARN_ON_ONCE(!IS_ALIGNED(addr, align))
...

So, can we consider removing the checking as well in this patch-set?

Regards,
-Tina



2024-04-15 06:47:20

by Zhang, Tina

[permalink] [raw]
Subject: RE: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation helpers



> -----Original Message-----
> From: Baolu Lu <[email protected]>
> Sent: Monday, April 15, 2024 1:07 PM
> To: Zhang, Tina <[email protected]>; Joerg Roedel <[email protected]>;
> Will Deacon <[email protected]>; Robin Murphy <[email protected]>;
> Tian, Kevin <[email protected]>; Jason Gunthorpe <[email protected]>
> Cc: [email protected]; Liu, Yi L <[email protected]>;
> [email protected]; [email protected]
> Subject: Re: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation
> helpers
>
> On 4/15/24 12:15 PM, Zhang, Tina wrote:
> >
> >> -----Original Message-----
> >> From: Lu Baolu<[email protected]>
> >> Sent: Wednesday, April 10, 2024 10:09 AM
> >> To: Joerg Roedel<[email protected]>; Will Deacon<[email protected]>;
> >> Robin Murphy<[email protected]>; Tian,
> >> Kevin<[email protected]>; Jason Gunthorpe<[email protected]>
> >> Cc: Zhang, Tina<[email protected]>; Liu, Yi L<[email protected]>;
> >> [email protected];[email protected]; Lu Baolu
> >> <[email protected]>
> >> Subject: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation
> >> helpers
> >>
> >> Add several helpers to invalidate the caches after mappings in the
> >> affected domain are changed.
> >>
> >> - cache_tag_flush_range() invalidates a range of caches after mappings
> >> within this range are changed. It uses the page-selective cache
> >> invalidation methods.
> >>
> >> - cache_tag_flush_all() invalidates all caches tagged by a domain ID.
> >> It uses the domain-selective cache invalidation methods.
> >>
> >> - cache_tag_flush_range_np() invalidates a range of caches when new
> >> mappings are created in the domain and the corresponding page table
> >> entries change from non-present to present.
> >>
> >> Signed-off-by: Lu Baolu<[email protected]>
> >> ---
> >> drivers/iommu/intel/iommu.h | 14 +++
> >> drivers/iommu/intel/cache.c | 195
> >> ++++++++++++++++++++++++++++++++++++
> >> drivers/iommu/intel/iommu.c | 12 ---
> >> 3 files changed, 209 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/iommu/intel/iommu.h
> >> b/drivers/iommu/intel/iommu.h index 6f6bffc60852..700574421b51
> 100644
> >> --- a/drivers/iommu/intel/iommu.h
> >> +++ b/drivers/iommu/intel/iommu.h
> >> @@ -35,6 +35,8 @@
> >> #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
> >> #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) &
> >> VTD_PAGE_MASK)
> >>
> >> +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
> >> +
> >> #define VTD_STRIDE_SHIFT (9)
> >> #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
> >>
> >> @@ -1041,6 +1043,13 @@ static inline void context_set_sm_pre(struct
> >> context_entry *context)
> >> context->lo |= BIT_ULL(4);
> >> }
> >>
> >> +/* Returns a number of VTD pages, but aligned to MM page size */
> >> +static inline unsigned long aligned_nrpages(unsigned long host_addr,
> >> +size_t
> >> +size) {
> >> + host_addr &= ~PAGE_MASK;
> >> + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; }
> >> +
> >> /* Convert value to context PASID directory size field coding. */
> >> #define context_pdts(pds) (((pds) & 0x7) << 9)
> >>
> >> @@ -1116,6 +1125,11 @@ int cache_tag_assign_domain(struct
> dmar_domain
> >> *domain, u16 did,
> >> struct device *dev, ioasid_t pasid); void
> >> cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
> >> struct device *dev, ioasid_t pasid);
> >> +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long
> >> start,
> >> + unsigned long end, int ih);
> >> +void cache_tag_flush_all(struct dmar_domain *domain); void
> >> +cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long
> >> start,
> >> + unsigned long end);
> >>
> >> #ifdef CONFIG_INTEL_IOMMU_SVM
> >> void intel_svm_check(struct intel_iommu *iommu); diff --git
> >> a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index
> >> debbdaeff1c4..b2270dc8a765 100644
> >> --- a/drivers/iommu/intel/cache.c
> >> +++ b/drivers/iommu/intel/cache.c
> >> @@ -12,6 +12,7 @@
> >> #include <linux/dmar.h>
> >> #include <linux/iommu.h>
> >> #include <linux/memory.h>
> >> +#include <linux/pci.h>
> >> #include <linux/spinlock.h>
> >>
> >> #include "iommu.h"
> >> @@ -194,3 +195,197 @@ void cache_tag_unassign_domain(struct
> >> dmar_domain *domain, u16 did,
> >> if (domain->domain.type == IOMMU_DOMAIN_NESTED)
> >> __cache_tag_unassign_parent_domain(domain->s2_domain,
> >> did, dev, pasid); }
> >> +
> >> +static unsigned long calculate_psi_aligned_address(unsigned long start,
> >> + unsigned long end,
> >> + unsigned long *_pages,
> >> + unsigned long *_mask)
> >> +{
> >> + unsigned long pages = aligned_nrpages(start, end - start + 1);
> >> + unsigned long aligned_pages = __roundup_pow_of_two(pages);
> >> + unsigned long bitmask = aligned_pages - 1;
> >> + unsigned long mask = ilog2(aligned_pages);
> >> + unsigned long pfn = IOVA_PFN(start);
> >> +
> >> + /*
> >> + * PSI masks the low order bits of the base address. If the
> >> + * address isn't aligned to the mask, then compute a mask value
> >> + * needed to ensure the target range is flushed.
> >> + */
> >> + if (unlikely(bitmask & pfn)) {
> >> + unsigned long end_pfn = pfn + pages - 1, shared_bits;
> >> +
> >> + /*
> >> + * Since end_pfn <= pfn + bitmask, the only way bits
> >> + * higher than bitmask can differ in pfn and end_pfn is
> >> + * by carrying. This means after masking out bitmask,
> >> + * high bits starting with the first set bit in
> >> + * shared_bits are all equal in both pfn and end_pfn.
> >> + */
> >> + shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
> >> + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
> >> + }
> >> +
> >> + *_pages = aligned_pages;
> >> + *_mask = mask;
> >> +
> >> + return ALIGN_DOWN(start, VTD_PAGE_SIZE); }
> > It's a good idea to use the above logic to calculate the appropriate mask for
> non-size-aligned page selective invalidation for all domains, including sva
> domain. Sounds like we plan to allow non-size-aligned page selection
> invalidation.
> >
> > However, currently there are some places in the code which have the
> assumption that the size of the page selective invalidation should be aligned
> with the start address, a.k.a. only size-aligned page selective invalidation
> should happen and non-size-aligned one isn't expected.
> >
> > One example is in qi_flush_dev_iotlb_pasid() and there is a checking:
> > if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)
> > pr_warn_ratelimited(...)
> > If the non-size-aligned page selective invalidation is allowed, the warning
> message may come out which sounds confusing and impacts performance.
> >
> > Another example is in qi_flush_piotlb() and there is a WARN_ON_ONCE() to
> remind user when non-size-aligned page selective invalidation is being used.
> > If (WARN_ON_ONCE(!IS_ALIGNED(addr, align))
> > ...
> >
> > So, can we consider removing the checking as well in this patch-set?
>
> This series doesn't intend to change the driver's behavior, so the check and
> warning you mentioned should be kept. The iommu core ensures the
> invalidation size is page-size aligned. Those checks in the iommu driver just
> make sure that the callers are doing things right.
The fact is this patch aims to allow non-size-aligned page selective invalidation which aren't expected by those warning messages.

So, either we disable use non-size-aligned page selective invalidation just like what we did previously for sva domain, or we remove those warning messages and use non-size-aligned page selective invalidation introduced in this patch for sva domain.

Regards,
-Tina
>
> Best regards,
> baolu

2024-04-15 09:20:03

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation helpers

On 4/15/24 12:15 PM, Zhang, Tina wrote:
>
>> -----Original Message-----
>> From: Lu Baolu<[email protected]>
>> Sent: Wednesday, April 10, 2024 10:09 AM
>> To: Joerg Roedel<[email protected]>; Will Deacon<[email protected]>; Robin
>> Murphy<[email protected]>; Tian, Kevin<[email protected]>;
>> Jason Gunthorpe<[email protected]>
>> Cc: Zhang, Tina<[email protected]>; Liu, Yi L<[email protected]>;
>> [email protected];[email protected]; Lu Baolu
>> <[email protected]>
>> Subject: [PATCH v2 02/12] iommu/vt-d: Add cache tag invalidation helpers
>>
>> Add several helpers to invalidate the caches after mappings in the affected
>> domain are changed.
>>
>> - cache_tag_flush_range() invalidates a range of caches after mappings
>> within this range are changed. It uses the page-selective cache
>> invalidation methods.
>>
>> - cache_tag_flush_all() invalidates all caches tagged by a domain ID.
>> It uses the domain-selective cache invalidation methods.
>>
>> - cache_tag_flush_range_np() invalidates a range of caches when new
>> mappings are created in the domain and the corresponding page table
>> entries change from non-present to present.
>>
>> Signed-off-by: Lu Baolu<[email protected]>
>> ---
>> drivers/iommu/intel/iommu.h | 14 +++
>> drivers/iommu/intel/cache.c | 195
>> ++++++++++++++++++++++++++++++++++++
>> drivers/iommu/intel/iommu.c | 12 ---
>> 3 files changed, 209 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
>> index 6f6bffc60852..700574421b51 100644
>> --- a/drivers/iommu/intel/iommu.h
>> +++ b/drivers/iommu/intel/iommu.h
>> @@ -35,6 +35,8 @@
>> #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
>> #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) &
>> VTD_PAGE_MASK)
>>
>> +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
>> +
>> #define VTD_STRIDE_SHIFT (9)
>> #define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
>>
>> @@ -1041,6 +1043,13 @@ static inline void context_set_sm_pre(struct
>> context_entry *context)
>> context->lo |= BIT_ULL(4);
>> }
>>
>> +/* Returns a number of VTD pages, but aligned to MM page size */ static
>> +inline unsigned long aligned_nrpages(unsigned long host_addr, size_t
>> +size) {
>> + host_addr &= ~PAGE_MASK;
>> + return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; }
>> +
>> /* Convert value to context PASID directory size field coding. */
>> #define context_pdts(pds) (((pds) & 0x7) << 9)
>>
>> @@ -1116,6 +1125,11 @@ int cache_tag_assign_domain(struct
>> dmar_domain *domain, u16 did,
>> struct device *dev, ioasid_t pasid); void
>> cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
>> struct device *dev, ioasid_t pasid);
>> +void cache_tag_flush_range(struct dmar_domain *domain, unsigned long
>> start,
>> + unsigned long end, int ih);
>> +void cache_tag_flush_all(struct dmar_domain *domain); void
>> +cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long
>> start,
>> + unsigned long end);
>>
>> #ifdef CONFIG_INTEL_IOMMU_SVM
>> void intel_svm_check(struct intel_iommu *iommu); diff --git
>> a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index
>> debbdaeff1c4..b2270dc8a765 100644
>> --- a/drivers/iommu/intel/cache.c
>> +++ b/drivers/iommu/intel/cache.c
>> @@ -12,6 +12,7 @@
>> #include <linux/dmar.h>
>> #include <linux/iommu.h>
>> #include <linux/memory.h>
>> +#include <linux/pci.h>
>> #include <linux/spinlock.h>
>>
>> #include "iommu.h"
>> @@ -194,3 +195,197 @@ void cache_tag_unassign_domain(struct
>> dmar_domain *domain, u16 did,
>> if (domain->domain.type == IOMMU_DOMAIN_NESTED)
>> __cache_tag_unassign_parent_domain(domain->s2_domain,
>> did, dev, pasid); }
>> +
>> +static unsigned long calculate_psi_aligned_address(unsigned long start,
>> + unsigned long end,
>> + unsigned long *_pages,
>> + unsigned long *_mask)
>> +{
>> + unsigned long pages = aligned_nrpages(start, end - start + 1);
>> + unsigned long aligned_pages = __roundup_pow_of_two(pages);
>> + unsigned long bitmask = aligned_pages - 1;
>> + unsigned long mask = ilog2(aligned_pages);
>> + unsigned long pfn = IOVA_PFN(start);
>> +
>> + /*
>> + * PSI masks the low order bits of the base address. If the
>> + * address isn't aligned to the mask, then compute a mask value
>> + * needed to ensure the target range is flushed.
>> + */
>> + if (unlikely(bitmask & pfn)) {
>> + unsigned long end_pfn = pfn + pages - 1, shared_bits;
>> +
>> + /*
>> + * Since end_pfn <= pfn + bitmask, the only way bits
>> + * higher than bitmask can differ in pfn and end_pfn is
>> + * by carrying. This means after masking out bitmask,
>> + * high bits starting with the first set bit in
>> + * shared_bits are all equal in both pfn and end_pfn.
>> + */
>> + shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
>> + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
>> + }
>> +
>> + *_pages = aligned_pages;
>> + *_mask = mask;
>> +
>> + return ALIGN_DOWN(start, VTD_PAGE_SIZE); }
> It's a good idea to use the above logic to calculate the appropriate mask for non-size-aligned page selective invalidation for all domains, including sva domain. Sounds like we plan to allow non-size-aligned page selection invalidation.
>
> However, currently there are some places in the code which have the assumption that the size of the page selective invalidation should be aligned with the start address, a.k.a. only size-aligned page selective invalidation should happen and non-size-aligned one isn't expected.
>
> One example is in qi_flush_dev_iotlb_pasid() and there is a checking:
> if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)
> pr_warn_ratelimited(...)
> If the non-size-aligned page selective invalidation is allowed, the warning message may come out which sounds confusing and impacts performance.
>
> Another example is in qi_flush_piotlb() and there is a WARN_ON_ONCE() to remind user when non-size-aligned page selective invalidation is being used.
> If (WARN_ON_ONCE(!IS_ALIGNED(addr, align))
> ...
>
> So, can we consider removing the checking as well in this patch-set?

This series doesn't intend to change the driver's behavior, so the check
and warning you mentioned should be kept. The iommu core ensures the
invalidation size is page-size aligned. Those checks in the iommu driver
just make sure that the callers are doing things right.

Best regards,
baolu