2024-03-25 10:38:43

by Baolu Lu

[permalink] [raw]
Subject: [PATCH 02/12] iommu/vt-d: Add cache tag invalidation helpers

Add several helpers to invalidate the caches after mappings in the
affected domain are changed.

- cache_tag_flush_range() invalidates a range of caches after mappings
within this range are changed. It uses the page-selective cache
invalidation methods.

- cache_tag_flush_all() invalidates all caches tagged by a domain ID.
It uses the domain-selective cache invalidation methods.

- cache_tag_flush_cm_range() invalidates a range of caches if IOMMU is
working in the caching mode and second-only translation is used for
the affected domain. It is called when non-present to present mappings
are created.

Signed-off-by: Lu Baolu <[email protected]>
---
drivers/iommu/intel/iommu.h | 7 ++
drivers/iommu/intel/cache.c | 189 ++++++++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.c | 5 -
3 files changed, 196 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index e3723b7a0b31..d05fa0122d65 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -35,6 +35,8 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)

+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)

@@ -1116,6 +1118,11 @@ int cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid);
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih);
+void cache_tag_flush_all(struct dmar_domain *domain);
+void cache_tag_flush_cm_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end);

#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 5a4e12e494b6..4c245d39faf2 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -12,6 +12,7 @@
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/memory.h>
+#include <linux/pci.h>
#include <linux/spinlock.h>

#include "iommu.h"
@@ -190,3 +191,191 @@ void cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
if (domain->domain.type == IOMMU_DOMAIN_NESTED)
__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
}
+
+static unsigned long calculate_psi_aligned_address(unsigned long start,
+ unsigned long end,
+ unsigned long *_pages,
+ unsigned long *_mask)
+{
+ unsigned long pages = (end - start + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
+ unsigned long aligned_pages = __roundup_pow_of_two(pages);
+ unsigned long bitmask = aligned_pages - 1;
+ unsigned long mask = ilog2(aligned_pages);
+ unsigned long pfn = IOVA_PFN(start);
+
+ /*
+ * PSI masks the low order bits of the base address. If the
+ * address isn't aligned to the mask, then compute a mask value
+ * needed to ensure the target range is flushed.
+ */
+ if (unlikely(bitmask & pfn)) {
+ unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+ /*
+ * Since end_pfn <= pfn + bitmask, the only way bits
+ * higher than bitmask can differ in pfn and end_pfn is
+ * by carrying. This means after masking out bitmask,
+ * high bits starting with the first set bit in
+ * shared_bits are all equal in both pfn and end_pfn.
+ */
+ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+ }
+
+ *_pages = aligned_pages;
+ *_mask = mask;
+
+ return ALIGN_DOWN(start, VTD_PAGE_SIZE);
+}
+
+/*
+ * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
+ * when the memory mappings in the target domain have been modified.
+ */
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct device_domain_info *info = dev_iommu_priv_get(tag->dev);
+ struct intel_iommu *iommu = tag->iommu;
+ u16 sid = PCI_DEVID(info->bus, info->devfn);
+
+ switch (tag->type) {
+ case CACHE_TAG_TYPE_IOTLB:
+ case CACHE_TAG_TYPE_PARENT_IOTLB:
+ if (domain->use_first_level) {
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, addr, pages, ih);
+ } else {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ break;
+ case CACHE_TAG_TYPE_DEVTLB:
+ if (tag->pasid == IOMMU_NO_PASID)
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid,
+ info->ats_qdep, addr, mask);
+ else
+ qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
+ tag->pasid, info->ats_qdep,
+ addr, mask);
+
+ quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ break;
+ case CACHE_TAG_TYPE_PARENT_DEVTLB:
+ /*
+ * Address translation cache in device side caches the
+ * result of nested translation. There is no easy way
+ * to identify the exact set of nested translations
+ * affected by a change in S2. So just flush the entire
+ * device cache.
+ */
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH);
+ quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
+ IOMMU_NO_PASID, info->ats_qdep);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidates all ranges of IOVA when the memory mappings in the target
+ * domain have been modified.
+ */
+void cache_tag_flush_all(struct dmar_domain *domain)
+{
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct device_domain_info *info = dev_iommu_priv_get(tag->dev);
+ struct intel_iommu *iommu = tag->iommu;
+ u16 sid = PCI_DEVID(info->bus, info->devfn);
+
+ switch (tag->type) {
+ case CACHE_TAG_TYPE_IOTLB:
+ case CACHE_TAG_TYPE_PARENT_IOTLB:
+ if (domain->use_first_level)
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ break;
+ case CACHE_TAG_TYPE_DEVTLB:
+ case CACHE_TAG_TYPE_PARENT_DEVTLB:
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH);
+ quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
+ IOMMU_NO_PASID, info->ats_qdep);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidate a range of IOVA when IOMMU is in caching mode and new mappings
+ * are added to the target domain.
+ */
+void cache_tag_flush_cm_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+
+ /*
+ * When IOMMU is enabled in caching mode some non-present
+ * mappings may be cached by the IOTLB if it uses second-
+ * only translation.
+ */
+ if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
+ iommu_flush_write_buffer(iommu);
+ continue;
+ }
+
+ if (tag->type == CACHE_TAG_TYPE_IOTLB ||
+ tag->type == CACHE_TAG_TYPE_PARENT_IOTLB) {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b4efbdedccce..93e4422c9b10 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -54,11 +54,6 @@
__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)

-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;

--
2.34.1



2024-03-28 07:40:07

by Tian, Kevin

[permalink] [raw]
Subject: RE: [PATCH 02/12] iommu/vt-d: Add cache tag invalidation helpers

> From: Lu Baolu <[email protected]>
> Sent: Monday, March 25, 2024 10:17 AM
>
> +
> +static unsigned long calculate_psi_aligned_address(unsigned long start,
> + unsigned long end,
> + unsigned long *_pages,
> + unsigned long *_mask)
> +{
> + unsigned long pages = (end - start + VTD_PAGE_SIZE - 1) >>
> VTD_PAGE_SHIFT;

this doesn't sound correct. You'd want to follow aligned_nrpages().

> + case CACHE_TAG_TYPE_DEVTLB:
> + if (tag->pasid == IOMMU_NO_PASID)
> + qi_flush_dev_iotlb(iommu, sid, info->pfsid,
> + info->ats_qdep, addr,
> mask);
> + else
> + qi_flush_dev_iotlb_pasid(iommu, sid, info-
> >pfsid,
> + tag->pasid, info-
> >ats_qdep,
> + addr, mask);
> +
> + quirk_extra_dev_tlb_flush(info, addr, mask, tag-
> >pasid, info->ats_qdep);
> + break;
> + case CACHE_TAG_TYPE_PARENT_DEVTLB:
> + /*
> + * Address translation cache in device side caches the
> + * result of nested translation. There is no easy way
> + * to identify the exact set of nested translations
> + * affected by a change in S2. So just flush the entire
> + * device cache.
> + */
> + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info-
> >ats_qdep,
> + 0, MAX_AGAW_PFN_WIDTH);
> + quirk_extra_dev_tlb_flush(info, 0,
> MAX_AGAW_PFN_WIDTH,
> + IOMMU_NO_PASID, info-
> >ats_qdep);
> + break;

parent devtlb can has pasid too, though not enabled now. As core helpers
probably we can support it now leading to simpler code:

case CACHE_TAG_TYPE_PARENT_DEVTLB:
//change addr/mask
//fallthrough
case CACHE_TAG_TYPE_DEVTLB:
//what this patch does

> +void cache_tag_flush_all(struct dmar_domain *domain)
> +{
> + struct cache_tag *tag;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&domain->cache_lock, flags);
> + list_for_each_entry(tag, &domain->cache_tags, node) {
> + struct device_domain_info *info = dev_iommu_priv_get(tag-
> >dev);
> + struct intel_iommu *iommu = tag->iommu;
> + u16 sid = PCI_DEVID(info->bus, info->devfn);
> +
> + switch (tag->type) {
> + case CACHE_TAG_TYPE_IOTLB:
> + case CACHE_TAG_TYPE_PARENT_IOTLB:
> + if (domain->use_first_level)
> + qi_flush_piotlb(iommu, tag->domain_id,
> + tag->pasid, 0, -1, 0);
> + else
> + iommu->flush.flush_iotlb(iommu, tag-
> >domain_id,
> + 0, 0,
> DMA_TLB_DSI_FLUSH);
> + break;
> + case CACHE_TAG_TYPE_DEVTLB:
> + case CACHE_TAG_TYPE_PARENT_DEVTLB:
> + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info-
> >ats_qdep,
> + 0, MAX_AGAW_PFN_WIDTH);
> + quirk_extra_dev_tlb_flush(info, 0,
> MAX_AGAW_PFN_WIDTH,
> + IOMMU_NO_PASID, info-
> >ats_qdep);
> + break;
> + }
> + }

could this loop be consolidated with the one in previous helper? anyway
the only difference is on addr/mask...

> +/*
> + * Invalidate a range of IOVA when IOMMU is in caching mode and new
> mappings
> + * are added to the target domain.
> + */
> +void cache_tag_flush_cm_range(struct dmar_domain *domain, unsigned
> long start,
> + unsigned long end)
> +{

I'm also not sure why this is worth a separate helper. why couldn't it
be managed by previous flush_range()?

> + unsigned long pages, mask, addr;
> + struct cache_tag *tag;
> + unsigned long flags;
> +
> + addr = calculate_psi_aligned_address(start, end, &pages, &mask);
> +
> + spin_lock_irqsave(&domain->cache_lock, flags);
> + list_for_each_entry(tag, &domain->cache_tags, node) {
> + struct intel_iommu *iommu = tag->iommu;
> +
> + /*
> + * When IOMMU is enabled in caching mode some non-
> present
> + * mappings may be cached by the IOTLB if it uses second-
> + * only translation.
> + */
> + if (!cap_caching_mode(iommu->cap) || domain-
> >use_first_level) {
> + iommu_flush_write_buffer(iommu);
> + continue;
> + }

the comment really doesn't tell what this block does. from its intention
it probably more suitable to be in the caller side as today.

> +
> + if (tag->type == CACHE_TAG_TYPE_IOTLB ||
> + tag->type == CACHE_TAG_TYPE_PARENT_IOTLB) {
> + /*
> + * Fallback to domain selective flush if no
> + * PSI support or the size is too big.
> + */
> + if (!cap_pgsel_inv(iommu->cap) ||
> + mask > cap_max_amask_val(iommu->cap))
> + iommu->flush.flush_iotlb(iommu, tag-
> >domain_id,
> + 0, 0,
> DMA_TLB_DSI_FLUSH);
> + else
> + iommu->flush.flush_iotlb(iommu, tag-
> >domain_id,
> + addr, mask,
> +
> DMA_TLB_PSI_FLUSH);
> + }
> + }

You skipped devtlb invalidation. yes it's not necessary based on current
nested translation design and this part is inconsistent in different paths.

but as a semantics change you may want to first make removing devtlb
invalidation a separate patch and then do this cleanup, so bisect is easier.

2024-04-07 05:34:18

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 02/12] iommu/vt-d: Add cache tag invalidation helpers

On 3/28/24 3:39 PM, Tian, Kevin wrote:
>> From: Lu Baolu <[email protected]>
>> Sent: Monday, March 25, 2024 10:17 AM
>>
>> +
>> +static unsigned long calculate_psi_aligned_address(unsigned long start,
>> + unsigned long end,
>> + unsigned long *_pages,
>> + unsigned long *_mask)
>> +{
>> + unsigned long pages = (end - start + VTD_PAGE_SIZE - 1) >>
>> VTD_PAGE_SHIFT;
>
> this doesn't sound correct. You'd want to follow aligned_nrpages().

Okay, I will make it like below.

unsigned long pages = aligned_nrpages(start, end - start + 1);

>
>> + case CACHE_TAG_TYPE_DEVTLB:
>> + if (tag->pasid == IOMMU_NO_PASID)
>> + qi_flush_dev_iotlb(iommu, sid, info->pfsid,
>> + info->ats_qdep, addr,
>> mask);
>> + else
>> + qi_flush_dev_iotlb_pasid(iommu, sid, info-
>>> pfsid,
>> + tag->pasid, info-
>>> ats_qdep,
>> + addr, mask);
>> +
>> + quirk_extra_dev_tlb_flush(info, addr, mask, tag-
>>> pasid, info->ats_qdep);
>> + break;
>> + case CACHE_TAG_TYPE_PARENT_DEVTLB:
>> + /*
>> + * Address translation cache in device side caches the
>> + * result of nested translation. There is no easy way
>> + * to identify the exact set of nested translations
>> + * affected by a change in S2. So just flush the entire
>> + * device cache.
>> + */
>> + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info-
>>> ats_qdep,
>> + 0, MAX_AGAW_PFN_WIDTH);
>> + quirk_extra_dev_tlb_flush(info, 0,
>> MAX_AGAW_PFN_WIDTH,
>> + IOMMU_NO_PASID, info-
>>> ats_qdep);
>> + break;
>
> parent devtlb can has pasid too, though not enabled now. As core helpers
> probably we can support it now leading to simpler code:
>
> case CACHE_TAG_TYPE_PARENT_DEVTLB:
> //change addr/mask
> //fallthrough
> case CACHE_TAG_TYPE_DEVTLB:
> //what this patch does

Yes. Yours is better. I will make it like below:

+ case CACHE_TAG_PARENT_DEVTLB:
+ /*
+ * Address translation cache in device side
caches the
+ * result of nested translation. There is no
easy way
+ * to identify the exact set of nested translations
+ * affected by a change in S2. So just flush the
entire
+ * device cache.
+ */
+ addr = 0;
+ mask = MAX_AGAW_PFN_WIDTH;
+ fallthrough;
case CACHE_TAG_DEVTLB:


>> +void cache_tag_flush_all(struct dmar_domain *domain)
>> +{
>> + struct cache_tag *tag;
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&domain->cache_lock, flags);
>> + list_for_each_entry(tag, &domain->cache_tags, node) {
>> + struct device_domain_info *info = dev_iommu_priv_get(tag-
>>> dev);
>> + struct intel_iommu *iommu = tag->iommu;
>> + u16 sid = PCI_DEVID(info->bus, info->devfn);
>> +
>> + switch (tag->type) {
>> + case CACHE_TAG_TYPE_IOTLB:
>> + case CACHE_TAG_TYPE_PARENT_IOTLB:
>> + if (domain->use_first_level)
>> + qi_flush_piotlb(iommu, tag->domain_id,
>> + tag->pasid, 0, -1, 0);
>> + else
>> + iommu->flush.flush_iotlb(iommu, tag-
>>> domain_id,
>> + 0, 0,
>> DMA_TLB_DSI_FLUSH);
>> + break;
>> + case CACHE_TAG_TYPE_DEVTLB:
>> + case CACHE_TAG_TYPE_PARENT_DEVTLB:
>> + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info-
>>> ats_qdep,
>> + 0, MAX_AGAW_PFN_WIDTH);
>> + quirk_extra_dev_tlb_flush(info, 0,
>> MAX_AGAW_PFN_WIDTH,
>> + IOMMU_NO_PASID, info-
>>> ats_qdep);
>> + break;
>> + }
>> + }
>
> could this loop be consolidated with the one in previous helper? anyway
> the only difference is on addr/mask...
>
>> +/*
>> + * Invalidate a range of IOVA when IOMMU is in caching mode and new
>> mappings
>> + * are added to the target domain.
>> + */
>> +void cache_tag_flush_cm_range(struct dmar_domain *domain, unsigned
>> long start,
>> + unsigned long end)
>> +{
>
> I'm also not sure why this is worth a separate helper. why couldn't it
> be managed by previous flush_range()?
This is only my preference. I'd like to separate things belonging to
different paths, so that it's easier for maintenance. For example, if,
in the future, we need to add or enhance something for a specific case,
we don't need to care about other cases.

>
>> + unsigned long pages, mask, addr;
>> + struct cache_tag *tag;
>> + unsigned long flags;
>> +
>> + addr = calculate_psi_aligned_address(start, end, &pages, &mask);
>> +
>> + spin_lock_irqsave(&domain->cache_lock, flags);
>> + list_for_each_entry(tag, &domain->cache_tags, node) {
>> + struct intel_iommu *iommu = tag->iommu;
>> +
>> + /*
>> + * When IOMMU is enabled in caching mode some non-
>> present
>> + * mappings may be cached by the IOTLB if it uses second-
>> + * only translation.
>> + */
>> + if (!cap_caching_mode(iommu->cap) || domain-
>>> use_first_level) {
>> + iommu_flush_write_buffer(iommu);
>> + continue;
>> + }
>
> the comment really doesn't tell what this block does. from its intention
> it probably more suitable to be in the caller side as today.

Yes, agreed. Will remove it.

>
>> +
>> + if (tag->type == CACHE_TAG_TYPE_IOTLB ||
>> + tag->type == CACHE_TAG_TYPE_PARENT_IOTLB) {
>> + /*
>> + * Fallback to domain selective flush if no
>> + * PSI support or the size is too big.
>> + */
>> + if (!cap_pgsel_inv(iommu->cap) ||
>> + mask > cap_max_amask_val(iommu->cap))
>> + iommu->flush.flush_iotlb(iommu, tag-
>>> domain_id,
>> + 0, 0,
>> DMA_TLB_DSI_FLUSH);
>> + else
>> + iommu->flush.flush_iotlb(iommu, tag-
>>> domain_id,
>> + addr, mask,
>> +
>> DMA_TLB_PSI_FLUSH);
>> + }
>> + }
>
> You skipped devtlb invalidation. yes it's not necessary based on current
> nested translation design and this part is inconsistent in different paths.
>
> but as a semantics change you may want to first make removing devtlb
> invalidation a separate patch and then do this cleanup, so bisect is easier.

This helper is designed for map paths when the IOMMU is in caching mode.
Caching mode doesn't impact the device TLB, so we should not invalidate
the device TLB here.

I guess the confusing thing is about the code below.


/*
* In caching mode, changes of pages from non-present to
present require
* flush. However, device IOTLB doesn't need to be flushed in
this case.
*/
if (!cap_caching_mode(iommu->cap) || !map)
iommu_flush_dev_iotlb(domain, addr, mask);

The real code doesn't match the comments, and I think the comments
describe the right thing. So perhaps the code should be changed to match
the comments.

if (!map)
iommu_flush_dev_iotlb(domain, addr, mask);

??

Best regards,
baolu

2024-04-08 02:34:04

by Tian, Kevin

[permalink] [raw]
Subject: RE: [PATCH 02/12] iommu/vt-d: Add cache tag invalidation helpers

> From: Baolu Lu <[email protected]>
> Sent: Sunday, April 7, 2024 1:33 PM
>
> On 3/28/24 3:39 PM, Tian, Kevin wrote:
> >> From: Lu Baolu <[email protected]>
> >> Sent: Monday, March 25, 2024 10:17 AM
> >>
> >> +/*
> >> + * Invalidate a range of IOVA when IOMMU is in caching mode and new
> >> mappings
> >> + * are added to the target domain.
> >> + */
> >> +void cache_tag_flush_cm_range(struct dmar_domain *domain, unsigned
> >> long start,
> >> + unsigned long end)
> >> +{
> >
> > I'm also not sure why this is worth a separate helper. why couldn't it
> > be managed by previous flush_range()?
> This is only my preference. I'd like to separate things belonging to
> different paths, so that it's easier for maintenance. For example, if,
> in the future, we need to add or enhance something for a specific case,
> we don't need to care about other cases.

IMHO caching mode is an attribute in low level iommu which can be
handled perfectly well within the helper by checking that attribute.

it sounds a bit weird for the caller to know that detail and call different
helpers when all paths just want to request to flush a specific range.

> >> +
> >> + if (tag->type == CACHE_TAG_TYPE_IOTLB ||
> >> + tag->type == CACHE_TAG_TYPE_PARENT_IOTLB) {
> >> + /*
> >> + * Fallback to domain selective flush if no
> >> + * PSI support or the size is too big.
> >> + */
> >> + if (!cap_pgsel_inv(iommu->cap) ||
> >> + mask > cap_max_amask_val(iommu->cap))
> >> + iommu->flush.flush_iotlb(iommu, tag-
> >>> domain_id,
> >> + 0, 0,
> >> DMA_TLB_DSI_FLUSH);
> >> + else
> >> + iommu->flush.flush_iotlb(iommu, tag-
> >>> domain_id,
> >> + addr, mask,
> >> +
> >> DMA_TLB_PSI_FLUSH);
> >> + }
> >> + }
> >
> > You skipped devtlb invalidation. yes it's not necessary based on current
> > nested translation design and this part is inconsistent in different paths.
> >
> > but as a semantics change you may want to first make removing devtlb
> > invalidation a separate patch and then do this cleanup, so bisect is easier.
>
> This helper is designed for map paths when the IOMMU is in caching mode.
> Caching mode doesn't impact the device TLB, so we should not invalidate
> the device TLB here.
>
> I guess the confusing thing is about the code below.
>
>
> /*
> * In caching mode, changes of pages from non-present to
> present require
> * flush. However, device IOTLB doesn't need to be flushed in
> this case.
> */
> if (!cap_caching_mode(iommu->cap) || !map)
> iommu_flush_dev_iotlb(domain, addr, mask);
>
> The real code doesn't match the comments, and I think the comments
> describe the right thing. So perhaps the code should be changed to match
> the comments.
>
> if (!map)
> iommu_flush_dev_iotlb(domain, addr, mask);
>
> ??
>

yes. that should be fixed separately.

2024-04-08 02:55:17

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 02/12] iommu/vt-d: Add cache tag invalidation helpers

On 4/8/24 10:33 AM, Tian, Kevin wrote:
>> From: Baolu Lu<[email protected]>
>> Sent: Sunday, April 7, 2024 1:33 PM
>>
>> On 3/28/24 3:39 PM, Tian, Kevin wrote:
>>>> From: Lu Baolu<[email protected]>
>>>> Sent: Monday, March 25, 2024 10:17 AM
>>>>
>>>> +/*
>>>> + * Invalidate a range of IOVA when IOMMU is in caching mode and new
>>>> mappings
>>>> + * are added to the target domain.
>>>> + */
>>>> +void cache_tag_flush_cm_range(struct dmar_domain *domain, unsigned
>>>> long start,
>>>> + unsigned long end)
>>>> +{
>>> I'm also not sure why this is worth a separate helper. why couldn't it
>>> be managed by previous flush_range()?
>> This is only my preference. I'd like to separate things belonging to
>> different paths, so that it's easier for maintenance. For example, if,
>> in the future, we need to add or enhance something for a specific case,
>> we don't need to care about other cases.
> IMHO caching mode is an attribute in low level iommu which can be
> handled perfectly well within the helper by checking that attribute.
>
> it sounds a bit weird for the caller to know that detail and call different
> helpers when all paths just want to request to flush a specific range.

I see. The helper name is a bit confusing.

Generally speaking, cache_tag_flush_range() and cache_tag_flush_all()
are designed to flush caches for mapping change from present to non-
present. While cache_tag_flush_cm_range() is designed to flush caches
for mapping change from non-present to present.

How about renaming these helpers to

cache_tag_flush_present_range()
cache_tag_flush_present_all()
cache_tag_flush_non_present_range()

?

In cache_tag_flush_non_present_range(),

- if IOMMU is not in caching mode, flush the write buffer if necessary,
or it's a no-op.
- if IOMMU is in caching mode, flush the IOTLB caches.

Best regards,
baolu

2024-04-08 03:14:59

by Tian, Kevin

[permalink] [raw]
Subject: RE: [PATCH 02/12] iommu/vt-d: Add cache tag invalidation helpers

> From: Baolu Lu <[email protected]>
> Sent: Monday, April 8, 2024 10:54 AM
>
> On 4/8/24 10:33 AM, Tian, Kevin wrote:
> >> From: Baolu Lu<[email protected]>
> >> Sent: Sunday, April 7, 2024 1:33 PM
> >>
> >> On 3/28/24 3:39 PM, Tian, Kevin wrote:
> >>>> From: Lu Baolu<[email protected]>
> >>>> Sent: Monday, March 25, 2024 10:17 AM
> >>>>
> >>>> +/*
> >>>> + * Invalidate a range of IOVA when IOMMU is in caching mode and
> new
> >>>> mappings
> >>>> + * are added to the target domain.
> >>>> + */
> >>>> +void cache_tag_flush_cm_range(struct dmar_domain *domain,
> unsigned
> >>>> long start,
> >>>> + unsigned long end)
> >>>> +{
> >>> I'm also not sure why this is worth a separate helper. why couldn't it
> >>> be managed by previous flush_range()?
> >> This is only my preference. I'd like to separate things belonging to
> >> different paths, so that it's easier for maintenance. For example, if,
> >> in the future, we need to add or enhance something for a specific case,
> >> we don't need to care about other cases.
> > IMHO caching mode is an attribute in low level iommu which can be
> > handled perfectly well within the helper by checking that attribute.
> >
> > it sounds a bit weird for the caller to know that detail and call different
> > helpers when all paths just want to request to flush a specific range.
>
> I see. The helper name is a bit confusing.
>
> Generally speaking, cache_tag_flush_range() and cache_tag_flush_all()
> are designed to flush caches for mapping change from present to non-
> present. While cache_tag_flush_cm_range() is designed to flush caches
> for mapping change from non-present to present.
>
> How about renaming these helpers to
>
> cache_tag_flush_present_range()
> cache_tag_flush_present_all()
> cache_tag_flush_non_present_range()

I'll probably keep cache_tag_flush_range/all() as it is because the
naming matches the general interpretation of the tlb semantics.

then call the 3rd one as cache_tag_flush_range_np() for various
additional requirements on transitions from non-present entry.

>
> ?
>
> In cache_tag_flush_non_present_range(),
>
> - if IOMMU is not in caching mode, flush the write buffer if necessary,

or if using stage-1

> or it's a no-op.
> - if IOMMU is in caching mode, flush the IOTLB caches.
>
> Best regards,
> baolu