2024-05-17 00:37:57

by Zhang, Tina

[permalink] [raw]
Subject: [PATCH 0/2] Batch IOTLB/dev-IOTLB invalidation

IOTLB and dev-IOTLB invalidation operations are performance-critical.
The current implementation in the VT-d driver submits these commands
individually, leading to some inefficiencies due to the IOMMU
programming and invalidation command processing overhead for each
operation.

This patch series, based on "Consolidate domain cache invalidation"
series[1], enhances the efficiency of Queue Invalidation (QI)
operations by adding support for batch processing. Microbenchmarks
show that with a DSA device working in SVA, batching IOTLB and dev-IOTLB
invalidations can decrease the time spent in qi_submit_sync()
by roughly more than 800 cycles.

Tina Zhang (2):
iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands
iommu/vt-d: Batch IOTLB/dev-IOTLB invalidation commands

[1]: https://lore.kernel.org/linux-iommu/[email protected]/

drivers/iommu/intel/cache.c | 89 ++++++++++++++++++++++++++++++-------
drivers/iommu/intel/dmar.c | 67 +++++++++++++++-------------
drivers/iommu/intel/iommu.c | 27 ++++++-----
drivers/iommu/intel/iommu.h | 21 ++++++---
drivers/iommu/intel/pasid.c | 20 +++++----
5 files changed, 152 insertions(+), 72 deletions(-)

--
2.39.3



2024-05-17 00:38:04

by Zhang, Tina

[permalink] [raw]
Subject: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands

Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
invalidation operations to support batching invalidation descriptors.
This batch_desc is a pointer to the descriptor entry in a batch cmds
buffer. If the batch_desc is NULL, it indicates that batch submission
is not being used, and descriptors will be submitted individually.

Also fix an issue reported by checkpatch about "unsigned mask":
"Prefer 'unsigned int' to bare use of 'unsigned'"

Signed-off-by: Tina Zhang <[email protected]>
---
drivers/iommu/intel/cache.c | 33 +++++++++++-------
drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
drivers/iommu/intel/iommu.c | 27 +++++++++------
drivers/iommu/intel/iommu.h | 21 ++++++++----
drivers/iommu/intel/pasid.c | 20 ++++++-----
5 files changed, 100 insertions(+), 68 deletions(-)

diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index e8418cdd8331..dcf5e0e6af17 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level) {
qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, addr, pages, ih);
+ tag->pasid, addr, pages, ih, NULL);
} else {
/*
* Fallback to domain selective flush if no
@@ -287,11 +287,13 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
+ 0, 0, DMA_TLB_DSI_FLUSH,
+ NULL);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
+ DMA_TLB_PSI_FLUSH,
+ NULL);
}
break;
case CACHE_TAG_NESTING_DEVTLB:
@@ -311,13 +313,15 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,

if (tag->pasid == IOMMU_NO_PASID)
qi_flush_dev_iotlb(iommu, sid, info->pfsid,
- info->ats_qdep, addr, mask);
+ info->ats_qdep, addr, mask,
+ NULL);
else
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
tag->pasid, info->ats_qdep,
- addr, mask);
+ addr, mask, NULL);

- quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid,
+ info->ats_qdep, NULL);
break;
}

@@ -346,10 +350,12 @@ void cache_tag_flush_all(struct dmar_domain *domain)
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level)
qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, 0, -1, 0);
+ tag->pasid, 0, -1, 0,
+ NULL);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
+ 0, 0, DMA_TLB_DSI_FLUSH,
+ NULL);
break;
case CACHE_TAG_DEVTLB:
case CACHE_TAG_NESTING_DEVTLB:
@@ -357,9 +363,10 @@ void cache_tag_flush_all(struct dmar_domain *domain)
sid = PCI_DEVID(info->bus, info->devfn);

qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
- 0, MAX_AGAW_PFN_WIDTH);
+ 0, MAX_AGAW_PFN_WIDTH, NULL);
quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
- IOMMU_NO_PASID, info->ats_qdep);
+ IOMMU_NO_PASID, info->ats_qdep,
+ NULL);
break;
}

@@ -406,11 +413,13 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
+ 0, 0, DMA_TLB_DSI_FLUSH,
+ NULL);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
addr, mask,
- DMA_TLB_PSI_FLUSH);
+ DMA_TLB_PSI_FLUSH,
+ NULL);
}

trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 36d7427b1202..943712b97973 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1518,11 +1518,12 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
}

void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
- unsigned int size_order, u64 type)
+ unsigned int size_order, u64 type,
+ struct qi_desc *batch_desc)
{
u8 dw = 0, dr = 0;
-
- struct qi_desc desc;
+ struct qi_desc desc = {0};
+ struct qi_desc *pdesc = batch_desc ? batch_desc : &desc;
int ih = 0;

if (cap_write_drain(iommu->cap))
@@ -1531,20 +1532,21 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
if (cap_read_drain(iommu->cap))
dr = 1;

- desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
+ pdesc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
+ pdesc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
| QI_IOTLB_AM(size_order);
- desc.qw2 = 0;
- desc.qw3 = 0;

- qi_submit_sync(iommu, &desc, 1, 0);
+ if (!batch_desc)
+ qi_submit_sync(iommu, pdesc, 1, 0);
}

void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
- u16 qdep, u64 addr, unsigned mask)
+ u16 qdep, u64 addr, unsigned int mask,
+ struct qi_desc *batch_desc)
{
- struct qi_desc desc;
+ struct qi_desc desc = {0};
+ struct qi_desc *pdesc = batch_desc ? batch_desc : &desc;

/*
* VT-d spec, section 4.3:
@@ -1557,26 +1559,27 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,

if (mask) {
addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
- } else
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
+ pdesc->qw1 |= QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+ } else {
+ pdesc->qw1 |= QI_DEV_IOTLB_ADDR(addr);
+ }

if (qdep >= QI_DEV_IOTLB_MAX_INVS)
qdep = 0;

- desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+ pdesc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw2 = 0;
- desc.qw3 = 0;

- qi_submit_sync(iommu, &desc, 1, 0);
+ if (!batch_desc)
+ qi_submit_sync(iommu, pdesc, 1, 0);
}

/* PASID-based IOTLB invalidation */
void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
- unsigned long npages, bool ih)
+ unsigned long npages, bool ih, struct qi_desc *batch_desc)
{
struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
+ struct qi_desc *pdesc = batch_desc ? batch_desc : &desc;

/*
* npages == -1 means a PASID-selective invalidation, otherwise,
@@ -1589,11 +1592,11 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
}

if (npages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
+ pdesc->qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
QI_EIOTLB_TYPE;
- desc.qw1 = 0;
+ pdesc->qw1 = 0;
} else {
int mask = ilog2(__roundup_pow_of_two(npages));
unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
@@ -1601,24 +1604,27 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
addr = ALIGN_DOWN(addr, align);

- desc.qw0 = QI_EIOTLB_PASID(pasid) |
+ pdesc->qw0 = QI_EIOTLB_PASID(pasid) |
QI_EIOTLB_DID(did) |
QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(addr) |
+ pdesc->qw1 = QI_EIOTLB_ADDR(addr) |
QI_EIOTLB_IH(ih) |
QI_EIOTLB_AM(mask);
}

- qi_submit_sync(iommu, &desc, 1, 0);
+ if (!batch_desc)
+ qi_submit_sync(iommu, pdesc, 1, 0);
}

/* PASID-based device IOTLB Invalidate */
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
- u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
+ u32 pasid, u16 qdep, u64 addr, unsigned int size_order,
+ struct qi_desc *batch_desc)
{
unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+ struct qi_desc *pdesc = batch_desc ? batch_desc : &desc;

/*
* VT-d spec, section 4.3:
@@ -1629,7 +1635,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;

- desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+ pdesc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
QI_DEV_IOTLB_PFSID(pfsid);

@@ -1647,7 +1653,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
addr, size_order);

/* Take page address */
- desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
+ pdesc->qw1 = QI_DEV_EIOTLB_ADDR(addr);

if (size_order) {
/*
@@ -1655,15 +1661,16 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
* significant bit, we must set them to 1s to avoid having
* smaller size than desired.
*/
- desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
+ pdesc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
VTD_PAGE_SHIFT);
/* Clear size_order bit to indicate size */
- desc.qw1 &= ~mask;
+ pdesc->qw1 &= ~mask;
/* Set the S bit to indicate flushing more than 1 page */
- desc.qw1 |= QI_DEV_EIOTLB_SIZE;
+ pdesc->qw1 |= QI_DEV_EIOTLB_SIZE;
}

- qi_submit_sync(iommu, &desc, 1, 0);
+ if (!batch_desc)
+ qi_submit_sync(iommu, pdesc, 1, 0);
}

void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index bbc47b3c603c..ff7d168d5673 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1157,7 +1157,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
if (sm_supported(iommu))
qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, NULL);
}

void iommu_flush_write_buffer(struct intel_iommu *iommu)
@@ -1216,7 +1216,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu,

/* return value determine if we need a write buffer flush */
static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
- u64 addr, unsigned int size_order, u64 type)
+ u64 addr, unsigned int size_order, u64 type,
+ struct qi_desc *batch_desc)
{
int tlb_offset = ecap_iotlb_offset(iommu->ecap);
u64 val = 0, val_iva = 0;
@@ -1385,8 +1386,8 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
sid = info->bus << 8 | info->devfn;
qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
- qdep, addr, mask);
- quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep);
+ qdep, addr, mask, NULL);
+ quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep, NULL);
}

static void intel_flush_iotlb_all(struct iommu_domain *domain)
@@ -1711,7 +1712,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
- DMA_TLB_DSI_FLUSH);
+ DMA_TLB_DSI_FLUSH, NULL);
}

clear_context_copied(iommu, bus, devfn);
@@ -1765,7 +1766,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, NULL);
} else {
iommu_flush_write_buffer(iommu);
}
@@ -1998,7 +1999,8 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
did_old,
0,
0,
- DMA_TLB_DSI_FLUSH);
+ DMA_TLB_DSI_FLUSH,
+ NULL);

__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
}
@@ -2655,7 +2657,8 @@ static void iommu_flush_all(void)
iommu->flush.flush_context(iommu, 0, 0, 0,
DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
- DMA_TLB_GLOBAL_FLUSH);
+ DMA_TLB_GLOBAL_FLUSH,
+ NULL);
}
}

@@ -4859,7 +4862,8 @@ static void __init check_tylersburg_isoch(void)
*/
void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
unsigned long address, unsigned long mask,
- u32 pasid, u16 qdep)
+ u32 pasid, u16 qdep,
+ struct qi_desc *batch_desc)
{
u16 sid;

@@ -4869,10 +4873,11 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
sid = PCI_DEVID(info->bus, info->devfn);
if (pasid == IOMMU_NO_PASID) {
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
- qdep, address, mask);
+ qdep, address, mask, batch_desc);
} else {
qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
- pasid, qdep, address, mask);
+ pasid, qdep, address, mask,
+ batch_desc);
}
}

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index e1fb94acf0be..4c5c93e22a37 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -524,7 +524,8 @@ struct iommu_flush {
void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid,
u8 fm, u64 type);
void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr,
- unsigned int size_order, u64 type);
+ unsigned int size_order, u64 type,
+ struct qi_desc *batch_desc);
};

enum {
@@ -1074,19 +1075,27 @@ void qi_global_iec(struct intel_iommu *iommu);
void qi_flush_context(struct intel_iommu *iommu, u16 did,
u16 sid, u8 fm, u64 type);
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
- unsigned int size_order, u64 type);
+ unsigned int size_order, u64 type,
+ struct qi_desc *batch_desc);
+
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
- u16 qdep, u64 addr, unsigned mask);
+ u16 qdep, u64 addr, unsigned int mask,
+ struct qi_desc *batch_desc);

void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
- unsigned long npages, bool ih);
+ unsigned long npages, bool ih,
+ struct qi_desc *batch_desc);

void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr,
- unsigned int size_order);
+ unsigned int size_order,
+ struct qi_desc *batch_desc);
+
void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
unsigned long address, unsigned long pages,
- u32 pasid, u16 qdep);
+ u32 pasid, u16 qdep,
+ struct qi_desc *batch_desc);
+
void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
u32 pasid);

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 11f0b856d74c..4fa935bf487d 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -228,9 +228,11 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
* efficient to flush devTLB specific to the PASID.
*/
if (pasid == IOMMU_NO_PASID)
- qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+ qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0,
+ 64 - VTD_PAGE_SHIFT, NULL);
else
- qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+ qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0,
+ 64 - VTD_PAGE_SHIFT, NULL);
}

void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
@@ -257,9 +259,9 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
pasid_cache_invalidation_with_pasid(iommu, did, pasid);

if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
- qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0, NULL);
else
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, NULL);

/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
@@ -279,7 +281,7 @@ static void pasid_flush_caches(struct intel_iommu *iommu,

if (cap_caching_mode(iommu->cap)) {
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0, NULL);
} else {
iommu_flush_write_buffer(iommu);
}
@@ -489,7 +491,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
*/
pasid_cache_invalidation_with_pasid(iommu, did, pasid);

- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, NULL);

/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
@@ -568,7 +570,7 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
*/
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0, NULL);

/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
@@ -816,7 +818,7 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, NULL);
devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);

/*
@@ -841,7 +843,7 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
PCI_DEVID(bus, devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, NULL);
}

return 0;
--
2.39.3


2024-05-17 00:38:17

by Zhang, Tina

[permalink] [raw]
Subject: [PATCH 2/2] iommu/vt-d: Batch IOTLB/dev-IOTLB invalidation commands

Utilize batch command processing in IOTLB/dev-IOTLB invalidation
operations.

Signed-off-by: Tina Zhang <[email protected]>
---
drivers/iommu/intel/cache.c | 76 ++++++++++++++++++++++++++++++-------
1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index dcf5e0e6af17..0a06e8565554 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -19,6 +19,14 @@
#include "pasid.h"
#include "trace.h"

+/* The max number of descriptors in a batch processing*/
+#define QI_MAX_BATCH_DESC_COUNT 2
+
+struct qi_cmd_batch {
+ struct qi_desc desc[QI_MAX_BATCH_DESC_COUNT];
+ int num;
+};
+
/* Check if an existing cache tag can be reused for a new association. */
static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
struct intel_iommu *iommu, struct device *dev,
@@ -254,6 +262,26 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
}

+static inline void cache_invalidate_cmd_batch_submit(struct intel_iommu *iommu,
+ struct qi_cmd_batch *cmds)
+{
+ if (!cmds->num)
+ return;
+
+ qi_submit_sync(iommu, cmds->desc, cmds->num, 0);
+ memset(cmds, 0, sizeof(struct qi_cmd_batch));
+}
+
+static inline void cache_invalidate_cmd_batch_add(struct intel_iommu *iommu,
+ struct qi_cmd_batch *cmds)
+{
+ if (!cmds->desc[cmds->num].qw0)
+ return;
+
+ if (++cmds->num == QI_MAX_BATCH_DESC_COUNT)
+ cache_invalidate_cmd_batch_submit(iommu, cmds);
+}
+
/*
* Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
* when the memory mappings in the target domain have been modified.
@@ -264,21 +292,28 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
+ struct intel_iommu *iommu = NULL;
+ struct qi_cmd_batch cmds = {0};

addr = calculate_psi_aligned_address(start, end, &pages, &mask);

spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
struct device_domain_info *info;
u16 sid;

+ if (iommu != tag->iommu) {
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
+ iommu = tag->iommu;
+ }
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level) {
qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, addr, pages, ih, NULL);
+ tag->pasid, addr, pages,
+ ih, &cmds.desc[cmds.num]);
} else {
/*
* Fallback to domain selective flush if no
@@ -288,13 +323,14 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
mask > cap_max_amask_val(iommu->cap))
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH,
- NULL);
+ &cmds.desc[cmds.num]);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
addr | ih, mask,
DMA_TLB_PSI_FLUSH,
- NULL);
+ &cmds.desc[cmds.num]);
}
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
case CACHE_TAG_NESTING_DEVTLB:
/*
@@ -310,23 +346,25 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
case CACHE_TAG_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
-
if (tag->pasid == IOMMU_NO_PASID)
qi_flush_dev_iotlb(iommu, sid, info->pfsid,
info->ats_qdep, addr, mask,
- NULL);
+ &cmds.desc[cmds.num]);
else
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
tag->pasid, info->ats_qdep,
- addr, mask, NULL);
+ addr, mask, &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);

quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid,
- info->ats_qdep, NULL);
+ info->ats_qdep, &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
}

trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
}
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}

@@ -338,40 +376,50 @@ void cache_tag_flush_all(struct dmar_domain *domain)
{
struct cache_tag *tag;
unsigned long flags;
+ struct intel_iommu *iommu = NULL;
+ struct qi_cmd_batch cmds = {0};

spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
struct device_domain_info *info;
u16 sid;

+ if (iommu != tag->iommu) {
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
+ iommu = tag->iommu;
+ }
+
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level)
qi_flush_piotlb(iommu, tag->domain_id,
tag->pasid, 0, -1, 0,
- NULL);
+ &cmds.desc[cmds.num]);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH,
- NULL);
+ &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
case CACHE_TAG_DEVTLB:
case CACHE_TAG_NESTING_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
-
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
- 0, MAX_AGAW_PFN_WIDTH, NULL);
+ 0, MAX_AGAW_PFN_WIDTH, &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
+
quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
IOMMU_NO_PASID, info->ats_qdep,
- NULL);
+ &cmds.desc[cmds.num]);
+ cache_invalidate_cmd_batch_add(iommu, &cmds);
break;
}

trace_cache_tag_flush_all(tag);
}
+ cache_invalidate_cmd_batch_submit(iommu, &cmds);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}

--
2.39.3


2024-05-19 09:44:54

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands

On 5/17/24 8:37 AM, Tina Zhang wrote:
> Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
> invalidation operations to support batching invalidation descriptors.
> This batch_desc is a pointer to the descriptor entry in a batch cmds
> buffer. If the batch_desc is NULL, it indicates that batch submission
> is not being used, and descriptors will be submitted individually.
>
> Also fix an issue reported by checkpatch about "unsigned mask":
> "Prefer 'unsigned int' to bare use of 'unsigned'"
>
> Signed-off-by: Tina Zhang<[email protected]>
> ---
> drivers/iommu/intel/cache.c | 33 +++++++++++-------
> drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
> drivers/iommu/intel/iommu.c | 27 +++++++++------
> drivers/iommu/intel/iommu.h | 21 ++++++++----
> drivers/iommu/intel/pasid.c | 20 ++++++-----
> 5 files changed, 100 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
> index e8418cdd8331..dcf5e0e6af17 100644
> --- a/drivers/iommu/intel/cache.c
> +++ b/drivers/iommu/intel/cache.c
> @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
> case CACHE_TAG_NESTING_IOTLB:
> if (domain->use_first_level) {
> qi_flush_piotlb(iommu, tag->domain_id,
> - tag->pasid, addr, pages, ih);
> + tag->pasid, addr, pages, ih, NULL);
> } else {

I'd like to have all batched descriptors code inside this file to make
it easier for maintenance. Perhaps we can add the below infrastructure
in the dmar_domain structure together with the cache tag.

#define MAX_BATCHED_DESC_COUNT 8

struct batched_desc {
struct qi_desc desc[MAX_BATCHED_DESC_COUNT];
unsigned int index;
};

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index eaf015b4353b..dd458d6ad7ec 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -610,6 +610,7 @@ struct dmar_domain {

spinlock_t cache_lock; /* Protect the cache tag list */
struct list_head cache_tags; /* Cache tag list */
+ struct batched_desc *descs; /* Batched qi descriptors */

int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1
== 2MiB,

The batched descriptor structure is allocated when the first cache tag
is assigned to this domain and freed when the last tag leaves.

Then, we have below helpers to replace the current qi_flush_xxx() calls.

static void batched_desc_iotlb_enqueue(...)
{
... ...
}

and another one to push all queued commands to the hardware,

static void batched_desc_flush(...)
{
... ...
}

> /*
> * Fallback to domain selective flush if no
> @@ -287,11 +287,13 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
> if (!cap_pgsel_inv(iommu->cap) ||
> mask > cap_max_amask_val(iommu->cap))
> iommu->flush.flush_iotlb(iommu, tag->domain_id,
> - 0, 0, DMA_TLB_DSI_FLUSH);
> + 0, 0, DMA_TLB_DSI_FLUSH,
> + NULL);
> else
> iommu->flush.flush_iotlb(iommu, tag->domain_id,
> addr | ih, mask,
> - DMA_TLB_PSI_FLUSH);
> + DMA_TLB_PSI_FLUSH,
> + NULL);

Perhaps we could refactor the indirect call here.
if (qi is supported by iommu)
...
else /* register based iotlb invalidation*/
...

and only batched descriptor is only supported in the upper case.

Best regards,
baolu

2024-05-20 04:35:06

by Zhang, Tina

[permalink] [raw]
Subject: RE: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands


> -----Original Message-----
> From: Baolu Lu <[email protected]>
> Sent: Sunday, May 19, 2024 5:43 PM
> To: Zhang, Tina <[email protected]>; Tian, Kevin <[email protected]>
> Cc: [email protected]; [email protected]; linux-
> [email protected]
> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
> invalidation commands
>
> On 5/17/24 8:37 AM, Tina Zhang wrote:
> > Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
> > invalidation operations to support batching invalidation descriptors.
> > This batch_desc is a pointer to the descriptor entry in a batch cmds
> > buffer. If the batch_desc is NULL, it indicates that batch submission
> > is not being used, and descriptors will be submitted individually.
> >
> > Also fix an issue reported by checkpatch about "unsigned mask":
> > "Prefer 'unsigned int' to bare use of 'unsigned'"
> >
> > Signed-off-by: Tina Zhang<[email protected]>
> > ---
> > drivers/iommu/intel/cache.c | 33 +++++++++++-------
> > drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
> > drivers/iommu/intel/iommu.c | 27 +++++++++------
> > drivers/iommu/intel/iommu.h | 21 ++++++++----
> > drivers/iommu/intel/pasid.c | 20 ++++++-----
> > 5 files changed, 100 insertions(+), 68 deletions(-)
> >
> > diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
> > index e8418cdd8331..dcf5e0e6af17 100644
> > --- a/drivers/iommu/intel/cache.c
> > +++ b/drivers/iommu/intel/cache.c
> > @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain
> *domain, unsigned long start,
> > case CACHE_TAG_NESTING_IOTLB:
> > if (domain->use_first_level) {
> > qi_flush_piotlb(iommu, tag->domain_id,
> > - tag->pasid, addr, pages, ih);
> > + tag->pasid, addr, pages, ih,
> NULL);
> > } else {
>
> I'd like to have all batched descriptors code inside this file to make it easier for
> maintenance. Perhaps we can add the below infrastructure in the
> dmar_domain structure together with the cache tag.
>
> #define MAX_BATCHED_DESC_COUNT 8
>
> struct batched_desc {
> struct qi_desc desc[MAX_BATCHED_DESC_COUNT];
> unsigned int index;
> };
>
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index eaf015b4353b..dd458d6ad7ec 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -610,6 +610,7 @@ struct dmar_domain {
>
> spinlock_t cache_lock; /* Protect the cache tag list */
> struct list_head cache_tags; /* Cache tag list */
> + struct batched_desc *descs; /* Batched qi descriptors */
>
> int iommu_superpage;/* Level of superpages supported:
> 0 == 4KiB (no superpages), 1 == 2MiB,
>
> The batched descriptor structure is allocated when the first cache tag is
> assigned to this domain and freed when the last tag leaves.
>
> Then, we have below helpers to replace the current qi_flush_xxx() calls.
>
> static void batched_desc_iotlb_enqueue(...) {
> ... ...
> }
>
> and another one to push all queued commands to the hardware,
>
> static void batched_desc_flush(...)
> {
> ... ...
> }
>
> > /*
> > * Fallback to domain selective flush if no @@
> -287,11 +287,13
> > @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned
> long start,
> > if (!cap_pgsel_inv(iommu->cap) ||
> > mask > cap_max_amask_val(iommu->cap))
> > iommu->flush.flush_iotlb(iommu, tag-
> >domain_id,
> > - 0, 0,
> DMA_TLB_DSI_FLUSH);
> > + 0, 0,
> DMA_TLB_DSI_FLUSH,
> > + NULL);
> > else
> > iommu->flush.flush_iotlb(iommu, tag-
> >domain_id,
> > addr | ih,
> mask,
> > -
> DMA_TLB_PSI_FLUSH);
> > +
> DMA_TLB_PSI_FLUSH,
> > + NULL);
>
> Perhaps we could refactor the indirect call here.
> if (qi is supported by iommu)
> ...
> else /* register based iotlb invalidation*/
> ...
>
> and only batched descriptor is only supported in the upper case.
Sure. Thanks.

Regards,
-Tina
>
> Best regards,
> baolu

2024-06-03 07:38:33

by Zhang, Tina

[permalink] [raw]
Subject: RE: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands

Hi Baolu,

> -----Original Message-----
> From: Baolu Lu <[email protected]>
> Sent: Sunday, May 19, 2024 5:43 PM
> To: Zhang, Tina <[email protected]>; Tian, Kevin <[email protected]>
> Cc: [email protected]; [email protected]; linux-
> [email protected]
> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
> invalidation commands
>
> On 5/17/24 8:37 AM, Tina Zhang wrote:
> > Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
> > invalidation operations to support batching invalidation descriptors.
> > This batch_desc is a pointer to the descriptor entry in a batch cmds
> > buffer. If the batch_desc is NULL, it indicates that batch submission
> > is not being used, and descriptors will be submitted individually.
> >
> > Also fix an issue reported by checkpatch about "unsigned mask":
> > "Prefer 'unsigned int' to bare use of 'unsigned'"
> >
> > Signed-off-by: Tina Zhang<[email protected]>
> > ---
> > drivers/iommu/intel/cache.c | 33 +++++++++++-------
> > drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
> > drivers/iommu/intel/iommu.c | 27 +++++++++------
> > drivers/iommu/intel/iommu.h | 21 ++++++++----
> > drivers/iommu/intel/pasid.c | 20 ++++++-----
> > 5 files changed, 100 insertions(+), 68 deletions(-)
> >
> > diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
> > index e8418cdd8331..dcf5e0e6af17 100644
> > --- a/drivers/iommu/intel/cache.c
> > +++ b/drivers/iommu/intel/cache.c
> > @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain
> *domain, unsigned long start,
> > case CACHE_TAG_NESTING_IOTLB:
> > if (domain->use_first_level) {
> > qi_flush_piotlb(iommu, tag->domain_id,
> > - tag->pasid, addr, pages, ih);
> > + tag->pasid, addr, pages, ih,
> NULL);
> > } else {
>
> I'd like to have all batched descriptors code inside this file to make it easier for
> maintenance. Perhaps we can add the below infrastructure in the
> dmar_domain structure together with the cache tag.

Does it suggest we need to add a batch version of qi_flush_iotlb/qi_flush_dev_iotlb/qi_flush_piotlb/qi_flush_dev_iotlb_pasid() in the cache.c file? It doesn't sound like an easy to maintain those functions, does it?

In this patch, we reuse the current qi_flush_xxx() for both batching and non-batching processing, so that we don't need to duplicate the logic of qi_flush_xxx() in two places with one for batching processing and the other one for non-batching processing. What do you think?

Regards,
-Tina

2024-06-04 04:21:31

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands

On 6/3/24 3:37 PM, Zhang, Tina wrote:
>> -----Original Message-----
>> From: Baolu Lu<[email protected]>
>> Sent: Sunday, May 19, 2024 5:43 PM
>> To: Zhang, Tina<[email protected]>; Tian, Kevin<[email protected]>
>> Cc:[email protected];[email protected]; linux-
>> [email protected]
>> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
>> invalidation commands
>>
>> On 5/17/24 8:37 AM, Tina Zhang wrote:
>>> Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
>>> invalidation operations to support batching invalidation descriptors.
>>> This batch_desc is a pointer to the descriptor entry in a batch cmds
>>> buffer. If the batch_desc is NULL, it indicates that batch submission
>>> is not being used, and descriptors will be submitted individually.
>>>
>>> Also fix an issue reported by checkpatch about "unsigned mask":
>>> "Prefer 'unsigned int' to bare use of 'unsigned'"
>>>
>>> Signed-off-by: Tina Zhang<[email protected]>
>>> ---
>>> drivers/iommu/intel/cache.c | 33 +++++++++++-------
>>> drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
>>> drivers/iommu/intel/iommu.c | 27 +++++++++------
>>> drivers/iommu/intel/iommu.h | 21 ++++++++----
>>> drivers/iommu/intel/pasid.c | 20 ++++++-----
>>> 5 files changed, 100 insertions(+), 68 deletions(-)
>>>
>>> diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
>>> index e8418cdd8331..dcf5e0e6af17 100644
>>> --- a/drivers/iommu/intel/cache.c
>>> +++ b/drivers/iommu/intel/cache.c
>>> @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain
>> *domain, unsigned long start,
>>> case CACHE_TAG_NESTING_IOTLB:
>>> if (domain->use_first_level) {
>>> qi_flush_piotlb(iommu, tag->domain_id,
>>> - tag->pasid, addr, pages, ih);
>>> + tag->pasid, addr, pages, ih,
>> NULL);
>>> } else {
>> I'd like to have all batched descriptors code inside this file to make it easier for
>> maintenance. Perhaps we can add the below infrastructure in the
>> dmar_domain structure together with the cache tag.
> Does it suggest we need to add a batch version of qi_flush_iotlb/qi_flush_dev_iotlb/qi_flush_piotlb/qi_flush_dev_iotlb_pasid() in the cache.c file? It doesn't sound like an easy to maintain those functions, does it?

Yes. I don't think it's that difficult as the helpers just compose a qi
descriptor and insert it in a local queue. This local queue will be
flushed after finishing iterating all cache tags, or there's no room for
more descriptors, or switches to a different iommu. Have I missed
anything?

>
> In this patch, we reuse the current qi_flush_xxx() for both batching and non-batching processing, so that we don't need to duplicate the logic of qi_flush_xxx() in two places with one for batching processing and the other one for non-batching processing. What do you think?

I don't like to put different things in a single helper. They share some
code, for example, composing the descriptor's dw words. Others are just
different. You can move that code into a static inline if you would
like, but the batching and non-batching processing should be in
different helpers.

Best regards,
baolu

2024-06-04 06:02:35

by Zhang, Tina

[permalink] [raw]
Subject: RE: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands

Hi Baolu,

> -----Original Message-----
> From: Baolu Lu <[email protected]>
> Sent: Tuesday, June 4, 2024 9:15 AM
> To: Zhang, Tina <[email protected]>; Tian, Kevin <[email protected]>
> Cc: [email protected]; [email protected]; linux-
> [email protected]
> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
> invalidation commands
>
> On 6/3/24 3:37 PM, Zhang, Tina wrote:
> >> -----Original Message-----
> >> From: Baolu Lu<[email protected]>
> >> Sent: Sunday, May 19, 2024 5:43 PM
> >> To: Zhang, Tina<[email protected]>; Tian,
> >> Kevin<[email protected]>
> >> Cc:[email protected];[email protected]; linux-
> >> [email protected]
> >> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
> >> invalidation commands
> >>
> >> On 5/17/24 8:37 AM, Tina Zhang wrote:
> >>> Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
> >>> invalidation operations to support batching invalidation descriptors.
> >>> This batch_desc is a pointer to the descriptor entry in a batch cmds
> >>> buffer. If the batch_desc is NULL, it indicates that batch
> >>> submission is not being used, and descriptors will be submitted individually.
> >>>
> >>> Also fix an issue reported by checkpatch about "unsigned mask":
> >>> "Prefer 'unsigned int' to bare use of 'unsigned'"
> >>>
> >>> Signed-off-by: Tina Zhang<[email protected]>
> >>> ---
> >>> drivers/iommu/intel/cache.c | 33 +++++++++++-------
> >>> drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
> >>> drivers/iommu/intel/iommu.c | 27 +++++++++------
> >>> drivers/iommu/intel/iommu.h | 21 ++++++++----
> >>> drivers/iommu/intel/pasid.c | 20 ++++++-----
> >>> 5 files changed, 100 insertions(+), 68 deletions(-)
> >>>
> >>> diff --git a/drivers/iommu/intel/cache.c
> >>> b/drivers/iommu/intel/cache.c index e8418cdd8331..dcf5e0e6af17
> >>> 100644
> >>> --- a/drivers/iommu/intel/cache.c
> >>> +++ b/drivers/iommu/intel/cache.c
> >>> @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain
> >> *domain, unsigned long start,
> >>> case CACHE_TAG_NESTING_IOTLB:
> >>> if (domain->use_first_level) {
> >>> qi_flush_piotlb(iommu, tag->domain_id,
> >>> - tag->pasid, addr, pages, ih);
> >>> + tag->pasid, addr, pages, ih,
> >> NULL);
> >>> } else {
> >> I'd like to have all batched descriptors code inside this file to
> >> make it easier for maintenance. Perhaps we can add the below
> >> infrastructure in the dmar_domain structure together with the cache tag.
> > Does it suggest we need to add a batch version of
> qi_flush_iotlb/qi_flush_dev_iotlb/qi_flush_piotlb/qi_flush_dev_iotlb_pasid() in
> the cache.c file? It doesn't sound like an easy to maintain those functions, does
> it?
>
> Yes. I don't think it's that difficult as the helpers just compose a qi descriptor and
> insert it in a local queue. This local queue will be flushed after finishing iterating
> all cache tags, or there's no room for more descriptors, or switches to a different
> iommu. Have I missed anything?

In current VT-d driver, only qi_flush_xxx() functions have the knowledge about how to make IOTLB invalidation descriptors. In qi_flush_xxx() functions, VT-d invalidation descriptors are populated and submitted to hardware immediately.

To support batching command, I think we can have two choices:
1. Extend qi_flush_xxx() functions to support batching descriptors. (Just like the implementation in this version)
In this way, the knowledge of populating an IOTLB invalidation descriptor in qi_flush_xxx() is reused. Additional code change is for batching the descriptor command into a buffer.

2. Introduce a new set of interfaces to populate IOTLB descriptors and batch them into a batch buffer.
The new set of interfaces is implemented in the cache.c file and we need to copy the knowledge about how to populate IOTLB descriptors from qi_flush_xxx() interfaces into the new interfaces. I hesitated to choose this option because it would duplicate code. Maybe we can generalize the knowledge of populating IOTLB descriptors into lower level interfaces and make the current qi_flush_xxx() and the new set of interfaces call them.

Which option do you think is better?

Regards,
-Tina

2024-06-04 08:22:55

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands

On 6/4/24 1:59 PM, Zhang, Tina wrote:
>> -----Original Message-----
>> From: Baolu Lu<[email protected]>
>> Sent: Tuesday, June 4, 2024 9:15 AM
>> To: Zhang, Tina<[email protected]>; Tian, Kevin<[email protected]>
>> Cc:[email protected];[email protected]; linux-
>> [email protected]
>> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
>> invalidation commands
>>
>> On 6/3/24 3:37 PM, Zhang, Tina wrote:
>>>> -----Original Message-----
>>>> From: Baolu Lu<[email protected]>
>>>> Sent: Sunday, May 19, 2024 5:43 PM
>>>> To: Zhang, Tina<[email protected]>; Tian,
>>>> Kevin<[email protected]>
>>>> Cc:[email protected];[email protected]; linux-
>>>> [email protected]
>>>> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
>>>> invalidation commands
>>>>
>>>> On 5/17/24 8:37 AM, Tina Zhang wrote:
>>>>> Introduce a new parameter batch_desc to the QI based IOTLB/dev-IOTLB
>>>>> invalidation operations to support batching invalidation descriptors.
>>>>> This batch_desc is a pointer to the descriptor entry in a batch cmds
>>>>> buffer. If the batch_desc is NULL, it indicates that batch
>>>>> submission is not being used, and descriptors will be submitted individually.
>>>>>
>>>>> Also fix an issue reported by checkpatch about "unsigned mask":
>>>>> "Prefer 'unsigned int' to bare use of 'unsigned'"
>>>>>
>>>>> Signed-off-by: Tina Zhang<[email protected]>
>>>>> ---
>>>>> drivers/iommu/intel/cache.c | 33 +++++++++++-------
>>>>> drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++-----------------
>>>>> drivers/iommu/intel/iommu.c | 27 +++++++++------
>>>>> drivers/iommu/intel/iommu.h | 21 ++++++++----
>>>>> drivers/iommu/intel/pasid.c | 20 ++++++-----
>>>>> 5 files changed, 100 insertions(+), 68 deletions(-)
>>>>>
>>>>> diff --git a/drivers/iommu/intel/cache.c
>>>>> b/drivers/iommu/intel/cache.c index e8418cdd8331..dcf5e0e6af17
>>>>> 100644
>>>>> --- a/drivers/iommu/intel/cache.c
>>>>> +++ b/drivers/iommu/intel/cache.c
>>>>> @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain
>>>> *domain, unsigned long start,
>>>>> case CACHE_TAG_NESTING_IOTLB:
>>>>> if (domain->use_first_level) {
>>>>> qi_flush_piotlb(iommu, tag->domain_id,
>>>>> - tag->pasid, addr, pages, ih);
>>>>> + tag->pasid, addr, pages, ih,
>>>> NULL);
>>>>> } else {
>>>> I'd like to have all batched descriptors code inside this file to
>>>> make it easier for maintenance. Perhaps we can add the below
>>>> infrastructure in the dmar_domain structure together with the cache tag.
>>> Does it suggest we need to add a batch version of
>> qi_flush_iotlb/qi_flush_dev_iotlb/qi_flush_piotlb/qi_flush_dev_iotlb_pasid() in
>> the cache.c file? It doesn't sound like an easy to maintain those functions, does
>> it?
>>
>> Yes. I don't think it's that difficult as the helpers just compose a qi descriptor and
>> insert it in a local queue. This local queue will be flushed after finishing iterating
>> all cache tags, or there's no room for more descriptors, or switches to a different
>> iommu. Have I missed anything?
> In current VT-d driver, only qi_flush_xxx() functions have the knowledge about how to make IOTLB invalidation descriptors. In qi_flush_xxx() functions, VT-d invalidation descriptors are populated and submitted to hardware immediately.
>
> To support batching command, I think we can have two choices:
> 1. Extend qi_flush_xxx() functions to support batching descriptors. (Just like the implementation in this version)
> In this way, the knowledge of populating an IOTLB invalidation descriptor in qi_flush_xxx() is reused. Additional code change is for batching the descriptor command into a buffer.
>
> 2. Introduce a new set of interfaces to populate IOTLB descriptors and batch them into a batch buffer.
> The new set of interfaces is implemented in the cache.c file and we need to copy the knowledge about how to populate IOTLB descriptors from qi_flush_xxx() interfaces into the new interfaces. I hesitated to choose this option because it would duplicate code. Maybe we can generalize the knowledge of populating IOTLB descriptors into lower level interfaces and make the current qi_flush_xxx() and the new set of interfaces call them.
>
> Which option do you think is better?

We can share the common code without changing the current helper
interfaces. Something like this,

static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64
addr,
unsigned int size_order, u64 type,
struct qi_desc *desc)
{
u8 dw = 0, dr = 0;
int ih = 0;

if (cap_write_drain(iommu->cap))
dw = 1;

if (cap_read_drain(iommu->cap))
dr = 1;

desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
| QI_IOTLB_AM(size_order);
desc->qw2 = 0;
desc->qw3 = 0;
}

void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type)
{
struct qi_desc desc;

qi_desc_iotlb(iommu, did, addr, size_order, type, &desc)
qi_submit_sync(iommu, &desc, 1, 0);
}

The qi_desc_iotlb() can be used in both batched and non-batched paths.
Does this work for you?

Best regards,
baolu

2024-06-04 10:16:13

by Zhang, Tina

[permalink] [raw]
Subject: RE: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB invalidation commands



> -----Original Message-----
> From: Baolu Lu <[email protected]>
> Sent: Tuesday, June 4, 2024 3:01 PM
> To: Zhang, Tina <[email protected]>; Tian, Kevin <[email protected]>
> Cc: [email protected]; [email protected]; linux-
> [email protected]
> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
> invalidation commands
>
> On 6/4/24 1:59 PM, Zhang, Tina wrote:
> >> -----Original Message-----
> >> From: Baolu Lu<[email protected]>
> >> Sent: Tuesday, June 4, 2024 9:15 AM
> >> To: Zhang, Tina<[email protected]>; Tian,
> >> Kevin<[email protected]>
> >> Cc:[email protected];[email protected]; linux-
> >> [email protected]
> >> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching IOTLB/dev-IOTLB
> >> invalidation commands
> >>
> >> On 6/3/24 3:37 PM, Zhang, Tina wrote:
> >>>> -----Original Message-----
> >>>> From: Baolu Lu<[email protected]>
> >>>> Sent: Sunday, May 19, 2024 5:43 PM
> >>>> To: Zhang, Tina<[email protected]>; Tian,
> >>>> Kevin<[email protected]>
> >>>> Cc:[email protected];[email protected]; linux-
> >>>> [email protected]
> >>>> Subject: Re: [PATCH 1/2] iommu/vt-d: Support batching
> >>>> IOTLB/dev-IOTLB invalidation commands
> >>>>
> >>>> On 5/17/24 8:37 AM, Tina Zhang wrote:
> >>>>> Introduce a new parameter batch_desc to the QI based
> >>>>> IOTLB/dev-IOTLB invalidation operations to support batching invalidation
> descriptors.
> >>>>> This batch_desc is a pointer to the descriptor entry in a batch
> >>>>> cmds buffer. If the batch_desc is NULL, it indicates that batch
> >>>>> submission is not being used, and descriptors will be submitted
> individually.
> >>>>>
> >>>>> Also fix an issue reported by checkpatch about "unsigned mask":
> >>>>> "Prefer 'unsigned int' to bare use of 'unsigned'"
> >>>>>
> >>>>> Signed-off-by: Tina Zhang<[email protected]>
> >>>>> ---
> >>>>> drivers/iommu/intel/cache.c | 33 +++++++++++-------
> >>>>> drivers/iommu/intel/dmar.c | 67 ++++++++++++++++++++---------------
> --
> >>>>> drivers/iommu/intel/iommu.c | 27 +++++++++------
> >>>>> drivers/iommu/intel/iommu.h | 21 ++++++++----
> >>>>> drivers/iommu/intel/pasid.c | 20 ++++++-----
> >>>>> 5 files changed, 100 insertions(+), 68 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/iommu/intel/cache.c
> >>>>> b/drivers/iommu/intel/cache.c index e8418cdd8331..dcf5e0e6af17
> >>>>> 100644
> >>>>> --- a/drivers/iommu/intel/cache.c
> >>>>> +++ b/drivers/iommu/intel/cache.c
> >>>>> @@ -278,7 +278,7 @@ void cache_tag_flush_range(struct dmar_domain
> >>>> *domain, unsigned long start,
> >>>>> case CACHE_TAG_NESTING_IOTLB:
> >>>>> if (domain->use_first_level) {
> >>>>> qi_flush_piotlb(iommu, tag-
> >domain_id,
> >>>>> - tag->pasid, addr, pages, ih);
> >>>>> + tag->pasid, addr, pages, ih,
> >>>> NULL);
> >>>>> } else {
> >>>> I'd like to have all batched descriptors code inside this file to
> >>>> make it easier for maintenance. Perhaps we can add the below
> >>>> infrastructure in the dmar_domain structure together with the cache tag.
> >>> Does it suggest we need to add a batch version of
> >> qi_flush_iotlb/qi_flush_dev_iotlb/qi_flush_piotlb/qi_flush_dev_iotlb_
> >> pasid() in the cache.c file? It doesn't sound like an easy to
> >> maintain those functions, does it?
> >>
> >> Yes. I don't think it's that difficult as the helpers just compose a
> >> qi descriptor and insert it in a local queue. This local queue will
> >> be flushed after finishing iterating all cache tags, or there's no
> >> room for more descriptors, or switches to a different iommu. Have I missed
> anything?
> > In current VT-d driver, only qi_flush_xxx() functions have the knowledge about
> how to make IOTLB invalidation descriptors. In qi_flush_xxx() functions, VT-d
> invalidation descriptors are populated and submitted to hardware immediately.
> >
> > To support batching command, I think we can have two choices:
> > 1. Extend qi_flush_xxx() functions to support batching descriptors.
> > (Just like the implementation in this version) In this way, the knowledge of
> populating an IOTLB invalidation descriptor in qi_flush_xxx() is reused.
> Additional code change is for batching the descriptor command into a buffer.
> >
> > 2. Introduce a new set of interfaces to populate IOTLB descriptors and batch
> them into a batch buffer.
> > The new set of interfaces is implemented in the cache.c file and we need to
> copy the knowledge about how to populate IOTLB descriptors from
> qi_flush_xxx() interfaces into the new interfaces. I hesitated to choose this
> option because it would duplicate code. Maybe we can generalize the
> knowledge of populating IOTLB descriptors into lower level interfaces and make
> the current qi_flush_xxx() and the new set of interfaces call them.
> >
> > Which option do you think is better?
>
> We can share the common code without changing the current helper interfaces.
> Something like this,
>
> static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
> unsigned int size_order, u64 type,
> struct qi_desc *desc) {
> u8 dw = 0, dr = 0;
> int ih = 0;
>
> if (cap_write_drain(iommu->cap))
> dw = 1;
>
> if (cap_read_drain(iommu->cap))
> dr = 1;
>
> desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
> | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
> desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
> | QI_IOTLB_AM(size_order);
> desc->qw2 = 0;
> desc->qw3 = 0;
> }
>
> void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
> unsigned int size_order, u64 type) {
> struct qi_desc desc;
>
> qi_desc_iotlb(iommu, did, addr, size_order, type, &desc)
> qi_submit_sync(iommu, &desc, 1, 0); }
>
> The qi_desc_iotlb() can be used in both batched and non-batched paths.
> Does this work for you?
It works. Thanks.

Regards,
-Tina
>
> Best regards,
> baolu