2023-12-05 01:26:56

by Baolu Lu

[permalink] [raw]
Subject: [PATCH v2 1/6] iommu/vt-d: Setup scalable mode context entry in probe path

In contrast to legacy mode, the DMA translation table is configured in
the PASID table entry instead of the context entry for scalable mode.
For this reason, it is more appropriate to set up the scalable mode
context entry in the device_probe callback and direct it to the
appropriate PASID table.

The iommu domain attach/detach operations only affect the PASID table
entry. Therefore, there is no need to modify the context entry when
configuring the translation type and page table.

The only exception is the kdump case, where context entry setup is
postponed until the device driver invokes the first DMA interface.

Signed-off-by: Lu Baolu <[email protected]>
---
drivers/iommu/intel/pasid.h | 1 +
drivers/iommu/intel/iommu.c | 17 +++-
drivers/iommu/intel/pasid.c | 180 ++++++++++++++++++++++++++++++++++++
3 files changed, 195 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 8d40d4c66e31..58d7049081b9 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -319,4 +319,5 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
bool fault_ignore);
void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
struct device *dev, u32 pasid);
+int intel_pasid_setup_sm_context(struct device *dev, bool deferred);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 84b78e42a470..9dc005031dd2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4270,15 +4270,26 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
ret = intel_pasid_alloc_table(dev);
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
- dev_iommu_priv_set(dev, NULL);
- kfree(info);
- return ERR_PTR(ret);
+ goto err_clear_priv;
+ }
+
+ ret = intel_pasid_setup_sm_context(dev, false);
+ if (ret) {
+ dev_err(dev, "Scalable context entry setup failed\n");
+ goto err_free_table;
}
}

intel_iommu_debugfs_create_dev(info);

return &iommu->iommu;
+err_free_table:
+ intel_pasid_free_table(dev);
+err_clear_priv:
+ dev_iommu_priv_set(dev, NULL);
+ kfree(info);
+
+ return ERR_PTR(ret);
}

static void intel_iommu_release_device(struct device *dev)
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 3239cefa4c33..9e505060617a 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -304,6 +304,11 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EINVAL;
}

+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -384,6 +389,11 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EINVAL;
}

+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
pgd = domain->pgd;
agaw = iommu_skip_agaw(domain, iommu, &pgd);
if (agaw < 0) {
@@ -505,6 +515,11 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
u16 did = FLPT_DEFAULT_DID;
struct pasid_entry *pte;

+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -623,6 +638,11 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
return -EINVAL;
}

+ if (intel_pasid_setup_sm_context(dev, true)) {
+ dev_err_ratelimited(dev, "Context entry is not configured\n");
+ return -ENODEV;
+ }
+
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
@@ -666,3 +686,163 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,

return 0;
}
+
+/*
+ * Interface to set a pasid table to the scalable mode context table entry:
+ */
+
+/*
+ * Get the PASID directory size for scalable mode context entry.
+ * Value of X in the PDTS field of a scalable mode context entry
+ * indicates PASID directory with 2^(X + 7) entries.
+ */
+static unsigned long context_get_sm_pds(struct pasid_table *table)
+{
+ unsigned long pds, max_pde;
+
+ max_pde = table->max_pasid >> PASID_PDE_SHIFT;
+ pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
+ if (pds < 7)
+ return 0;
+
+ return pds - 7;
+}
+
+static int context_entry_set_pasid_table(struct context_entry *context,
+ struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct pasid_table *table = info->pasid_table;
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long pds;
+
+ context_clear_entry(context);
+
+ pds = context_get_sm_pds(table);
+ context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
+ context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
+
+ if (info->ats_supported)
+ context_set_sm_dte(context);
+ if (info->pri_supported)
+ context_set_sm_pre(context);
+ if (info->pasid_supported)
+ context_set_pasid(context);
+
+ context_set_fault_enable(context);
+ context_set_present(context);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
+
+ return 0;
+}
+
+static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+ int ret = 0;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, true);
+ if (!context) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ if (context_present(context) && !context_copied(iommu, bus, devfn))
+ goto out_unlock;
+
+ /*
+ * Cache invalidation for changes to a scalable-mode context table
+ * entry.
+ *
+ * Section 6.5.3.3 of the VT-d spec:
+ * - Device-selective context-cache invalidation;
+ * - Domain-selective PASID-cache invalidation to affected domains
+ * (can be skipped if all PASID entries were not-present);
+ * - Domain-selective IOTLB invalidation to affected domains;
+ * - Global Device-TLB invalidation to affected functions.
+ *
+ * For kdump cases, old valid entries may be cached due to the
+ * in-flight DMA and copied pgtable, but there is no unmapping
+ * behaviour for them, thus we need explicit cache flushes for all
+ * affected domain IDs and PASIDs used in the copied PASID table.
+ * Given that we have no idea about which domain IDs and PASIDs were
+ * used in the copied tables, upgrade them to global PASID and IOTLB
+ * cache invalidation.
+ *
+ * For kdump case, at this point, the device is supposed to finish
+ * reset at its driver probe stage, so no in-flight DMA will exist,
+ * and we don't need to worry anymore hereafter.
+ */
+ if (context_copied(iommu, bus, devfn)) {
+ context_clear_entry(context);
+ clear_context_copied(iommu, bus, devfn);
+ iommu->flush.flush_context(iommu, 0,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
+ }
+
+ context_entry_set_pasid_table(context, dev);
+
+ /*
+ * It's a non-present to present mapping. If hardware doesn't cache
+ * non-present entry we only need to flush the write-buffer. If the
+ * _does_ cache non-present entries, then it does so in the special
+ * domain ID #0, which we have to flush:
+ */
+ if (cap_caching_mode(iommu->cap)) {
+ iommu->flush.flush_context(iommu, 0,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+
+out_unlock:
+ spin_unlock(&iommu->lock);
+ return ret;
+}
+
+static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev != &pdev->dev)
+ return 0;
+
+ return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
+}
+
+/*
+ * Set the device's PASID table to its context table entry.
+ *
+ * The PASID table is set to the context entries of both device itself
+ * and its alias requester ID for DMA. If it is called in domain attach
+ * paths, set @deferred to true, false in other cases.
+ */
+int intel_pasid_setup_sm_context(struct device *dev, bool deferred)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * Skip pasid table setting up if context entry is copied and
+ * function is not called in deferred attachment context.
+ */
+ if (deferred ^ context_copied(iommu, info->bus, info->devfn))
+ return 0;
+
+ if (!dev_is_pci(dev))
+ return device_pasid_table_setup(dev, info->bus, info->devfn);
+
+ return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
+}
--
2.34.1


2023-12-08 08:51:26

by Tian, Kevin

[permalink] [raw]
Subject: RE: [PATCH v2 1/6] iommu/vt-d: Setup scalable mode context entry in probe path

> From: Lu Baolu <[email protected]>
> Sent: Tuesday, December 5, 2023 9:22 AM
>
> @@ -304,6 +304,11 @@ int intel_pasid_setup_first_level(struct intel_iommu
> *iommu,
> return -EINVAL;
> }
>
> + if (intel_pasid_setup_sm_context(dev, true)) {
> + dev_err(dev, "Context entry is not configured\n");
> + return -ENODEV;
> + }
> +
> spin_lock(&iommu->lock);
> pte = intel_pasid_get_entry(dev, pasid);
> if (!pte) {
> @@ -384,6 +389,11 @@ int intel_pasid_setup_second_level(struct
> intel_iommu *iommu,
> return -EINVAL;
> }
>
> + if (intel_pasid_setup_sm_context(dev, true)) {
> + dev_err(dev, "Context entry is not configured\n");
> + return -ENODEV;
> + }
> +
> pgd = domain->pgd;
> agaw = iommu_skip_agaw(domain, iommu, &pgd);
> if (agaw < 0) {
> @@ -505,6 +515,11 @@ int intel_pasid_setup_pass_through(struct
> intel_iommu *iommu,
> u16 did = FLPT_DEFAULT_DID;
> struct pasid_entry *pte;
>
> + if (intel_pasid_setup_sm_context(dev, true)) {
> + dev_err(dev, "Context entry is not configured\n");
> + return -ENODEV;
> + }
> +
> spin_lock(&iommu->lock);
> pte = intel_pasid_get_entry(dev, pasid);
> if (!pte) {

instead of replicating the invocation in all three stubs it's simpler to
do once in dmar_domain_attach_device() for all of them.

Then put the deferred check outside of intel_pasid_setup_sm_context()
instead of using a Boolean flag


> @@ -623,6 +638,11 @@ int intel_pasid_setup_nested(struct intel_iommu
> *iommu, struct device *dev,
> return -EINVAL;
> }
>
> + if (intel_pasid_setup_sm_context(dev, true)) {
> + dev_err_ratelimited(dev, "Context entry is not configured\n");
> + return -ENODEV;
> + }
> +

Do we support nested in kdump?

> +
> + /*
> + * Cache invalidation for changes to a scalable-mode context table
> + * entry.
> + *
> + * Section 6.5.3.3 of the VT-d spec:
> + * - Device-selective context-cache invalidation;
> + * - Domain-selective PASID-cache invalidation to affected domains
> + * (can be skipped if all PASID entries were not-present);
> + * - Domain-selective IOTLB invalidation to affected domains;
> + * - Global Device-TLB invalidation to affected functions.
> + *
> + * For kdump cases, old valid entries may be cached due to the
> + * in-flight DMA and copied pgtable, but there is no unmapping
> + * behaviour for them, thus we need explicit cache flushes for all
> + * affected domain IDs and PASIDs used in the copied PASID table.
> + * Given that we have no idea about which domain IDs and PASIDs
> were
> + * used in the copied tables, upgrade them to global PASID and IOTLB
> + * cache invalidation.
> + *
> + * For kdump case, at this point, the device is supposed to finish
> + * reset at its driver probe stage, so no in-flight DMA will exist,
> + * and we don't need to worry anymore hereafter.
> + */
> + if (context_copied(iommu, bus, devfn)) {
> + context_clear_entry(context);
> + clear_context_copied(iommu, bus, devfn);
> + iommu->flush.flush_context(iommu, 0,
> + (((u16)bus) << 8) | devfn,
> + DMA_CCMD_MASK_NOBIT,
> + DMA_CCMD_DEVICE_INVL);
> + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
> + iommu->flush.flush_iotlb(iommu, 0, 0, 0,
> DMA_TLB_GLOBAL_FLUSH);
> + devtlb_invalidation_with_pasid(iommu, dev,
> IOMMU_NO_PASID);
> + }

I don't see this logic from existing code. If it's a bug fix then
please send it separately first.

> +
> + context_entry_set_pasid_table(context, dev);

and here is additional change to the context entry. Why is the
context cache invalidated in the start?

> +
> +static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
> +{
> + struct device *dev = data;
> +
> + if (dev != &pdev->dev)
> + return 0;

what is it for? the existing domain_context_mapping_cb() doesn't have
this check then implying a behavior change.

2023-12-09 07:58:02

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH v2 1/6] iommu/vt-d: Setup scalable mode context entry in probe path

On 12/8/23 4:50 PM, Tian, Kevin wrote:
>> From: Lu Baolu <[email protected]>
>> Sent: Tuesday, December 5, 2023 9:22 AM
>>
>> @@ -304,6 +304,11 @@ int intel_pasid_setup_first_level(struct intel_iommu
>> *iommu,
>> return -EINVAL;
>> }
>>
>> + if (intel_pasid_setup_sm_context(dev, true)) {
>> + dev_err(dev, "Context entry is not configured\n");
>> + return -ENODEV;
>> + }
>> +
>> spin_lock(&iommu->lock);
>> pte = intel_pasid_get_entry(dev, pasid);
>> if (!pte) {
>> @@ -384,6 +389,11 @@ int intel_pasid_setup_second_level(struct
>> intel_iommu *iommu,
>> return -EINVAL;
>> }
>>
>> + if (intel_pasid_setup_sm_context(dev, true)) {
>> + dev_err(dev, "Context entry is not configured\n");
>> + return -ENODEV;
>> + }
>> +
>> pgd = domain->pgd;
>> agaw = iommu_skip_agaw(domain, iommu, &pgd);
>> if (agaw < 0) {
>> @@ -505,6 +515,11 @@ int intel_pasid_setup_pass_through(struct
>> intel_iommu *iommu,
>> u16 did = FLPT_DEFAULT_DID;
>> struct pasid_entry *pte;
>>
>> + if (intel_pasid_setup_sm_context(dev, true)) {
>> + dev_err(dev, "Context entry is not configured\n");
>> + return -ENODEV;
>> + }
>> +
>> spin_lock(&iommu->lock);
>> pte = intel_pasid_get_entry(dev, pasid);
>> if (!pte) {
>
> instead of replicating the invocation in all three stubs it's simpler to
> do once in dmar_domain_attach_device() for all of them.

It's not good to repeat the code. Perhaps we can add this check to
intel_pasid_get_entry()? The rule is that you can't get the pasid entry
if the context is copied.

> Then put the deferred check outside of intel_pasid_setup_sm_context()
> instead of using a Boolean flag

Okay, that's more readable.

>> @@ -623,6 +638,11 @@ int intel_pasid_setup_nested(struct intel_iommu
>> *iommu, struct device *dev,
>> return -EINVAL;
>> }
>>
>> + if (intel_pasid_setup_sm_context(dev, true)) {
>> + dev_err_ratelimited(dev, "Context entry is not configured\n");
>> + return -ENODEV;
>> + }
>> +
>
> Do we support nested in kdump?

No.

>
>> +
>> + /*
>> + * Cache invalidation for changes to a scalable-mode context table
>> + * entry.
>> + *
>> + * Section 6.5.3.3 of the VT-d spec:
>> + * - Device-selective context-cache invalidation;
>> + * - Domain-selective PASID-cache invalidation to affected domains
>> + * (can be skipped if all PASID entries were not-present);
>> + * - Domain-selective IOTLB invalidation to affected domains;
>> + * - Global Device-TLB invalidation to affected functions.
>> + *
>> + * For kdump cases, old valid entries may be cached due to the
>> + * in-flight DMA and copied pgtable, but there is no unmapping
>> + * behaviour for them, thus we need explicit cache flushes for all
>> + * affected domain IDs and PASIDs used in the copied PASID table.
>> + * Given that we have no idea about which domain IDs and PASIDs
>> were
>> + * used in the copied tables, upgrade them to global PASID and IOTLB
>> + * cache invalidation.
>> + *
>> + * For kdump case, at this point, the device is supposed to finish
>> + * reset at its driver probe stage, so no in-flight DMA will exist,
>> + * and we don't need to worry anymore hereafter.
>> + */
>> + if (context_copied(iommu, bus, devfn)) {
>> + context_clear_entry(context);
>> + clear_context_copied(iommu, bus, devfn);
>> + iommu->flush.flush_context(iommu, 0,
>> + (((u16)bus) << 8) | devfn,
>> + DMA_CCMD_MASK_NOBIT,
>> + DMA_CCMD_DEVICE_INVL);
>> + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
>> + iommu->flush.flush_iotlb(iommu, 0, 0, 0,
>> DMA_TLB_GLOBAL_FLUSH);
>> + devtlb_invalidation_with_pasid(iommu, dev,
>> IOMMU_NO_PASID);
>> + }
>
> I don't see this logic from existing code. If it's a bug fix then
> please send it separately first.

This code originates from domain_context_mapping_one(). It's not a bug
fix.

>> +
>> + context_entry_set_pasid_table(context, dev);
>
> and here is additional change to the context entry. Why is the
> context cache invalidated in the start?

The previous context entry may be copied from a previous kernel.
Therefore, we need to tear down the entry and flush the caches before
reusing it.

>
>> +
>> +static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
>> +{
>> + struct device *dev = data;
>> +
>> + if (dev != &pdev->dev)
>> + return 0;
>
> what is it for? the existing domain_context_mapping_cb() doesn't have
> this check then implying a behavior change.

Emm, I should remove this line and keep it consistent with the exiting
code.

Best regards,
baolu

2023-12-11 04:06:42

by Tian, Kevin

[permalink] [raw]
Subject: RE: [PATCH v2 1/6] iommu/vt-d: Setup scalable mode context entry in probe path

> From: Baolu Lu <[email protected]>
> Sent: Saturday, December 9, 2023 3:53 PM
>
> On 12/8/23 4:50 PM, Tian, Kevin wrote:
> >> From: Lu Baolu <[email protected]>
> >> Sent: Tuesday, December 5, 2023 9:22 AM
> >>
> >> @@ -304,6 +304,11 @@ int intel_pasid_setup_first_level(struct
> intel_iommu
> >> *iommu,
> >> return -EINVAL;
> >> }
> >>
> >> + if (intel_pasid_setup_sm_context(dev, true)) {
> >> + dev_err(dev, "Context entry is not configured\n");
> >> + return -ENODEV;
> >> + }
> >> +
> >> spin_lock(&iommu->lock);
> >> pte = intel_pasid_get_entry(dev, pasid);
> >> if (!pte) {
> >> @@ -384,6 +389,11 @@ int intel_pasid_setup_second_level(struct
> >> intel_iommu *iommu,
> >> return -EINVAL;
> >> }
> >>
> >> + if (intel_pasid_setup_sm_context(dev, true)) {
> >> + dev_err(dev, "Context entry is not configured\n");
> >> + return -ENODEV;
> >> + }
> >> +
> >> pgd = domain->pgd;
> >> agaw = iommu_skip_agaw(domain, iommu, &pgd);
> >> if (agaw < 0) {
> >> @@ -505,6 +515,11 @@ int intel_pasid_setup_pass_through(struct
> >> intel_iommu *iommu,
> >> u16 did = FLPT_DEFAULT_DID;
> >> struct pasid_entry *pte;
> >>
> >> + if (intel_pasid_setup_sm_context(dev, true)) {
> >> + dev_err(dev, "Context entry is not configured\n");
> >> + return -ENODEV;
> >> + }
> >> +
> >> spin_lock(&iommu->lock);
> >> pte = intel_pasid_get_entry(dev, pasid);
> >> if (!pte) {
> >
> > instead of replicating the invocation in all three stubs it's simpler to
> > do once in dmar_domain_attach_device() for all of them.
>
> It's not good to repeat the code. Perhaps we can add this check to
> intel_pasid_get_entry()? The rule is that you can't get the pasid entry
> if the context is copied.

You can add a check in intel_pasid_get_entry() but it's not a clean
code putting delayed setup inside it as we don't know when that
helper might be called. It's clearer to do delayed setup right in this
attach point.

> >
> >> +
> >> + /*
> >> + * Cache invalidation for changes to a scalable-mode context table
> >> + * entry.
> >> + *
> >> + * Section 6.5.3.3 of the VT-d spec:
> >> + * - Device-selective context-cache invalidation;
> >> + * - Domain-selective PASID-cache invalidation to affected domains
> >> + * (can be skipped if all PASID entries were not-present);
> >> + * - Domain-selective IOTLB invalidation to affected domains;
> >> + * - Global Device-TLB invalidation to affected functions.
> >> + *
> >> + * For kdump cases, old valid entries may be cached due to the
> >> + * in-flight DMA and copied pgtable, but there is no unmapping
> >> + * behaviour for them, thus we need explicit cache flushes for all
> >> + * affected domain IDs and PASIDs used in the copied PASID table.
> >> + * Given that we have no idea about which domain IDs and PASIDs
> >> were
> >> + * used in the copied tables, upgrade them to global PASID and IOTLB
> >> + * cache invalidation.
> >> + *
> >> + * For kdump case, at this point, the device is supposed to finish
> >> + * reset at its driver probe stage, so no in-flight DMA will exist,
> >> + * and we don't need to worry anymore hereafter.
> >> + */
> >> + if (context_copied(iommu, bus, devfn)) {
> >> + context_clear_entry(context);
> >> + clear_context_copied(iommu, bus, devfn);
> >> + iommu->flush.flush_context(iommu, 0,
> >> + (((u16)bus) << 8) | devfn,
> >> + DMA_CCMD_MASK_NOBIT,
> >> + DMA_CCMD_DEVICE_INVL);
> >> + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
> >> + iommu->flush.flush_iotlb(iommu, 0, 0, 0,
> >> DMA_TLB_GLOBAL_FLUSH);
> >> + devtlb_invalidation_with_pasid(iommu, dev,
> >> IOMMU_NO_PASID);
> >> + }
> >
> > I don't see this logic from existing code. If it's a bug fix then
> > please send it separately first.
>
> This code originates from domain_context_mapping_one(). It's not a bug
> fix.

but it's not the same flow:

/*
* For kdump cases, old valid entries may be cached due to the
* in-flight DMA and copied pgtable, but there is no unmapping
* behaviour for them, thus we need an explicit cache flush for
* the newly-mapped device. For kdump, at this point, the device
* is supposed to finish reset at its driver probe stage, so no
* in-flight DMA will exist, and we don't need to worry anymore
* hereafter.
*/
if (context_copied(iommu, bus, devfn)) {
u16 did_old = context_domain_id(context);

if (did_old < cap_ndoms(iommu->cap)) {
iommu->flush.flush_context(iommu, did_old,
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
DMA_TLB_DSI_FLUSH);
}

clear_context_copied(iommu, bus, devfn);
}

>
> >> +
> >> + context_entry_set_pasid_table(context, dev);
> >
> > and here is additional change to the context entry. Why is the
> > context cache invalidated in the start?
>
> The previous context entry may be copied from a previous kernel.
> Therefore, we need to tear down the entry and flush the caches before
> reusing it.

there is no reuse before this function returns. then why not doing just
one flush in the end?

2023-12-11 17:38:39

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v2 1/6] iommu/vt-d: Setup scalable mode context entry in probe path

On Tue, Dec 05, 2023 at 09:21:58AM +0800, Lu Baolu wrote:
> +/*
> + * Get the PASID directory size for scalable mode context entry.
> + * Value of X in the PDTS field of a scalable mode context entry
> + * indicates PASID directory with 2^(X + 7) entries.
> + */
> +static unsigned long context_get_sm_pds(struct pasid_table *table)
> +{
> + unsigned long pds, max_pde;
> +
> + max_pde = table->max_pasid >> PASID_PDE_SHIFT;
> + pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
> + if (pds < 7)
> + return 0;
> +
> + return pds - 7;

This seems like a convoluted way to write
max(ilog2(table-max_pasid) - 7,0)

?

Jason

2023-12-12 05:39:13

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH v2 1/6] iommu/vt-d: Setup scalable mode context entry in probe path

On 12/12/23 1:38 AM, Jason Gunthorpe wrote:
> On Tue, Dec 05, 2023 at 09:21:58AM +0800, Lu Baolu wrote:
>> +/*
>> + * Get the PASID directory size for scalable mode context entry.
>> + * Value of X in the PDTS field of a scalable mode context entry
>> + * indicates PASID directory with 2^(X + 7) entries.
>> + */
>> +static unsigned long context_get_sm_pds(struct pasid_table *table)
>> +{
>> + unsigned long pds, max_pde;
>> +
>> + max_pde = table->max_pasid >> PASID_PDE_SHIFT;
>> + pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
>> + if (pds < 7)
>> + return 0;
>> +
>> + return pds - 7;
> This seems like a convoluted way to write
> max(ilog2(table-max_pasid) - 7,0)
>
> ?

Yes. :-)

Something like,

max(ilog2(table->max_pasid >> PASID_PDE_SHIFT) - 7, 0)

Best regards,
baolu