v2: https://lore.kernel.org/linux-iommu/[email protected]/
Hi all,
Here's v3, now with working x86! Having finally made sense of how I
broke Intel, I've given AMD the same fix by inspection. I'm still not
100% sure about s390, but it looks like it should probably be OK since
it seems to register an IOMMU instance for each PCI device (?!) before
disappearing into PCI hotplug code, wherein I assume we should never see
a PCI device appear without its IOMMU already registered.
Otherwise, the only other updates are hooking up the new host1x context
bus (noting that it now takes all of 4 lines to support a whole new bus,
yay!), and a slight tweak to make sure we keep rejecting registration of
conflicting iommu_ops rather than needlessly change that just yet.
Thanks,
Robin.
Robin Murphy (15):
iommu/vt-d: Handle race between registration and device probe
iommu/amd: Handle race between registration and device probe
iommu: Always register bus notifiers
iommu: Move bus setup to IOMMU device registration
iommu/amd: Clean up bus_set_iommu()
iommu/arm-smmu: Clean up bus_set_iommu()
iommu/arm-smmu-v3: Clean up bus_set_iommu()
iommu/dart: Clean up bus_set_iommu()
iommu/exynos: Clean up bus_set_iommu()
iommu/ipmmu-vmsa: Clean up bus_set_iommu()
iommu/mtk: Clean up bus_set_iommu()
iommu/omap: Clean up bus_set_iommu()
iommu/tegra-smmu: Clean up bus_set_iommu()
iommu/virtio: Clean up bus_set_iommu()
iommu: Clean up bus_set_iommu()
drivers/iommu/amd/amd_iommu.h | 1 -
drivers/iommu/amd/init.c | 9 +-
drivers/iommu/amd/iommu.c | 25 +----
drivers/iommu/apple-dart.c | 30 +----
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 53 +--------
drivers/iommu/arm/arm-smmu/arm-smmu.c | 84 +-------------
drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 -
drivers/iommu/exynos-iommu.c | 9 --
drivers/iommu/fsl_pamu_domain.c | 4 -
drivers/iommu/intel/iommu.c | 4 +-
drivers/iommu/iommu.c | 117 +++++++++-----------
drivers/iommu/ipmmu-vmsa.c | 35 +-----
drivers/iommu/msm_iommu.c | 2 -
drivers/iommu/mtk_iommu.c | 24 +---
drivers/iommu/mtk_iommu_v1.c | 13 +--
drivers/iommu/omap-iommu.c | 6 -
drivers/iommu/rockchip-iommu.c | 2 -
drivers/iommu/s390-iommu.c | 6 -
drivers/iommu/sprd-iommu.c | 5 -
drivers/iommu/sun50i-iommu.c | 2 -
drivers/iommu/tegra-smmu.c | 29 +----
drivers/iommu/virtio-iommu.c | 25 -----
include/linux/iommu.h | 1 -
23 files changed, 75 insertions(+), 415 deletions(-)
--
2.36.1.dirty
The number of bus types that the IOMMU subsystem deals with is small and
manageable, so pull that list into core code as a first step towards
cleaning up all the boilerplate bus-awareness from drivers. Calling
iommu_probe_device() before bus->iommu_ops is set will simply return
-ENODEV and not break the notifier call chain, so there should be no
harm in proactively registering all our bus notifiers at init time.
Tested-by: Marek Szyprowski <[email protected]>
Signed-off-by: Robin Murphy <[email protected]>
---
v3: Add host1x_context_bus
drivers/iommu/iommu.c | 53 ++++++++++++++++++++++++++-----------------
1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 847ad47a2dfd..514edc0eaa94 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) "iommu: " fmt
+#include <linux/amba/bus.h>
#include <linux/device.h>
#include <linux/dma-iommu.h>
#include <linux/kernel.h>
@@ -16,11 +17,13 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/pci.h>
#include <linux/bitops.h>
+#include <linux/platform_device.h>
#include <linux/property.h>
#include <linux/fsl/mc.h>
#include <linux/module.h>
@@ -75,6 +78,7 @@ static const char * const iommu_group_resv_type_string[] = {
#define IOMMU_CMD_LINE_DMA_API BIT(0)
#define IOMMU_CMD_LINE_STRICT BIT(1)
+static int iommu_bus_init(struct bus_type *bus);
static int iommu_alloc_default_domain(struct iommu_group *group,
struct device *dev);
static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
@@ -103,6 +107,22 @@ struct iommu_group_attribute iommu_group_attr_##_name = \
static LIST_HEAD(iommu_device_list);
static DEFINE_SPINLOCK(iommu_device_lock);
+static struct bus_type * const iommu_buses[] = {
+ &platform_bus_type,
+#ifdef CONFIG_PCI
+ &pci_bus_type,
+#endif
+#ifdef CONFIG_ARM_AMBA
+ &amba_bustype,
+#endif
+#ifdef CONFIG_FSL_MC_BUS
+ &fsl_mc_bus_type,
+#endif
+#ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS
+ &host1x_context_device_bus_type,
+#endif
+};
+
/*
* Use a function instead of an array here because the domain-type is a
* bit-field, so an array would waste memory.
@@ -152,6 +172,10 @@ static int __init iommu_subsys_init(void)
(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
"(set via kernel command line)" : "");
+ /* If the system is so broken that this fails, it will WARN anyway */
+ for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
+ iommu_bus_init(iommu_buses[i]);
+
return 0;
}
subsys_initcall(iommu_subsys_init);
@@ -1772,35 +1796,19 @@ int bus_iommu_probe(struct bus_type *bus)
return ret;
}
-static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
+static int iommu_bus_init(struct bus_type *bus)
{
struct notifier_block *nb;
int err;
- nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+ nb = kzalloc(sizeof(*nb), GFP_KERNEL);
if (!nb)
return -ENOMEM;
nb->notifier_call = iommu_bus_notifier;
-
err = bus_register_notifier(bus, nb);
if (err)
- goto out_free;
-
- err = bus_iommu_probe(bus);
- if (err)
- goto out_err;
-
-
- return 0;
-
-out_err:
- /* Clean up */
- bus_for_each_dev(bus, NULL, NULL, remove_iommu_group);
- bus_unregister_notifier(bus, nb);
-
-out_free:
- kfree(nb);
+ kfree(nb);
return err;
}
@@ -1833,9 +1841,12 @@ int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
bus->iommu_ops = ops;
/* Do IOMMU specific setup for this bus-type */
- err = iommu_bus_init(bus, ops);
- if (err)
+ err = bus_iommu_probe(bus);
+ if (err) {
+ /* Clean up */
+ bus_for_each_dev(bus, NULL, NULL, remove_iommu_group);
bus->iommu_ops = NULL;
+ }
return err;
}
--
2.36.1.dirty
Move the bus setup to iommu_device_register(). This should allow
bus_iommu_probe() to be correctly replayed for multiple IOMMU instances,
and leaves bus_set_iommu() as a glorified no-op to be cleaned up next.
At this point we can also handle cleanup better than just rolling back
the most-recently-touched bus upon failure - which may release devices
owned by other already-registered instances, and still leave devices on
other buses with dangling pointers to the failed instance. Now it's easy
to clean up the exact footprint of a given instance, no more, no less.
Tested-by: Marek Szyprowski <[email protected]>
Reviewed-By: Krishna Reddy <[email protected]>
Signed-off-by: Robin Murphy <[email protected]>
---
v3: Have iommu_device_register() return -EBUSY for conflicting ops
rather than change that behaviour just yet.
drivers/iommu/iommu.c | 54 ++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 24 deletions(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 514edc0eaa94..acb7b2ab0b79 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -180,6 +180,14 @@ static int __init iommu_subsys_init(void)
}
subsys_initcall(iommu_subsys_init);
+static int remove_iommu_group(struct device *dev, void *data)
+{
+ if (dev->iommu && dev->iommu->iommu_dev == data)
+ iommu_release_device(dev);
+
+ return 0;
+}
+
/**
* iommu_device_register() - Register an IOMMU hardware instance
* @iommu: IOMMU handle for the instance
@@ -202,12 +210,32 @@ int iommu_device_register(struct iommu_device *iommu,
spin_lock(&iommu_device_lock);
list_add_tail(&iommu->list, &iommu_device_list);
spin_unlock(&iommu_device_lock);
+
+ for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
+ struct bus_type *bus = iommu_buses[i];
+ int err;
+
+ if (bus->iommu_ops && bus->iommu_ops != ops) {
+ err = -EBUSY;
+ } else {
+ bus->iommu_ops = ops;
+ err = bus_iommu_probe(bus);
+ }
+ if (err) {
+ iommu_device_unregister(iommu);
+ return err;
+ }
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(iommu_device_register);
void iommu_device_unregister(struct iommu_device *iommu)
{
+ for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
+ bus_for_each_dev(iommu_buses[i], NULL, iommu, remove_iommu_group);
+
spin_lock(&iommu_device_lock);
list_del(&iommu->list);
spin_unlock(&iommu_device_lock);
@@ -1633,13 +1661,6 @@ static int probe_iommu_group(struct device *dev, void *data)
return ret;
}
-static int remove_iommu_group(struct device *dev, void *data)
-{
- iommu_release_device(dev);
-
- return 0;
-}
-
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -1828,27 +1849,12 @@ static int iommu_bus_init(struct bus_type *bus)
*/
int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
{
- int err;
-
- if (ops == NULL) {
- bus->iommu_ops = NULL;
- return 0;
- }
-
- if (bus->iommu_ops != NULL)
+ if (bus->iommu_ops && ops && bus->iommu_ops != ops)
return -EBUSY;
bus->iommu_ops = ops;
- /* Do IOMMU specific setup for this bus-type */
- err = bus_iommu_probe(bus);
- if (err) {
- /* Clean up */
- bus_for_each_dev(bus, NULL, NULL, remove_iommu_group);
- bus->iommu_ops = NULL;
- }
-
- return err;
+ return 0;
}
EXPORT_SYMBOL_GPL(bus_set_iommu);
--
2.36.1.dirty
Stop calling bus_set_iommu() since it's now unnecessary, and simplify
the probe failure path accordingly.
Signed-off-by: Robin Murphy <[email protected]>
---
v3: No change
drivers/iommu/tegra-smmu.c | 29 ++++++-----------------------
1 file changed, 6 insertions(+), 23 deletions(-)
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 1fea68e551f1..2e4d2e4c65bb 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -1086,8 +1086,8 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
/*
* This is a bit of a hack. Ideally we'd want to simply return this
- * value. However the IOMMU registration process will attempt to add
- * all devices to the IOMMU when bus_set_iommu() is called. In order
+ * value. However iommu_device_register() will attempt to add
+ * all devices to the IOMMU before we get that far. In order
* not to rely on global variables to track the IOMMU instance, we
* set it here so that it can be looked up from the .probe_device()
* callback via the IOMMU device's .drvdata field.
@@ -1141,32 +1141,15 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
return ERR_PTR(err);
err = iommu_device_register(&smmu->iommu, &tegra_smmu_ops, dev);
- if (err)
- goto remove_sysfs;
-
- err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops);
- if (err < 0)
- goto unregister;
-
-#ifdef CONFIG_PCI
- err = bus_set_iommu(&pci_bus_type, &tegra_smmu_ops);
- if (err < 0)
- goto unset_platform_bus;
-#endif
+ if (err) {
+ iommu_device_sysfs_remove(&smmu->iommu);
+ return ERR_PTR(err);
+ }
if (IS_ENABLED(CONFIG_DEBUG_FS))
tegra_smmu_debugfs_init(smmu);
return smmu;
-
-unset_platform_bus: __maybe_unused;
- bus_set_iommu(&platform_bus_type, NULL);
-unregister:
- iommu_device_unregister(&smmu->iommu);
-remove_sysfs:
- iommu_device_sysfs_remove(&smmu->iommu);
-
- return ERR_PTR(err);
}
void tegra_smmu_remove(struct tegra_smmu *smmu)
--
2.36.1.dirty
Stop calling bus_set_iommu() since it's now unnecessary, and simplify
the probe failure path accordingly.
Reviewed-by: Jean-Philippe Brucker <[email protected]>
Signed-off-by: Robin Murphy <[email protected]>
---
v3: No change
drivers/iommu/virtio-iommu.c | 25 -------------------------
1 file changed, 25 deletions(-)
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 25be4b822aa0..bcbd10ec4ccb 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -7,7 +7,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/amba/bus.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
#include <linux/dma-map-ops.h>
@@ -17,7 +16,6 @@
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
-#include <linux/platform_device.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ids.h>
@@ -1146,26 +1144,6 @@ static int viommu_probe(struct virtio_device *vdev)
iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev);
-#ifdef CONFIG_PCI
- if (pci_bus_type.iommu_ops != &viommu_ops) {
- ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
- if (ret)
- goto err_unregister;
- }
-#endif
-#ifdef CONFIG_ARM_AMBA
- if (amba_bustype.iommu_ops != &viommu_ops) {
- ret = bus_set_iommu(&amba_bustype, &viommu_ops);
- if (ret)
- goto err_unregister;
- }
-#endif
- if (platform_bus_type.iommu_ops != &viommu_ops) {
- ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
- if (ret)
- goto err_unregister;
- }
-
vdev->priv = viommu;
dev_info(dev, "input address: %u bits\n",
@@ -1174,9 +1152,6 @@ static int viommu_probe(struct virtio_device *vdev)
return 0;
-err_unregister:
- iommu_device_sysfs_remove(&viommu->iommu);
- iommu_device_unregister(&viommu->iommu);
err_free_vqs:
vdev->config->del_vqs(vdev);
--
2.36.1.dirty
Clean up the remaining trivial bus_set_iommu() callsites along
with the implementation. Now drivers only have to know and care
about iommu_device instances, phew!
Signed-off-by: Robin Murphy <[email protected]>
---
v3: Also catch Intel's cheeky open-coded assignment
drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 ----
drivers/iommu/fsl_pamu_domain.c | 4 ----
drivers/iommu/intel/iommu.c | 2 --
drivers/iommu/iommu.c | 24 ------------------------
drivers/iommu/msm_iommu.c | 2 --
drivers/iommu/rockchip-iommu.c | 2 --
drivers/iommu/s390-iommu.c | 6 ------
drivers/iommu/sprd-iommu.c | 5 -----
drivers/iommu/sun50i-iommu.c | 2 --
include/linux/iommu.h | 1 -
10 files changed, 52 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 4c077c38fbd6..80af00f468b4 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -845,8 +845,6 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
goto err_pm_disable;
}
- bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
-
if (qcom_iommu->local_base) {
pm_runtime_get_sync(dev);
writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS);
@@ -864,8 +862,6 @@ static int qcom_iommu_device_remove(struct platform_device *pdev)
{
struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev);
- bus_set_iommu(&platform_bus_type, NULL);
-
pm_runtime_force_suspend(&pdev->dev);
platform_set_drvdata(pdev, NULL);
iommu_device_sysfs_remove(&qcom_iommu->iommu);
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 94b4589dc67c..09aa90c27723 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -481,11 +481,7 @@ int __init pamu_domain_init(void)
if (ret) {
iommu_device_sysfs_remove(&pamu_iommu);
pr_err("Can't register iommu device\n");
- return ret;
}
- bus_set_iommu(&platform_bus_type, &fsl_pamu_ops);
- bus_set_iommu(&pci_bus_type, &fsl_pamu_ops);
-
return ret;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3e02c08802a0..388dd0e22fb9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4031,7 +4031,6 @@ static int __init probe_acpi_namespace_devices(void)
continue;
}
- pn->dev->bus->iommu_ops = &intel_iommu_ops;
ret = iommu_probe_device(pn->dev);
if (ret)
break;
@@ -4153,7 +4152,6 @@ int __init intel_iommu_init(void)
}
up_read(&dmar_global_lock);
- bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
register_memory_notifier(&intel_iommu_memory_nb);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index acb7b2ab0b79..c10ecb87fd9c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1834,30 +1834,6 @@ static int iommu_bus_init(struct bus_type *bus)
return err;
}
-/**
- * bus_set_iommu - set iommu-callbacks for the bus
- * @bus: bus.
- * @ops: the callbacks provided by the iommu-driver
- *
- * This function is called by an iommu driver to set the iommu methods
- * used for a particular bus. Drivers for devices on that bus can use
- * the iommu-api after these ops are registered.
- * This special function is needed because IOMMUs are usually devices on
- * the bus itself, so the iommu drivers are not initialized when the bus
- * is set up. With this function the iommu-driver can set the iommu-ops
- * afterwards.
- */
-int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
-{
- if (bus->iommu_ops && ops && bus->iommu_ops != ops)
- return -EBUSY;
-
- bus->iommu_ops = ops;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(bus_set_iommu);
-
bool iommu_present(struct bus_type *bus)
{
return bus->iommu_ops != NULL;
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f09aedfdd462..4a71989406dc 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -797,8 +797,6 @@ static int msm_iommu_probe(struct platform_device *pdev)
goto fail;
}
- bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
-
pr_info("device mapped at %p, irq %d with %d ctx banks\n",
iommu->base, iommu->irq, iommu->ncb);
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index ab57c4b8fade..a3fc59b814ab 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1300,8 +1300,6 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (!dma_dev)
dma_dev = &pdev->dev;
- bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
-
pm_runtime_enable(dev);
for (i = 0; i < iommu->num_irq; i++) {
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index c898bcbbce11..dd957145fb81 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -385,9 +385,3 @@ static const struct iommu_ops s390_iommu_ops = {
.free = s390_domain_free,
}
};
-
-static int __init s390_iommu_init(void)
-{
- return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
-}
-subsys_initcall(s390_iommu_init);
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index bd409bab6286..6770e6a72283 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -507,9 +507,6 @@ static int sprd_iommu_probe(struct platform_device *pdev)
if (ret)
goto remove_sysfs;
- if (!iommu_present(&platform_bus_type))
- bus_set_iommu(&platform_bus_type, &sprd_iommu_ops);
-
ret = sprd_iommu_clk_enable(sdev);
if (ret)
goto unregister_iommu;
@@ -545,8 +542,6 @@ static int sprd_iommu_remove(struct platform_device *pdev)
iommu_group_put(sdev->group);
sdev->group = NULL;
- bus_set_iommu(&platform_bus_type, NULL);
-
platform_set_drvdata(pdev, NULL);
iommu_device_sysfs_remove(&sdev->iommu);
iommu_device_unregister(&sdev->iommu);
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index c54ab477b8fd..e104543b78d9 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -968,8 +968,6 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
if (ret < 0)
goto err_unregister;
- bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops);
-
return 0;
err_unregister:
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e1afe169549..2cb05ff89f03 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -412,7 +412,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
return dev->iommu->iommu_dev->ops;
}
-extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
extern int bus_iommu_probe(struct bus_type *bus);
extern bool iommu_present(struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
--
2.36.1.dirty
Stop calling bus_set_iommu() since it's now unnecessary. This also
leaves the custom initcall effectively doing nothing but register
the driver, which no longer needs to happen early either, so convert
it to builtin_platform_driver().
Signed-off-by: Robin Murphy <[email protected]>
---
v3: No change
drivers/iommu/ipmmu-vmsa.c | 35 +----------------------------------
1 file changed, 1 insertion(+), 34 deletions(-)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8fdb84b3642b..2549d32f0ddd 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1090,11 +1090,6 @@ static int ipmmu_probe(struct platform_device *pdev)
ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
if (ret)
return ret;
-
-#if defined(CONFIG_IOMMU_DMA)
- if (!iommu_present(&platform_bus_type))
- bus_set_iommu(&platform_bus_type, &ipmmu_ops);
-#endif
}
/*
@@ -1168,32 +1163,4 @@ static struct platform_driver ipmmu_driver = {
.probe = ipmmu_probe,
.remove = ipmmu_remove,
};
-
-static int __init ipmmu_init(void)
-{
- struct device_node *np;
- static bool setup_done;
- int ret;
-
- if (setup_done)
- return 0;
-
- np = of_find_matching_node(NULL, ipmmu_of_ids);
- if (!np)
- return 0;
-
- of_node_put(np);
-
- ret = platform_driver_register(&ipmmu_driver);
- if (ret < 0)
- return ret;
-
-#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
- if (!iommu_present(&platform_bus_type))
- bus_set_iommu(&platform_bus_type, &ipmmu_ops);
-#endif
-
- setup_done = true;
- return 0;
-}
-subsys_initcall(ipmmu_init);
+builtin_platform_driver(ipmmu_driver);
--
2.36.1.dirty
On 2022/7/6 01:08, Robin Murphy wrote:
> /*
> * Use a function instead of an array here because the domain-type is a
> * bit-field, so an array would waste memory.
> @@ -152,6 +172,10 @@ static int __init iommu_subsys_init(void)
> (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
> "(set via kernel command line)" : "");
>
> + /* If the system is so broken that this fails, it will WARN anyway */
Can you please elaborate a bit on this? iommu_bus_init() still return
errors.
> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
> + iommu_bus_init(iommu_buses[i]);
> +
> return 0;
Best regards,
baolu
On 2022/7/6 01:08, Robin Murphy wrote:
> @@ -202,12 +210,32 @@ int iommu_device_register(struct iommu_device *iommu,
> spin_lock(&iommu_device_lock);
> list_add_tail(&iommu->list, &iommu_device_list);
> spin_unlock(&iommu_device_lock);
> +
> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
> + struct bus_type *bus = iommu_buses[i];
> + int err;
> +
> + if (bus->iommu_ops && bus->iommu_ops != ops) {
> + err = -EBUSY;
> + } else {
> + bus->iommu_ops = ops;
> + err = bus_iommu_probe(bus);
> + }
> + if (err) {
> + iommu_device_unregister(iommu);
> + return err;
> + }
> + }
> +
> return 0;
> }
> EXPORT_SYMBOL_GPL(iommu_device_register);
With bus_set_iommu() retired, my understanding is that now we embrace
the first-come-first-serve policy for bus->iommu_ops setting. This will
lead to problem in different iommu_ops for different bus case. Did I
overlook anything?
Best regards,
baolu
On 2022-07-06 02:53, Baolu Lu wrote:
> On 2022/7/6 01:08, Robin Murphy wrote:
>> /*
>> * Use a function instead of an array here because the domain-type is a
>> * bit-field, so an array would waste memory.
>> @@ -152,6 +172,10 @@ static int __init iommu_subsys_init(void)
>> (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
>> "(set via kernel command line)" : "");
>> + /* If the system is so broken that this fails, it will WARN
>> anyway */
>
> Can you please elaborate a bit on this? iommu_bus_init() still return
> errors.
Indeed, it's commenting on the fact that we don't try to clean up or
propagate an error value further even if it did ever manage to return
one. I feared that if I strip the error handling out of iommu_bus_init()
itself on the same reasoning, we'll just get constant patches from the
static checker brigade trying to add it back, so it seemed like the
neatest compromise to keep that decision where it's obviously in an
early initcall, rather than in the helper function which can be viewed
out of context. However, I'm happy to either expand this comment or go
the whole way and make iommu_bus_init() return void if you think it's
worthwhile.
Cheers,
Robin.
>
>> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
>> + iommu_bus_init(iommu_buses[i]);
>> +
>> return 0;
>
> Best regards,
> baolu
On 2022-07-06 03:35, Baolu Lu wrote:
> On 2022/7/6 01:08, Robin Murphy wrote:
>> @@ -202,12 +210,32 @@ int iommu_device_register(struct iommu_device
>> *iommu,
>> spin_lock(&iommu_device_lock);
>> list_add_tail(&iommu->list, &iommu_device_list);
>> spin_unlock(&iommu_device_lock);
>> +
>> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
>> + struct bus_type *bus = iommu_buses[i];
>> + int err;
>> +
>> + if (bus->iommu_ops && bus->iommu_ops != ops) {
>> + err = -EBUSY;
>> + } else {
>> + bus->iommu_ops = ops;
>> + err = bus_iommu_probe(bus);
>> + }
>> + if (err) {
>> + iommu_device_unregister(iommu);
>> + return err;
>> + }
>> + }
>> +
>> return 0;
>> }
>> EXPORT_SYMBOL_GPL(iommu_device_register);
>
> With bus_set_iommu() retired, my understanding is that now we embrace
> the first-come-first-serve policy for bus->iommu_ops setting. This will
> lead to problem in different iommu_ops for different bus case. Did I
> overlook anything?
This is just formalising the de-facto situation that we don't actually
have any combination of drivers that could load on the same system
without already attempting to claim at least one bus in common. It's
also only temporary until the bus ops are removed completely and we
fully support multiple drivers coexisting, which only actually takes a
handful more patches - I've realised I could even bring that change
*ahead* of the big job of converting iommu_domain_alloc() (I'm not
convinced that the tree-wide flag-day patch for that I currently have in
the dev branch is really viable, nor that I've actually got the correct
device at some of the callsites), although whether it's worth the
potentially-surprising behaviour that might result I'm less sure.
If we already had systems where in-tree drivers successfully coexisted
on different buses then I'd have split this up and done something a bit
more involved to keep a vestigial bus_set_iommu() around until the final
bus ops removal, but since we don't, it seemed neatest to do all the
related work in one go.
Thanks,
Robin.
On 2022/7/6 21:43, Robin Murphy wrote:
> On 2022-07-06 02:53, Baolu Lu wrote:
>> On 2022/7/6 01:08, Robin Murphy wrote:
>>> /*
>>> * Use a function instead of an array here because the domain-type
>>> is a
>>> * bit-field, so an array would waste memory.
>>> @@ -152,6 +172,10 @@ static int __init iommu_subsys_init(void)
>>> (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
>>> "(set via kernel command line)" : "");
>>> + /* If the system is so broken that this fails, it will WARN
>>> anyway */
>>
>> Can you please elaborate a bit on this? iommu_bus_init() still return
>> errors.
>
> Indeed, it's commenting on the fact that we don't try to clean up or
> propagate an error value further even if it did ever manage to return
> one. I feared that if I strip the error handling out of iommu_bus_init()
> itself on the same reasoning, we'll just get constant patches from the
> static checker brigade trying to add it back, so it seemed like the
> neatest compromise to keep that decision where it's obviously in an
> early initcall, rather than in the helper function which can be viewed
> out of context. However, I'm happy to either expand this comment or go
> the whole way and make iommu_bus_init() return void if you think it's
> worthwhile.
Thanks for the explanation. It would be helpful if the comment could be
expanded. In this case, after a long time, people will not consider it
an oversight. :-)
Best regards,
baolu
>
> Cheers,
> Robin.
>
>>
>>> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++)
>>> + iommu_bus_init(iommu_buses[i]);
>>> +
>>> return 0;
>>
>> Best regards,
>> baolu
>
On 2022/7/6 22:37, Robin Murphy wrote:
> On 2022-07-06 03:35, Baolu Lu wrote:
>> On 2022/7/6 01:08, Robin Murphy wrote:
>>> @@ -202,12 +210,32 @@ int iommu_device_register(struct iommu_device
>>> *iommu,
>>> spin_lock(&iommu_device_lock);
>>> list_add_tail(&iommu->list, &iommu_device_list);
>>> spin_unlock(&iommu_device_lock);
>>> +
>>> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
>>> + struct bus_type *bus = iommu_buses[i];
>>> + int err;
>>> +
>>> + if (bus->iommu_ops && bus->iommu_ops != ops) {
>>> + err = -EBUSY;
>>> + } else {
>>> + bus->iommu_ops = ops;
>>> + err = bus_iommu_probe(bus);
>>> + }
>>> + if (err) {
>>> + iommu_device_unregister(iommu);
>>> + return err;
>>> + }
>>> + }
>>> +
>>> return 0;
>>> }
>>> EXPORT_SYMBOL_GPL(iommu_device_register);
>>
>> With bus_set_iommu() retired, my understanding is that now we embrace
>> the first-come-first-serve policy for bus->iommu_ops setting. This will
>> lead to problem in different iommu_ops for different bus case. Did I
>> overlook anything?
>
> This is just formalising the de-facto situation that we don't actually
> have any combination of drivers that could load on the same system
> without already attempting to claim at least one bus in common. It's
> also only temporary until the bus ops are removed completely and we
> fully support multiple drivers coexisting, which only actually takes a
> handful more patches - I've realised I could even bring that change
> *ahead* of the big job of converting iommu_domain_alloc() (I'm not
> convinced that the tree-wide flag-day patch for that I currently have in
> the dev branch is really viable, nor that I've actually got the correct
> device at some of the callsites), although whether it's worth the
> potentially-surprising behaviour that might result I'm less sure.
>
> If we already had systems where in-tree drivers successfully coexisted
> on different buses then I'd have split this up and done something a bit
> more involved to keep a vestigial bus_set_iommu() around until the final
> bus ops removal, but since we don't, it seemed neatest to do all the
> related work in one go.
Fair enough. I've never seen a mixed system as far. It's fine for us to
retire bus_set_iommu() for now and then formally support mixed IOMMU
drivers later.
Best regards,
baolu
> From: Robin Murphy <[email protected]>
> Sent: Wednesday, July 6, 2022 1:08 AM
>
> The number of bus types that the IOMMU subsystem deals with is small and
> manageable, so pull that list into core code as a first step towards
> cleaning up all the boilerplate bus-awareness from drivers. Calling
> iommu_probe_device() before bus->iommu_ops is set will simply return
> -ENODEV and not break the notifier call chain, so there should be no
> harm in proactively registering all our bus notifiers at init time.
>
Suppose we miss a check on iommu ops in iommu_release_device():
if (!dev->iommu) <<<<<<<
return;
iommu_device_unlink(dev->iommu->iommu_dev, dev);
ops = dev_iommu_ops(dev);
ops->release_device(dev);
following the rationale in patch01 a device could be removed when
it's associated with a known but not registered instance.
> From: Baolu Lu <[email protected]>
> Sent: Thursday, July 7, 2022 8:21 AM
>
> On 2022/7/6 21:43, Robin Murphy wrote:
> > On 2022-07-06 02:53, Baolu Lu wrote:
> >> On 2022/7/6 01:08, Robin Murphy wrote:
> >>> /*
> >>> * Use a function instead of an array here because the domain-type
> >>> is a
> >>> * bit-field, so an array would waste memory.
> >>> @@ -152,6 +172,10 @@ static int __init iommu_subsys_init(void)
> >>> (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
> >>> "(set via kernel command line)" : "");
> >>> + /* If the system is so broken that this fails, it will WARN
> >>> anyway */
> >>
> >> Can you please elaborate a bit on this? iommu_bus_init() still return
> >> errors.
> >
> > Indeed, it's commenting on the fact that we don't try to clean up or
> > propagate an error value further even if it did ever manage to return
> > one. I feared that if I strip the error handling out of iommu_bus_init()
> > itself on the same reasoning, we'll just get constant patches from the
> > static checker brigade trying to add it back, so it seemed like the
> > neatest compromise to keep that decision where it's obviously in an
> > early initcall, rather than in the helper function which can be viewed
> > out of context. However, I'm happy to either expand this comment or go
> > the whole way and make iommu_bus_init() return void if you think it's
> > worthwhile.
>
> Thanks for the explanation. It would be helpful if the comment could be
> expanded. In this case, after a long time, people will not consider it
> an oversight. :-)
>
I'd prefer to making iommu_bus_init() return void plus expanding
the comment otherwise the question arises that if the only caller
is not interested in the return value then why bother returning it
in the first place. ????
> From: Robin Murphy <[email protected]>
> Sent: Wednesday, July 6, 2022 1:08 AM
>
> @@ -202,12 +210,32 @@ int iommu_device_register(struct iommu_device
> *iommu,
> spin_lock(&iommu_device_lock);
> list_add_tail(&iommu->list, &iommu_device_list);
> spin_unlock(&iommu_device_lock);
> +
> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
> + struct bus_type *bus = iommu_buses[i];
> + int err;
> +
> + if (bus->iommu_ops && bus->iommu_ops != ops) {
> + err = -EBUSY;
> + } else {
> + bus->iommu_ops = ops;
> + err = bus_iommu_probe(bus);
> + }
> + if (err) {
> + iommu_device_unregister(iommu);
> + return err;
> + }
> + }
> +
Probably move above into a new function bus_iommu_probe_all():
/* probe all buses for devices associated with this iommu */
err = bus_iommu_probe_all();
if (err) {
iommu_device_unregister(iommu);
return err;
}
Just my personal preference on leaving logic in iommu_device_register()
more relevant to the iommu instance itself.
Apart from that:
Reviewed-by: Kevin Tian <[email protected]>
> From: Robin Murphy <[email protected]>
> Sent: Wednesday, July 6, 2022 1:09 AM
>
> Clean up the remaining trivial bus_set_iommu() callsites along
> with the implementation. Now drivers only have to know and care
> about iommu_device instances, phew!
>
> Signed-off-by: Robin Murphy <[email protected]>
Reviewed-by: Kevin Tian <[email protected]>
On 2022-07-07 07:34, Tian, Kevin wrote:
>> From: Baolu Lu <[email protected]>
>> Sent: Thursday, July 7, 2022 8:21 AM
>>
>> On 2022/7/6 21:43, Robin Murphy wrote:
>>> On 2022-07-06 02:53, Baolu Lu wrote:
>>>> On 2022/7/6 01:08, Robin Murphy wrote:
>>>>> /*
>>>>> * Use a function instead of an array here because the domain-type
>>>>> is a
>>>>> * bit-field, so an array would waste memory.
>>>>> @@ -152,6 +172,10 @@ static int __init iommu_subsys_init(void)
>>>>> (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
>>>>> "(set via kernel command line)" : "");
>>>>> + /* If the system is so broken that this fails, it will WARN
>>>>> anyway */
>>>>
>>>> Can you please elaborate a bit on this? iommu_bus_init() still return
>>>> errors.
>>>
>>> Indeed, it's commenting on the fact that we don't try to clean up or
>>> propagate an error value further even if it did ever manage to return
>>> one. I feared that if I strip the error handling out of iommu_bus_init()
>>> itself on the same reasoning, we'll just get constant patches from the
>>> static checker brigade trying to add it back, so it seemed like the
>>> neatest compromise to keep that decision where it's obviously in an
>>> early initcall, rather than in the helper function which can be viewed
>>> out of context. However, I'm happy to either expand this comment or go
>>> the whole way and make iommu_bus_init() return void if you think it's
>>> worthwhile.
>>
>> Thanks for the explanation. It would be helpful if the comment could be
>> expanded. In this case, after a long time, people will not consider it
>> an oversight. :-)
>>
>
> I'd prefer to making iommu_bus_init() return void plus expanding
> the comment otherwise the question arises that if the only caller
> is not interested in the return value then why bother returning it
> in the first place. ????
OK, that's fair enough, will do.
Thanks,
Robin.
On 2022-07-07 07:31, Tian, Kevin wrote:
>> From: Robin Murphy <[email protected]>
>> Sent: Wednesday, July 6, 2022 1:08 AM
>>
>> The number of bus types that the IOMMU subsystem deals with is small and
>> manageable, so pull that list into core code as a first step towards
>> cleaning up all the boilerplate bus-awareness from drivers. Calling
>> iommu_probe_device() before bus->iommu_ops is set will simply return
>> -ENODEV and not break the notifier call chain, so there should be no
>> harm in proactively registering all our bus notifiers at init time.
>>
>
> Suppose we miss a check on iommu ops in iommu_release_device():
>
> if (!dev->iommu) <<<<<<<
> return;
>
> iommu_device_unlink(dev->iommu->iommu_dev, dev);
>
> ops = dev_iommu_ops(dev);
> ops->release_device(dev);
>
> following the rationale in patch01 a device could be removed when
> it's associated with a known but not registered instance.
No, because at that point the instance is only known internally to the
driver. As long as it isn't erroneously returned from
->probe_device(dev), dev->iommu will remain NULL and the rest of the
core code works as expected.
Thanks,
Robin.
On 2022-07-07 07:51, Tian, Kevin wrote:
>> From: Robin Murphy <[email protected]>
>> Sent: Wednesday, July 6, 2022 1:08 AM
>>
>> @@ -202,12 +210,32 @@ int iommu_device_register(struct iommu_device
>> *iommu,
>> spin_lock(&iommu_device_lock);
>> list_add_tail(&iommu->list, &iommu_device_list);
>> spin_unlock(&iommu_device_lock);
>> +
>> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
>> + struct bus_type *bus = iommu_buses[i];
>> + int err;
>> +
>> + if (bus->iommu_ops && bus->iommu_ops != ops) {
>> + err = -EBUSY;
>> + } else {
>> + bus->iommu_ops = ops;
>> + err = bus_iommu_probe(bus);
>> + }
>> + if (err) {
>> + iommu_device_unregister(iommu);
>> + return err;
>> + }
>> + }
>> +
>
> Probably move above into a new function bus_iommu_probe_all():
>
> /* probe all buses for devices associated with this iommu */
> err = bus_iommu_probe_all();
> if (err) {
> iommu_device_unregister(iommu);
> return err;
> }
>
> Just my personal preference on leaving logic in iommu_device_register()
> more relevant to the iommu instance itself.
On reflection I think it makes sense to pull the
iommu_device_unregister() out of the loop anyway - I think that's really
a left-over from between v1 and v2 when that error case briefly jumped
to another cleanup loop, before I realised it was actually trivial for
iommu_device_unregister() to clean up for itself.
However I now see I've also missed another opportunity, and the -EBUSY
case should be hoisted out of the loop as well, since checking
iommu_buses[0] is sufficient. Then it's hopefully much clearer that once
the bus ops go away we'll be left with just a single extra line for the
loop, as in iommu_device_unregister(). Does that sound reasonable?
> Apart from that:
>
> Reviewed-by: Kevin Tian <[email protected]>
Thanks! (and for the others as well)
Robin.
On 7/5/22 1:08 PM, Robin Murphy wrote:
> Clean up the remaining trivial bus_set_iommu() callsites along
> with the implementation. Now drivers only have to know and care
> about iommu_device instances, phew!
>
> Signed-off-by: Robin Murphy <[email protected]>
> ---
>
> v3: Also catch Intel's cheeky open-coded assignment
>
...
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index c898bcbbce11..dd957145fb81 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -385,9 +385,3 @@ static const struct iommu_ops s390_iommu_ops = {
> .free = s390_domain_free,
> }
> };
> -
> -static int __init s390_iommu_init(void)
> -{
> - return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
> -}
> -subsys_initcall(s390_iommu_init);
Previously s390_iommu_ops was only being set for pci_bus_type, but with
this series it will now also be set for platform_bus_type.
To tolerate that, this series needs a change along the lines of:
From: Matthew Rosato <[email protected]>
Date: Thu, 7 Jul 2022 08:45:44 -0400
Subject: [PATCH] iommu/s390: fail probe for non-pci device
s390-iommu only supports pci_bus_type today
Signed-off-by: Matthew Rosato <[email protected]>
---
drivers/iommu/s390-iommu.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index dd957145fb81..762f892b4ec3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -185,7 +185,12 @@ static void s390_iommu_detach_device(struct
iommu_domain *domain,
static struct iommu_device *s390_iommu_probe_device(struct device
*dev)
{
- struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct zpci_dev *zdev;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-ENODEV);
+
+ zdev = to_zpci_dev(dev);
return &zdev->iommu_dev;
} return &zdev->iommu_dev;
}
On 7/7/22 8:49 AM, Matthew Rosato wrote:
> On 7/5/22 1:08 PM, Robin Murphy wrote:
>> Clean up the remaining trivial bus_set_iommu() callsites along
>> with the implementation. Now drivers only have to know and care
>> about iommu_device instances, phew!
>>
>> Signed-off-by: Robin Murphy <[email protected]>
>> ---
>>
>> v3: Also catch Intel's cheeky open-coded assignment
>>
>
> ...
>
>> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
>> index c898bcbbce11..dd957145fb81 100644
>> --- a/drivers/iommu/s390-iommu.c
>> +++ b/drivers/iommu/s390-iommu.c
>> @@ -385,9 +385,3 @@ static const struct iommu_ops s390_iommu_ops = {
>> .free = s390_domain_free,
>> }
>> };
>> -
>> -static int __init s390_iommu_init(void)
>> -{
>> - return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
>> -}
>> -subsys_initcall(s390_iommu_init);
>
> Previously s390_iommu_ops was only being set for pci_bus_type, but with
> this series it will now also be set for platform_bus_type.
>
> To tolerate that, this series needs a change along the lines of:
>
... Sorry, let's try that again without a mangled diff:
From: Matthew Rosato <[email protected]>
Date: Thu, 7 Jul 2022 08:45:44 -0400
Subject: [PATCH] iommu/s390: fail probe for non-pci device
s390-iommu only supports pci_bus_type today
Signed-off-by: Matthew Rosato <[email protected]>
---
drivers/iommu/s390-iommu.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index dd957145fb81..762f892b4ec3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -185,7 +185,12 @@ static void s390_iommu_detach_device(struct
iommu_domain *domain,
static struct iommu_device *s390_iommu_probe_device(struct device
*dev)
{
- struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct zpci_dev *zdev;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-ENODEV);
+
+ zdev = to_zpci_dev(dev);
return &zdev->iommu_dev;
}
On 2022-07-07 13:54, Matthew Rosato wrote:
> On 7/7/22 8:49 AM, Matthew Rosato wrote:
>> On 7/5/22 1:08 PM, Robin Murphy wrote:
>>> Clean up the remaining trivial bus_set_iommu() callsites along
>>> with the implementation. Now drivers only have to know and care
>>> about iommu_device instances, phew!
>>>
>>> Signed-off-by: Robin Murphy <[email protected]>
>>> ---
>>>
>>> v3: Also catch Intel's cheeky open-coded assignment
>>>
>>
>> ...
>>
>>> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
>>> index c898bcbbce11..dd957145fb81 100644
>>> --- a/drivers/iommu/s390-iommu.c
>>> +++ b/drivers/iommu/s390-iommu.c
>>> @@ -385,9 +385,3 @@ static const struct iommu_ops s390_iommu_ops = {
>>> .free = s390_domain_free,
>>> }
>>> };
>>> -
>>> -static int __init s390_iommu_init(void)
>>> -{
>>> - return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
>>> -}
>>> -subsys_initcall(s390_iommu_init);
>>
>> Previously s390_iommu_ops was only being set for pci_bus_type, but
>> with this series it will now also be set for platform_bus_type.
Ah, indeed I hadn't got as far as fully appreciating that to_zpci_dev()
isn't robust enough on its own. Thanks for the patch, I've pulled it in
and will include it in v4. Do I take it that all else works OK with this
fixed?
Cheers,
Robin.
>>
>> To tolerate that, this series needs a change along the lines of:
>>
>
> ... Sorry, let's try that again without a mangled diff:
>
> From: Matthew Rosato <[email protected]>
> Date: Thu, 7 Jul 2022 08:45:44 -0400
> Subject: [PATCH] iommu/s390: fail probe for non-pci device
>
>
> s390-iommu only supports pci_bus_type today
>
>
> Signed-off-by: Matthew Rosato <[email protected]>
> ---
> drivers/iommu/s390-iommu.c | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index dd957145fb81..762f892b4ec3 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -185,7 +185,12 @@ static void s390_iommu_detach_device(struct
> iommu_domain *domain,
>
>
> static struct iommu_device *s390_iommu_probe_device(struct device *dev)
> {
> - struct zpci_dev *zdev = to_zpci_dev(dev);
> + struct zpci_dev *zdev;
> +
> + if (!dev_is_pci(dev))
> + return ERR_PTR(-ENODEV);
> +
> + zdev = to_zpci_dev(dev);
>
>
> return &zdev->iommu_dev;
> }
>
On 7/7/22 10:58 AM, Robin Murphy wrote:
> On 2022-07-07 13:54, Matthew Rosato wrote:
>> On 7/7/22 8:49 AM, Matthew Rosato wrote:
>>> On 7/5/22 1:08 PM, Robin Murphy wrote:
>>>> Clean up the remaining trivial bus_set_iommu() callsites along
>>>> with the implementation. Now drivers only have to know and care
>>>> about iommu_device instances, phew!
>>>>
>>>> Signed-off-by: Robin Murphy <[email protected]>
>>>> ---
>>>>
>>>> v3: Also catch Intel's cheeky open-coded assignment
>>>>
>>>
>>> ...
>>>
>>>> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
>>>> index c898bcbbce11..dd957145fb81 100644
>>>> --- a/drivers/iommu/s390-iommu.c
>>>> +++ b/drivers/iommu/s390-iommu.c
>>>> @@ -385,9 +385,3 @@ static const struct iommu_ops s390_iommu_ops = {
>>>> .free = s390_domain_free,
>>>> }
>>>> };
>>>> -
>>>> -static int __init s390_iommu_init(void)
>>>> -{
>>>> - return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
>>>> -}
>>>> -subsys_initcall(s390_iommu_init);
>>>
>>> Previously s390_iommu_ops was only being set for pci_bus_type, but
>>> with this series it will now also be set for platform_bus_type.
>
> Ah, indeed I hadn't got as far as fully appreciating that to_zpci_dev()
> isn't robust enough on its own. Thanks for the patch, I've pulled it in
> and will include it in v4. Do I take it that all else works OK with this
> fixed?
>
Yes, with that patch included this looks OK so for s390 you can also take a
Tested-by: Matthew Rosato <[email protected]>
> From: Robin Murphy <[email protected]>
> Sent: Thursday, July 7, 2022 5:59 PM
>
> On 2022-07-07 07:31, Tian, Kevin wrote:
> >> From: Robin Murphy <[email protected]>
> >> Sent: Wednesday, July 6, 2022 1:08 AM
> >>
> >> The number of bus types that the IOMMU subsystem deals with is small
> and
> >> manageable, so pull that list into core code as a first step towards
> >> cleaning up all the boilerplate bus-awareness from drivers. Calling
> >> iommu_probe_device() before bus->iommu_ops is set will simply return
> >> -ENODEV and not break the notifier call chain, so there should be no
> >> harm in proactively registering all our bus notifiers at init time.
> >>
> >
> > Suppose we miss a check on iommu ops in iommu_release_device():
> >
> > if (!dev->iommu) <<<<<<<
> > return;
> >
> > iommu_device_unlink(dev->iommu->iommu_dev, dev);
> >
> > ops = dev_iommu_ops(dev);
> > ops->release_device(dev);
> >
> > following the rationale in patch01 a device could be removed when
> > it's associated with a known but not registered instance.
>
> No, because at that point the instance is only known internally to the
> driver. As long as it isn't erroneously returned from
> ->probe_device(dev), dev->iommu will remain NULL and the rest of the
> core code works as expected.
>
You are correct. I overlooked dev->iommu as device_to_iommu() in
patch01. As long as the device hasn't been probed or ->probe_device
doesn't do bad thing then dev->iommu should be NULL in this case.
> From: Robin Murphy <[email protected]>
> Sent: Thursday, July 7, 2022 6:58 PM
>
> On 2022-07-07 07:51, Tian, Kevin wrote:
> >> From: Robin Murphy <[email protected]>
> >> Sent: Wednesday, July 6, 2022 1:08 AM
> >>
> >> @@ -202,12 +210,32 @@ int iommu_device_register(struct
> iommu_device
> >> *iommu,
> >> spin_lock(&iommu_device_lock);
> >> list_add_tail(&iommu->list, &iommu_device_list);
> >> spin_unlock(&iommu_device_lock);
> >> +
> >> + for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
> >> + struct bus_type *bus = iommu_buses[i];
> >> + int err;
> >> +
> >> + if (bus->iommu_ops && bus->iommu_ops != ops) {
> >> + err = -EBUSY;
> >> + } else {
> >> + bus->iommu_ops = ops;
> >> + err = bus_iommu_probe(bus);
> >> + }
> >> + if (err) {
> >> + iommu_device_unregister(iommu);
> >> + return err;
> >> + }
> >> + }
> >> +
> >
> > Probably move above into a new function bus_iommu_probe_all():
> >
> > /* probe all buses for devices associated with this iommu */
> > err = bus_iommu_probe_all();
> > if (err) {
> > iommu_device_unregister(iommu);
> > return err;
> > }
> >
> > Just my personal preference on leaving logic in iommu_device_register()
> > more relevant to the iommu instance itself.
>
> On reflection I think it makes sense to pull the
> iommu_device_unregister() out of the loop anyway - I think that's really
> a left-over from between v1 and v2 when that error case briefly jumped
> to another cleanup loop, before I realised it was actually trivial for
> iommu_device_unregister() to clean up for itself.
>
> However I now see I've also missed another opportunity, and the -EBUSY
> case should be hoisted out of the loop as well, since checking
> iommu_buses[0] is sufficient. Then it's hopefully much clearer that once
> the bus ops go away we'll be left with just a single extra line for the
> loop, as in iommu_device_unregister(). Does that sound reasonable?
>
Yes, sounds good to me.
On Tue, 2022-07-05 at 18:08 +0100, Robin Murphy wrote:
> v2: https://lore.kernel.org/linux-iommu/[email protected]/
>
> Hi all,
>
> Here's v3, now with working x86! Having finally made sense of how I
> broke Intel, I've given AMD the same fix by inspection. I'm still not
> 100% sure about s390, but it looks like it should probably be OK since
> it seems to register an IOMMU instance for each PCI device (?!) before
> disappearing into PCI hotplug code, wherein I assume we should never see
> a PCI device appear without its IOMMU already registered.
Yes, this is a bit unusual as our PCI architecture doesn't really have
a notion of an IOMMU device only of I/O translation tables. These are
then registered per PCI function. PCI functions may share I/O
translation tables and thus DMA address spaces but this is not done at
the moment. As Matt already mentioned we do need a small change for
this patch series. Since that was still mangled in his mail for me I
just replied with that using "git send-email". With Matt's patch
applied I can confirm that this works fine for us and does look like a
useful simplification. So feel free to add my
Tested-by: Niklas Schnelle <[email protected]>
From: Matthew Rosato <[email protected]>
s390-iommu only supports pci_bus_type today
Signed-off-by: Matthew Rosato <[email protected]>
---
drivers/iommu/s390-iommu.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index dd957145fb81..762f892b4ec3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -185,7 +185,12 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
static struct iommu_device *s390_iommu_probe_device(struct device *dev)
{
- struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct zpci_dev *zdev;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-ENODEV);
+
+ zdev = to_zpci_dev(dev);
return &zdev->iommu_dev;
}
--
2.34.1
On 2022-07-05 18:08, Robin Murphy wrote:
> v2: https://lore.kernel.org/linux-iommu/[email protected]/
>
> Hi all,
>
> Here's v3, now with working x86! Having finally made sense of how I
> broke Intel, I've given AMD the same fix by inspection. I'm still not
> 100% sure about s390, but it looks like it should probably be OK since
> it seems to register an IOMMU instance for each PCI device (?!) before
> disappearing into PCI hotplug code, wherein I assume we should never see
> a PCI device appear without its IOMMU already registered.
>
> Otherwise, the only other updates are hooking up the new host1x context
> bus (noting that it now takes all of 4 lines to support a whole new bus,
> yay!), and a slight tweak to make sure we keep rejecting registration of
> conflicting iommu_ops rather than needlessly change that just yet.
FWIW I've prepared v4, including Matt's s390 patch and some nice extra
cleanups thanks to Kevin's suggestions, but have now decided to wait to
rebase and send it after the upcoming merge window. If anyone's
interested in the meantime, there's a preliminary branch here:
https://gitlab.arm.com/linux-arm/linux-rm/-/commits/bus-set-iommu-v4
(temporarily including the host1x patch from -next to avoid breakage on
arm64 as well)
Thanks,
Robin.
> From: Robin Murphy <[email protected]>
> Sent: Friday, July 15, 2022 9:13 PM
>
> On 2022-07-05 18:08, Robin Murphy wrote:
> > v2: https://lore.kernel.org/linux-
> iommu/[email protected]/
> >
> > Hi all,
> >
> > Here's v3, now with working x86! Having finally made sense of how I
> > broke Intel, I've given AMD the same fix by inspection. I'm still not
> > 100% sure about s390, but it looks like it should probably be OK since
> > it seems to register an IOMMU instance for each PCI device (?!) before
> > disappearing into PCI hotplug code, wherein I assume we should never see
> > a PCI device appear without its IOMMU already registered.
> >
> > Otherwise, the only other updates are hooking up the new host1x context
> > bus (noting that it now takes all of 4 lines to support a whole new bus,
> > yay!), and a slight tweak to make sure we keep rejecting registration of
> > conflicting iommu_ops rather than needlessly change that just yet.
>
> FWIW I've prepared v4, including Matt's s390 patch and some nice extra
> cleanups thanks to Kevin's suggestions, but have now decided to wait to
> rebase and send it after the upcoming merge window. If anyone's
> interested in the meantime, there's a preliminary branch here:
>
> https://gitlab.arm.com/linux-arm/linux-rm/-/commits/bus-set-iommu-v4
>
> (temporarily including the host1x patch from -next to avoid breakage on
> arm64 as well)
>
This looks good to me.