Add a basic iommu for the s390 platform. The code is pretty simple
since on s390 each PCI device has its own virtual io address space
starting at the same vio address. For this a domain could hold only
one pci device. Also there is no relation between pci devices so each
device belongs to a separate iommu group.
Signed-off-by: Frank Blaschka <[email protected]>
---
MAINTAINERS | 8 ++
arch/s390/include/asm/pci.h | 3 +
arch/s390/pci/pci_dma.c | 19 ++++-
drivers/iommu/Kconfig | 9 +++
drivers/iommu/Makefile | 1 +
drivers/iommu/s390-iommu.c | 179 ++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 217 insertions(+), 2 deletions(-)
create mode 100644 drivers/iommu/s390-iommu.c
diff --git a/MAINTAINERS b/MAINTAINERS
index bc69ca4..a3ba11b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7935,6 +7935,14 @@ F: drivers/s390/net/*iucv*
F: include/net/iucv/
F: net/iucv/
+S390 IOMMU
+M: Frank Blaschka <[email protected]>
+M: [email protected]
+L: [email protected]
+W: http://www.ibm.com/developerworks/linux/linux390/
+S: Supported
+F: drivers/iommu/s390-iommu.c
+
S3C24XX SD/MMC Driver
M: Ben Dooks <[email protected]>
L: [email protected] (moderated for non-subscribers)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c030900..6790d0d 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -177,6 +177,9 @@ struct zpci_dev *get_zdev_by_fid(u32);
/* DMA */
int zpci_dma_init(void);
void zpci_dma_exit(void);
+int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+ dma_addr_t dma_addr, size_t size, int flags);
+void dma_purge_rto_entries(struct zpci_dev *zdev);
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 4cbb29a..a4db33e 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -139,8 +139,8 @@ static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr,
entry_clr_protected(entry);
}
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
- dma_addr_t dma_addr, size_t size, int flags)
+int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+ dma_addr_t dma_addr, size_t size, int flags)
{
unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
u8 *page_addr = (u8 *) (pa & PAGE_MASK);
@@ -210,6 +210,21 @@ static void dma_cleanup_tables(struct zpci_dev *zdev)
zdev->dma_table = NULL;
}
+void dma_purge_rto_entries(struct zpci_dev *zdev)
+{
+ unsigned long *table;
+ int rtx;
+
+ if (!zdev || !zdev->dma_table)
+ return;
+ table = zdev->dma_table;
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(table[rtx])) {
+ dma_free_seg_table(table[rtx]);
+ invalidate_table_entry(&table[rtx]);
+ }
+}
+
static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
unsigned long start, int size)
{
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dd51122..545e3fd 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -302,4 +302,13 @@ config ARM_SMMU
Say Y here if your SoC includes an IOMMU device implementing
the ARM SMMU architecture.
+config S390_IOMMU
+ bool "s390 IOMMU Support"
+ depends on S390 && PCI
+ select IOMMU_API
+ help
+ Support for the IBM s/390 IOMMU
+
+ If unsure, say N here.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 16edef7..1278aad 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
+obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
new file mode 100644
index 0000000..f9f048d
--- /dev/null
+++ b/drivers/iommu/s390-iommu.c
@@ -0,0 +1,179 @@
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/iommu.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <asm/pci_dma.h>
+
+#define S390_IOMMU_PGSIZES SZ_4K
+
+struct s390_domain {
+ struct zpci_dev *zdev;
+};
+
+static int s390_iommu_domain_init(struct iommu_domain *domain)
+{
+ struct s390_domain *priv;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ domain->priv = priv;
+ return 0;
+}
+
+static void s390_iommu_domain_destroy(struct iommu_domain *domain)
+{
+ kfree(domain->priv);
+ domain->priv = NULL;
+}
+
+static int s390_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *priv = domain->priv;
+
+ if (priv->zdev)
+ return -EEXIST;
+
+ priv->zdev = (struct zpci_dev *)to_pci_dev(dev)->sysdata;
+ return 0;
+}
+
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *priv = domain->priv;
+
+ dma_purge_rto_entries(priv->zdev);
+ priv->zdev = NULL;
+}
+
+static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ struct s390_domain *priv = domain->priv;
+ int flags = 0;
+ int rc;
+
+ if (!priv->zdev)
+ return -ENODEV;
+
+ rc = dma_update_trans(priv->zdev, (unsigned long)paddr, iova, size,
+ flags);
+
+ return rc;
+}
+
+static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct s390_domain *priv = domain->priv;
+ phys_addr_t phys = 0;
+ unsigned long *sto, *pto, *rto;
+ unsigned int rtx, sx, px;
+
+ if (!priv->zdev)
+ return -ENODEV;
+
+ rtx = calc_rtx(iova);
+ sx = calc_sx(iova);
+ px = calc_px(iova);
+ rto = priv->zdev->dma_table;
+
+ if (reg_entry_isvalid(rto[rtx])) {
+ sto = get_rt_sto(rto[rtx]);
+ if (reg_entry_isvalid(sto[sx])) {
+ pto = get_st_pto(sto[sx]);
+ if ((pto[px] & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)
+ phys = pto[px] & ZPCI_PTE_ADDR_MASK;
+ }
+ }
+
+ return phys;
+}
+
+static size_t s390_iommu_unmap(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+ struct s390_domain *priv = domain->priv;
+ int flags = ZPCI_PTE_INVALID;
+ phys_addr_t paddr;
+ int rc;
+
+ if (!priv->zdev)
+ goto out;
+
+ paddr = s390_iommu_iova_to_phys(domain, iova);
+ if (!paddr)
+ goto out;
+
+ rc = dma_update_trans(priv->zdev, (unsigned long)paddr, iova, size,
+ flags);
+out:
+ return size;
+}
+
+static bool s390_iommu_capable(enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ case IOMMU_CAP_INTR_REMAP:
+ return true;
+ }
+
+ return false;
+}
+
+static int s390_iommu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ int ret;
+
+ group = iommu_group_alloc();
+ if (IS_ERR(group)) {
+ dev_err(dev, "Failed to allocate IOMMU group\n");
+ return PTR_ERR(group);
+ }
+
+ ret = iommu_group_add_device(group, dev);
+ return ret;
+}
+
+static void s390_iommu_remove_device(struct device *dev)
+{
+ iommu_group_remove_device(dev);
+}
+
+static struct iommu_ops s390_iommu_ops = {
+ .capable = s390_iommu_capable,
+ .domain_init = s390_iommu_domain_init,
+ .domain_destroy = s390_iommu_domain_destroy,
+ .attach_dev = s390_iommu_attach_device,
+ .detach_dev = s390_iommu_detach_device,
+ .map = s390_iommu_map,
+ .unmap = s390_iommu_unmap,
+ .iova_to_phys = s390_iommu_iova_to_phys,
+ .add_device = s390_iommu_add_device,
+ .remove_device = s390_iommu_remove_device,
+ .pgsize_bitmap = S390_IOMMU_PGSIZES,
+};
+
+static int __init s390_iommu_init(void)
+{
+ bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
+ return 0;
+}
+subsys_initcall(s390_iommu_init);
--
1.8.5.5
Hi Frank,
On Tue, Oct 21, 2014 at 01:57:25PM +0200, Frank Blaschka wrote:
> Add a basic iommu for the s390 platform. The code is pretty simple
> since on s390 each PCI device has its own virtual io address space
> starting at the same vio address.
Are there any limitations on IOVA address space for the devices or can
be really any system physical address mapped starting from 0 to 2^64?
> For this a domain could hold only one pci device.
This bothers me, as it is not compatible with the IOMMU-API. I looked a
little bit into how the mappings are created, and it seems there is a
per-device dma_table.
Is there any reason a dma_table can't be per IOMMU domain and assigned
to multiple devices at the same time?
Otherwise the code looks quite simple and straight forward.
Joerg
On Wed, Oct 22, 2014 at 04:17:29PM +0200, Joerg Roedel wrote:
> Hi Frank,
>
> On Tue, Oct 21, 2014 at 01:57:25PM +0200, Frank Blaschka wrote:
> > Add a basic iommu for the s390 platform. The code is pretty simple
> > since on s390 each PCI device has its own virtual io address space
> > starting at the same vio address.
>
> Are there any limitations on IOVA address space for the devices or can
> be really any system physical address mapped starting from 0 to 2^64?
>
Hi Joerg,
Basically there are no limitations. Depending on the s390 maschine
generation a device starts its IOVA at a specific address (announced by
the HW). But as I already told each device starts at the same address.
I think this prevents having multiple devices on the same IOMMU domain.
> > For this a domain could hold only one pci device.
>
> This bothers me, as it is not compatible with the IOMMU-API. I looked a
> little bit into how the mappings are created, and it seems there is a
> per-device dma_table.
>
yes, you are absolutely right. There is a per-device dma_table.
There is no general IOMMU device but each pci device has its own IOMMU
translation capability.
> Is there any reason a dma_table can't be per IOMMU domain and assigned
> to multiple devices at the same time?
Is there a possibility the IOMMU domain can support e.g. something like
VIOA 0x10000 -> pci device 1
VIOA 0x10000 -> pci device 2
>
> Otherwise the code looks quite simple and straight forward.
>
Thx for your review and help
Frank
>
> Joerg
>
>
On Wed, Oct 22, 2014 at 05:43:20PM +0200, Frank Blaschka wrote:
> Basically there are no limitations. Depending on the s390 maschine
> generation a device starts its IOVA at a specific address (announced by
> the HW). But as I already told each device starts at the same address.
> I think this prevents having multiple devices on the same IOMMU domain.
Why, each device has its own IOVA address space, so IOVA A could map to
physical address X for one device and to Y for another, no? And if you
point multiple devices to the same dma_table they share the mappings
(and thus the address space). Or am I getting something wrong?
> yes, you are absolutely right. There is a per-device dma_table.
> There is no general IOMMU device but each pci device has its own IOMMU
> translation capability.
I see, in this way it is similar to ARM where there is often also one IOMMU
per master device.
> Is there a possibility the IOMMU domain can support e.g. something like
>
> VIOA 0x10000 -> pci device 1
> VIOA 0x10000 -> pci device 2
A domain is basically an abstraction for a DMA page table (or a
dma_table, as you call it on s390). So you can easily create similar
mappings for more than one device with it.
Joerg
On Thu, Oct 23, 2014 at 02:41:15PM +0200, Joerg Roedel wrote:
> On Wed, Oct 22, 2014 at 05:43:20PM +0200, Frank Blaschka wrote:
> > Basically there are no limitations. Depending on the s390 maschine
> > generation a device starts its IOVA at a specific address (announced by
> > the HW). But as I already told each device starts at the same address.
> > I think this prevents having multiple devices on the same IOMMU domain.
>
> Why, each device has its own IOVA address space, so IOVA A could map to
> physical address X for one device and to Y for another, no? And if you
> point multiple devices to the same dma_table they share the mappings
> (and thus the address space). Or am I getting something wrong?
>
> > yes, you are absolutely right. There is a per-device dma_table.
> > There is no general IOMMU device but each pci device has its own IOMMU
> > translation capability.
>
> I see, in this way it is similar to ARM where there is often also one IOMMU
> per master device.
>
> > Is there a possibility the IOMMU domain can support e.g. something like
> >
> > VIOA 0x10000 -> pci device 1
> > VIOA 0x10000 -> pci device 2
>
> A domain is basically an abstraction for a DMA page table (or a
> dma_table, as you call it on s390). So you can easily create similar
> mappings for more than one device with it.
>
ok, maybe I was too close to the existing s390 dma implementation or simply
wrong, maybe Sebastian or Gerald can give more background information.
Here is my understanding so far:
the clp instructions reports a start/end dma address for the pci device.
on my system all devices report:
sdma = 0x100000000;
edma = 0x1ffffffffffffff;
dma mappings are created for each device separately starting from 0x100000000
and filling the the VIOA space for this device (until 0x1ffffffffffffff)
If we would like to have more then one device per domain I think:
we would have to slice the IOVA address space (0x100000000 - 0x1ffffffffffffff)
of the domain and report only a slice to the pci device (clp)
The iommu code would have to find the device by the dma (VIOA) address
and then program the entry to the table of the particular device (and only this
device).
Is this understanding more appropriate?
Thx
Frank
>
>
> Joerg
>
Hi Frank,
On Thu, Oct 23, 2014 at 04:04:37PM +0200, Frank Blaschka wrote:
> > A domain is basically an abstraction for a DMA page table (or a
> > dma_table, as you call it on s390). So you can easily create similar
> > mappings for more than one device with it.
> >
> the clp instructions reports a start/end dma address for the pci device.
> on my system all devices report:
>
> sdma = 0x100000000;
> edma = 0x1ffffffffffffff;
These values need to be reported through the IOMMU-API, so that the
users know which address ranges they can map.
> dma mappings are created for each device separately starting from 0x100000000
> and filling the the VIOA space for this device (until 0x1ffffffffffffff)
>
> If we would like to have more then one device per domain I think:
>
> we would have to slice the IOVA address space (0x100000000 - 0x1ffffffffffffff)
> of the domain and report only a slice to the pci device (clp)
> The iommu code would have to find the device by the dma (VIOA) address
> and then program the entry to the table of the particular device (and only this
> device).
Why do you need to splice an address space when more than one device is
assigned to it? Does that come from the hardware?
Usually its not problematic when devices share an address space. The
partitioning of that address-space between devices is done by an address
allocator which works on small chunks of memory (io-page-size
granularity).
But such an address allocator is part of the DMA-API, the IOMMU-API
which you implement here only cares about the mappings itself, not about
address allocation.
Joerg
On Thu, 23 Oct 2014 16:04:37 +0200
Frank Blaschka <[email protected]> wrote:
> On Thu, Oct 23, 2014 at 02:41:15PM +0200, Joerg Roedel wrote:
> > On Wed, Oct 22, 2014 at 05:43:20PM +0200, Frank Blaschka wrote:
> > > Basically there are no limitations. Depending on the s390 maschine
> > > generation a device starts its IOVA at a specific address
> > > (announced by the HW). But as I already told each device starts
> > > at the same address. I think this prevents having multiple
> > > devices on the same IOMMU domain.
> >
> > Why, each device has its own IOVA address space, so IOVA A could
> > map to physical address X for one device and to Y for another, no?
> > And if you point multiple devices to the same dma_table they share
> > the mappings (and thus the address space). Or am I getting
> > something wrong?
> >
> > > yes, you are absolutely right. There is a per-device dma_table.
> > > There is no general IOMMU device but each pci device has its own
> > > IOMMU translation capability.
> >
> > I see, in this way it is similar to ARM where there is often also
> > one IOMMU per master device.
> >
> > > Is there a possibility the IOMMU domain can support e.g.
> > > something like
> > >
> > > VIOA 0x10000 -> pci device 1
> > > VIOA 0x10000 -> pci device 2
> >
> > A domain is basically an abstraction for a DMA page table (or a
> > dma_table, as you call it on s390). So you can easily create similar
> > mappings for more than one device with it.
> >
> ok, maybe I was too close to the existing s390 dma implementation or
> simply wrong, maybe Sebastian or Gerald can give more background
Not sure if I understood the concept of IOMMU domains right. But if this
is about having multiple devices in the same domain, so that iommu_ops->map
will establish the _same_ DMA mapping on _all_ registered devices, then
this should be possible.
We cannot have shared DMA tables because each device gets its own DMA table
allocated during device initialization. But we could just keep all devices
from one domain in a list and then call dma_update_trans() for all devices
during iommu_ops->map/unmap.
Gerald
On Mon, Oct 27, 2014 at 03:32:01PM +0100, Gerald Schaefer wrote:
> Not sure if I understood the concept of IOMMU domains right. But if this
> is about having multiple devices in the same domain, so that iommu_ops->map
> will establish the _same_ DMA mapping on _all_ registered devices, then
> this should be possible.
Yes, this is what domains are about. A domain describes a set of DMA
mappings which can be assigned to multiple devices in parallel.
> We cannot have shared DMA tables because each device gets its own DMA table
> allocated during device initialization.
Is there some hardware reason for this or is that just an implementation
detail that can be changed. In other words, does the hardware allow to
use the same DMA table for multiple devices?
> But we could just keep all devices from one domain in a list and then
> call dma_update_trans() for all devices during iommu_ops->map/unmap.
This sounds complicated. Note that a device can be assigned to a domain that
already has existing mappings. In this case you need to make sure that
the new device inherits these mappings (and destroy all old mappings for
the device that possibly exist).
I think it is much easier to use the same DMA table for all devices in a
domain, if the hardware allows that.
Joerg
On Mon, 27 Oct 2014 17:25:02 +0100
Joerg Roedel <[email protected]> wrote:
> On Mon, Oct 27, 2014 at 03:32:01PM +0100, Gerald Schaefer wrote:
> > Not sure if I understood the concept of IOMMU domains right. But if
> > this is about having multiple devices in the same domain, so that
> > iommu_ops->map will establish the _same_ DMA mapping on _all_
> > registered devices, then this should be possible.
>
> Yes, this is what domains are about. A domain describes a set of DMA
> mappings which can be assigned to multiple devices in parallel.
>
> > We cannot have shared DMA tables because each device gets its own
> > DMA table allocated during device initialization.
>
> Is there some hardware reason for this or is that just an
> implementation detail that can be changed. In other words, does the
> hardware allow to use the same DMA table for multiple devices?
Yes, the HW would allow shared DMA tables, but the implementation would
need some non-trivial changes. For example, we have a per-device spin_lock
for DMA table manipulations and the code in arch/s390/pci/pci_dma.c knows
nothing about IOMMU domains or shared DMA tables, it just implements a set
of dma_map_ops.
Of course this would also go horribly wrong if a device was already
in use (via the current dma_map_ops), but I guess using devices through
the IOMMU_API prevents using them otherwise?
>
> > But we could just keep all devices from one domain in a list and
> > then call dma_update_trans() for all devices during
> > iommu_ops->map/unmap.
>
> This sounds complicated. Note that a device can be assigned to a
> domain that already has existing mappings. In this case you need to
> make sure that the new device inherits these mappings (and destroy
> all old mappings for the device that possibly exist).
>
> I think it is much easier to use the same DMA table for all devices
> in a domain, if the hardware allows that.
Yes, in this case, having one DMA table per domain and sharing it
between all devices in that domain sounds like a good idea. However,
I can't think of any use case for this, and Frank probably had a very
special use case in mind where this scenario doesn't appear, hence the
"one device per domain" restriction.
So, if having multiple devices per domain is a must, then we probably
need a thorough rewrite of the arch/s390/pci/pci_dma.c code.
Gerald
On Mon, Oct 27, 2014 at 06:02:19PM +0100, Gerald Schaefer wrote:
> On Mon, 27 Oct 2014 17:25:02 +0100
> Joerg Roedel <[email protected]> wrote:
> > Is there some hardware reason for this or is that just an
> > implementation detail that can be changed. In other words, does the
> > hardware allow to use the same DMA table for multiple devices?
>
> Yes, the HW would allow shared DMA tables, but the implementation would
> need some non-trivial changes. For example, we have a per-device spin_lock
> for DMA table manipulations and the code in arch/s390/pci/pci_dma.c knows
> nothing about IOMMU domains or shared DMA tables, it just implements a set
> of dma_map_ops.
I think it would make sense to move the DMA table handling code and the
dma_map_ops implementation to the IOMMU driver too. This is also how
some other IOMMU drivers implement it.
The plan is to consolidate the dma_ops implementations someday and have
a common implementation that works with all IOMMU drivers across
architectures. This would benefit s390 as well and obsoletes the driver
specific dma_ops implementation.
> Of course this would also go horribly wrong if a device was already
> in use (via the current dma_map_ops), but I guess using devices through
> the IOMMU_API prevents using them otherwise?
This is taken care of by the device drivers. A driver for a device
either uses the DMA-API or does its own management of DMA mappings using
the IOMMU-API. VFIO is an example for the later case.
> > I think it is much easier to use the same DMA table for all devices
> > in a domain, if the hardware allows that.
>
> Yes, in this case, having one DMA table per domain and sharing it
> between all devices in that domain sounds like a good idea. However,
> I can't think of any use case for this, and Frank probably had a very
> special use case in mind where this scenario doesn't appear, hence the
> "one device per domain" restriction.
One usecase is device access from user-space via VFIO. A userspace
process might want to access multiple devices at the same time and VFIO
would implement this by assigning all of these devices to the same IOMMU
domain.
This requirement also comes also from the IOMMU-API itself. The
intention of the API is to make different IOMMUs look the same through
the API, and this is violated when drivers implement a 1-1
domain->device mapping.
> So, if having multiple devices per domain is a must, then we probably
> need a thorough rewrite of the arch/s390/pci/pci_dma.c code.
Yes, this is a requirement for new IOMMU drivers. We already have
drivers implementing the same 1-1 relation and we are about to fix them.
But I don't want to add new drivers doing the same.
Joerg
On Mon, 27 Oct 2014 18:58:35 +0100
Joerg Roedel <[email protected]> wrote:
> On Mon, Oct 27, 2014 at 06:02:19PM +0100, Gerald Schaefer wrote:
> > On Mon, 27 Oct 2014 17:25:02 +0100
> > Joerg Roedel <[email protected]> wrote:
> > > Is there some hardware reason for this or is that just an
> > > implementation detail that can be changed. In other words, does
> > > the hardware allow to use the same DMA table for multiple devices?
> >
> > Yes, the HW would allow shared DMA tables, but the implementation
> > would need some non-trivial changes. For example, we have a
> > per-device spin_lock for DMA table manipulations and the code in
> > arch/s390/pci/pci_dma.c knows nothing about IOMMU domains or shared
> > DMA tables, it just implements a set of dma_map_ops.
>
> I think it would make sense to move the DMA table handling code and
> the dma_map_ops implementation to the IOMMU driver too. This is also
> how some other IOMMU drivers implement it.
Yes, I feared that this would come up, but I agree that it looks like the
best solution, at least if we really want/need the IOMMU API for s390 now.
I'll need to discuss this with Frank, he seems to be on vacation this week.
Thanks for your feedback and explanations!
> The plan is to consolidate the dma_ops implementations someday and
> have a common implementation that works with all IOMMU drivers across
> architectures. This would benefit s390 as well and obsoletes the
> driver specific dma_ops implementation.
>
> > Of course this would also go horribly wrong if a device was already
> > in use (via the current dma_map_ops), but I guess using devices
> > through the IOMMU_API prevents using them otherwise?
>
> This is taken care of by the device drivers. A driver for a device
> either uses the DMA-API or does its own management of DMA mappings
> using the IOMMU-API. VFIO is an example for the later case.
>
> > > I think it is much easier to use the same DMA table for all
> > > devices in a domain, if the hardware allows that.
> >
> > Yes, in this case, having one DMA table per domain and sharing it
> > between all devices in that domain sounds like a good idea. However,
> > I can't think of any use case for this, and Frank probably had a
> > very special use case in mind where this scenario doesn't appear,
> > hence the "one device per domain" restriction.
>
> One usecase is device access from user-space via VFIO. A userspace
> process might want to access multiple devices at the same time and
> VFIO would implement this by assigning all of these devices to the
> same IOMMU domain.
>
> This requirement also comes also from the IOMMU-API itself. The
> intention of the API is to make different IOMMUs look the same through
> the API, and this is violated when drivers implement a 1-1
> domain->device mapping.
>
> > So, if having multiple devices per domain is a must, then we
> > probably need a thorough rewrite of the arch/s390/pci/pci_dma.c
> > code.
>
> Yes, this is a requirement for new IOMMU drivers. We already have
> drivers implementing the same 1-1 relation and we are about to fix
> them. But I don't want to add new drivers doing the same.
>
>
> Joerg
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-s390"
> in the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>