Hey,
here are two patches for the s390 PCI and IOMMU code. It is
based on the assumption that every pci_dev that points to
the same zpci_dev shares a single dma-table (and thus a
single address space).
If this assupmtion is true (as it looks to me from reading
the code) then the iommu-group setup code in the s390 iommu
driver needs to be updated.
These patches do this and also add support for the
iommu_device_register interface to the s390 iommu driver.
Any comments and testing appreciated.
Thanks,
Joerg
Joerg Roedel (2):
iommu/s390: Fix IOMMU groups
iommu/s390: Add support for iommu_device handling
arch/s390/include/asm/pci.h | 8 +++++
arch/s390/pci/pci.c | 10 ++++++-
drivers/iommu/s390-iommu.c | 71 ++++++++++++++++++++++++++++++++++++++-------
3 files changed, 78 insertions(+), 11 deletions(-)
--
1.9.1
From: Joerg Roedel <[email protected]>
Add support for the iommu_device_register interface to make
the s390 hardware iommus visible to the iommu core and in
sysfs.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/s390/include/asm/pci.h | 1 +
drivers/iommu/s390-iommu.c | 30 ++++++++++++++++++++++++++++++
2 files changed, 31 insertions(+)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 045665d..8c071af 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -124,6 +124,7 @@ struct zpci_dev {
unsigned long iommu_pages;
unsigned int next_bit;
+ struct iommu_device iommu_dev; /* IOMMU core handle */
struct iommu_group *group; /* IOMMU group for all devices behind this zdev */
char res_name[16];
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index cad3ad0..ec7f5e4 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -18,6 +18,8 @@
*/
#define S390_IOMMU_PGSIZES (~0xFFFUL)
+static struct iommu_ops s390_iommu_ops;
+
struct s390_domain {
struct iommu_domain domain;
struct list_head devices;
@@ -173,10 +175,13 @@ static struct iommu_group *s390_iommu_device_group(struct device *dev)
static int s390_iommu_add_device(struct device *dev)
{
struct iommu_group *group = iommu_group_get_for_dev(dev);
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+
if (IS_ERR(group))
return PTR_ERR(group);
iommu_group_put(group);
+ iommu_device_link(&zdev->iommu_dev, dev);
return 0;
}
@@ -203,6 +208,7 @@ static void s390_iommu_remove_device(struct device *dev)
s390_iommu_detach_device(domain, dev);
}
+ iommu_device_unlink(&zdev->iommu_dev, dev);
iommu_group_remove_device(dev);
}
@@ -342,13 +348,37 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (IS_ERR(zdev->group)) {
rc = PTR_ERR(zdev->group);
zdev->group = NULL;
+ goto out_err;
}
+ rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
+ "s390-iommu.%08x", zdev->fid);
+ if (rc)
+ goto out_group;
+
+ iommu_device_set_ops(&zdev->iommu_dev, &s390_iommu_ops);
+
+ rc = iommu_device_register(&zdev->iommu_dev);
+ if (rc)
+ goto out_sysfs;
+
+ return 0;
+
+out_sysfs:
+ iommu_device_sysfs_remove(&zdev->iommu_dev);
+
+out_group:
+ iommu_group_put(zdev->group);
+ zdev->group = NULL;
+
+out_err:
return rc;
}
void zpci_destroy_iommu(struct zpci_dev *zdev)
{
+ iommu_device_unregister(&zdev->iommu_dev);
+ iommu_device_sysfs_remove(&zdev->iommu_dev);
iommu_group_put(zdev->group);
zdev->group = NULL;
}
--
1.9.1
From: Joerg Roedel <[email protected]>
Currently the s390 iommu driver allocates an iommu-group for
every device that is added. But that is wrong, as there is
only one dma-table per pci-root-bus. Make all devices behind
one dma-table share one iommu-group.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/s390/include/asm/pci.h | 7 +++++++
arch/s390/pci/pci.c | 10 +++++++++-
drivers/iommu/s390-iommu.c | 43 ++++++++++++++++++++++++++++++++-----------
3 files changed, 48 insertions(+), 12 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 4e31866..045665d 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -8,6 +8,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
+#include <linux/iommu.h>
#include <asm-generic/pci.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -123,6 +124,8 @@ struct zpci_dev {
unsigned long iommu_pages;
unsigned int next_bit;
+ struct iommu_group *group; /* IOMMU group for all devices behind this zdev */
+
char res_name[16];
struct zpci_bar_struct bars[PCI_BAR_COUNT];
@@ -173,6 +176,10 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
+/* IOMMU Interface */
+int zpci_init_iommu(struct zpci_dev *zdev);
+void zpci_destroy_iommu(struct zpci_dev *zdev);
+
#ifdef CONFIG_PCI
/* Error handling and recovery */
void zpci_event_error(void *);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 364b9d8..3178e4d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -754,6 +754,7 @@ void pcibios_remove_bus(struct pci_bus *bus)
zpci_exit_slot(zdev);
zpci_cleanup_bus_resources(zdev);
+ zpci_destroy_iommu(zdev);
zpci_free_domain(zdev);
spin_lock(&zpci_list_lock);
@@ -825,11 +826,15 @@ int zpci_create_device(struct zpci_dev *zdev)
if (rc)
goto out;
+ rc = zpci_init_iommu(zdev);
+ if (rc)
+ goto out_free;
+
mutex_init(&zdev->lock);
if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
rc = zpci_enable_device(zdev);
if (rc)
- goto out_free;
+ goto out_iommu;
}
rc = zpci_scan_bus(zdev);
if (rc)
@@ -846,6 +851,9 @@ int zpci_create_device(struct zpci_dev *zdev)
out_disable:
if (zdev->state == ZPCI_FN_STATE_ONLINE)
zpci_disable_device(zdev);
+out_iommu:
+ zpci_destroy_iommu(zdev);
+
out_free:
zpci_free_domain(zdev);
out:
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 179e636..cad3ad0 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -163,22 +163,22 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
}
}
-static int s390_iommu_add_device(struct device *dev)
+static struct iommu_group *s390_iommu_device_group(struct device *dev)
{
- struct iommu_group *group;
- int rc;
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
- group = iommu_group_get(dev);
- if (!group) {
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
- }
+ return zdev->group;
+}
+
+static int s390_iommu_add_device(struct device *dev)
+{
+ struct iommu_group *group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group))
+ return PTR_ERR(group);
- rc = iommu_group_add_device(group, dev);
iommu_group_put(group);
- return rc;
+ return 0;
}
static void s390_iommu_remove_device(struct device *dev)
@@ -333,6 +333,26 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
return size;
}
+int zpci_init_iommu(struct zpci_dev *zdev)
+{
+ int rc = 0;
+
+ zdev->group = iommu_group_alloc();
+
+ if (IS_ERR(zdev->group)) {
+ rc = PTR_ERR(zdev->group);
+ zdev->group = NULL;
+ }
+
+ return rc;
+}
+
+void zpci_destroy_iommu(struct zpci_dev *zdev)
+{
+ iommu_group_put(zdev->group);
+ zdev->group = NULL;
+}
+
static struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
@@ -344,6 +364,7 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
.iova_to_phys = s390_iommu_iova_to_phys,
.add_device = s390_iommu_add_device,
.remove_device = s390_iommu_remove_device,
+ .device_group = s390_iommu_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
};
--
1.9.1
On Thu, 27 Apr 2017 17:28:23 +0200
Joerg Roedel <[email protected]> wrote:
> Hey,
>
> here are two patches for the s390 PCI and IOMMU code. It is
> based on the assumption that every pci_dev that points to
> the same zpci_dev shares a single dma-table (and thus a
> single address space).
Well, there is a separate zpci_dev for each pci_dev on s390,
and each of those has its own separate dma-table (thus not shared).
>
> If this assupmtion is true (as it looks to me from reading
> the code) then the iommu-group setup code in the s390 iommu
> driver needs to be updated.
Given this "separate zpci_dev for each pci_dev" situation, I don't
see what this update actually changes, compared to the previous code,
see also my comments to that patch.
>
> These patches do this and also add support for the
> iommu_device_register interface to the s390 iommu driver.
>
> Any comments and testing appreciated.
>
> Thanks,
>
> Joerg
>
> Joerg Roedel (2):
> iommu/s390: Fix IOMMU groups
> iommu/s390: Add support for iommu_device handling
>
> arch/s390/include/asm/pci.h | 8 +++++
> arch/s390/pci/pci.c | 10 ++++++-
> drivers/iommu/s390-iommu.c | 71 ++++++++++++++++++++++++++++++++++++++-------
> 3 files changed, 78 insertions(+), 11 deletions(-)
>
On Thu, 27 Apr 2017 17:28:24 +0200
Joerg Roedel <[email protected]> wrote:
> From: Joerg Roedel <[email protected]>
>
> Currently the s390 iommu driver allocates an iommu-group for
> every device that is added. But that is wrong, as there is
> only one dma-table per pci-root-bus. Make all devices behind
> one dma-table share one iommu-group.
On s390, each PCI device has its own zpci_dev structure, and also its own
DMA address space. Even with this patch, you'll still allocate an
iommu_group for every device that is added, see below, so I do not really
see the benefit of this (but my knowledge got a little rusty, so I may be
missing something).
The only reason why we allow the theoretical option to attach more than
one device to the same IOMMU domain (and thus address space), is because
it was a requirement by you at that time (IIRC). We have no use-case for
this, and even in this theoretical scenario you would still have separate
zpci_dev structures for the affected devices that share the same IOMMU
domain (DMA address space), and thus also separate IOMMU groups, at least
after this patch.
Before this patch, you could in theory have different PCI devices in the
same IOMMU group, by having iommu_group_get() in s390_iommu_add_device()
return a group != NULL. With this patch, you will enforce that a new
iommu_group is allocated for every device, which would be the contrary
of what the description says.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
> arch/s390/include/asm/pci.h | 7 +++++++
> arch/s390/pci/pci.c | 10 +++++++++-
> drivers/iommu/s390-iommu.c | 43 ++++++++++++++++++++++++++++++++-----------
> 3 files changed, 48 insertions(+), 12 deletions(-)
>
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 4e31866..045665d 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -8,6 +8,7 @@
>
> #include <linux/pci.h>
> #include <linux/mutex.h>
> +#include <linux/iommu.h>
> #include <asm-generic/pci.h>
> #include <asm/pci_clp.h>
> #include <asm/pci_debug.h>
> @@ -123,6 +124,8 @@ struct zpci_dev {
> unsigned long iommu_pages;
> unsigned int next_bit;
>
> + struct iommu_group *group; /* IOMMU group for all devices behind this zdev */
There is always only one device behind a zpci_dev, so this comment makes
no sense.
> +
> char res_name[16];
> struct zpci_bar_struct bars[PCI_BAR_COUNT];
>
> @@ -173,6 +176,10 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
> int clp_enable_fh(struct zpci_dev *, u8);
> int clp_disable_fh(struct zpci_dev *);
>
> +/* IOMMU Interface */
> +int zpci_init_iommu(struct zpci_dev *zdev);
> +void zpci_destroy_iommu(struct zpci_dev *zdev);
> +
> #ifdef CONFIG_PCI
> /* Error handling and recovery */
> void zpci_event_error(void *);
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 364b9d8..3178e4d 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -754,6 +754,7 @@ void pcibios_remove_bus(struct pci_bus *bus)
>
> zpci_exit_slot(zdev);
> zpci_cleanup_bus_resources(zdev);
> + zpci_destroy_iommu(zdev);
> zpci_free_domain(zdev);
>
> spin_lock(&zpci_list_lock);
> @@ -825,11 +826,15 @@ int zpci_create_device(struct zpci_dev *zdev)
> if (rc)
> goto out;
>
> + rc = zpci_init_iommu(zdev);
> + if (rc)
> + goto out_free;
This will get called for every device that is added, and every device
will get its own zpci_dev, so this will still result in allocating
an IOMMU group for every device.
> +
> mutex_init(&zdev->lock);
> if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
> rc = zpci_enable_device(zdev);
> if (rc)
> - goto out_free;
> + goto out_iommu;
> }
> rc = zpci_scan_bus(zdev);
> if (rc)
> @@ -846,6 +851,9 @@ int zpci_create_device(struct zpci_dev *zdev)
> out_disable:
> if (zdev->state == ZPCI_FN_STATE_ONLINE)
> zpci_disable_device(zdev);
> +out_iommu:
> + zpci_destroy_iommu(zdev);
> +
> out_free:
> zpci_free_domain(zdev);
> out:
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 179e636..cad3ad0 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -163,22 +163,22 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
> }
> }
>
> -static int s390_iommu_add_device(struct device *dev)
> +static struct iommu_group *s390_iommu_device_group(struct device *dev)
> {
> - struct iommu_group *group;
> - int rc;
> + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
>
> - group = iommu_group_get(dev);
> - if (!group) {
> - group = iommu_group_alloc();
> - if (IS_ERR(group))
> - return PTR_ERR(group);
> - }
> + return zdev->group;
> +}
> +
> +static int s390_iommu_add_device(struct device *dev)
> +{
> + struct iommu_group *group = iommu_group_get_for_dev(dev);
> + if (IS_ERR(group))
> + return PTR_ERR(group);
>
> - rc = iommu_group_add_device(group, dev);
> iommu_group_put(group);
>
> - return rc;
> + return 0;
> }
>
> static void s390_iommu_remove_device(struct device *dev)
> @@ -333,6 +333,26 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
> return size;
> }
>
> +int zpci_init_iommu(struct zpci_dev *zdev)
> +{
> + int rc = 0;
> +
> + zdev->group = iommu_group_alloc();
> +
> + if (IS_ERR(zdev->group)) {
> + rc = PTR_ERR(zdev->group);
> + zdev->group = NULL;
> + }
> +
> + return rc;
> +}
> +
> +void zpci_destroy_iommu(struct zpci_dev *zdev)
> +{
> + iommu_group_put(zdev->group);
> + zdev->group = NULL;
> +}
While the rest of this patch doesn't seem to make much of a difference to
the current behavior, I'm wondering where this extra iommu_group_put()
comes from. It either was erroneously missing before this patch, or it
is erroneously introduced by this patch.
> +
> static struct iommu_ops s390_iommu_ops = {
> .capable = s390_iommu_capable,
> .domain_alloc = s390_domain_alloc,
> @@ -344,6 +364,7 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
> .iova_to_phys = s390_iommu_iova_to_phys,
> .add_device = s390_iommu_add_device,
> .remove_device = s390_iommu_remove_device,
> + .device_group = s390_iommu_device_group,
> .pgsize_bitmap = S390_IOMMU_PGSIZES,
> };
>
Hi Gerald,
thanks for your reply. I have some more questions, please see below.
On Thu, Apr 27, 2017 at 08:10:18PM +0200, Gerald Schaefer wrote:
> Well, there is a separate zpci_dev for each pci_dev on s390,
> and each of those has its own separate dma-table (thus not shared).
Is that true for all functions of a PCIe card, so does every function of
a device has its own zpci_dev structure and thus its own DMA-table?
My assumption came from the fact that the zpci_dev is read from
pci_dev->sysdata, which is propagated there from the pci_bridge
through the pci_root_bus structures.
> Given this "separate zpci_dev for each pci_dev" situation, I don't
> see what this update actually changes, compared to the previous code,
> see also my comments to that patch.
The add_device call-back is invoked for every function of a pci-device,
because each function gets its own pci_dev structure. Also we usually
group all functions of a PCI-device together into one iommu-group,
because we don't trust that the device isolates its functions from each
other.
Regards,
Joerg
On Thu, Apr 27, 2017 at 08:11:42PM +0200, Gerald Schaefer wrote:
> > +void zpci_destroy_iommu(struct zpci_dev *zdev)
> > +{
> > + iommu_group_put(zdev->group);
> > + zdev->group = NULL;
> > +}
>
> While the rest of this patch doesn't seem to make much of a difference to
> the current behavior, I'm wondering where this extra iommu_group_put()
> comes from. It either was erroneously missing before this patch, or it
> is erroneously introduced by this patch.
This is the way to free an iommu-group. It was missing before probably
because it was unclear whether the add_device function allocated a group
or not. So there was no way to know if it needs to be put again in the
remove_device function.
With this patch the iommu-group is explicitly allocated when the
zpci_dev is created and destroyed again with it.
Joerg
Hi Joerg,
I guess we are a bit special on s390 (again), see below. Sebastian is more
familiar with the base s390 PCI code, he may correct me if I'm wrong.
On Thu, 27 Apr 2017 23:03:25 +0200
Joerg Roedel <[email protected]> wrote:
> > Well, there is a separate zpci_dev for each pci_dev on s390,
> > and each of those has its own separate dma-table (thus not shared).
>
> Is that true for all functions of a PCIe card, so does every function of
> a device has its own zpci_dev structure and thus its own DMA-table?
Yes, clp_add_pci_device() is called for every function, which in turn calls
zpci_create_device() with a freshly allocated zdev. zpci_enable_device()
then sets up a new DMA address space for each function.
> My assumption came from the fact that the zpci_dev is read from
> pci_dev->sysdata, which is propagated there from the pci_bridge
> through the pci_root_bus structures.
The zdev gets there via zpci_create_device() -> zpci_scan_bus() ->
pci_scan_root_bus(), which is done for every single function.
Not sure if I understand this right, but it looks like we set up a new PCI
bus for each function.
> > Given this "separate zpci_dev for each pci_dev" situation, I don't
> > see what this update actually changes, compared to the previous code,
> > see also my comments to that patch.
>
> The add_device call-back is invoked for every function of a pci-device,
> because each function gets its own pci_dev structure. Also we usually
> group all functions of a PCI-device together into one iommu-group,
> because we don't trust that the device isolates its functions from each
> other.
OK, but similar to the add_device callback, zpci_create_device() is
also invoked for every function. So, allocating a new iommu-group in
zpci_create_device() will never lead to any group sharing.
I am however a bit confused now, about how we would have allowed group
sharing with the current s390 IOMMU code, or IOW in which scenario would
iommu_group_get() in the add_device callback find a shareable iommu-group?
In the attach_dev callback, we provide the option to "force" multiple
functions using the same iommu-domain / DMA address space, by de-registering
the per-function DMA address space and registering a common space. But
such functions would only be in the same iommu "domain" and not "group",
if I get this right.
So, I guess we may have an issue with not sharing iommu-groups when
it could make sense to do so. But your patch would not fix this, as
we still would allocate separate iommu-groups for all functions.
Regards,
Gerald
On Thu, 27 Apr 2017 23:12:32 +0200
Joerg Roedel <[email protected]> wrote:
> On Thu, Apr 27, 2017 at 08:11:42PM +0200, Gerald Schaefer wrote:
> > > +void zpci_destroy_iommu(struct zpci_dev *zdev)
> > > +{
> > > + iommu_group_put(zdev->group);
> > > + zdev->group = NULL;
> > > +}
> >
> > While the rest of this patch doesn't seem to make much of a difference to
> > the current behavior, I'm wondering where this extra iommu_group_put()
> > comes from. It either was erroneously missing before this patch, or it
> > is erroneously introduced by this patch.
>
> This is the way to free an iommu-group. It was missing before probably
> because it was unclear whether the add_device function allocated a group
> or not. So there was no way to know if it needs to be put again in the
> remove_device function.
Hmm, for the reference count it should not matter whether a new group was
allocated or an existing group found with iommu_group_get(). Our add_device
callback always gets one reference either from iommu_group_get or _alloc,
and then another one from iommu_group_add_device(), after which the first
reference is put again. So there should always be one reference more after
a successful add_device.
Now I'm wondering where this one reference is put again, and I thought
that happened in the remove_device callback, where we call
iommu_group_remove_device(). Is this not correct? Just want to make sure
that we don't have a refcount issue in the current code.
Regards,
Gerald
On Fri, Apr 28, 2017 at 03:20:17PM +0200, Gerald Schaefer wrote:
> On Thu, 27 Apr 2017 23:12:32 +0200
> Joerg Roedel <[email protected]> wrote:
> > This is the way to free an iommu-group. It was missing before probably
> > because it was unclear whether the add_device function allocated a group
> > or not. So there was no way to know if it needs to be put again in the
> > remove_device function.
>
> Hmm, for the reference count it should not matter whether a new group was
> allocated or an existing group found with iommu_group_get(). Our add_device
> callback always gets one reference either from iommu_group_get or _alloc,
> and then another one from iommu_group_add_device(), after which the first
> reference is put again. So there should always be one reference more after
> a successful add_device.
Right, my statement above is wrong. The current code is fine, it gets a
reference to the group with iommu_group_get/iommu_group_alloc, attaches
the device to the group (which takes a reference to the group of its
own), and in the end it drops its local reference.
When the device->group link is broken up in the remove_device function,
that reference is also dropped. So everything is fine. The additional
iommu_group_put() in my patch is wrong.
Regards,
Joerg
Hi Gerald,
On Fri, Apr 28, 2017 at 02:46:34PM +0200, Gerald Schaefer wrote:
> On Thu, 27 Apr 2017 23:03:25 +0200
> Joerg Roedel <[email protected]> wrote:
>
> > > Well, there is a separate zpci_dev for each pci_dev on s390,
> > > and each of those has its own separate dma-table (thus not shared).
> >
> > Is that true for all functions of a PCIe card, so does every function of
> > a device has its own zpci_dev structure and thus its own DMA-table?
>
> Yes, clp_add_pci_device() is called for every function, which in turn calls
> zpci_create_device() with a freshly allocated zdev. zpci_enable_device()
> then sets up a new DMA address space for each function.
That sounds special :) So will every function of a single device end up
as a seperate device on a seperate root-bus?
> > My assumption came from the fact that the zpci_dev is read from
> > pci_dev->sysdata, which is propagated there from the pci_bridge
> > through the pci_root_bus structures.
>
> The zdev gets there via zpci_create_device() -> zpci_scan_bus() ->
> pci_scan_root_bus(), which is done for every single function.
>
> Not sure if I understand this right, but it looks like we set up a new PCI
> bus for each function.
Yeah, it sounds like this. Maybe Sebastian can confirm that?
> I am however a bit confused now, about how we would have allowed group
> sharing with the current s390 IOMMU code, or IOW in which scenario would
> iommu_group_get() in the add_device callback find a shareable iommu-group?
The usual way to do this is to use the iommu_group_get_for_dev()
function, which invokes the iommu_ops->device_group call-back of the
driver to find a matching group or allocating a new one.
There are ready-to-use functions for this call-back already:
1) generic_device_group() - which just allocates a new group for
the device. This is usually used outside of PCI
2) pci_device_group() - Which walks the PCI hierarchy to find
devices that are not isolated and uses the matching group for
its isolation domain.
A few drivers have their own versions of this call-back, but those are
IOMMU drivers supporting multiple bus-types and need to find the right
way to determine the group first.
> So, I guess we may have an issue with not sharing iommu-groups when
> it could make sense to do so. But your patch would not fix this, as
> we still would allocate separate iommu-groups for all functions.
Yes, but the above approach won't help when each function ends up on a
seperate bus because the code looks for different functions that are
enumerated as such. Anyway, some more insight into how this enumeration
works on s390 would be great :)
Regards,
Joerg
On Fri, 28 Apr 2017, Joerg Roedel wrote:
> On Fri, Apr 28, 2017 at 02:46:34PM +0200, Gerald Schaefer wrote:
> > On Thu, 27 Apr 2017 23:03:25 +0200
> > Joerg Roedel <[email protected]> wrote:
> >
> > > > Well, there is a separate zpci_dev for each pci_dev on s390,
> > > > and each of those has its own separate dma-table (thus not shared).
> > >
> > > Is that true for all functions of a PCIe card, so does every function of
> > > a device has its own zpci_dev structure and thus its own DMA-table?
> >
> > Yes, clp_add_pci_device() is called for every function, which in turn calls
> > zpci_create_device() with a freshly allocated zdev. zpci_enable_device()
> > then sets up a new DMA address space for each function.
>
> That sounds special :) So will every function of a single device end up
> as a seperate device on a seperate root-bus?
Yes. That's true even for multi-function and SRIOV.
> > > My assumption came from the fact that the zpci_dev is read from
> > > pci_dev->sysdata, which is propagated there from the pci_bridge
> > > through the pci_root_bus structures.
> >
> > The zdev gets there via zpci_create_device() -> zpci_scan_bus() ->
> > pci_scan_root_bus(), which is done for every single function.
> >
> > Not sure if I understand this right, but it looks like we set up a new PCI
> > bus for each function.
>
> Yeah, it sounds like this. Maybe Sebastian can confirm that?
Yes. Confirmed.
Regards,
Sebastian
Hi Joerg,
[auto build test ERROR on s390/features]
[also build test ERROR on v4.11-rc8 next-20170428]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Joerg-Roedel/iommu-s390-Fix-iommu-groups-and-add-sysfs-support/20170429-003440
base: https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
config: s390-default_defconfig (attached as .config)
compiler: s390x-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390
All errors (new ones prefixed by >>):
In file included from include/trace/define_trace.h:95:0,
from include/trace/events/iommu.h:167,
from include/linux/iommu.h:27,
from arch/s390/include/asm/pci.h:11,
from include/linux/pci.h:1627,
from include/trace/events/iommu.h:14,
from drivers/iommu/iommu-traces.c:12:
include/trace/events/iommu.h: In function 'ftrace_test_probe_add_device_to_group':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_add_device_to_group' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/trace/events/iommu.h: In function 'ftrace_test_probe_remove_device_from_group':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_remove_device_from_group' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
include/trace/events/iommu.h: In function 'ftrace_test_probe_attach_device_to_domain':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_attach_device_to_domain' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/trace/events/iommu.h: In function 'ftrace_test_probe_detach_device_from_domain':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_detach_device_from_domain' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/events/iommu.h:79:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, detach_device_from_domain,
^~~~~~~~~~~~
include/trace/events/iommu.h: In function 'ftrace_test_probe_map':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_map' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/trace_events.h:66:2: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
^~~~~~~~~~~~
include/trace/events/iommu.h:86:1: note: in expansion of macro 'TRACE_EVENT'
TRACE_EVENT(map,
^~~~~~~~~~~
include/trace/events/iommu.h: In function 'ftrace_test_probe_unmap':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_unmap' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/trace_events.h:66:2: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
^~~~~~~~~~~~
include/trace/events/iommu.h:109:1: note: in expansion of macro 'TRACE_EVENT'
TRACE_EVENT(unmap,
^~~~~~~~~~~
include/trace/events/iommu.h: In function 'ftrace_test_probe_io_page_fault':
include/trace/trace_events.h:716:2: error: implicit declaration of function 'check_trace_callback_type_io_page_fault' [-Werror=implicit-function-declaration]
check_trace_callback_type_##call(trace_event_raw_event_##template); \
^
include/trace/events/iommu.h:158:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_error, io_page_fault,
^~~~~~~~~~~~
In file included from arch/s390/include/asm/pci.h:11:0,
from include/linux/pci.h:1627,
from include/trace/events/iommu.h:14,
from drivers/iommu/iommu-traces.c:12:
include/linux/iommu.h: In function 'report_iommu_fault':
>> include/linux/iommu.h:368:2: error: implicit declaration of function 'trace_io_page_fault' [-Werror=implicit-function-declaration]
trace_io_page_fault(dev, iova, flags);
^~~~~~~~~~~~~~~~~~~
In file included from include/trace/events/iommu.h:13:0,
from drivers/iommu/iommu-traces.c:12:
include/trace/events/iommu.h: At top level:
>> include/trace/events/iommu.h:20:11: error: expected ')' before 'int'
TP_PROTO(int group_id, struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:41:11: error: expected ')' before 'int'
TP_PROTO(int group_id, struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:49:11: error: expected ')' before 'int'
TP_PROTO(int group_id, struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
>> include/trace/events/iommu.h:56:11: error: expected ')' before 'struct'
TP_PROTO(struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:74:11: error: expected ')' before 'struct'
TP_PROTO(struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:81:11: error: expected ')' before 'struct'
TP_PROTO(struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
>> include/trace/events/iommu.h:88:11: error: expected ')' before 'unsigned'
TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:111:11: error: expected ')' before 'unsigned'
TP_PROTO(unsigned long iova, size_t size, size_t unmapped_size),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:134:11: error: expected ')' before 'struct'
TP_PROTO(struct device *dev, unsigned long iova, int flags),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/trace/events/iommu.h:160:11: error: expected ')' before 'struct'
TP_PROTO(struct device *dev, unsigned long iova, int flags),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
>> include/trace/events/iommu.h:20:11: error: expected ')' before 'int'
TP_PROTO(int group_id, struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/linux/tracepoint.h:233:20: error: redefinition of '__tpstrtab_add_device_to_group'
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:233:20: note: previous definition of '__tpstrtab_add_device_to_group' was here
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:235:20: error: redefinition of '__tracepoint_add_device_to_group'
struct tracepoint __tracepoint_##name \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:235:20: note: previous definition of '__tracepoint_add_device_to_group' was here
struct tracepoint __tracepoint_##name \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:238:35: error: redefinition of '__tracepoint_ptr_add_device_to_group'
static struct tracepoint * const __tracepoint_ptr_##name __used \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:238:35: note: previous definition of '__tracepoint_ptr_add_device_to_group' was here
static struct tracepoint * const __tracepoint_ptr_##name __used \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:39:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, add_device_to_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:233:20: error: redefinition of '__tpstrtab_remove_device_from_group'
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:233:20: note: previous definition of '__tpstrtab_remove_device_from_group' was here
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:235:20: error: redefinition of '__tracepoint_remove_device_from_group'
struct tracepoint __tracepoint_##name \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:235:20: note: previous definition of '__tracepoint_remove_device_from_group' was here
struct tracepoint __tracepoint_##name \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:238:35: error: redefinition of '__tracepoint_ptr_remove_device_from_group'
static struct tracepoint * const __tracepoint_ptr_##name __used \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
include/linux/tracepoint.h:238:35: note: previous definition of '__tracepoint_ptr_remove_device_from_group' was here
static struct tracepoint * const __tracepoint_ptr_##name __used \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:47:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_group_event, remove_device_from_group,
^~~~~~~~~~~~
>> include/trace/events/iommu.h:56:11: error: expected ')' before 'struct'
TP_PROTO(struct device *dev),
^
include/linux/tracepoint.h:104:27: note: in definition of macro 'TP_PROTO'
#define TP_PROTO(args...) args
^~~~
include/linux/tracepoint.h:233:20: error: redefinition of '__tpstrtab_attach_device_to_domain'
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:233:20: note: previous definition of '__tpstrtab_attach_device_to_domain' was here
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:235:20: error: redefinition of '__tracepoint_attach_device_to_domain'
struct tracepoint __tracepoint_##name \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:235:20: note: previous definition of '__tracepoint_attach_device_to_domain' was here
struct tracepoint __tracepoint_##name \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:238:35: error: redefinition of '__tracepoint_ptr_attach_device_to_domain'
static struct tracepoint * const __tracepoint_ptr_##name __used \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:238:35: note: previous definition of '__tracepoint_ptr_attach_device_to_domain' was here
static struct tracepoint * const __tracepoint_ptr_##name __used \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:72:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, attach_device_to_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:233:20: error: redefinition of '__tpstrtab_detach_device_from_domain'
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:79:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, detach_device_from_domain,
^~~~~~~~~~~~
include/linux/tracepoint.h:233:20: note: previous definition of '__tpstrtab_detach_device_from_domain' was here
static const char __tpstrtab_##name[] \
^
include/linux/tracepoint.h:243:2: note: in expansion of macro 'DEFINE_TRACE_FN'
DEFINE_TRACE_FN(name, NULL, NULL);
^~~~~~~~~~~~~~~
include/trace/define_trace.h:50:2: note: in expansion of macro 'DEFINE_TRACE'
DEFINE_TRACE(name)
^~~~~~~~~~~~
include/trace/events/iommu.h:79:1: note: in expansion of macro 'DEFINE_EVENT'
DEFINE_EVENT(iommu_device_event, detach_device_from_domain,
..
vim +/trace_io_page_fault +368 include/linux/iommu.h
4f3f8d9d Ohad Ben-Cohen 2011-09-13 362 * invoke it.
4f3f8d9d Ohad Ben-Cohen 2011-09-13 363 */
4f3f8d9d Ohad Ben-Cohen 2011-09-13 364 if (domain->handler)
77ca2332 Ohad Ben-Cohen 2012-05-21 365 ret = domain->handler(domain, dev, iova, flags,
77ca2332 Ohad Ben-Cohen 2012-05-21 366 domain->handler_token);
4a77a6cf Joerg Roedel 2008-11-26 367
56fa4849 Shuah Khan 2013-09-24 @368 trace_io_page_fault(dev, iova, flags);
4f3f8d9d Ohad Ben-Cohen 2011-09-13 369 return ret;
4a77a6cf Joerg Roedel 2008-11-26 370 }
4a77a6cf Joerg Roedel 2008-11-26 371
:::::: The code at line 368 was first introduced by commit
:::::: 56fa484969c367e3ae43a012a7b99f75bb4f3bdb iommu: Change iommu driver to call io_page_fault trace event
:::::: TO: Shuah Khan <[email protected]>
:::::: CC: Joerg Roedel <[email protected]>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Fri, 28 Apr 2017 16:55:13 +0200
Joerg Roedel <[email protected]> wrote:
> > I am however a bit confused now, about how we would have allowed group
> > sharing with the current s390 IOMMU code, or IOW in which scenario would
> > iommu_group_get() in the add_device callback find a shareable iommu-group?
>
> The usual way to do this is to use the iommu_group_get_for_dev()
> function, which invokes the iommu_ops->device_group call-back of the
> driver to find a matching group or allocating a new one.
>
> There are ready-to-use functions for this call-back already:
>
> 1) generic_device_group() - which just allocates a new group for
> the device. This is usually used outside of PCI
>
> 2) pci_device_group() - Which walks the PCI hierarchy to find
> devices that are not isolated and uses the matching group for
> its isolation domain.
>
> A few drivers have their own versions of this call-back, but those are
> IOMMU drivers supporting multiple bus-types and need to find the right
> way to determine the group first.
>
> > So, I guess we may have an issue with not sharing iommu-groups when
> > it could make sense to do so. But your patch would not fix this, as
> > we still would allocate separate iommu-groups for all functions.
>
> Yes, but the above approach won't help when each function ends up on a
> seperate bus because the code looks for different functions that are
> enumerated as such. Anyway, some more insight into how this enumeration
> works on s390 would be great :)
Since Sebastian confirmed this, it looks like we do not really have any
enumeration when there is a separate bus for each function.
Also, IIRC, add_device will get called before attach_dev. Currently we
allow to attach more than one device (apparently from different buses) to
one domain (one shared DMA table) in attach_dev. But then it would be too
late to also add all devices to the same iommu-group. That would have had
to be done earlier in add_device, but there we don't know yet if a shared
DMA table would be set up later in attach_dev.
So, it looks to me that we cannot provide correct iommu-group sharing
on s390, even though we allow iommu-domain sharing, which sounds odd. Since
this "shared domain / DMA table" option in attach_dev was only added
because at that time I thought that was a hard requirement for any arch-
specific IOMMU API implementation, maybe there was some misunderstanding.
It would make the code easier (and more consistent with the s390 hardware)
if I would just remove that option from attach_dev, and allow only one
device/function per iommu-domain. What do you think, could this be removed
for s390, or is there any common code requirement for providing that option
(and is it OK that we have separate iommu-groups in this case)?
Regards,
Gerald
On Fri, Apr 28, 2017 at 05:25:20PM +0200, Sebastian Ott wrote:
> On Fri, 28 Apr 2017, Joerg Roedel wrote:
> > That sounds special :) So will every function of a single device end up
> > as a seperate device on a seperate root-bus?
>
> Yes. That's true even for multi-function and SRIOV.
Okay, so its truly special :) Thanks for your confirmation.
Joerg
Hi Gerald,
On Fri, Apr 28, 2017 at 08:06:12PM +0200, Gerald Schaefer wrote:
> On Fri, 28 Apr 2017 16:55:13 +0200
> Joerg Roedel <[email protected]> wrote:
> Also, IIRC, add_device will get called before attach_dev. Currently we
> allow to attach more than one device (apparently from different buses) to
> one domain (one shared DMA table) in attach_dev. But then it would be too
> late to also add all devices to the same iommu-group. That would have had
> to be done earlier in add_device, but there we don't know yet if a shared
> DMA table would be set up later in attach_dev.
I think there is some misunderstanding here about what iommu-groups are.
An iommu-group is a group of devices that are not isolated from each
other wrt. DMA and/or IRQs. This means that the devices can influence
each other, e.g. directly DMA to each other without IOMMU control. So
the grouping relies on how the hardware is built.
Domains on the other side are a software controled concept. A domain is
basically an abstraction of a DMA address space. Multiple devices can
share on domain/address-space, just as multiple threads can share a
cpu address space.
The point of iommu-grouping is to make sure that we don't assign
different domains to devices in the same group, as that could break or
cause security issues without proper isolation.
Regards,
Joerg
Hi Joerg,
[auto build test ERROR on s390/features]
[also build test ERROR on v4.11-rc8 next-20170428]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Joerg-Roedel/iommu-s390-Fix-iommu-groups-and-add-sysfs-support/20170429-003440
base: https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
config: s390-allmodconfig (attached as .config)
compiler: s390x-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390
All errors (new ones prefixed by >>):
In file included from include/linux/pci.h:1627:0,
from include/trace/events/iommu.h:14,
from include/linux/iommu.h:27,
from drivers/s390/cio/vfio_ccw_cp.c:12:
>> arch/s390/include/asm/pci.h:127:22: error: field 'iommu_dev' has incomplete type
struct iommu_device iommu_dev; /* IOMMU core handle */
^~~~~~~~~
vim +/iommu_dev +127 arch/s390/include/asm/pci.h
121 unsigned long *iommu_bitmap;
122 unsigned long *lazy_bitmap;
123 unsigned long iommu_size;
124 unsigned long iommu_pages;
125 unsigned int next_bit;
126
> 127 struct iommu_device iommu_dev; /* IOMMU core handle */
128 struct iommu_group *group; /* IOMMU group for all devices behind this zdev */
129
130 char res_name[16];
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation