From: Joerg Roedel <[email protected]>
Add support for the iommu_device_register interface to make
the s390 hardware iommus visible to the iommu core and in
sysfs.
Signed-off-by: Joerg Roedel <[email protected]>
---
arch/s390/include/asm/pci.h | 7 +++++++
arch/s390/pci/pci.c | 9 ++++++++-
drivers/iommu/s390-iommu.c | 35 +++++++++++++++++++++++++++++++++++
3 files changed, 50 insertions(+), 1 deletion(-)
v2->v3:
- Rebased to v4.13-rc4
- Fixed error path in zpci_create_device() to
destroy the iommu instance too (noted by
Gerald Schaefer)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index f36b4b726057..386df9adef0a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -8,6 +8,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
+#include <linux/iommu.h>
#include <asm-generic/pci.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -122,6 +123,8 @@ struct zpci_dev {
unsigned long iommu_pages;
unsigned int next_bit;
+ struct iommu_device iommu_dev; /* IOMMU core handle */
+
char res_name[16];
struct zpci_bar_struct bars[PCI_BAR_COUNT];
@@ -174,6 +177,10 @@ int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
int clp_get_state(u32 fid, enum zpci_state *state);
+/* IOMMU Interface */
+int zpci_init_iommu(struct zpci_dev *zdev);
+void zpci_destroy_iommu(struct zpci_dev *zdev);
+
#ifdef CONFIG_PCI
/* Error handling and recovery */
void zpci_event_error(void *);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7b30af5da222..001ca80fa2fe 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -776,6 +776,7 @@ void pcibios_remove_bus(struct pci_bus *bus)
zpci_exit_slot(zdev);
zpci_cleanup_bus_resources(zdev);
+ zpci_destroy_iommu(zdev);
zpci_free_domain(zdev);
spin_lock(&zpci_list_lock);
@@ -848,11 +849,15 @@ int zpci_create_device(struct zpci_dev *zdev)
if (rc)
goto out;
+ rc = zpci_init_iommu(zdev);
+ if (rc)
+ goto out_free;
+
mutex_init(&zdev->lock);
if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
rc = zpci_enable_device(zdev);
if (rc)
- goto out_free;
+ goto out_destroy_iommu;
}
rc = zpci_scan_bus(zdev);
if (rc)
@@ -869,6 +874,8 @@ int zpci_create_device(struct zpci_dev *zdev)
out_disable:
if (zdev->state == ZPCI_FN_STATE_ONLINE)
zpci_disable_device(zdev);
+out_destroy_iommu:
+ zpci_destroy_iommu(zdev);
out_free:
zpci_free_domain(zdev);
out:
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 8788640756a7..85f3bc52efc2 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -18,6 +18,8 @@
*/
#define S390_IOMMU_PGSIZES (~0xFFFUL)
+static struct iommu_ops s390_iommu_ops;
+
struct s390_domain {
struct iommu_domain domain;
struct list_head devices;
@@ -166,11 +168,13 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
static int s390_iommu_add_device(struct device *dev)
{
struct iommu_group *group = iommu_group_get_for_dev(dev);
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
if (IS_ERR(group))
return PTR_ERR(group);
iommu_group_put(group);
+ iommu_device_link(&zdev->iommu_dev, dev);
return 0;
}
@@ -197,6 +201,7 @@ static void s390_iommu_remove_device(struct device *dev)
s390_iommu_detach_device(domain, dev);
}
+ iommu_device_unlink(&zdev->iommu_dev, dev);
iommu_group_remove_device(dev);
}
@@ -327,6 +332,36 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain,
return size;
}
+int zpci_init_iommu(struct zpci_dev *zdev)
+{
+ int rc = 0;
+
+ rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
+ "s390-iommu.%08x", zdev->fid);
+ if (rc)
+ goto out_err;
+
+ iommu_device_set_ops(&zdev->iommu_dev, &s390_iommu_ops);
+
+ rc = iommu_device_register(&zdev->iommu_dev);
+ if (rc)
+ goto out_sysfs;
+
+ return 0;
+
+out_sysfs:
+ iommu_device_sysfs_remove(&zdev->iommu_dev);
+
+out_err:
+ return rc;
+}
+
+void zpci_destroy_iommu(struct zpci_dev *zdev)
+{
+ iommu_device_unregister(&zdev->iommu_dev);
+ iommu_device_sysfs_remove(&zdev->iommu_dev);
+}
+
static struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
--
2.12.3
On Wed, 9 Aug 2017, Joerg Roedel wrote:
> From: Joerg Roedel <[email protected]>
>
> Add support for the iommu_device_register interface to make
> the s390 hardware iommus visible to the iommu core and in
> sysfs.
>
With this patch pci hot-unplug fails with a use after free or refcounting
issue - I'm currently trying to understand what's going on...
Sebastian
Hi Sebastian,
On Thu, Aug 10, 2017 at 09:07:06PM +0200, Sebastian Ott wrote:
> On Wed, 9 Aug 2017, Joerg Roedel wrote:
> > From: Joerg Roedel <[email protected]>
> >
> > Add support for the iommu_device_register interface to make
> > the s390 hardware iommus visible to the iommu core and in
> > sysfs.
> >
>
> With this patch pci hot-unplug fails with a use after free or refcounting
> issue - I'm currently trying to understand what's going on...
Thanks for testing and looking into the issue.
Regards,
Joerg
Hey Sebastian,
On Thu, Aug 10, 2017 at 09:07:06PM +0200, Sebastian Ott wrote:
> With this patch pci hot-unplug fails with a use after free or refcounting
> issue - I'm currently trying to understand what's going on...
Let me know if I can help with debugging the issue, do you have a
backtrace for me to look at?
Regards,
Joerg
Hello Joerg,
On Fri, 11 Aug 2017, Joerg Roedel wrote:
> Hey Sebastian,
>
> On Thu, Aug 10, 2017 at 09:07:06PM +0200, Sebastian Ott wrote:
> > With this patch pci hot-unplug fails with a use after free or refcounting
> > issue - I'm currently trying to understand what's going on...
>
> Let me know if I can help with debugging the issue, do you have a
> backtrace for me to look at?
I would have send backtraces but everyone looked different: random mem
corruptions, panic during unrelated allocations and stuff like that.
..but I found the bug, actually 2 bugs:
* That patch embedded a struct iommu_device within struct zpci_dev but
the iommu_device has a release function (via its class) - so when
the release function gets called it frees memory that was never allocated.
The fix is to not embedd struct iommu_device in zpci_dev (see below)
* iommu_release_device must not release the struct device but the
structure it is embedded in: struct iommu_device (I'll send a patch
for that)
With these fixed it works fine.
Sebastian
---
arch/s390/include/asm/pci.h | 2 +-
drivers/iommu/s390-iommu.c | 20 ++++++++++++--------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 386df9a..de3129e 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -123,7 +123,7 @@ struct zpci_dev {
unsigned long iommu_pages;
unsigned int next_bit;
- struct iommu_device iommu_dev; /* IOMMU core handle */
+ struct iommu_device *iommu_dev; /* IOMMU core handle */
char res_name[16];
struct zpci_bar_struct bars[PCI_BAR_COUNT];
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 85f3bc5..58a7414 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -174,7 +174,7 @@ static int s390_iommu_add_device(struct device *dev)
return PTR_ERR(group);
iommu_group_put(group);
- iommu_device_link(&zdev->iommu_dev, dev);
+ iommu_device_link(zdev->iommu_dev, dev);
return 0;
}
@@ -201,7 +201,7 @@ static void s390_iommu_remove_device(struct device *dev)
s390_iommu_detach_device(domain, dev);
}
- iommu_device_unlink(&zdev->iommu_dev, dev);
+ iommu_device_unlink(zdev->iommu_dev, dev);
iommu_group_remove_device(dev);
}
@@ -336,21 +336,25 @@ int zpci_init_iommu(struct zpci_dev *zdev)
{
int rc = 0;
- rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
+ zdev->iommu_dev = kzalloc(sizeof(*zdev->iommu_dev), GFP_KERNEL);
+ if (!zdev->iommu_dev)
+ return -ENOMEM;
+
+ rc = iommu_device_sysfs_add(zdev->iommu_dev, NULL, NULL,
"s390-iommu.%08x", zdev->fid);
if (rc)
goto out_err;
- iommu_device_set_ops(&zdev->iommu_dev, &s390_iommu_ops);
+ iommu_device_set_ops(zdev->iommu_dev, &s390_iommu_ops);
- rc = iommu_device_register(&zdev->iommu_dev);
+ rc = iommu_device_register(zdev->iommu_dev);
if (rc)
goto out_sysfs;
return 0;
out_sysfs:
- iommu_device_sysfs_remove(&zdev->iommu_dev);
+ iommu_device_sysfs_remove(zdev->iommu_dev);
out_err:
return rc;
@@ -358,8 +362,8 @@ int zpci_init_iommu(struct zpci_dev *zdev)
void zpci_destroy_iommu(struct zpci_dev *zdev)
{
- iommu_device_unregister(&zdev->iommu_dev);
- iommu_device_sysfs_remove(&zdev->iommu_dev);
+ iommu_device_unregister(zdev->iommu_dev);
+ iommu_device_sysfs_remove(zdev->iommu_dev);
}
static struct iommu_ops s390_iommu_ops = {
--
2.5.5
On Fri, 11 Aug 2017, Sebastian Ott wrote:
> * iommu_release_device must not release the struct device but the
> structure it is embedded in: struct iommu_device (I'll send a patch
> for that)
--->8
>From 2839c92e038af47b8cb569f84d571878c54d1815 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <[email protected]>
Date: Fri, 11 Aug 2017 19:04:00 +0200
Subject: [PATCH] iommu: fix the release function of iommu_class
The release function of iommu_class must not only free the device
structure it gets called with but the whole structure the device
is embedded in - struct iommu_device.
Signed-off-by: Sebastian Ott <[email protected]>
---
drivers/iommu/iommu-sysfs.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index c58351e..b65a501 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -34,7 +34,9 @@ static const struct attribute_group *iommu_dev_groups[] = {
static void iommu_release_device(struct device *dev)
{
- kfree(dev);
+ struct iommu_device *iommu = container_of(dev, struct iommu_device, dev);
+
+ kfree(iommu);
}
static struct class iommu_class = {
--
2.5.5
On Fri, 11 Aug 2017, Sebastian Ott wrote:
> * That patch embedded a struct iommu_device within struct zpci_dev but
> the iommu_device has a release function (via its class) - so when
> the release function gets called it frees memory that was never allocated.
> The fix is to not embedd struct iommu_device in zpci_dev (see below)
While discussing the problem with Gerald he noticed that
struct iommu_device is embedded in other structs. So that
is potentially broken in other cases, too.
Sebastian
Hi Sebastian,
On Fri, Aug 11, 2017 at 07:02:36PM +0200, Sebastian Ott wrote:
> ..but I found the bug, actually 2 bugs:
>
> * That patch embedded a struct iommu_device within struct zpci_dev but
> the iommu_device has a release function (via its class) - so when
> the release function gets called it frees memory that was never allocated.
> The fix is to not embedd struct iommu_device in zpci_dev (see below)
>
> * iommu_release_device must not release the struct device but the
> structure it is embedded in: struct iommu_device (I'll send a patch
> for that)
Thanks a lot for your in-depth analysis, this bug has been around since
4.11 time :-/ Unfortunatly I can't test iommu-unplug here, so I didn't
notice it until your report.
Meanwhile I worked on a different fix that make the 'struct device' in
iommu_device a pointer again. As Gerald and you noticed already,
struct iommu_device is embedded in other structs as well, and I'd like
to keep it that way. The reason is that in the future the code in
iommu.c should call into iommu-drivers and supply a struct iommu_device,
and the driver can then just do a container_of to get its own data about
that hardware iommu.
So just making the dev member a pointer is a simpler fix for now. The
other option would have been to call back into the iommu-drivers from
the release-function.
I attach the patch I wrote to fix this, can you please test it together
with the initial patch in this thread?
Thanks,
Joerg
>From aea46317b063d04b5e445aa56e25cc278678e991 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <[email protected]>
Date: Mon, 14 Aug 2017 17:19:26 +0200
Subject: [PATCH] iommu: Fix wrong freeing of iommu_device->dev
The struct iommu_device has a 'struct device' embedded into
it, not as a pointer, but the whole struct. In the
conversion of the iommu drivers to use struct iommu_device
it was forgotten that the relase function for that struct
device simply calls kfree() on the pointer.
This frees memory that was never allocated and causes memory
corruption.
To fix this issue, use a pointer to struct device instead of
embedding the whole struct. This needs some updates in the
iommu sysfs code as well as the Intel VT-d and AMD IOMMU
driver.
Reported-by: Sebastian Ott <[email protected]>
Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device')
Cc: [email protected] # >= v4.11
Signed-off-by: Joerg Roedel <[email protected]>
---
drivers/iommu/amd_iommu_types.h | 4 +++-
drivers/iommu/intel-iommu.c | 4 +++-
drivers/iommu/iommu-sysfs.c | 32 ++++++++++++++++++++------------
include/linux/iommu.h | 12 +++++++++++-
4 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 294a409e283b..d6b873b57054 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -574,7 +574,9 @@ struct amd_iommu {
static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
{
- return container_of(dev, struct amd_iommu, iommu.dev);
+ struct iommu_device *iommu = dev_to_iommu_device(dev);
+
+ return container_of(iommu, struct amd_iommu, iommu);
}
#define ACPIHID_UID_LEN 256
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index d5e8b8628a1a..39d51995e777 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4615,7 +4615,9 @@ static void intel_disable_iommus(void)
static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
{
- return container_of(dev, struct intel_iommu, iommu.dev);
+ struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
+
+ return container_of(iommu_dev, struct intel_iommu, iommu);
}
static ssize_t intel_iommu_show_version(struct device *dev,
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index c58351ed61c1..36d1a7ce7fc4 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -62,32 +62,40 @@ int iommu_device_sysfs_add(struct iommu_device *iommu,
va_list vargs;
int ret;
- device_initialize(&iommu->dev);
+ iommu->dev = kzalloc(sizeof(*iommu->dev), GFP_KERNEL);
+ if (!iommu->dev)
+ return -ENOMEM;
- iommu->dev.class = &iommu_class;
- iommu->dev.parent = parent;
- iommu->dev.groups = groups;
+ device_initialize(iommu->dev);
+
+ iommu->dev->class = &iommu_class;
+ iommu->dev->parent = parent;
+ iommu->dev->groups = groups;
va_start(vargs, fmt);
- ret = kobject_set_name_vargs(&iommu->dev.kobj, fmt, vargs);
+ ret = kobject_set_name_vargs(&iommu->dev->kobj, fmt, vargs);
va_end(vargs);
if (ret)
goto error;
- ret = device_add(&iommu->dev);
+ ret = device_add(iommu->dev);
if (ret)
goto error;
+ dev_set_drvdata(iommu->dev, iommu);
+
return 0;
error:
- put_device(&iommu->dev);
+ put_device(iommu->dev);
return ret;
}
void iommu_device_sysfs_remove(struct iommu_device *iommu)
{
- device_unregister(&iommu->dev);
+ dev_set_drvdata(iommu->dev, NULL);
+ device_unregister(iommu->dev);
+ iommu->dev = NULL;
}
/*
* IOMMU drivers can indicate a device is managed by a given IOMMU using
@@ -102,14 +110,14 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link)
if (!iommu || IS_ERR(iommu))
return -ENODEV;
- ret = sysfs_add_link_to_group(&iommu->dev.kobj, "devices",
+ ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices",
&link->kobj, dev_name(link));
if (ret)
return ret;
- ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev.kobj, "iommu");
+ ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev->kobj, "iommu");
if (ret)
- sysfs_remove_link_from_group(&iommu->dev.kobj, "devices",
+ sysfs_remove_link_from_group(&iommu->dev->kobj, "devices",
dev_name(link));
return ret;
@@ -121,5 +129,5 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link)
return;
sysfs_remove_link(&link->kobj, "iommu");
- sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", dev_name(link));
+ sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link));
}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2cb54adc4a33..176f7569d874 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -240,7 +240,7 @@ struct iommu_device {
struct list_head list;
const struct iommu_ops *ops;
struct fwnode_handle *fwnode;
- struct device dev;
+ struct device *dev;
};
int iommu_device_register(struct iommu_device *iommu);
@@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
iommu->fwnode = fwnode;
}
+static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
+{
+ return (struct iommu_device *)dev_get_drvdata(dev);
+}
+
#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */
#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */
#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */
@@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
{
}
+static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
+{
+ return NULL;
+}
+
static inline void iommu_device_unregister(struct iommu_device *iommu)
{
}
--
2.12.3
On Mon, 14 Aug 2017, Joerg Roedel wrote:
> I attach the patch I wrote to fix this, can you please test it together
> with the initial patch in this thread?
I did and can confirm that everything worked as expected!
Thanks,
Sebastian
On Tue, Aug 15, 2017 at 06:02:20PM +0200, Sebastian Ott wrote:
> On Mon, 14 Aug 2017, Joerg Roedel wrote:
> > I attach the patch I wrote to fix this, can you please test it together
> > with the initial patch in this thread?
>
> I did and can confirm that everything worked as expected!
Great, thanks a lot for your testing and thanks again for the effort to
root-cause the crashes on s390x.
So, can I add an Acked-by to the initial patch and put it into the
iommu-tree?
Regards,
Joerg
On Tue, 15 Aug 2017, Joerg Roedel wrote:
> On Tue, Aug 15, 2017 at 06:02:20PM +0200, Sebastian Ott wrote:
> > On Mon, 14 Aug 2017, Joerg Roedel wrote:
> > > I attach the patch I wrote to fix this, can you please test it together
> > > with the initial patch in this thread?
> >
> > I did and can confirm that everything worked as expected!
>
> Great, thanks a lot for your testing and thanks again for the effort to
> root-cause the crashes on s390x.
>
> So, can I add an Acked-by to the initial patch and put it into the
> iommu-tree?
Yes, please do.