On s390 PCI functions may be hotplugged individually even when they
belong to a multi-function device. In particular on an SR-IOV device VFs
may be removed and later re-added.
In commit a50297cf8235 ("s390/pci: separate zbus creation from
scanning") it was missed however that struct pci_bus and struct
zpci_bus's resource list retained a reference to the PCI functions MMIO
resources even though those resources are released and freed on
hot-unplug. These stale resources may subsequently be claimed when the
PCI function re-appears resulting in use-after-free.
One idea of fixing this use-after-free in s390 specific code that was
investigated was to simply keep resources around from the moment a PCI
function first appeared until the whole virtual PCI bus created for
a multi-function device disappears. The problem with this however is
that due to the requirement of artificial MMIO addreesses (address
cookies) we will then need extra logic and tracking in struct zpci_bus
to keep these compatible for re-use. At the same time the MMIO resources
semantically belong to the PCI function so tying their lifecycle to the
function seems more logical.
Instead a simpler approach is to remove the resources of an individually
hot-unplugged PCI function from the PCI bus's resource list while
keeping the resources of other PCI functions on the PCI bus untouched.
This is done by introducing pci_bus_remove_resource() to remove an
individual resource. Similarly the resource also needs to be removed
from the struct zpci_bus's resource list. It turns out however, that
there is really no need to add the MMIO resources at all and instead we
can simply use the zpci_bar_struct's resource pointer directly.
Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning")
Signed-off-by: Niklas Schnelle <[email protected]>
---
arch/s390/pci/pci.c | 16 ++++++++++------
arch/s390/pci/pci_bus.c | 12 +++++-------
arch/s390/pci/pci_bus.h | 3 +--
drivers/pci/bus.c | 23 +++++++++++++++++++++++
include/linux/pci.h | 1 +
5 files changed, 40 insertions(+), 15 deletions(-)
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ef38b1514c77..e16afacc8fd1 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -544,8 +544,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
return r;
}
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
{
unsigned long addr, size, flags;
struct resource *res;
@@ -581,7 +580,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
return -ENOMEM;
}
zdev->bars[i].res = res;
- pci_add_resource(resources, res);
}
zdev->has_resources = 1;
@@ -590,17 +588,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
{
+ struct resource *res;
int i;
+ pci_lock_rescan_remove();
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (!zdev->bars[i].size || !zdev->bars[i].res)
+ res = zdev->bars[i].res;
+ if (!res)
continue;
+ release_resource(res);
+ pci_bus_remove_resource(zdev->zbus->bus, res);
zpci_free_iomap(zdev, zdev->bars[i].map_idx);
- release_resource(zdev->bars[i].res);
- kfree(zdev->bars[i].res);
+ zdev->bars[i].res = NULL;
+ kfree(res);
}
zdev->has_resources = 0;
+ pci_unlock_rescan_remove();
}
int pcibios_device_add(struct pci_dev *pdev)
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 6a8da1b742ae..a99926af2b69 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -41,9 +41,7 @@ static int zpci_nb_devices;
*/
static int zpci_bus_prepare_device(struct zpci_dev *zdev)
{
- struct resource_entry *window, *n;
- struct resource *res;
- int rc;
+ int rc, i;
if (!zdev_enabled(zdev)) {
rc = zpci_enable_device(zdev);
@@ -57,10 +55,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
}
if (!zdev->has_resources) {
- zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
- resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
- res = window->res;
- pci_bus_add_resource(zdev->zbus->bus, res, 0);
+ zpci_setup_bus_resources(zdev);
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (zdev->bars[i].res)
+ pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
}
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e96c9860e064..af9f0ac79a1b 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -30,8 +30,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
int zpci_alloc_domain(int domain);
void zpci_free_domain(int domain);
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
unsigned int devfn)
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 83ae838ceb5f..f021f1d4af9f 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -76,6 +76,29 @@ struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
}
EXPORT_SYMBOL_GPL(pci_bus_resource_n);
+void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res)
+{
+ struct pci_bus_resource *bus_res, *tmp;
+ int i;
+
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+ if (bus->resource[i] == res) {
+ bus->resource[i] = NULL;
+ return;
+ }
+ }
+
+ list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
+ if (bus_res->res == res) {
+ list_del(&bus_res->list);
+ kfree(bus_res);
+ return;
+ }
+ }
+ return;
+
+}
+
void pci_bus_remove_resources(struct pci_bus *bus)
{
int i;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index adffd65e84b4..3b1974e2ec73 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1436,6 +1436,7 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
unsigned int flags);
struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
void pci_bus_remove_resources(struct pci_bus *bus);
+void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res);
int devm_request_pci_bus_resources(struct device *dev,
struct list_head *resources);
--
2.37.2
Hi Niklas,
when comparing pci_bus_remove_resource with pci_bus_remove_resources,
I find that the "single-resource" variant might be ending too early.
On Fri, 2023-02-03 at 12:48 +0100, Niklas Schnelle wrote:
> +void pci_bus_remove_resource(struct pci_bus *bus, struct resource
> *res)
> +{
> + struct pci_bus_resource *bus_res, *tmp;
> + int i;
> +
> + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
> + if (bus->resource[i] == res) {
> + bus->resource[i] = NULL;
> + return;
^^^^^^^
Did you mean to "break" here, rather than end the routine?
> + }
> + }
> +
> + list_for_each_entry_safe(bus_res, tmp, &bus->resources, list)
> {
> + if (bus_res->res == res) {
> + list_del(&bus_res->list);
> + kfree(bus_res);
> + return;
^^^^^^^
Here "break" and "return" have the same effect, but "break" would be
"symmetric".
> + }
> + }
> + return;
> +
> +}
While this might be a nit, I'd like to better separate the "single-
resource" variant's name. How about pci_bus_remove_one_resource - I
know it's getting long...
Thanks,
Gerd
On Tue, 2023-02-14 at 10:46 +0100, Gerd Bayer wrote:
> Hi Niklas,
>
> when comparing pci_bus_remove_resource with pci_bus_remove_resources,
> I find that the "single-resource" variant might be ending too early.
>
> On Fri, 2023-02-03 at 12:48 +0100, Niklas Schnelle wrote:
> > +void pci_bus_remove_resource(struct pci_bus *bus, struct resource
> > *res)
> > +{
> > + struct pci_bus_resource *bus_res, *tmp;
> > + int i;
> > +
> > + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
> > + if (bus->resource[i] == res) {
> > + bus->resource[i] = NULL;
> > + return;
> ^^^^^^^
> Did you mean to "break" here, rather than end the routine?
No the return is intended. We're looking to remove a single resource
and if I understand things correctly then that resource can either be
in bus->resource[] (x)or in bus->resources depending on whether it is a
PCI bridge resource. Either way once found and removed from the
respective array/list we're done and can thus return immediately.
> > + }
> > + }
> > +
> > + list_for_each_entry_safe(bus_res, tmp, &bus->resources, list)
> > {
> > + if (bus_res->res == res) {
> > + list_del(&bus_res->list);
> > + kfree(bus_res);
> > + return;
> ^^^^^^^
> Here "break" and "return" have the same effect, but "break" would be
> "symmetric".
> > + }
> > + }
> > + return;
> > +
> > +}
>
> While this might be a nit, I'd like to better separate the "single-
> resource" variant's name. How about pci_bus_remove_one_resource - I
> know it's getting long...
>
> Thanks,
> Gerd
I have no strong feelings on the name.