2021-05-12 08:08:15

by Long Li

[permalink] [raw]
Subject: [Patch v3 1/2] PCI: hv: Fix a race condition when removing the device

From: Long Li <[email protected]>

On removing the device, any work item (hv_pci_devices_present() or
hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running
and race with hv_pci_remove().

This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2)
and decide to rescind the channel immediately after that.

Fix this by flushing/destroying the workqueue of hbus before doing hbus remove.

Signed-off-by: Long Li <[email protected]>
---
Change in v2: Remove unused bus state hv_pcibus_removed
Change in v3: Change hv_pci_bus_exit() to not use workqueue to remove PCI devices

drivers/pci/controller/pci-hyperv.c | 30 ++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 27a17a1e4a7c..c6122a1b0c46 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -444,7 +444,6 @@ enum hv_pcibus_state {
hv_pcibus_probed,
hv_pcibus_installed,
hv_pcibus_removing,
- hv_pcibus_removed,
hv_pcibus_maximum
};

@@ -3247,8 +3246,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
struct pci_packet teardown_packet;
u8 buffer[sizeof(struct pci_message)];
} pkt;
- struct hv_dr_state *dr;
struct hv_pci_compl comp_pkt;
+ struct hv_pci_dev *hpdev, *tmp;
+ unsigned long flags;
int ret;

/*
@@ -3260,9 +3260,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)

if (!keep_devs) {
/* Delete any children which might still exist. */
- dr = kzalloc(sizeof(*dr), GFP_KERNEL);
- if (dr && hv_pci_start_relations_work(hbus, dr))
- kfree(dr);
+ spin_lock_irqsave(&hbus->device_list_lock, flags);
+ list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) {
+ list_del(&hpdev->list_entry);
+ if (hpdev->pci_slot)
+ pci_destroy_slot(hpdev->pci_slot);
+ /* For the two refs got in new_pcichild_device() */
+ put_pcichild(hpdev);
+ put_pcichild(hpdev);
+ }
+ spin_unlock_irqrestore(&hbus->device_list_lock, flags);
}

ret = hv_send_resources_released(hdev);
@@ -3305,13 +3312,23 @@ static int hv_pci_remove(struct hv_device *hdev)

hbus = hv_get_drvdata(hdev);
if (hbus->state == hv_pcibus_installed) {
+ tasklet_disable(&hdev->channel->callback_event);
+ hbus->state = hv_pcibus_removing;
+ tasklet_enable(&hdev->channel->callback_event);
+ destroy_workqueue(hbus->wq);
+ hbus->wq = NULL;
+ /*
+ * At this point, no work is running or can be scheduled
+ * on hbus-wq. We can't race with hv_pci_devices_present()
+ * or hv_pci_eject_device(), it's safe to proceed.
+ */
+
/* Remove the bus from PCI's point of view. */
pci_lock_rescan_remove();
pci_stop_root_bus(hbus->pci_bus);
hv_pci_remove_slots(hbus);
pci_remove_root_bus(hbus->pci_bus);
pci_unlock_rescan_remove();
- hbus->state = hv_pcibus_removed;
}

ret = hv_pci_bus_exit(hdev, false);
@@ -3326,7 +3343,6 @@ static int hv_pci_remove(struct hv_device *hdev)
irq_domain_free_fwnode(hbus->sysdata.fwnode);
put_hvpcibus(hbus);
wait_for_completion(&hbus->remove_event);
- destroy_workqueue(hbus->wq);

hv_put_dom_num(hbus->sysdata.domain);

--
2.27.0


2021-05-26 20:21:48

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [Patch v3 1/2] PCI: hv: Fix a race condition when removing the device

From: [email protected] <[email protected]> Sent: Wednesday, May 12, 2021 1:07 AM
>
> On removing the device, any work item (hv_pci_devices_present() or
> hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running
> and race with hv_pci_remove().
>
> This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2)
> and decide to rescind the channel immediately after that.
>
> Fix this by flushing/destroying the workqueue of hbus before doing hbus remove.
>
> Signed-off-by: Long Li <[email protected]>
> ---
> Change in v2: Remove unused bus state hv_pcibus_removed
> Change in v3: Change hv_pci_bus_exit() to not use workqueue to remove PCI devices
>
> drivers/pci/controller/pci-hyperv.c | 30 ++++++++++++++++++++++-------
> 1 file changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 27a17a1e4a7c..c6122a1b0c46 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -444,7 +444,6 @@ enum hv_pcibus_state {
> hv_pcibus_probed,
> hv_pcibus_installed,
> hv_pcibus_removing,
> - hv_pcibus_removed,
> hv_pcibus_maximum
> };
>
> @@ -3247,8 +3246,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool
> keep_devs)
> struct pci_packet teardown_packet;
> u8 buffer[sizeof(struct pci_message)];
> } pkt;
> - struct hv_dr_state *dr;
> struct hv_pci_compl comp_pkt;
> + struct hv_pci_dev *hpdev, *tmp;
> + unsigned long flags;
> int ret;
>
> /*
> @@ -3260,9 +3260,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool
> keep_devs)
>
> if (!keep_devs) {
> /* Delete any children which might still exist. */
> - dr = kzalloc(sizeof(*dr), GFP_KERNEL);
> - if (dr && hv_pci_start_relations_work(hbus, dr))
> - kfree(dr);
> + spin_lock_irqsave(&hbus->device_list_lock, flags);
> + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) {
> + list_del(&hpdev->list_entry);
> + if (hpdev->pci_slot)
> + pci_destroy_slot(hpdev->pci_slot);
> + /* For the two refs got in new_pcichild_device() */
> + put_pcichild(hpdev);
> + put_pcichild(hpdev);
> + }
> + spin_unlock_irqrestore(&hbus->device_list_lock, flags);
> }
>
> ret = hv_send_resources_released(hdev);
> @@ -3305,13 +3312,23 @@ static int hv_pci_remove(struct hv_device *hdev)
>
> hbus = hv_get_drvdata(hdev);
> if (hbus->state == hv_pcibus_installed) {
> + tasklet_disable(&hdev->channel->callback_event);
> + hbus->state = hv_pcibus_removing;
> + tasklet_enable(&hdev->channel->callback_event);
> + destroy_workqueue(hbus->wq);
> + hbus->wq = NULL;
> + /*
> + * At this point, no work is running or can be scheduled
> + * on hbus-wq. We can't race with hv_pci_devices_present()
> + * or hv_pci_eject_device(), it's safe to proceed.
> + */
> +
> /* Remove the bus from PCI's point of view. */
> pci_lock_rescan_remove();
> pci_stop_root_bus(hbus->pci_bus);
> hv_pci_remove_slots(hbus);
> pci_remove_root_bus(hbus->pci_bus);
> pci_unlock_rescan_remove();
> - hbus->state = hv_pcibus_removed;
> }
>
> ret = hv_pci_bus_exit(hdev, false);
> @@ -3326,7 +3343,6 @@ static int hv_pci_remove(struct hv_device *hdev)
> irq_domain_free_fwnode(hbus->sysdata.fwnode);
> put_hvpcibus(hbus);
> wait_for_completion(&hbus->remove_event);
> - destroy_workqueue(hbus->wq);
>
> hv_put_dom_num(hbus->sysdata.domain);
>
> --
> 2.27.0

Reviewed-by: Michael Kelley <[email protected]>

2021-06-03 17:39:09

by Lorenzo Pieralisi

[permalink] [raw]
Subject: Re: [Patch v3 1/2] PCI: hv: Fix a race condition when removing the device

On Wed, May 12, 2021 at 01:06:40AM -0700, [email protected] wrote:
> From: Long Li <[email protected]>
>
> On removing the device, any work item (hv_pci_devices_present() or
> hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running
> and race with hv_pci_remove().
>
> This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2)
> and decide to rescind the channel immediately after that.
>
> Fix this by flushing/destroying the workqueue of hbus before doing hbus remove.
>
> Signed-off-by: Long Li <[email protected]>
> ---
> Change in v2: Remove unused bus state hv_pcibus_removed
> Change in v3: Change hv_pci_bus_exit() to not use workqueue to remove PCI devices
>
> drivers/pci/controller/pci-hyperv.c | 30 ++++++++++++++++++++++-------
> 1 file changed, 23 insertions(+), 7 deletions(-)

Applied series to pci/hv, thanks.

Lorenzo

> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 27a17a1e4a7c..c6122a1b0c46 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -444,7 +444,6 @@ enum hv_pcibus_state {
> hv_pcibus_probed,
> hv_pcibus_installed,
> hv_pcibus_removing,
> - hv_pcibus_removed,
> hv_pcibus_maximum
> };
>
> @@ -3247,8 +3246,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
> struct pci_packet teardown_packet;
> u8 buffer[sizeof(struct pci_message)];
> } pkt;
> - struct hv_dr_state *dr;
> struct hv_pci_compl comp_pkt;
> + struct hv_pci_dev *hpdev, *tmp;
> + unsigned long flags;
> int ret;
>
> /*
> @@ -3260,9 +3260,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
>
> if (!keep_devs) {
> /* Delete any children which might still exist. */
> - dr = kzalloc(sizeof(*dr), GFP_KERNEL);
> - if (dr && hv_pci_start_relations_work(hbus, dr))
> - kfree(dr);
> + spin_lock_irqsave(&hbus->device_list_lock, flags);
> + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) {
> + list_del(&hpdev->list_entry);
> + if (hpdev->pci_slot)
> + pci_destroy_slot(hpdev->pci_slot);
> + /* For the two refs got in new_pcichild_device() */
> + put_pcichild(hpdev);
> + put_pcichild(hpdev);
> + }
> + spin_unlock_irqrestore(&hbus->device_list_lock, flags);
> }
>
> ret = hv_send_resources_released(hdev);
> @@ -3305,13 +3312,23 @@ static int hv_pci_remove(struct hv_device *hdev)
>
> hbus = hv_get_drvdata(hdev);
> if (hbus->state == hv_pcibus_installed) {
> + tasklet_disable(&hdev->channel->callback_event);
> + hbus->state = hv_pcibus_removing;
> + tasklet_enable(&hdev->channel->callback_event);
> + destroy_workqueue(hbus->wq);
> + hbus->wq = NULL;
> + /*
> + * At this point, no work is running or can be scheduled
> + * on hbus-wq. We can't race with hv_pci_devices_present()
> + * or hv_pci_eject_device(), it's safe to proceed.
> + */
> +
> /* Remove the bus from PCI's point of view. */
> pci_lock_rescan_remove();
> pci_stop_root_bus(hbus->pci_bus);
> hv_pci_remove_slots(hbus);
> pci_remove_root_bus(hbus->pci_bus);
> pci_unlock_rescan_remove();
> - hbus->state = hv_pcibus_removed;
> }
>
> ret = hv_pci_bus_exit(hdev, false);
> @@ -3326,7 +3343,6 @@ static int hv_pci_remove(struct hv_device *hdev)
> irq_domain_free_fwnode(hbus->sysdata.fwnode);
> put_hvpcibus(hbus);
> wait_for_completion(&hbus->remove_event);
> - destroy_workqueue(hbus->wq);
>
> hv_put_dom_num(hbus->sysdata.domain);
>
> --
> 2.27.0
>