2022-01-14 22:56:19

by Jacob Pan

[permalink] [raw]
Subject: [PATCH v2] iommu/vt-d: Fix PCI bus rescan device hot add

During PCI bus rescan, adding new devices involve two notifiers.
1. dmar_pci_bus_notifier()
2. iommu_bus_notifier()
The current code sets #1 as low priority (INT_MIN) which resulted in #2
being invoked first. The result is that struct device pointer cannot be
found in DRHD search for the new device's DMAR/IOMMU. Subsequently, the
device is put under the "catch-all" IOMMU instead of the correct one.

This could cause system hang when device TLB invalidation is sent to the
wrong IOMMU. Invalidation timeout error and hard lockup have been observed.

On the reverse direction for device removal, the order should be #2-#1
such that DMAR cleanup is done after IOMMU.

This patch fixes the issue by setting proper priorities for
dmar_pci_bus_notifier around IOMMU bus notifier. DRHD search for a new
device will find the correct IOMMU. The order with this patch is the
following:
1. dmar_pci_bus_add_dev()
2. iommu_probe_device()
3. iommu_release_device()
4. dmar_pci_bus_remove_dev()

Fixes: 59ce0515cdaf ("iommu/vt-d: Update DRHD/RMRR/ATSR device scope")
Reported-by: Zhang, Bernice <[email protected]>
Suggested-by: Lu Baolu <[email protected]>
Signed-off-by: Jacob Pan <[email protected]>
---
drivers/iommu/intel/dmar.c | 69 ++++++++++++++++++++++++++++----------
drivers/iommu/iommu.c | 1 +
include/linux/iommu.h | 1 +
3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 915bff76fe96..5f4751ba6bb1 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -340,15 +340,19 @@ static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
}

-static int dmar_pci_bus_notifier(struct notifier_block *nb,
+static int dmar_pci_bus_add_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct pci_dev *pdev = to_pci_dev(data);
struct dmar_pci_notify_info *info;

- /* Only care about add/remove events for physical functions.
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return NOTIFY_DONE;
+
+ /*
* For VFs we actually do the lookup based on the corresponding
- * PF in device_to_iommu() anyway. */
+ * PF in device_to_iommu() anyway.
+ */
if (pdev->is_virtfn) {
/*
* Ensure that the VF device inherits the irq domain of the
@@ -358,13 +362,34 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
* from the PF device, but that's yet another x86'sism to
* inflict on everybody else.
*/
- if (action == BUS_NOTIFY_ADD_DEVICE)
- vf_inherit_msi_domain(pdev);
+ vf_inherit_msi_domain(pdev);
return NOTIFY_DONE;
}

- if (action != BUS_NOTIFY_ADD_DEVICE &&
- action != BUS_NOTIFY_REMOVED_DEVICE)
+ info = dmar_alloc_pci_notify_info(pdev, action);
+ if (!info)
+ return NOTIFY_DONE;
+
+ down_write(&dmar_global_lock);
+ dmar_pci_bus_add_dev(info);
+ up_write(&dmar_global_lock);
+ dmar_free_pci_notify_info(info);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block dmar_pci_bus_add_nb = {
+ .notifier_call = dmar_pci_bus_add_notifier,
+ .priority = IOMMU_BUS_NOTIFY_PRIORITY + 1,
+};
+
+static int dmar_pci_bus_remove_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct pci_dev *pdev = to_pci_dev(data);
+ struct dmar_pci_notify_info *info;
+
+ if (pdev->is_virtfn || action != BUS_NOTIFY_REMOVED_DEVICE)
return NOTIFY_DONE;

info = dmar_alloc_pci_notify_info(pdev, action);
@@ -372,10 +397,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
return NOTIFY_DONE;

down_write(&dmar_global_lock);
- if (action == BUS_NOTIFY_ADD_DEVICE)
- dmar_pci_bus_add_dev(info);
- else if (action == BUS_NOTIFY_REMOVED_DEVICE)
- dmar_pci_bus_del_dev(info);
+ dmar_pci_bus_del_dev(info);
up_write(&dmar_global_lock);

dmar_free_pci_notify_info(info);
@@ -383,11 +405,10 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}

-static struct notifier_block dmar_pci_bus_nb = {
- .notifier_call = dmar_pci_bus_notifier,
- .priority = INT_MIN,
+static struct notifier_block dmar_pci_bus_remove_nb = {
+ .notifier_call = dmar_pci_bus_remove_notifier,
+ .priority = IOMMU_BUS_NOTIFY_PRIORITY - 1,
};
-
static struct dmar_drhd_unit *
dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
{
@@ -835,7 +856,17 @@ int __init dmar_dev_scope_init(void)

void __init dmar_register_bus_notifier(void)
{
- bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
+ /*
+ * We need two notifiers in that we need to make sure the ordering
+ * is enforced as the following:
+ * 1. dmar_pci_bus_add_dev()
+ * 2. iommu_probe_device()
+ * 3. iommu_release_device()
+ * 4. dmar_pci_bus_remove_dev()
+ * Notifier block priority is used to enforce the order
+ */
+ bus_register_notifier(&pci_bus_type, &dmar_pci_bus_add_nb);
+ bus_register_notifier(&pci_bus_type, &dmar_pci_bus_remove_nb);
}


@@ -2151,8 +2182,10 @@ static int __init dmar_free_unused_resources(void)
if (dmar_in_use())
return 0;

- if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
- bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
+ if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units)) {
+ bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_add_nb);
+ bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_remove_nb);
+ }

down_write(&dmar_global_lock);
list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8b86406b7162..6103bcde1f65 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1841,6 +1841,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
return -ENOMEM;

nb->notifier_call = iommu_bus_notifier;
+ nb->priority = IOMMU_BUS_NOTIFY_PRIORITY;

err = bus_register_notifier(bus, nb);
if (err)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index de0c57a567c8..8e13c69980be 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -403,6 +403,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
};
}

+#define IOMMU_BUS_NOTIFY_PRIORITY 0
#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */
#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */
#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */
--
2.25.1


2022-01-26 06:35:07

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH v2] iommu/vt-d: Fix PCI bus rescan device hot add

Hi all,

Just wondering if there are any other comments? This fixes a
regression that can cause system hang.

On Fri, 14 Jan 2022 00:21:10 -0800, Jacob Pan
<[email protected]> wrote:

> During PCI bus rescan, adding new devices involve two notifiers.
> 1. dmar_pci_bus_notifier()
> 2. iommu_bus_notifier()
> The current code sets #1 as low priority (INT_MIN) which resulted in #2
> being invoked first. The result is that struct device pointer cannot be
> found in DRHD search for the new device's DMAR/IOMMU. Subsequently, the
> device is put under the "catch-all" IOMMU instead of the correct one.
>
> This could cause system hang when device TLB invalidation is sent to the
> wrong IOMMU. Invalidation timeout error and hard lockup have been
> observed.
>
> On the reverse direction for device removal, the order should be #2-#1
> such that DMAR cleanup is done after IOMMU.
>
> This patch fixes the issue by setting proper priorities for
> dmar_pci_bus_notifier around IOMMU bus notifier. DRHD search for a new
> device will find the correct IOMMU. The order with this patch is the
> following:
> 1. dmar_pci_bus_add_dev()
> 2. iommu_probe_device()
> 3. iommu_release_device()
> 4. dmar_pci_bus_remove_dev()
>
> Fixes: 59ce0515cdaf ("iommu/vt-d: Update DRHD/RMRR/ATSR device scope")
> Reported-by: Zhang, Bernice <[email protected]>
> Suggested-by: Lu Baolu <[email protected]>
> Signed-off-by: Jacob Pan <[email protected]>
> ---
> drivers/iommu/intel/dmar.c | 69 ++++++++++++++++++++++++++++----------
> drivers/iommu/iommu.c | 1 +
> include/linux/iommu.h | 1 +
> 3 files changed, 53 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
> index 915bff76fe96..5f4751ba6bb1 100644
> --- a/drivers/iommu/intel/dmar.c
> +++ b/drivers/iommu/intel/dmar.c
> @@ -340,15 +340,19 @@ static inline void vf_inherit_msi_domain(struct
> pci_dev *pdev) dev_set_msi_domain(&pdev->dev,
> dev_get_msi_domain(&physfn->dev)); }
>
> -static int dmar_pci_bus_notifier(struct notifier_block *nb,
> +static int dmar_pci_bus_add_notifier(struct notifier_block *nb,
> unsigned long action, void *data)
> {
> struct pci_dev *pdev = to_pci_dev(data);
> struct dmar_pci_notify_info *info;
>
> - /* Only care about add/remove events for physical functions.
> + if (action != BUS_NOTIFY_ADD_DEVICE)
> + return NOTIFY_DONE;
> +
> + /*
> * For VFs we actually do the lookup based on the corresponding
> - * PF in device_to_iommu() anyway. */
> + * PF in device_to_iommu() anyway.
> + */
> if (pdev->is_virtfn) {
> /*
> * Ensure that the VF device inherits the irq domain of
> the @@ -358,13 +362,34 @@ static int dmar_pci_bus_notifier(struct
> notifier_block *nb,
> * from the PF device, but that's yet another x86'sism to
> * inflict on everybody else.
> */
> - if (action == BUS_NOTIFY_ADD_DEVICE)
> - vf_inherit_msi_domain(pdev);
> + vf_inherit_msi_domain(pdev);
> return NOTIFY_DONE;
> }
>
> - if (action != BUS_NOTIFY_ADD_DEVICE &&
> - action != BUS_NOTIFY_REMOVED_DEVICE)
> + info = dmar_alloc_pci_notify_info(pdev, action);
> + if (!info)
> + return NOTIFY_DONE;
> +
> + down_write(&dmar_global_lock);
> + dmar_pci_bus_add_dev(info);
> + up_write(&dmar_global_lock);
> + dmar_free_pci_notify_info(info);
> +
> + return NOTIFY_OK;
> +}
> +
> +static struct notifier_block dmar_pci_bus_add_nb = {
> + .notifier_call = dmar_pci_bus_add_notifier,
> + .priority = IOMMU_BUS_NOTIFY_PRIORITY + 1,
> +};
> +
> +static int dmar_pci_bus_remove_notifier(struct notifier_block *nb,
> + unsigned long action, void *data)
> +{
> + struct pci_dev *pdev = to_pci_dev(data);
> + struct dmar_pci_notify_info *info;
> +
> + if (pdev->is_virtfn || action != BUS_NOTIFY_REMOVED_DEVICE)
> return NOTIFY_DONE;
>
> info = dmar_alloc_pci_notify_info(pdev, action);
> @@ -372,10 +397,7 @@ static int dmar_pci_bus_notifier(struct
> notifier_block *nb, return NOTIFY_DONE;
>
> down_write(&dmar_global_lock);
> - if (action == BUS_NOTIFY_ADD_DEVICE)
> - dmar_pci_bus_add_dev(info);
> - else if (action == BUS_NOTIFY_REMOVED_DEVICE)
> - dmar_pci_bus_del_dev(info);
> + dmar_pci_bus_del_dev(info);
> up_write(&dmar_global_lock);
>
> dmar_free_pci_notify_info(info);
> @@ -383,11 +405,10 @@ static int dmar_pci_bus_notifier(struct
> notifier_block *nb, return NOTIFY_OK;
> }
>
> -static struct notifier_block dmar_pci_bus_nb = {
> - .notifier_call = dmar_pci_bus_notifier,
> - .priority = INT_MIN,
> +static struct notifier_block dmar_pci_bus_remove_nb = {
> + .notifier_call = dmar_pci_bus_remove_notifier,
> + .priority = IOMMU_BUS_NOTIFY_PRIORITY - 1,
> };
> -
> static struct dmar_drhd_unit *
> dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
> {
> @@ -835,7 +856,17 @@ int __init dmar_dev_scope_init(void)
>
> void __init dmar_register_bus_notifier(void)
> {
> - bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
> + /*
> + * We need two notifiers in that we need to make sure the
> ordering
> + * is enforced as the following:
> + * 1. dmar_pci_bus_add_dev()
> + * 2. iommu_probe_device()
> + * 3. iommu_release_device()
> + * 4. dmar_pci_bus_remove_dev()
> + * Notifier block priority is used to enforce the order
> + */
> + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_add_nb);
> + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_remove_nb);
> }
>
>
> @@ -2151,8 +2182,10 @@ static int __init dmar_free_unused_resources(void)
> if (dmar_in_use())
> return 0;
>
> - if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
> - bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
> + if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
> {
> + bus_unregister_notifier(&pci_bus_type,
> &dmar_pci_bus_add_nb);
> + bus_unregister_notifier(&pci_bus_type,
> &dmar_pci_bus_remove_nb);
> + }
>
> down_write(&dmar_global_lock);
> list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list)
> { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 8b86406b7162..6103bcde1f65 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1841,6 +1841,7 @@ static int iommu_bus_init(struct bus_type *bus,
> const struct iommu_ops *ops) return -ENOMEM;
>
> nb->notifier_call = iommu_bus_notifier;
> + nb->priority = IOMMU_BUS_NOTIFY_PRIORITY;
>
> err = bus_register_notifier(bus, nb);
> if (err)
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index de0c57a567c8..8e13c69980be 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -403,6 +403,7 @@ static inline void iommu_iotlb_gather_init(struct
> iommu_iotlb_gather *gather) };
> }
>
> +#define IOMMU_BUS_NOTIFY_PRIORITY 0
> #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */
> #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device
> removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre
> Driver bind */


Thanks,

Jacob

2022-01-26 13:35:33

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH v2] iommu/vt-d: Fix PCI bus rescan device hot add

Hi Jacob,

On 1/26/22 2:57 AM, Jacob Pan wrote:
> Hi all,
>
> Just wondering if there are any other comments? This fixes a
> regression that can cause system hang.

This looks fine to me. I will queue it for v5.17 if there's no further
comments.

Best regards,
baolu