2024-02-15 07:29:06

by Lu Baolu

[permalink] [raw]
Subject: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

Use a red-black tree(rbtree) to track devices probed by the driver's
probe_device callback. These devices need to be looked up quickly by
a source ID when the hardware reports a fault, either recoverable or
unrecoverable.

Fault reporting paths are critical. Searching a list in this scenario
is inefficient, with an algorithm complexity of O(n). An rbtree is a
self-balancing binary search tree, offering an average search time
complexity of O(log(n)). This significant performance improvement
makes rbtrees a better choice.

Furthermore, rbtrees are implemented on a per-iommu basis, eliminating
the need for global searches and further enhancing efficiency in
critical fault paths. The rbtree is protected by a spin lock with
interrupts disabled to ensure thread-safe access even within interrupt
contexts.

Co-developed-by: Huang Jiaqing <[email protected]>
Signed-off-by: Huang Jiaqing <[email protected]>
Signed-off-by: Lu Baolu <[email protected]>
---
drivers/iommu/intel/iommu.h | 7 +++++
drivers/iommu/intel/dmar.c | 3 +-
drivers/iommu/intel/iommu.c | 62 +++++++++++++++++++++++++++++++++++--
3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index cf9a28c7fab8..54eeaa8e35a9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -716,6 +716,11 @@ struct intel_iommu {
struct q_inval *qi; /* Queued invalidation info */
u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/

+ /* rb tree for all probed devices */
+ struct rb_root device_rbtree;
+ /* protect the device_rbtree */
+ spinlock_t device_rbtree_lock;
+
#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
struct irq_domain *ir_domain;
@@ -749,6 +754,8 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+ /* device tracking node(lookup by PCI RID) */
+ struct rb_node node;
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
struct dentry *debugfs_dentry; /* pointer to device directory dentry */
#endif
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 23cb80d62a9a..f9b63c2875f7 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1095,7 +1095,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->agaw = agaw;
iommu->msagaw = msagaw;
iommu->segment = drhd->segment;
-
+ iommu->device_rbtree = RB_ROOT;
+ spin_lock_init(&iommu->device_rbtree_lock);
iommu->node = NUMA_NO_NODE;

ver = readl(iommu->reg + DMAR_VER_REG);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a81a2be9b870..09009d96e553 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -96,6 +96,55 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
return re->hi & VTD_PAGE_MASK;
}

+static int device_rid_cmp_key(const void *key, const struct rb_node *node)
+{
+ struct device_domain_info *info =
+ rb_entry(node, struct device_domain_info, node);
+ const u16 *rid_lhs = key;
+
+ if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
+ return -1;
+
+ if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
+ return 1;
+
+ return 0;
+}
+
+static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
+{
+ struct device_domain_info *info =
+ rb_entry(lhs, struct device_domain_info, node);
+ u16 key = PCI_DEVID(info->bus, info->devfn);
+
+ return device_rid_cmp_key(&key, rhs);
+}
+
+static int device_rbtree_insert(struct intel_iommu *iommu,
+ struct device_domain_info *info)
+{
+ struct rb_node *curr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+ if (curr)
+ dev_warn(info->dev, "device already in rbtree\n");
+
+ return curr ? -EEXIST : 0;
+}
+
+static void device_rbtree_remove(struct device_domain_info *info)
+{
+ struct intel_iommu *iommu = info->iommu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+ rb_erase(&info->node, &iommu->device_rbtree);
+ spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+}
+
/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -4264,25 +4313,34 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}

dev_iommu_priv_set(dev, info);
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;

if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
if (ret) {
dev_err(dev, "PASID table allocation failed\n");
- kfree(info);
- return ERR_PTR(ret);
+ goto clear_rbtree;
}
}

intel_iommu_debugfs_create_dev(info);

return &iommu->iommu;
+clear_rbtree:
+ device_rbtree_remove(info);
+free:
+ kfree(info);
+
+ return ERR_PTR(ret);
}

static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);

+ device_rbtree_remove(info);
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
--
2.34.1



2024-02-15 17:48:18

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

On Thu, Feb 15, 2024 at 03:22:48PM +0800, Lu Baolu wrote:
> Use a red-black tree(rbtree) to track devices probed by the driver's
> probe_device callback. These devices need to be looked up quickly by
> a source ID when the hardware reports a fault, either recoverable or
> unrecoverable.
>
> Fault reporting paths are critical. Searching a list in this scenario
> is inefficient, with an algorithm complexity of O(n). An rbtree is a
> self-balancing binary search tree, offering an average search time
> complexity of O(log(n)). This significant performance improvement
> makes rbtrees a better choice.
>
> Furthermore, rbtrees are implemented on a per-iommu basis, eliminating
> the need for global searches and further enhancing efficiency in
> critical fault paths. The rbtree is protected by a spin lock with
> interrupts disabled to ensure thread-safe access even within interrupt
> contexts.
>
> Co-developed-by: Huang Jiaqing <[email protected]>
> Signed-off-by: Huang Jiaqing <[email protected]>
> Signed-off-by: Lu Baolu <[email protected]>
> ---
> drivers/iommu/intel/iommu.h | 7 +++++
> drivers/iommu/intel/dmar.c | 3 +-
> drivers/iommu/intel/iommu.c | 62 +++++++++++++++++++++++++++++++++++--
> 3 files changed, 69 insertions(+), 3 deletions(-)

Reviewed-by: Jason Gunthorpe <[email protected]>

> +static int device_rbtree_insert(struct intel_iommu *iommu,
> + struct device_domain_info *info)
> +{
> + struct rb_node *curr;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
> + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
> + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
> + if (curr)
> + dev_warn(info->dev, "device already in rbtree\n");

I would suggest

WARN_ON(curr);

Something has gone really wonky at this point, right?

Jason

2024-02-18 04:23:15

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

On 2024/2/16 1:47, Jason Gunthorpe wrote:
> On Thu, Feb 15, 2024 at 03:22:48PM +0800, Lu Baolu wrote:
>> Use a red-black tree(rbtree) to track devices probed by the driver's
>> probe_device callback. These devices need to be looked up quickly by
>> a source ID when the hardware reports a fault, either recoverable or
>> unrecoverable.
>>
>> Fault reporting paths are critical. Searching a list in this scenario
>> is inefficient, with an algorithm complexity of O(n). An rbtree is a
>> self-balancing binary search tree, offering an average search time
>> complexity of O(log(n)). This significant performance improvement
>> makes rbtrees a better choice.
>>
>> Furthermore, rbtrees are implemented on a per-iommu basis, eliminating
>> the need for global searches and further enhancing efficiency in
>> critical fault paths. The rbtree is protected by a spin lock with
>> interrupts disabled to ensure thread-safe access even within interrupt
>> contexts.
>>
>> Co-developed-by: Huang Jiaqing<[email protected]>
>> Signed-off-by: Huang Jiaqing<[email protected]>
>> Signed-off-by: Lu Baolu<[email protected]>
>> ---
>> drivers/iommu/intel/iommu.h | 7 +++++
>> drivers/iommu/intel/dmar.c | 3 +-
>> drivers/iommu/intel/iommu.c | 62 +++++++++++++++++++++++++++++++++++--
>> 3 files changed, 69 insertions(+), 3 deletions(-)
> Reviewed-by: Jason Gunthorpe<[email protected]>
>
>> +static int device_rbtree_insert(struct intel_iommu *iommu,
>> + struct device_domain_info *info)
>> +{
>> + struct rb_node *curr;
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
>> + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
>> + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
>> + if (curr)
>> + dev_warn(info->dev, "device already in rbtree\n");
> I would suggest
>
> WARN_ON(curr);
>
> Something has gone really wonky at this point, right?

Yes. This is not expected, and it is worth a WARN_ON().

Best regards,
baolu

2024-02-19 04:05:20

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

On 2024/2/19 10:45, Ethan Zhao wrote:
>> @@ -4264,25 +4313,34 @@ static struct iommu_device
>> *intel_iommu_probe_device(struct device *dev)
>>       }
>>       dev_iommu_priv_set(dev, info);
>> +    ret = device_rbtree_insert(iommu, info);
>> +    if (ret)
>> +        goto free;
>>       if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
>>           ret = intel_pasid_alloc_table(dev);
>>           if (ret) {
>>               dev_err(dev, "PASID table allocation failed\n");
>> -            kfree(info);
>> -            return ERR_PTR(ret);
>> +            goto clear_rbtree;
>>           }
>>       }
>>       intel_iommu_debugfs_create_dev(info);
>>       return &iommu->iommu;
>> +clear_rbtree:
>> +    device_rbtree_remove(info);
>> +free:
>> +    kfree(info);
>> +
>> +    return ERR_PTR(ret);
>>   }
>>   static void intel_iommu_release_device(struct device *dev)
>>   {
>>       struct device_domain_info *info = dev_iommu_priv_get(dev);
>> +    device_rbtree_remove(info);
>
> Perhpas too early here to remove dev from the rbtree, if it is wanted in
> devTLB invalidation steps in intel_pasid_tear_down_entry().

Perhaps the caller of device_rbtree_find() should not depend on the
order in the release_device callback. For the device TLB invalidation
timed-out case, probably it could be checked in this way:

struct device *dev = device_rbtree_find(iommu, ite_sid);
if (!dev || !pci_device_is_present(to_pci_dev(dev)))
return -ETIMEDOUT;

Best regards,
baolu

2024-02-19 06:47:23

by Lu Baolu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

On 2024/2/19 13:33, Ethan Zhao wrote:
> On 2/19/2024 12:04 PM, Baolu Lu wrote:
>> On 2024/2/19 10:45, Ethan Zhao wrote:
>>>> @@ -4264,25 +4313,34 @@ static struct iommu_device
>>>> *intel_iommu_probe_device(struct device *dev)
>>>>       }
>>>>       dev_iommu_priv_set(dev, info);
>>>> +    ret = device_rbtree_insert(iommu, info);
>>>> +    if (ret)
>>>> +        goto free;
>>>>       if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
>>>>           ret = intel_pasid_alloc_table(dev);
>>>>           if (ret) {
>>>>               dev_err(dev, "PASID table allocation failed\n");
>>>> -            kfree(info);
>>>> -            return ERR_PTR(ret);
>>>> +            goto clear_rbtree;
>>>>           }
>>>>       }
>>>>       intel_iommu_debugfs_create_dev(info);
>>>>       return &iommu->iommu;
>>>> +clear_rbtree:
>>>> +    device_rbtree_remove(info);
>>>> +free:
>>>> +    kfree(info);
>>>> +
>>>> +    return ERR_PTR(ret);
>>>>   }
>>>>   static void intel_iommu_release_device(struct device *dev)
>>>>   {
>>>>       struct device_domain_info *info = dev_iommu_priv_get(dev);
>>>> +    device_rbtree_remove(info);
>>>
>>> Perhpas too early here to remove dev from the rbtree, if it is wanted in
>>> devTLB invalidation steps in intel_pasid_tear_down_entry().
>>
>> Perhaps the caller of device_rbtree_find() should not depend on the
>
> I didn't catch up here. seems have to maintain the lifecycle as PCI
> subsystem
> does, or there would be mutli instances for the same BDF(e.g. the device is
> removed then plugged, again and again.....in the same slot) in the rbtree ?

There should not be multiple instances for a same BDF. The lifecycle of
a device is managed by the device and driver core. The iommu subsystem
registers a notification to the core and take actions on device ADD and
REMOVE events.

Best regards,
baolu

2024-02-19 06:49:47

by Ethan Zhao

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

On 2/19/2024 12:04 PM, Baolu Lu wrote:
> On 2024/2/19 10:45, Ethan Zhao wrote:
>>> @@ -4264,25 +4313,34 @@ static struct iommu_device
>>> *intel_iommu_probe_device(struct device *dev)
>>>       }
>>>       dev_iommu_priv_set(dev, info);
>>> +    ret = device_rbtree_insert(iommu, info);
>>> +    if (ret)
>>> +        goto free;
>>>       if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
>>>           ret = intel_pasid_alloc_table(dev);
>>>           if (ret) {
>>>               dev_err(dev, "PASID table allocation failed\n");
>>> -            kfree(info);
>>> -            return ERR_PTR(ret);
>>> +            goto clear_rbtree;
>>>           }
>>>       }
>>>       intel_iommu_debugfs_create_dev(info);
>>>       return &iommu->iommu;
>>> +clear_rbtree:
>>> +    device_rbtree_remove(info);
>>> +free:
>>> +    kfree(info);
>>> +
>>> +    return ERR_PTR(ret);
>>>   }
>>>   static void intel_iommu_release_device(struct device *dev)
>>>   {
>>>       struct device_domain_info *info = dev_iommu_priv_get(dev);
>>> +    device_rbtree_remove(info);
>>
>> Perhpas too early here to remove dev from the rbtree, if it is wanted in
>> devTLB invalidation steps in intel_pasid_tear_down_entry().
>
> Perhaps the caller of device_rbtree_find() should not depend on the

I didn't catch up here. seems have to maintain the lifecycle as PCI subsystem
does, or there would be mutli instances for the same BDF(e.g. the device is
removed then plugged, again and again.....in the same slot) in the rbtree ?

> order in the release_device callback. For the device TLB invalidation
> timed-out case, probably it could be checked in this way:
>
>     struct device *dev = device_rbtree_find(iommu, ite_sid);
>     if (!dev || !pci_device_is_present(to_pci_dev(dev)))
>         return -ETIMEDOUT;

looks much advanced with such infrastructure !

Thanks,
Ethan

>
> Best regards,
> baolu

2024-02-19 08:02:18

by Ethan Zhao

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices


On 2/15/2024 3:22 PM, Lu Baolu wrote:
> Use a red-black tree(rbtree) to track devices probed by the driver's
> probe_device callback. These devices need to be looked up quickly by
> a source ID when the hardware reports a fault, either recoverable or
> unrecoverable.
>
> Fault reporting paths are critical. Searching a list in this scenario
> is inefficient, with an algorithm complexity of O(n). An rbtree is a
> self-balancing binary search tree, offering an average search time
> complexity of O(log(n)). This significant performance improvement
> makes rbtrees a better choice.
>
> Furthermore, rbtrees are implemented on a per-iommu basis, eliminating
> the need for global searches and further enhancing efficiency in
> critical fault paths. The rbtree is protected by a spin lock with
> interrupts disabled to ensure thread-safe access even within interrupt
> contexts.
>
> Co-developed-by: Huang Jiaqing <[email protected]>
> Signed-off-by: Huang Jiaqing <[email protected]>
> Signed-off-by: Lu Baolu <[email protected]>
> ---
> drivers/iommu/intel/iommu.h | 7 +++++
> drivers/iommu/intel/dmar.c | 3 +-
> drivers/iommu/intel/iommu.c | 62 +++++++++++++++++++++++++++++++++++--
> 3 files changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index cf9a28c7fab8..54eeaa8e35a9 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -716,6 +716,11 @@ struct intel_iommu {
> struct q_inval *qi; /* Queued invalidation info */
> u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
>
> + /* rb tree for all probed devices */
> + struct rb_root device_rbtree;
> + /* protect the device_rbtree */
> + spinlock_t device_rbtree_lock;
> +
> #ifdef CONFIG_IRQ_REMAP
> struct ir_table *ir_table; /* Interrupt remapping info */
> struct irq_domain *ir_domain;
> @@ -749,6 +754,8 @@ struct device_domain_info {
> struct intel_iommu *iommu; /* IOMMU used by this device */
> struct dmar_domain *domain; /* pointer to domain */
> struct pasid_table *pasid_table; /* pasid table */
> + /* device tracking node(lookup by PCI RID) */
> + struct rb_node node;
> #ifdef CONFIG_INTEL_IOMMU_DEBUGFS
> struct dentry *debugfs_dentry; /* pointer to device directory dentry */
> #endif
> diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
> index 23cb80d62a9a..f9b63c2875f7 100644
> --- a/drivers/iommu/intel/dmar.c
> +++ b/drivers/iommu/intel/dmar.c
> @@ -1095,7 +1095,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
> iommu->agaw = agaw;
> iommu->msagaw = msagaw;
> iommu->segment = drhd->segment;
> -
> + iommu->device_rbtree = RB_ROOT;
> + spin_lock_init(&iommu->device_rbtree_lock);
> iommu->node = NUMA_NO_NODE;
>
> ver = readl(iommu->reg + DMAR_VER_REG);
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index a81a2be9b870..09009d96e553 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -96,6 +96,55 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
> return re->hi & VTD_PAGE_MASK;
> }
>
> +static int device_rid_cmp_key(const void *key, const struct rb_node *node)
> +{
> + struct device_domain_info *info =
> + rb_entry(node, struct device_domain_info, node);
> + const u16 *rid_lhs = key;
> +
> + if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
> + return -1;
> +
> + if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
> + return 1;
> +
> + return 0;
> +}
> +
> +static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
> +{
> + struct device_domain_info *info =
> + rb_entry(lhs, struct device_domain_info, node);
> + u16 key = PCI_DEVID(info->bus, info->devfn);
> +
> + return device_rid_cmp_key(&key, rhs);
> +}
> +
> +static int device_rbtree_insert(struct intel_iommu *iommu,
> + struct device_domain_info *info)
> +{
> + struct rb_node *curr;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
> + curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
> + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
> + if (curr)
> + dev_warn(info->dev, "device already in rbtree\n");
> +
> + return curr ? -EEXIST : 0;
> +}
> +
> +static void device_rbtree_remove(struct device_domain_info *info)
> +{
> + struct intel_iommu *iommu = info->iommu;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
> + rb_erase(&info->node, &iommu->device_rbtree);
> + spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
> +}
> +
> /*
> * This domain is a statically identity mapping domain.
> * 1. This domain creats a static 1:1 mapping to all usable memory.
> @@ -4264,25 +4313,34 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
> }
>
> dev_iommu_priv_set(dev, info);
> + ret = device_rbtree_insert(iommu, info);
> + if (ret)
> + goto free;
>
> if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
> ret = intel_pasid_alloc_table(dev);
> if (ret) {
> dev_err(dev, "PASID table allocation failed\n");
> - kfree(info);
> - return ERR_PTR(ret);
> + goto clear_rbtree;
> }
> }
>
> intel_iommu_debugfs_create_dev(info);
>
> return &iommu->iommu;
> +clear_rbtree:
> + device_rbtree_remove(info);
> +free:
> + kfree(info);
> +
> + return ERR_PTR(ret);
> }
>
> static void intel_iommu_release_device(struct device *dev)
> {
> struct device_domain_info *info = dev_iommu_priv_get(dev);
>
> + device_rbtree_remove(info);

Perhpas too early here to remove dev from the rbtree, if it is wanted in
devTLB invalidation steps in intel_pasid_tear_down_entry().

Thanks,
Ethan

> dmar_remove_one_dev_info(dev);
> intel_pasid_free_table(dev);
> intel_iommu_debugfs_remove_dev(info);

2024-02-19 11:41:23

by Ethan Zhao

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/vt-d: Use rbtree to track iommu probed devices

On 2/19/2024 2:47 PM, Baolu Lu wrote:
> On 2024/2/19 13:33, Ethan Zhao wrote:
>> On 2/19/2024 12:04 PM, Baolu Lu wrote:
>>> On 2024/2/19 10:45, Ethan Zhao wrote:
>>>>> @@ -4264,25 +4313,34 @@ static struct iommu_device
>>>>> *intel_iommu_probe_device(struct device *dev)
>>>>>       }
>>>>>       dev_iommu_priv_set(dev, info);
>>>>> +    ret = device_rbtree_insert(iommu, info);
>>>>> +    if (ret)
>>>>> +        goto free;
>>>>>       if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
>>>>>           ret = intel_pasid_alloc_table(dev);
>>>>>           if (ret) {
>>>>>               dev_err(dev, "PASID table allocation failed\n");
>>>>> -            kfree(info);
>>>>> -            return ERR_PTR(ret);
>>>>> +            goto clear_rbtree;
>>>>>           }
>>>>>       }
>>>>>       intel_iommu_debugfs_create_dev(info);
>>>>>       return &iommu->iommu;
>>>>> +clear_rbtree:
>>>>> +    device_rbtree_remove(info);
>>>>> +free:
>>>>> +    kfree(info);
>>>>> +
>>>>> +    return ERR_PTR(ret);
>>>>>   }
>>>>>   static void intel_iommu_release_device(struct device *dev)
>>>>>   {
>>>>>       struct device_domain_info *info = dev_iommu_priv_get(dev);
>>>>> +    device_rbtree_remove(info);
>>>>
>>>> Perhpas too early here to remove dev from the rbtree, if it is
>>>> wanted in
>>>> devTLB invalidation steps in intel_pasid_tear_down_entry().
>>>
>>> Perhaps the caller of device_rbtree_find() should not depend on the
>>
>> I didn't catch up here. seems have to maintain the lifecycle as PCI
>> subsystem
>> does, or there would be mutli instances for the same BDF(e.g. the
>> device is
>> removed then plugged, again and again.....in the same slot) in the
>> rbtree ?
>
> There should not be multiple instances for a same BDF. The lifecycle of
> a device is managed by the device and driver core. The iommu subsystem
> registers a notification to the core and take actions on device ADD and
> REMOVE events.

Move device_rbtree_remove(info) to end of this REMOVED notifier callback, or such
beautiful code wouldn't work :)

struct device *dev = device_rbtree_find(iommu, ite_sid);
if (!dev || !pci_device_is_present(to_pci_dev(dev)))
  return -ETIMEDOUT;

Thanks,
Ethan



>
> Best regards,
> baolu
>