2024-04-19 19:51:20

by Kubalewski, Arkadiusz

[permalink] [raw]
Subject: [PATCH net] dpll: fix dpll_pin_registration missing refcount

In scenario where pin is registered with multiple parent pins via
dpll_pin_on_pin_register(..), belonging to the same dpll device,
and each time with the same set of ops/priv data, a reference
between a pin and dpll is created once and then refcounted, at the same
time the dpll_pin_registration is only checked for existence and created
if does not exist. This is wrong, as for the same ops/priv data a
registration shall be also refcounted, a child pin is also registered
with dpll device, until each child is unregistered the registration data
shall exist.

Add refcount and check if all registrations are dropped before releasing
dpll_pin_registration resources.

Currently, the following crash/call trace is produced when ice driver is
removed on the system with installed NIC which includes dpll device:

WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809 dpll_pin_ops+0x20/0x30
Call Trace:
dpll_msg_add_pin_freq+0x37/0x1d0
dpll_cmd_pin_get_one+0x1c0/0x400
? __nlmsg_put+0x63/0x80
dpll_pin_event_send+0x93/0x140
dpll_pin_on_pin_unregister+0x3f/0x100
ice_dpll_deinit_pins+0xa1/0x230 [ice]
ice_remove+0xf1/0x210 [ice]

Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple registrations")
Reviewed-by: Przemek Kitszel <[email protected]>
Signed-off-by: Arkadiusz Kubalewski <[email protected]>
---
drivers/dpll/dpll_core.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index 64eaca80d736..7ababa327c0c 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -40,6 +40,7 @@ struct dpll_device_registration {

struct dpll_pin_registration {
struct list_head list;
+ refcount_t refcount;
const struct dpll_pin_ops *ops;
void *priv;
};
@@ -81,6 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
reg = dpll_pin_registration_find(ref, ops, priv);
if (reg) {
refcount_inc(&ref->refcount);
+ refcount_inc(&reg->refcount);
return 0;
}
ref_exists = true;
@@ -113,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
reg->priv = priv;
if (ref_exists)
refcount_inc(&ref->refcount);
+ refcount_set(&reg->refcount, 1);
list_add_tail(&reg->list, &ref->registration_list);

return 0;
@@ -131,8 +134,10 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
reg = dpll_pin_registration_find(ref, ops, priv);
if (WARN_ON(!reg))
return -EINVAL;
- list_del(&reg->list);
- kfree(reg);
+ if (refcount_dec_and_test(&reg->refcount)) {
+ list_del(&reg->list);
+ kfree(reg);
+ }
if (refcount_dec_and_test(&ref->refcount)) {
xa_erase(xa_pins, i);
WARN_ON(!list_empty(&ref->registration_list));
@@ -160,6 +165,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
reg = dpll_pin_registration_find(ref, ops, priv);
if (reg) {
refcount_inc(&ref->refcount);
+ refcount_inc(&reg->refcount);
return 0;
}
ref_exists = true;
@@ -192,6 +198,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
reg->priv = priv;
if (ref_exists)
refcount_inc(&ref->refcount);
+ refcount_set(&reg->refcount, 1);
list_add_tail(&reg->list, &ref->registration_list);

return 0;
@@ -211,8 +218,10 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
reg = dpll_pin_registration_find(ref, ops, priv);
if (WARN_ON(!reg))
return;
- list_del(&reg->list);
- kfree(reg);
+ if (refcount_dec_and_test(&reg->refcount)) {
+ list_del(&reg->list);
+ kfree(reg);
+ }
if (refcount_dec_and_test(&ref->refcount)) {
xa_erase(xa_dplls, i);
WARN_ON(!list_empty(&ref->registration_list));

base-commit: ac1a21db32eda8a09076bad025d7b848dd086d28
--
2.38.1



2024-04-22 13:31:56

by Jiri Pirko

[permalink] [raw]
Subject: Re: [PATCH net] dpll: fix dpll_pin_registration missing refcount

Fri, Apr 19, 2024 at 09:47:11PM CEST, [email protected] wrote:
>In scenario where pin is registered with multiple parent pins via
>dpll_pin_on_pin_register(..), belonging to the same dpll device,
>and each time with the same set of ops/priv data, a reference
>between a pin and dpll is created once and then refcounted, at the same
>time the dpll_pin_registration is only checked for existence and created
>if does not exist. This is wrong, as for the same ops/priv data a
>registration shall be also refcounted, a child pin is also registered
>with dpll device, until each child is unregistered the registration data
>shall exist.

I read this 3 time, don't undestand clearly the matter of the problem.
Could you perhaps make it somehow visual?


>
>Add refcount and check if all registrations are dropped before releasing
>dpll_pin_registration resources.
>
>Currently, the following crash/call trace is produced when ice driver is
>removed on the system with installed NIC which includes dpll device:
>
>WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809 dpll_pin_ops+0x20/0x30
>Call Trace:
> dpll_msg_add_pin_freq+0x37/0x1d0
> dpll_cmd_pin_get_one+0x1c0/0x400
> ? __nlmsg_put+0x63/0x80
> dpll_pin_event_send+0x93/0x140
> dpll_pin_on_pin_unregister+0x3f/0x100
> ice_dpll_deinit_pins+0xa1/0x230 [ice]
> ice_remove+0xf1/0x210 [ice]
>
>Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple registrations")
>Reviewed-by: Przemek Kitszel <[email protected]>
>Signed-off-by: Arkadiusz Kubalewski <[email protected]>
>---
> drivers/dpll/dpll_core.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
>index 64eaca80d736..7ababa327c0c 100644
>--- a/drivers/dpll/dpll_core.c
>+++ b/drivers/dpll/dpll_core.c
>@@ -40,6 +40,7 @@ struct dpll_device_registration {
>
> struct dpll_pin_registration {
> struct list_head list;
>+ refcount_t refcount;
> const struct dpll_pin_ops *ops;
> void *priv;
> };
>@@ -81,6 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
> reg = dpll_pin_registration_find(ref, ops, priv);
> if (reg) {
> refcount_inc(&ref->refcount);
>+ refcount_inc(&reg->refcount);

I don't like this. Registration is supposed to be created for a single
registration. Not you create one for many and refcount it.

Instead of this, I suggest to extend __dpll_pin_register() for a
"void *cookie" arg. That would be NULL for dpll_pin_register() caller.
For dpll_pin_on_pin_register() caller, it would pass "parent" pointer.

Than dpll_xa_ref_pin_add() can pass this cookie value to
dpll_pin_registration_find(). The if case there would look like:
if (reg->ops == ops && reg->priv == priv && reg->cookie == cookie)

This way, we will create separate "sub-registration" for each parent.

Makes sense?

> return 0;
> }
> ref_exists = true;
>@@ -113,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
> reg->priv = priv;
> if (ref_exists)
> refcount_inc(&ref->refcount);
>+ refcount_set(&reg->refcount, 1);
> list_add_tail(&reg->list, &ref->registration_list);
>
> return 0;
>@@ -131,8 +134,10 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
> reg = dpll_pin_registration_find(ref, ops, priv);
> if (WARN_ON(!reg))
> return -EINVAL;
>- list_del(&reg->list);
>- kfree(reg);
>+ if (refcount_dec_and_test(&reg->refcount)) {
>+ list_del(&reg->list);
>+ kfree(reg);
>+ }
> if (refcount_dec_and_test(&ref->refcount)) {
> xa_erase(xa_pins, i);
> WARN_ON(!list_empty(&ref->registration_list));
>@@ -160,6 +165,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
> reg = dpll_pin_registration_find(ref, ops, priv);
> if (reg) {
> refcount_inc(&ref->refcount);
>+ refcount_inc(&reg->refcount);
> return 0;
> }
> ref_exists = true;
>@@ -192,6 +198,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
> reg->priv = priv;
> if (ref_exists)
> refcount_inc(&ref->refcount);
>+ refcount_set(&reg->refcount, 1);
> list_add_tail(&reg->list, &ref->registration_list);
>
> return 0;
>@@ -211,8 +218,10 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
> reg = dpll_pin_registration_find(ref, ops, priv);
> if (WARN_ON(!reg))
> return;
>- list_del(&reg->list);
>- kfree(reg);
>+ if (refcount_dec_and_test(&reg->refcount)) {
>+ list_del(&reg->list);
>+ kfree(reg);
>+ }
> if (refcount_dec_and_test(&ref->refcount)) {
> xa_erase(xa_dplls, i);
> WARN_ON(!list_empty(&ref->registration_list));
>
>base-commit: ac1a21db32eda8a09076bad025d7b848dd086d28
>--
>2.38.1
>

2024-04-23 11:04:41

by Kubalewski, Arkadiusz

[permalink] [raw]
Subject: RE: [PATCH net] dpll: fix dpll_pin_registration missing refcount

>From: Jiri Pirko <[email protected]>
>Sent: Monday, April 22, 2024 3:31 PM
>
>Fri, Apr 19, 2024 at 09:47:11PM CEST, [email protected] wrote:
>>In scenario where pin is registered with multiple parent pins via
>>dpll_pin_on_pin_register(..), belonging to the same dpll device,
>>and each time with the same set of ops/priv data, a reference
>>between a pin and dpll is created once and then refcounted, at the same
>>time the dpll_pin_registration is only checked for existence and created
>>if does not exist. This is wrong, as for the same ops/priv data a
>>registration shall be also refcounted, a child pin is also registered
>>with dpll device, until each child is unregistered the registration data
>>shall exist.
>
>I read this 3 time, don't undestand clearly the matter of the problem.
>Could you perhaps make it somehow visual?
>

Many thanks for all your insights on this!

Register child pin twice (via dpll_pin_on_pin_register(..)) with two different
parents but the same ops/priv. Then, a single dpll_pin_on_pin_unregister(..) will
cause below stack trace.

It was good to add a fix in b446631f355e, but the fix did not cover a multi-parent
registration case, here I am fixing it.

>
>>
>>Add refcount and check if all registrations are dropped before releasing
>>dpll_pin_registration resources.
>>
>>Currently, the following crash/call trace is produced when ice driver is
>>removed on the system with installed NIC which includes dpll device:
>>
>>WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809 dpll_pin_ops+0x20/0x30
>>Call Trace:
>> dpll_msg_add_pin_freq+0x37/0x1d0
>> dpll_cmd_pin_get_one+0x1c0/0x400
>> ? __nlmsg_put+0x63/0x80
>> dpll_pin_event_send+0x93/0x140
>> dpll_pin_on_pin_unregister+0x3f/0x100
>> ice_dpll_deinit_pins+0xa1/0x230 [ice]
>> ice_remove+0xf1/0x210 [ice]
>>
>>Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple registrations")
>>Reviewed-by: Przemek Kitszel <[email protected]>
>>Signed-off-by: Arkadiusz Kubalewski <[email protected]>
>>---
>> drivers/dpll/dpll_core.c | 17 +++++++++++++----
>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>
>>diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
>>index 64eaca80d736..7ababa327c0c 100644
>>--- a/drivers/dpll/dpll_core.c
>>+++ b/drivers/dpll/dpll_core.c
>>@@ -40,6 +40,7 @@ struct dpll_device_registration {
>>
>> struct dpll_pin_registration {
>> struct list_head list;
>>+ refcount_t refcount;
>> const struct dpll_pin_ops *ops;
>> void *priv;
>> };
>>@@ -81,6 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct
>>dpll_pin *pin,
>> reg = dpll_pin_registration_find(ref, ops, priv);
>> if (reg) {
>> refcount_inc(&ref->refcount);
>>+ refcount_inc(&reg->refcount);
>
>I don't like this. Registration is supposed to be created for a single
>registration. Not you create one for many and refcount it.
>

If register function is called with the same priv/ops, why to do all you
suggested below instead of just refcounting?

>Instead of this, I suggest to extend __dpll_pin_register() for a
>"void *cookie" arg. That would be NULL for dpll_pin_register() caller.
>For dpll_pin_on_pin_register() caller, it would pass "parent" pointer.
>
>Than dpll_xa_ref_pin_add() can pass this cookie value to
>dpll_pin_registration_find(). The if case there would look like:
>if (reg->ops == ops && reg->priv == priv && reg->cookie == cookie)
>
>This way, we will create separate "sub-registration" for each parent.
>
>Makes sense?
>

It would do, but only if the code would anyhow use that new parent
sub-registration explicitly for anything else later.

Creating a sub-registration with additional parent cookie just to create a
second registration with only difference parent cookie and not using the
cookie even once after, seems overshot for a fix.

What you suggest is rather a refactor, but again needed only after we would
make use of the parent cooking somewhere else.
And such refactor shall target next-tree, right?

Thank you!
Arkadiusz

>> return 0;
>> }
>> ref_exists = true;
>>@@ -113,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct
>>dpll_pin *pin,
>> reg->priv = priv;
>> if (ref_exists)
>> refcount_inc(&ref->refcount);
>>+ refcount_set(&reg->refcount, 1);
>> list_add_tail(&reg->list, &ref->registration_list);
>>
>> return 0;
>>@@ -131,8 +134,10 @@ static int dpll_xa_ref_pin_del(struct xarray
>>*xa_pins, struct dpll_pin *pin,
>> reg = dpll_pin_registration_find(ref, ops, priv);
>> if (WARN_ON(!reg))
>> return -EINVAL;
>>- list_del(&reg->list);
>>- kfree(reg);
>>+ if (refcount_dec_and_test(&reg->refcount)) {
>>+ list_del(&reg->list);
>>+ kfree(reg);
>>+ }
>> if (refcount_dec_and_test(&ref->refcount)) {
>> xa_erase(xa_pins, i);
>> WARN_ON(!list_empty(&ref->registration_list));
>>@@ -160,6 +165,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct
>>dpll_device *dpll,
>> reg = dpll_pin_registration_find(ref, ops, priv);
>> if (reg) {
>> refcount_inc(&ref->refcount);
>>+ refcount_inc(&reg->refcount);
>> return 0;
>> }
>> ref_exists = true;
>>@@ -192,6 +198,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct
>>dpll_device *dpll,
>> reg->priv = priv;
>> if (ref_exists)
>> refcount_inc(&ref->refcount);
>>+ refcount_set(&reg->refcount, 1);
>> list_add_tail(&reg->list, &ref->registration_list);
>>
>> return 0;
>>@@ -211,8 +218,10 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct
>>dpll_device *dpll,
>> reg = dpll_pin_registration_find(ref, ops, priv);
>> if (WARN_ON(!reg))
>> return;
>>- list_del(&reg->list);
>>- kfree(reg);
>>+ if (refcount_dec_and_test(&reg->refcount)) {
>>+ list_del(&reg->list);
>>+ kfree(reg);
>>+ }
>> if (refcount_dec_and_test(&ref->refcount)) {
>> xa_erase(xa_dplls, i);
>> WARN_ON(!list_empty(&ref->registration_list));
>>
>>base-commit: ac1a21db32eda8a09076bad025d7b848dd086d28
>>--
>>2.38.1
>>

2024-04-23 11:16:50

by Jiri Pirko

[permalink] [raw]
Subject: Re: [PATCH net] dpll: fix dpll_pin_registration missing refcount

Tue, Apr 23, 2024 at 01:04:22PM CEST, [email protected] wrote:
>>From: Jiri Pirko <[email protected]>
>>Sent: Monday, April 22, 2024 3:31 PM
>>
>>Fri, Apr 19, 2024 at 09:47:11PM CEST, [email protected] wrote:
>>>In scenario where pin is registered with multiple parent pins via
>>>dpll_pin_on_pin_register(..), belonging to the same dpll device,
>>>and each time with the same set of ops/priv data, a reference
>>>between a pin and dpll is created once and then refcounted, at the same
>>>time the dpll_pin_registration is only checked for existence and created
>>>if does not exist. This is wrong, as for the same ops/priv data a
>>>registration shall be also refcounted, a child pin is also registered
>>>with dpll device, until each child is unregistered the registration data
>>>shall exist.
>>
>>I read this 3 time, don't undestand clearly the matter of the problem.
>>Could you perhaps make it somehow visual?
>>
>
>Many thanks for all your insights on this!
>
>Register child pin twice (via dpll_pin_on_pin_register(..)) with two different
>parents but the same ops/priv. Then, a single dpll_pin_on_pin_unregister(..) will
>cause below stack trace.
>
>It was good to add a fix in b446631f355e, but the fix did not cover a multi-parent
>registration case, here I am fixing it.
>
>>
>>>
>>>Add refcount and check if all registrations are dropped before releasing
>>>dpll_pin_registration resources.
>>>
>>>Currently, the following crash/call trace is produced when ice driver is
>>>removed on the system with installed NIC which includes dpll device:
>>>
>>>WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809 dpll_pin_ops+0x20/0x30
>>>Call Trace:
>>> dpll_msg_add_pin_freq+0x37/0x1d0
>>> dpll_cmd_pin_get_one+0x1c0/0x400
>>> ? __nlmsg_put+0x63/0x80
>>> dpll_pin_event_send+0x93/0x140
>>> dpll_pin_on_pin_unregister+0x3f/0x100
>>> ice_dpll_deinit_pins+0xa1/0x230 [ice]
>>> ice_remove+0xf1/0x210 [ice]
>>>
>>>Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple registrations")
>>>Reviewed-by: Przemek Kitszel <[email protected]>
>>>Signed-off-by: Arkadiusz Kubalewski <[email protected]>
>>>---
>>> drivers/dpll/dpll_core.c | 17 +++++++++++++----
>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>
>>>diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
>>>index 64eaca80d736..7ababa327c0c 100644
>>>--- a/drivers/dpll/dpll_core.c
>>>+++ b/drivers/dpll/dpll_core.c
>>>@@ -40,6 +40,7 @@ struct dpll_device_registration {
>>>
>>> struct dpll_pin_registration {
>>> struct list_head list;
>>>+ refcount_t refcount;
>>> const struct dpll_pin_ops *ops;
>>> void *priv;
>>> };
>>>@@ -81,6 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct
>>>dpll_pin *pin,
>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>> if (reg) {
>>> refcount_inc(&ref->refcount);
>>>+ refcount_inc(&reg->refcount);
>>
>>I don't like this. Registration is supposed to be created for a single
>>registration. Not you create one for many and refcount it.
>>
>
>If register function is called with the same priv/ops, why to do all you
>suggested below instead of just refcounting?
>
>>Instead of this, I suggest to extend __dpll_pin_register() for a
>>"void *cookie" arg. That would be NULL for dpll_pin_register() caller.
>>For dpll_pin_on_pin_register() caller, it would pass "parent" pointer.
>>
>>Than dpll_xa_ref_pin_add() can pass this cookie value to
>>dpll_pin_registration_find(). The if case there would look like:
>>if (reg->ops == ops && reg->priv == priv && reg->cookie == cookie)
>>
>>This way, we will create separate "sub-registration" for each parent.
>>
>>Makes sense?
>>
>
>It would do, but only if the code would anyhow use that new parent
>sub-registration explicitly for anything else later.
>
>Creating a sub-registration with additional parent cookie just to create a
>second registration with only difference parent cookie and not using the
>cookie even once after, seems overshot for a fix.

Well, we have ref with multiple references and refcount, single
registration instance per registration. Now you make that multiple
references and refcounted as well, just because the parent is different.
That is why I suggested to add the parent to the registration look-up
if. Makes things a bit cleaner to read in already quite complex code.


>
>What you suggest is rather a refactor, but again needed only after we would
>make use of the parent cooking somewhere else.
>And such refactor shall target next-tree, right?

Not sure what refactor you refer to. Couple of lines, similar to your
version.


>
>Thank you!
>Arkadiusz
>
>>> return 0;
>>> }
>>> ref_exists = true;
>>>@@ -113,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct
>>>dpll_pin *pin,
>>> reg->priv = priv;
>>> if (ref_exists)
>>> refcount_inc(&ref->refcount);
>>>+ refcount_set(&reg->refcount, 1);
>>> list_add_tail(&reg->list, &ref->registration_list);
>>>
>>> return 0;
>>>@@ -131,8 +134,10 @@ static int dpll_xa_ref_pin_del(struct xarray
>>>*xa_pins, struct dpll_pin *pin,
>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>> if (WARN_ON(!reg))
>>> return -EINVAL;
>>>- list_del(&reg->list);
>>>- kfree(reg);
>>>+ if (refcount_dec_and_test(&reg->refcount)) {
>>>+ list_del(&reg->list);
>>>+ kfree(reg);
>>>+ }
>>> if (refcount_dec_and_test(&ref->refcount)) {
>>> xa_erase(xa_pins, i);
>>> WARN_ON(!list_empty(&ref->registration_list));
>>>@@ -160,6 +165,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct
>>>dpll_device *dpll,
>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>> if (reg) {
>>> refcount_inc(&ref->refcount);
>>>+ refcount_inc(&reg->refcount);
>>> return 0;
>>> }
>>> ref_exists = true;
>>>@@ -192,6 +198,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct
>>>dpll_device *dpll,
>>> reg->priv = priv;
>>> if (ref_exists)
>>> refcount_inc(&ref->refcount);
>>>+ refcount_set(&reg->refcount, 1);
>>> list_add_tail(&reg->list, &ref->registration_list);
>>>
>>> return 0;
>>>@@ -211,8 +218,10 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct
>>>dpll_device *dpll,
>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>> if (WARN_ON(!reg))
>>> return;
>>>- list_del(&reg->list);
>>>- kfree(reg);
>>>+ if (refcount_dec_and_test(&reg->refcount)) {
>>>+ list_del(&reg->list);
>>>+ kfree(reg);
>>>+ }
>>> if (refcount_dec_and_test(&ref->refcount)) {
>>> xa_erase(xa_dplls, i);
>>> WARN_ON(!list_empty(&ref->registration_list));
>>>
>>>base-commit: ac1a21db32eda8a09076bad025d7b848dd086d28
>>>--
>>>2.38.1
>>>

2024-04-24 10:26:40

by Kubalewski, Arkadiusz

[permalink] [raw]
Subject: RE: [PATCH net] dpll: fix dpll_pin_registration missing refcount

>From: Jiri Pirko <[email protected]>
>Sent: Tuesday, April 23, 2024 1:17 PM
>
>Tue, Apr 23, 2024 at 01:04:22PM CEST, [email protected] wrote:
>>>From: Jiri Pirko <[email protected]>
>>>Sent: Monday, April 22, 2024 3:31 PM
>>>
>>>Fri, Apr 19, 2024 at 09:47:11PM CEST, [email protected]
>>>wrote:
>>>>In scenario where pin is registered with multiple parent pins via
>>>>dpll_pin_on_pin_register(..), belonging to the same dpll device,
>>>>and each time with the same set of ops/priv data, a reference
>>>>between a pin and dpll is created once and then refcounted, at the same
>>>>time the dpll_pin_registration is only checked for existence and created
>>>>if does not exist. This is wrong, as for the same ops/priv data a
>>>>registration shall be also refcounted, a child pin is also registered
>>>>with dpll device, until each child is unregistered the registration data
>>>>shall exist.
>>>
>>>I read this 3 time, don't undestand clearly the matter of the problem.
>>>Could you perhaps make it somehow visual?
>>>
>>
>>Many thanks for all your insights on this!
>>
>>Register child pin twice (via dpll_pin_on_pin_register(..)) with two
>>different
>>parents but the same ops/priv. Then, a single
>>dpll_pin_on_pin_unregister(..) will
>>cause below stack trace.
>>
>>It was good to add a fix in b446631f355e, but the fix did not cover a
>>multi-parent
>>registration case, here I am fixing it.
>>
>>>
>>>>
>>>>Add refcount and check if all registrations are dropped before releasing
>>>>dpll_pin_registration resources.
>>>>
>>>>Currently, the following crash/call trace is produced when ice driver is
>>>>removed on the system with installed NIC which includes dpll device:
>>>>
>>>>WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809
>>>>dpll_pin_ops+0x20/0x30
>>>>Call Trace:
>>>> dpll_msg_add_pin_freq+0x37/0x1d0
>>>> dpll_cmd_pin_get_one+0x1c0/0x400
>>>> ? __nlmsg_put+0x63/0x80
>>>> dpll_pin_event_send+0x93/0x140
>>>> dpll_pin_on_pin_unregister+0x3f/0x100
>>>> ice_dpll_deinit_pins+0xa1/0x230 [ice]
>>>> ice_remove+0xf1/0x210 [ice]
>>>>
>>>>Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple
>>>>registrations")
>>>>Reviewed-by: Przemek Kitszel <[email protected]>
>>>>Signed-off-by: Arkadiusz Kubalewski <[email protected]>
>>>>---
>>>> drivers/dpll/dpll_core.c | 17 +++++++++++++----
>>>> 1 file changed, 13 insertions(+), 4 deletions(-)
>>>>
>>>>diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
>>>>index 64eaca80d736..7ababa327c0c 100644
>>>>--- a/drivers/dpll/dpll_core.c
>>>>+++ b/drivers/dpll/dpll_core.c
>>>>@@ -40,6 +40,7 @@ struct dpll_device_registration {
>>>>
>>>> struct dpll_pin_registration {
>>>> struct list_head list;
>>>>+ refcount_t refcount;
>>>> const struct dpll_pin_ops *ops;
>>>> void *priv;
>>>> };
>>>>@@ -81,6 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct
>>>>dpll_pin *pin,
>>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>>> if (reg) {
>>>> refcount_inc(&ref->refcount);
>>>>+ refcount_inc(&reg->refcount);
>>>
>>>I don't like this. Registration is supposed to be created for a single
>>>registration. Not you create one for many and refcount it.
>>>
>>
>>If register function is called with the same priv/ops, why to do all you
>>suggested below instead of just refcounting?
>>
>>>Instead of this, I suggest to extend __dpll_pin_register() for a
>>>"void *cookie" arg. That would be NULL for dpll_pin_register() caller.
>>>For dpll_pin_on_pin_register() caller, it would pass "parent" pointer.
>>>
>>>Than dpll_xa_ref_pin_add() can pass this cookie value to
>>>dpll_pin_registration_find(). The if case there would look like:
>>>if (reg->ops == ops && reg->priv == priv && reg->cookie == cookie)
>>>
>>>This way, we will create separate "sub-registration" for each parent.
>>>
>>>Makes sense?
>>>
>>
>>It would do, but only if the code would anyhow use that new parent
>>sub-registration explicitly for anything else later.
>>
>>Creating a sub-registration with additional parent cookie just to create a
>>second registration with only difference parent cookie and not using the
>>cookie even once after, seems overshot for a fix.
>
>Well, we have ref with multiple references and refcount, single
>registration instance per registration. Now you make that multiple
>references and refcounted as well, just because the parent is different.
>That is why I suggested to add the parent to the registration look-up
>if. Makes things a bit cleaner to read in already quite complex code.
>
>
>>
>>What you suggest is rather a refactor, but again needed only after we
>>would
>>make use of the parent cooking somewhere else.
>>And such refactor shall target next-tree, right?
>
>Not sure what refactor you refer to. Couple of lines, similar to your
>version.
>

Just sent v2 with your proposal, please take a look.

Thank you!
Arkadiusz.

>
>>
>>Thank you!
>>Arkadiusz
>>
>>>> return 0;
>>>> }
>>>> ref_exists = true;
>>>>@@ -113,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct
>>>>dpll_pin *pin,
>>>> reg->priv = priv;
>>>> if (ref_exists)
>>>> refcount_inc(&ref->refcount);
>>>>+ refcount_set(&reg->refcount, 1);
>>>> list_add_tail(&reg->list, &ref->registration_list);
>>>>
>>>> return 0;
>>>>@@ -131,8 +134,10 @@ static int dpll_xa_ref_pin_del(struct xarray
>>>>*xa_pins, struct dpll_pin *pin,
>>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>>> if (WARN_ON(!reg))
>>>> return -EINVAL;
>>>>- list_del(&reg->list);
>>>>- kfree(reg);
>>>>+ if (refcount_dec_and_test(&reg->refcount)) {
>>>>+ list_del(&reg->list);
>>>>+ kfree(reg);
>>>>+ }
>>>> if (refcount_dec_and_test(&ref->refcount)) {
>>>> xa_erase(xa_pins, i);
>>>> WARN_ON(!list_empty(&ref->registration_list));
>>>>@@ -160,6 +165,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct
>>>>dpll_device *dpll,
>>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>>> if (reg) {
>>>> refcount_inc(&ref->refcount);
>>>>+ refcount_inc(&reg->refcount);
>>>> return 0;
>>>> }
>>>> ref_exists = true;
>>>>@@ -192,6 +198,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct
>>>>dpll_device *dpll,
>>>> reg->priv = priv;
>>>> if (ref_exists)
>>>> refcount_inc(&ref->refcount);
>>>>+ refcount_set(&reg->refcount, 1);
>>>> list_add_tail(&reg->list, &ref->registration_list);
>>>>
>>>> return 0;
>>>>@@ -211,8 +218,10 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls,
>>>>struct
>>>>dpll_device *dpll,
>>>> reg = dpll_pin_registration_find(ref, ops, priv);
>>>> if (WARN_ON(!reg))
>>>> return;
>>>>- list_del(&reg->list);
>>>>- kfree(reg);
>>>>+ if (refcount_dec_and_test(&reg->refcount)) {
>>>>+ list_del(&reg->list);
>>>>+ kfree(reg);
>>>>+ }
>>>> if (refcount_dec_and_test(&ref->refcount)) {
>>>> xa_erase(xa_dplls, i);
>>>> WARN_ON(!list_empty(&ref->registration_list));
>>>>
>>>>base-commit: ac1a21db32eda8a09076bad025d7b848dd086d28
>>>>--
>>>>2.38.1
>>>>