2018-08-30 18:16:26

by Corey Minyard

[permalink] [raw]
Subject: [PATCH] ipmi: Fix I2C client removal in the SSIF driver

From: Corey Minyard <[email protected]>

The SSIF driver was removing any client that came in through the
platform interface, but it should only remove clients that it
added. On a failure in the probe function, this could result
in the following oops when the driver is removed and the
client gets unregistered twice:

CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference firmware version 7.0 08/04/2018
pstate: 60400009 (nZCv daif +PAN -UAO)
pc : kernfs_find_ns+0x28/0x120
lr : kernfs_find_and_get_ns+0x40/0x60
sp : ffff00002310fb50
x29: ffff00002310fb50 x28: ffff800a8240f800
x27: 0000000000000000 x26: 0000000000000000
x25: 0000000056000000 x24: ffff000009073000
x23: ffff000008998b38 x22: 0000000000000000
x21: ffff800ed86de820 x20: 0000000000000000
x19: ffff00000913a1d8 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000
x15: 0000000000000000 x14: 5300737265766972
x13: 643d4d4554535953 x12: 0000000000000030
x11: 0000000000000030 x10: 0101010101010101
x9 : ffff800ea06cc3f9 x8 : 0000000000000000
x7 : 0000000000000141 x6 : ffff000009073000
x5 : ffff800adb706b00 x4 : 0000000000000000
x3 : 00000000ffffffff x2 : 0000000000000000
x1 : ffff000008998b38 x0 : ffff000008356760
Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
Call trace:
kernfs_find_ns+0x28/0x120
kernfs_find_and_get_ns+0x40/0x60
sysfs_unmerge_group+0x2c/0x6c
dpm_sysfs_remove+0x34/0x70
device_del+0x58/0x30c
device_unregister+0x30/0x7c
i2c_unregister_device+0x84/0x90 [i2c_core]
ssif_platform_remove+0x38/0x98 [ipmi_ssif]
platform_drv_remove+0x2c/0x6c
device_release_driver_internal+0x168/0x1f8
driver_detach+0x50/0xbc
bus_remove_driver+0x74/0xe8
driver_unregister+0x34/0x5c
platform_driver_unregister+0x20/0x2c
cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
__arm64_sys_delete_module+0x1b4/0x220
el0_svc_handler+0x104/0x160
el0_svc+0x8/0xc
Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
---[ end trace 09f0e34cce8e2d8c ]---
Kernel panic - not syncing: Fatal exception
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x23800c38

So track the clients that the SSIF driver adds and only remove
those.

Reported-by: George Cherian <[email protected]>
Signed-off-by: Corey Minyard <[email protected]>
---
drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)

George, your fix was close, but not quite right. The following
should fix the problem, obvious in hindsight :-). Thanks for working
with me on this.

-corey

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index e7e8b86..ca4f7c4 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -182,6 +182,8 @@ struct ssif_addr_info {
struct device *dev;
struct i2c_client *client;

+ struct i2c_client *added_client;
+
struct mutex clients_mutex;
struct list_head clients;

@@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)

out:
if (rv) {
- /*
- * Note that if addr_info->client is assigned, we
- * leave it. The i2c client hangs around even if we
- * return a failure here, and the failure here is not
- * propagated back to the i2c code. This seems to be
- * design intent, strange as it may be. But if we
- * don't leave it, ssif_platform_remove will not remove
- * the client like it should.
- */
+ addr_info->client = NULL;
dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv);
kfree(ssif_info);
}
@@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque)
if (adev->type != &i2c_adapter_type)
return 0;

- i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
+ addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
+ &addr_info->binfo);

if (!addr_info->adapter_name)
return 1; /* Only try the first I2C adapter by default. */
@@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct platform_device *dev)
return 0;

mutex_lock(&ssif_infos_mutex);
- i2c_unregister_device(addr_info->client);
+ i2c_unregister_device(addr_info->added_client);

list_del(&addr_info->link);
kfree(addr_info);
--
2.7.4



2018-08-31 12:15:39

by Corey Minyard

[permalink] [raw]
Subject: Re: [PATCH] ipmi: Fix I2C client removal in the SSIF driver

On 08/31/2018 06:58 AM, George Cherian wrote:
>
>
> On 08/30/2018 11:44 PM, [email protected] wrote:
>>
>> From: Corey Minyard <[email protected]>
>>
>> The SSIF driver was removing any client that came in through the
>> platform interface, but it should only remove clients that it
>> added.  On a failure in the probe function, this could result
>> in the following oops when the driver is removed and the
>> client gets unregistered twice:
>>
>>   CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
>>   Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference
>> firmware version 7.0 08/04/2018
>>   pstate: 60400009 (nZCv daif +PAN -UAO)
>>   pc : kernfs_find_ns+0x28/0x120
>>   lr : kernfs_find_and_get_ns+0x40/0x60
>>   sp : ffff00002310fb50
>>   x29: ffff00002310fb50 x28: ffff800a8240f800
>>   x27: 0000000000000000 x26: 0000000000000000
>>   x25: 0000000056000000 x24: ffff000009073000
>>   x23: ffff000008998b38 x22: 0000000000000000
>>   x21: ffff800ed86de820 x20: 0000000000000000
>>   x19: ffff00000913a1d8 x18: 0000000000000000
>>   x17: 0000000000000000 x16: 0000000000000000
>>   x15: 0000000000000000 x14: 5300737265766972
>>   x13: 643d4d4554535953 x12: 0000000000000030
>>   x11: 0000000000000030 x10: 0101010101010101
>>   x9 : ffff800ea06cc3f9 x8 : 0000000000000000
>>   x7 : 0000000000000141 x6 : ffff000009073000
>>   x5 : ffff800adb706b00 x4 : 0000000000000000
>>   x3 : 00000000ffffffff x2 : 0000000000000000
>>   x1 : ffff000008998b38 x0 : ffff000008356760
>>   Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
>>   Call trace:
>>    kernfs_find_ns+0x28/0x120
>>    kernfs_find_and_get_ns+0x40/0x60
>>    sysfs_unmerge_group+0x2c/0x6c
>>    dpm_sysfs_remove+0x34/0x70
>>    device_del+0x58/0x30c
>>    device_unregister+0x30/0x7c
>>    i2c_unregister_device+0x84/0x90 [i2c_core]
>>    ssif_platform_remove+0x38/0x98 [ipmi_ssif]
>>    platform_drv_remove+0x2c/0x6c
>>    device_release_driver_internal+0x168/0x1f8
>>    driver_detach+0x50/0xbc
>>    bus_remove_driver+0x74/0xe8
>>    driver_unregister+0x34/0x5c
>>    platform_driver_unregister+0x20/0x2c
>>    cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
>>    __arm64_sys_delete_module+0x1b4/0x220
>>    el0_svc_handler+0x104/0x160
>>    el0_svc+0x8/0xc
>>   Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
>>   ---[ end trace 09f0e34cce8e2d8c ]---
>>   Kernel panic - not syncing: Fatal exception
>>   SMP: stopping secondary CPUs
>>   Kernel Offset: disabled
>>   CPU features: 0x23800c38
>>
>> So track the clients that the SSIF driver adds and only remove
>> those.
>>
>> Reported-by: George Cherian <[email protected]>
>> Signed-off-by: Corey Minyard <[email protected]>
>> ---
>>   drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
>>   1 file changed, 6 insertions(+), 11 deletions(-)
>>
>> George, your fix was close, but not quite right.  The following
>> should fix the problem, obvious in hindsight :-).  Thanks for working
>> with me on this.
>>
> Thanks Corey!!!
> This works for me now.
>

Thanks.  Can I add a Tested-by: from you?

-corey

>> -corey
>>
>> diff --git a/drivers/char/ipmi/ipmi_ssif.c
>> b/drivers/char/ipmi/ipmi_ssif.c
>> index e7e8b86..ca4f7c4 100644
>> --- a/drivers/char/ipmi/ipmi_ssif.c
>> +++ b/drivers/char/ipmi/ipmi_ssif.c
>> @@ -182,6 +182,8 @@ struct ssif_addr_info {
>>          struct device *dev;
>>          struct i2c_client *client;
>>
>> +       struct i2c_client *added_client;
>> +
>>          struct mutex clients_mutex;
>>          struct list_head clients;
>>
>> @@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client
>> *client, const struct i2c_device_id *id)
>>
>>    out:
>>          if (rv) {
>> -               /*
>> -                * Note that if addr_info->client is assigned, we
>> -                * leave it.  The i2c client hangs around even if we
>> -                * return a failure here, and the failure here is not
>> -                * propagated back to the i2c code.  This seems to be
>> -                * design intent, strange as it may be.  But if we
>> -                * don't leave it, ssif_platform_remove will not remove
>> -                * the client like it should.
>> -                */
>> +               addr_info->client = NULL;
>>                  dev_err(&client->dev, "Unable to start IPMI SSIF:
>> %d\n", rv);
>>                  kfree(ssif_info);
>>          }
>> @@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device
>> *adev, void *opaque)
>>          if (adev->type != &i2c_adapter_type)
>>                  return 0;
>>
>> -       i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
>> +       addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
>> + &addr_info->binfo);
>>
>>          if (!addr_info->adapter_name)
>>                  return 1; /* Only try the first I2C adapter by
>> default. */
>> @@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct
>> platform_device *dev)
>>                  return 0;
>>
>>          mutex_lock(&ssif_infos_mutex);
>> -       i2c_unregister_device(addr_info->client);
>> +       i2c_unregister_device(addr_info->added_client);
>>
>>          list_del(&addr_info->link);
>>          kfree(addr_info);
>> --
>> 2.7.4
>>


2018-08-31 12:23:31

by George Cherian

[permalink] [raw]
Subject: Re: [PATCH] ipmi: Fix I2C client removal in the SSIF driver



On 08/31/2018 05:43 PM, Corey Minyard wrote:
>
> On 08/31/2018 06:58 AM, George Cherian wrote:
>>
>>
>> On 08/30/2018 11:44 PM, [email protected] wrote:
>>>
>>> From: Corey Minyard <[email protected]>
>>>
>>> The SSIF driver was removing any client that came in through the
>>> platform interface, but it should only remove clients that it
>>> added.  On a failure in the probe function, this could result
>>> in the following oops when the driver is removed and the
>>> client gets unregistered twice:
>>>
>>>   CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
>>>   Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference
>>> firmware version 7.0 08/04/2018
>>>   pstate: 60400009 (nZCv daif +PAN -UAO)
>>>   pc : kernfs_find_ns+0x28/0x120
>>>   lr : kernfs_find_and_get_ns+0x40/0x60
>>>   sp : ffff00002310fb50
>>>   x29: ffff00002310fb50 x28: ffff800a8240f800
>>>   x27: 0000000000000000 x26: 0000000000000000
>>>   x25: 0000000056000000 x24: ffff000009073000
>>>   x23: ffff000008998b38 x22: 0000000000000000
>>>   x21: ffff800ed86de820 x20: 0000000000000000
>>>   x19: ffff00000913a1d8 x18: 0000000000000000
>>>   x17: 0000000000000000 x16: 0000000000000000
>>>   x15: 0000000000000000 x14: 5300737265766972
>>>   x13: 643d4d4554535953 x12: 0000000000000030
>>>   x11: 0000000000000030 x10: 0101010101010101
>>>   x9 : ffff800ea06cc3f9 x8 : 0000000000000000
>>>   x7 : 0000000000000141 x6 : ffff000009073000
>>>   x5 : ffff800adb706b00 x4 : 0000000000000000
>>>   x3 : 00000000ffffffff x2 : 0000000000000000
>>>   x1 : ffff000008998b38 x0 : ffff000008356760
>>>   Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
>>>   Call trace:
>>>    kernfs_find_ns+0x28/0x120
>>>    kernfs_find_and_get_ns+0x40/0x60
>>>    sysfs_unmerge_group+0x2c/0x6c
>>>    dpm_sysfs_remove+0x34/0x70
>>>    device_del+0x58/0x30c
>>>    device_unregister+0x30/0x7c
>>>    i2c_unregister_device+0x84/0x90 [i2c_core]
>>>    ssif_platform_remove+0x38/0x98 [ipmi_ssif]
>>>    platform_drv_remove+0x2c/0x6c
>>>    device_release_driver_internal+0x168/0x1f8
>>>    driver_detach+0x50/0xbc
>>>    bus_remove_driver+0x74/0xe8
>>>    driver_unregister+0x34/0x5c
>>>    platform_driver_unregister+0x20/0x2c
>>>    cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
>>>    __arm64_sys_delete_module+0x1b4/0x220
>>>    el0_svc_handler+0x104/0x160
>>>    el0_svc+0x8/0xc
>>>   Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
>>>   ---[ end trace 09f0e34cce8e2d8c ]---
>>>   Kernel panic - not syncing: Fatal exception
>>>   SMP: stopping secondary CPUs
>>>   Kernel Offset: disabled
>>>   CPU features: 0x23800c38
>>>
>>> So track the clients that the SSIF driver adds and only remove
>>> those.
>>>
>>> Reported-by: George Cherian <[email protected]>
>>> Signed-off-by: Corey Minyard <[email protected]>
>>> ---
>>>   drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
>>>   1 file changed, 6 insertions(+), 11 deletions(-)
>>>
>>> George, your fix was close, but not quite right.  The following
>>> should fix the problem, obvious in hindsight :-).  Thanks for working
>>> with me on this.
>>>
>> Thanks Corey!!!
>> This works for me now.
>>
>
> Thanks.  Can I add a Tested-by: from you?
Yes.
Tested-by: George Cherian <[email protected]>
Can you CC this to stable too.

-George
>
> -corey
>
>>> -corey
>>>
>>> diff --git a/drivers/char/ipmi/ipmi_ssif.c
>>> b/drivers/char/ipmi/ipmi_ssif.c
>>> index e7e8b86..ca4f7c4 100644
>>> --- a/drivers/char/ipmi/ipmi_ssif.c
>>> +++ b/drivers/char/ipmi/ipmi_ssif.c
>>> @@ -182,6 +182,8 @@ struct ssif_addr_info {
>>>          struct device *dev;
>>>          struct i2c_client *client;
>>>
>>> +       struct i2c_client *added_client;
>>> +
>>>          struct mutex clients_mutex;
>>>          struct list_head clients;
>>>
>>> @@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client
>>> *client, const struct i2c_device_id *id)
>>>
>>>    out:
>>>          if (rv) {
>>> -               /*
>>> -                * Note that if addr_info->client is assigned, we
>>> -                * leave it.  The i2c client hangs around even if we
>>> -                * return a failure here, and the failure here is not
>>> -                * propagated back to the i2c code.  This seems to be
>>> -                * design intent, strange as it may be.  But if we
>>> -                * don't leave it, ssif_platform_remove will not remove
>>> -                * the client like it should.
>>> -                */
>>> +               addr_info->client = NULL;
>>>                  dev_err(&client->dev, "Unable to start IPMI SSIF:
>>> %d\n", rv);
>>>                  kfree(ssif_info);
>>>          }
>>> @@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device
>>> *adev, void *opaque)
>>>          if (adev->type != &i2c_adapter_type)
>>>                  return 0;
>>>
>>> -       i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
>>> +       addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
>>> + &addr_info->binfo);
>>>
>>>          if (!addr_info->adapter_name)
>>>                  return 1; /* Only try the first I2C adapter by
>>> default. */
>>> @@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct
>>> platform_device *dev)
>>>                  return 0;
>>>
>>>          mutex_lock(&ssif_infos_mutex);
>>> -       i2c_unregister_device(addr_info->client);
>>> +       i2c_unregister_device(addr_info->added_client);
>>>
>>>          list_del(&addr_info->link);
>>>          kfree(addr_info);
>>> --
>>> 2.7.4
>>>
>

2018-08-31 13:28:10

by George Cherian

[permalink] [raw]
Subject: Re: [PATCH] ipmi: Fix I2C client removal in the SSIF driver



On 08/30/2018 11:44 PM, [email protected] wrote:
>
> From: Corey Minyard <[email protected]>
>
> The SSIF driver was removing any client that came in through the
> platform interface, but it should only remove clients that it
> added. On a failure in the probe function, this could result
> in the following oops when the driver is removed and the
> client gets unregistered twice:
>
> CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
> Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference firmware version 7.0 08/04/2018
> pstate: 60400009 (nZCv daif +PAN -UAO)
> pc : kernfs_find_ns+0x28/0x120
> lr : kernfs_find_and_get_ns+0x40/0x60
> sp : ffff00002310fb50
> x29: ffff00002310fb50 x28: ffff800a8240f800
> x27: 0000000000000000 x26: 0000000000000000
> x25: 0000000056000000 x24: ffff000009073000
> x23: ffff000008998b38 x22: 0000000000000000
> x21: ffff800ed86de820 x20: 0000000000000000
> x19: ffff00000913a1d8 x18: 0000000000000000
> x17: 0000000000000000 x16: 0000000000000000
> x15: 0000000000000000 x14: 5300737265766972
> x13: 643d4d4554535953 x12: 0000000000000030
> x11: 0000000000000030 x10: 0101010101010101
> x9 : ffff800ea06cc3f9 x8 : 0000000000000000
> x7 : 0000000000000141 x6 : ffff000009073000
> x5 : ffff800adb706b00 x4 : 0000000000000000
> x3 : 00000000ffffffff x2 : 0000000000000000
> x1 : ffff000008998b38 x0 : ffff000008356760
> Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
> Call trace:
> kernfs_find_ns+0x28/0x120
> kernfs_find_and_get_ns+0x40/0x60
> sysfs_unmerge_group+0x2c/0x6c
> dpm_sysfs_remove+0x34/0x70
> device_del+0x58/0x30c
> device_unregister+0x30/0x7c
> i2c_unregister_device+0x84/0x90 [i2c_core]
> ssif_platform_remove+0x38/0x98 [ipmi_ssif]
> platform_drv_remove+0x2c/0x6c
> device_release_driver_internal+0x168/0x1f8
> driver_detach+0x50/0xbc
> bus_remove_driver+0x74/0xe8
> driver_unregister+0x34/0x5c
> platform_driver_unregister+0x20/0x2c
> cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
> __arm64_sys_delete_module+0x1b4/0x220
> el0_svc_handler+0x104/0x160
> el0_svc+0x8/0xc
> Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
> ---[ end trace 09f0e34cce8e2d8c ]---
> Kernel panic - not syncing: Fatal exception
> SMP: stopping secondary CPUs
> Kernel Offset: disabled
> CPU features: 0x23800c38
>
> So track the clients that the SSIF driver adds and only remove
> those.
>
> Reported-by: George Cherian <[email protected]>
> Signed-off-by: Corey Minyard <[email protected]>
> ---
> drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
> 1 file changed, 6 insertions(+), 11 deletions(-)
>
> George, your fix was close, but not quite right. The following
> should fix the problem, obvious in hindsight :-). Thanks for working
> with me on this.
>
Thanks Corey!!!
This works for me now.

> -corey
>
> diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
> index e7e8b86..ca4f7c4 100644
> --- a/drivers/char/ipmi/ipmi_ssif.c
> +++ b/drivers/char/ipmi/ipmi_ssif.c
> @@ -182,6 +182,8 @@ struct ssif_addr_info {
> struct device *dev;
> struct i2c_client *client;
>
> + struct i2c_client *added_client;
> +
> struct mutex clients_mutex;
> struct list_head clients;
>
> @@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
>
> out:
> if (rv) {
> - /*
> - * Note that if addr_info->client is assigned, we
> - * leave it. The i2c client hangs around even if we
> - * return a failure here, and the failure here is not
> - * propagated back to the i2c code. This seems to be
> - * design intent, strange as it may be. But if we
> - * don't leave it, ssif_platform_remove will not remove
> - * the client like it should.
> - */
> + addr_info->client = NULL;
> dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv);
> kfree(ssif_info);
> }
> @@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque)
> if (adev->type != &i2c_adapter_type)
> return 0;
>
> - i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
> + addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
> + &addr_info->binfo);
>
> if (!addr_info->adapter_name)
> return 1; /* Only try the first I2C adapter by default. */
> @@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct platform_device *dev)
> return 0;
>
> mutex_lock(&ssif_infos_mutex);
> - i2c_unregister_device(addr_info->client);
> + i2c_unregister_device(addr_info->added_client);
>
> list_del(&addr_info->link);
> kfree(addr_info);
> --
> 2.7.4
>