From: Corey Minyard <[email protected]>
The SSIF driver was removing any client that came in through the
platform interface, but it should only remove clients that it
added. On a failure in the probe function, this could result
in the following oops when the driver is removed and the
client gets unregistered twice:
CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference firmware version 7.0 08/04/2018
pstate: 60400009 (nZCv daif +PAN -UAO)
pc : kernfs_find_ns+0x28/0x120
lr : kernfs_find_and_get_ns+0x40/0x60
sp : ffff00002310fb50
x29: ffff00002310fb50 x28: ffff800a8240f800
x27: 0000000000000000 x26: 0000000000000000
x25: 0000000056000000 x24: ffff000009073000
x23: ffff000008998b38 x22: 0000000000000000
x21: ffff800ed86de820 x20: 0000000000000000
x19: ffff00000913a1d8 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000
x15: 0000000000000000 x14: 5300737265766972
x13: 643d4d4554535953 x12: 0000000000000030
x11: 0000000000000030 x10: 0101010101010101
x9 : ffff800ea06cc3f9 x8 : 0000000000000000
x7 : 0000000000000141 x6 : ffff000009073000
x5 : ffff800adb706b00 x4 : 0000000000000000
x3 : 00000000ffffffff x2 : 0000000000000000
x1 : ffff000008998b38 x0 : ffff000008356760
Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
Call trace:
kernfs_find_ns+0x28/0x120
kernfs_find_and_get_ns+0x40/0x60
sysfs_unmerge_group+0x2c/0x6c
dpm_sysfs_remove+0x34/0x70
device_del+0x58/0x30c
device_unregister+0x30/0x7c
i2c_unregister_device+0x84/0x90 [i2c_core]
ssif_platform_remove+0x38/0x98 [ipmi_ssif]
platform_drv_remove+0x2c/0x6c
device_release_driver_internal+0x168/0x1f8
driver_detach+0x50/0xbc
bus_remove_driver+0x74/0xe8
driver_unregister+0x34/0x5c
platform_driver_unregister+0x20/0x2c
cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
__arm64_sys_delete_module+0x1b4/0x220
el0_svc_handler+0x104/0x160
el0_svc+0x8/0xc
Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
---[ end trace 09f0e34cce8e2d8c ]---
Kernel panic - not syncing: Fatal exception
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x23800c38
So track the clients that the SSIF driver adds and only remove
those.
Reported-by: George Cherian <[email protected]>
Signed-off-by: Corey Minyard <[email protected]>
---
drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
George, your fix was close, but not quite right. The following
should fix the problem, obvious in hindsight :-). Thanks for working
with me on this.
-corey
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index e7e8b86..ca4f7c4 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -182,6 +182,8 @@ struct ssif_addr_info {
struct device *dev;
struct i2c_client *client;
+ struct i2c_client *added_client;
+
struct mutex clients_mutex;
struct list_head clients;
@@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
out:
if (rv) {
- /*
- * Note that if addr_info->client is assigned, we
- * leave it. The i2c client hangs around even if we
- * return a failure here, and the failure here is not
- * propagated back to the i2c code. This seems to be
- * design intent, strange as it may be. But if we
- * don't leave it, ssif_platform_remove will not remove
- * the client like it should.
- */
+ addr_info->client = NULL;
dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv);
kfree(ssif_info);
}
@@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque)
if (adev->type != &i2c_adapter_type)
return 0;
- i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
+ addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
+ &addr_info->binfo);
if (!addr_info->adapter_name)
return 1; /* Only try the first I2C adapter by default. */
@@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct platform_device *dev)
return 0;
mutex_lock(&ssif_infos_mutex);
- i2c_unregister_device(addr_info->client);
+ i2c_unregister_device(addr_info->added_client);
list_del(&addr_info->link);
kfree(addr_info);
--
2.7.4
On 08/31/2018 06:58 AM, George Cherian wrote:
>
>
> On 08/30/2018 11:44 PM, [email protected] wrote:
>>
>> From: Corey Minyard <[email protected]>
>>
>> The SSIF driver was removing any client that came in through the
>> platform interface, but it should only remove clients that it
>> added. On a failure in the probe function, this could result
>> in the following oops when the driver is removed and the
>> client gets unregistered twice:
>>
>> CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
>> Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference
>> firmware version 7.0 08/04/2018
>> pstate: 60400009 (nZCv daif +PAN -UAO)
>> pc : kernfs_find_ns+0x28/0x120
>> lr : kernfs_find_and_get_ns+0x40/0x60
>> sp : ffff00002310fb50
>> x29: ffff00002310fb50 x28: ffff800a8240f800
>> x27: 0000000000000000 x26: 0000000000000000
>> x25: 0000000056000000 x24: ffff000009073000
>> x23: ffff000008998b38 x22: 0000000000000000
>> x21: ffff800ed86de820 x20: 0000000000000000
>> x19: ffff00000913a1d8 x18: 0000000000000000
>> x17: 0000000000000000 x16: 0000000000000000
>> x15: 0000000000000000 x14: 5300737265766972
>> x13: 643d4d4554535953 x12: 0000000000000030
>> x11: 0000000000000030 x10: 0101010101010101
>> x9 : ffff800ea06cc3f9 x8 : 0000000000000000
>> x7 : 0000000000000141 x6 : ffff000009073000
>> x5 : ffff800adb706b00 x4 : 0000000000000000
>> x3 : 00000000ffffffff x2 : 0000000000000000
>> x1 : ffff000008998b38 x0 : ffff000008356760
>> Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
>> Call trace:
>> kernfs_find_ns+0x28/0x120
>> kernfs_find_and_get_ns+0x40/0x60
>> sysfs_unmerge_group+0x2c/0x6c
>> dpm_sysfs_remove+0x34/0x70
>> device_del+0x58/0x30c
>> device_unregister+0x30/0x7c
>> i2c_unregister_device+0x84/0x90 [i2c_core]
>> ssif_platform_remove+0x38/0x98 [ipmi_ssif]
>> platform_drv_remove+0x2c/0x6c
>> device_release_driver_internal+0x168/0x1f8
>> driver_detach+0x50/0xbc
>> bus_remove_driver+0x74/0xe8
>> driver_unregister+0x34/0x5c
>> platform_driver_unregister+0x20/0x2c
>> cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
>> __arm64_sys_delete_module+0x1b4/0x220
>> el0_svc_handler+0x104/0x160
>> el0_svc+0x8/0xc
>> Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
>> ---[ end trace 09f0e34cce8e2d8c ]---
>> Kernel panic - not syncing: Fatal exception
>> SMP: stopping secondary CPUs
>> Kernel Offset: disabled
>> CPU features: 0x23800c38
>>
>> So track the clients that the SSIF driver adds and only remove
>> those.
>>
>> Reported-by: George Cherian <[email protected]>
>> Signed-off-by: Corey Minyard <[email protected]>
>> ---
>> drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
>> 1 file changed, 6 insertions(+), 11 deletions(-)
>>
>> George, your fix was close, but not quite right. The following
>> should fix the problem, obvious in hindsight :-). Thanks for working
>> with me on this.
>>
> Thanks Corey!!!
> This works for me now.
>
Thanks. Can I add a Tested-by: from you?
-corey
>> -corey
>>
>> diff --git a/drivers/char/ipmi/ipmi_ssif.c
>> b/drivers/char/ipmi/ipmi_ssif.c
>> index e7e8b86..ca4f7c4 100644
>> --- a/drivers/char/ipmi/ipmi_ssif.c
>> +++ b/drivers/char/ipmi/ipmi_ssif.c
>> @@ -182,6 +182,8 @@ struct ssif_addr_info {
>> struct device *dev;
>> struct i2c_client *client;
>>
>> + struct i2c_client *added_client;
>> +
>> struct mutex clients_mutex;
>> struct list_head clients;
>>
>> @@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client
>> *client, const struct i2c_device_id *id)
>>
>> out:
>> if (rv) {
>> - /*
>> - * Note that if addr_info->client is assigned, we
>> - * leave it. The i2c client hangs around even if we
>> - * return a failure here, and the failure here is not
>> - * propagated back to the i2c code. This seems to be
>> - * design intent, strange as it may be. But if we
>> - * don't leave it, ssif_platform_remove will not remove
>> - * the client like it should.
>> - */
>> + addr_info->client = NULL;
>> dev_err(&client->dev, "Unable to start IPMI SSIF:
>> %d\n", rv);
>> kfree(ssif_info);
>> }
>> @@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device
>> *adev, void *opaque)
>> if (adev->type != &i2c_adapter_type)
>> return 0;
>>
>> - i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
>> + addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
>> + &addr_info->binfo);
>>
>> if (!addr_info->adapter_name)
>> return 1; /* Only try the first I2C adapter by
>> default. */
>> @@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct
>> platform_device *dev)
>> return 0;
>>
>> mutex_lock(&ssif_infos_mutex);
>> - i2c_unregister_device(addr_info->client);
>> + i2c_unregister_device(addr_info->added_client);
>>
>> list_del(&addr_info->link);
>> kfree(addr_info);
>> --
>> 2.7.4
>>
On 08/31/2018 05:43 PM, Corey Minyard wrote:
>
> On 08/31/2018 06:58 AM, George Cherian wrote:
>>
>>
>> On 08/30/2018 11:44 PM, [email protected] wrote:
>>>
>>> From: Corey Minyard <[email protected]>
>>>
>>> The SSIF driver was removing any client that came in through the
>>> platform interface, but it should only remove clients that it
>>> added. On a failure in the probe function, this could result
>>> in the following oops when the driver is removed and the
>>> client gets unregistered twice:
>>>
>>> CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
>>> Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference
>>> firmware version 7.0 08/04/2018
>>> pstate: 60400009 (nZCv daif +PAN -UAO)
>>> pc : kernfs_find_ns+0x28/0x120
>>> lr : kernfs_find_and_get_ns+0x40/0x60
>>> sp : ffff00002310fb50
>>> x29: ffff00002310fb50 x28: ffff800a8240f800
>>> x27: 0000000000000000 x26: 0000000000000000
>>> x25: 0000000056000000 x24: ffff000009073000
>>> x23: ffff000008998b38 x22: 0000000000000000
>>> x21: ffff800ed86de820 x20: 0000000000000000
>>> x19: ffff00000913a1d8 x18: 0000000000000000
>>> x17: 0000000000000000 x16: 0000000000000000
>>> x15: 0000000000000000 x14: 5300737265766972
>>> x13: 643d4d4554535953 x12: 0000000000000030
>>> x11: 0000000000000030 x10: 0101010101010101
>>> x9 : ffff800ea06cc3f9 x8 : 0000000000000000
>>> x7 : 0000000000000141 x6 : ffff000009073000
>>> x5 : ffff800adb706b00 x4 : 0000000000000000
>>> x3 : 00000000ffffffff x2 : 0000000000000000
>>> x1 : ffff000008998b38 x0 : ffff000008356760
>>> Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
>>> Call trace:
>>> kernfs_find_ns+0x28/0x120
>>> kernfs_find_and_get_ns+0x40/0x60
>>> sysfs_unmerge_group+0x2c/0x6c
>>> dpm_sysfs_remove+0x34/0x70
>>> device_del+0x58/0x30c
>>> device_unregister+0x30/0x7c
>>> i2c_unregister_device+0x84/0x90 [i2c_core]
>>> ssif_platform_remove+0x38/0x98 [ipmi_ssif]
>>> platform_drv_remove+0x2c/0x6c
>>> device_release_driver_internal+0x168/0x1f8
>>> driver_detach+0x50/0xbc
>>> bus_remove_driver+0x74/0xe8
>>> driver_unregister+0x34/0x5c
>>> platform_driver_unregister+0x20/0x2c
>>> cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
>>> __arm64_sys_delete_module+0x1b4/0x220
>>> el0_svc_handler+0x104/0x160
>>> el0_svc+0x8/0xc
>>> Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
>>> ---[ end trace 09f0e34cce8e2d8c ]---
>>> Kernel panic - not syncing: Fatal exception
>>> SMP: stopping secondary CPUs
>>> Kernel Offset: disabled
>>> CPU features: 0x23800c38
>>>
>>> So track the clients that the SSIF driver adds and only remove
>>> those.
>>>
>>> Reported-by: George Cherian <[email protected]>
>>> Signed-off-by: Corey Minyard <[email protected]>
>>> ---
>>> drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
>>> 1 file changed, 6 insertions(+), 11 deletions(-)
>>>
>>> George, your fix was close, but not quite right. The following
>>> should fix the problem, obvious in hindsight :-). Thanks for working
>>> with me on this.
>>>
>> Thanks Corey!!!
>> This works for me now.
>>
>
> Thanks. Can I add a Tested-by: from you?
Yes.
Tested-by: George Cherian <[email protected]>
Can you CC this to stable too.
-George
>
> -corey
>
>>> -corey
>>>
>>> diff --git a/drivers/char/ipmi/ipmi_ssif.c
>>> b/drivers/char/ipmi/ipmi_ssif.c
>>> index e7e8b86..ca4f7c4 100644
>>> --- a/drivers/char/ipmi/ipmi_ssif.c
>>> +++ b/drivers/char/ipmi/ipmi_ssif.c
>>> @@ -182,6 +182,8 @@ struct ssif_addr_info {
>>> struct device *dev;
>>> struct i2c_client *client;
>>>
>>> + struct i2c_client *added_client;
>>> +
>>> struct mutex clients_mutex;
>>> struct list_head clients;
>>>
>>> @@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client
>>> *client, const struct i2c_device_id *id)
>>>
>>> out:
>>> if (rv) {
>>> - /*
>>> - * Note that if addr_info->client is assigned, we
>>> - * leave it. The i2c client hangs around even if we
>>> - * return a failure here, and the failure here is not
>>> - * propagated back to the i2c code. This seems to be
>>> - * design intent, strange as it may be. But if we
>>> - * don't leave it, ssif_platform_remove will not remove
>>> - * the client like it should.
>>> - */
>>> + addr_info->client = NULL;
>>> dev_err(&client->dev, "Unable to start IPMI SSIF:
>>> %d\n", rv);
>>> kfree(ssif_info);
>>> }
>>> @@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device
>>> *adev, void *opaque)
>>> if (adev->type != &i2c_adapter_type)
>>> return 0;
>>>
>>> - i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
>>> + addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
>>> + &addr_info->binfo);
>>>
>>> if (!addr_info->adapter_name)
>>> return 1; /* Only try the first I2C adapter by
>>> default. */
>>> @@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct
>>> platform_device *dev)
>>> return 0;
>>>
>>> mutex_lock(&ssif_infos_mutex);
>>> - i2c_unregister_device(addr_info->client);
>>> + i2c_unregister_device(addr_info->added_client);
>>>
>>> list_del(&addr_info->link);
>>> kfree(addr_info);
>>> --
>>> 2.7.4
>>>
>
On 08/30/2018 11:44 PM, [email protected] wrote:
>
> From: Corey Minyard <[email protected]>
>
> The SSIF driver was removing any client that came in through the
> platform interface, but it should only remove clients that it
> added. On a failure in the probe function, this could result
> in the following oops when the driver is removed and the
> client gets unregistered twice:
>
> CPU: 107 PID: 30266 Comm: rmmod Not tainted 4.18.0+ #80
> Hardware name: Cavium Inc. Saber/Saber, BIOS Cavium reference firmware version 7.0 08/04/2018
> pstate: 60400009 (nZCv daif +PAN -UAO)
> pc : kernfs_find_ns+0x28/0x120
> lr : kernfs_find_and_get_ns+0x40/0x60
> sp : ffff00002310fb50
> x29: ffff00002310fb50 x28: ffff800a8240f800
> x27: 0000000000000000 x26: 0000000000000000
> x25: 0000000056000000 x24: ffff000009073000
> x23: ffff000008998b38 x22: 0000000000000000
> x21: ffff800ed86de820 x20: 0000000000000000
> x19: ffff00000913a1d8 x18: 0000000000000000
> x17: 0000000000000000 x16: 0000000000000000
> x15: 0000000000000000 x14: 5300737265766972
> x13: 643d4d4554535953 x12: 0000000000000030
> x11: 0000000000000030 x10: 0101010101010101
> x9 : ffff800ea06cc3f9 x8 : 0000000000000000
> x7 : 0000000000000141 x6 : ffff000009073000
> x5 : ffff800adb706b00 x4 : 0000000000000000
> x3 : 00000000ffffffff x2 : 0000000000000000
> x1 : ffff000008998b38 x0 : ffff000008356760
> Process rmmod (pid: 30266, stack limit = 0x00000000e218418d)
> Call trace:
> kernfs_find_ns+0x28/0x120
> kernfs_find_and_get_ns+0x40/0x60
> sysfs_unmerge_group+0x2c/0x6c
> dpm_sysfs_remove+0x34/0x70
> device_del+0x58/0x30c
> device_unregister+0x30/0x7c
> i2c_unregister_device+0x84/0x90 [i2c_core]
> ssif_platform_remove+0x38/0x98 [ipmi_ssif]
> platform_drv_remove+0x2c/0x6c
> device_release_driver_internal+0x168/0x1f8
> driver_detach+0x50/0xbc
> bus_remove_driver+0x74/0xe8
> driver_unregister+0x34/0x5c
> platform_driver_unregister+0x20/0x2c
> cleanup_ipmi_ssif+0x50/0xd82c [ipmi_ssif]
> __arm64_sys_delete_module+0x1b4/0x220
> el0_svc_handler+0x104/0x160
> el0_svc+0x8/0xc
> Code: aa1e03e0 aa0203f6 aa0103f7 d503201f (7940e280)
> ---[ end trace 09f0e34cce8e2d8c ]---
> Kernel panic - not syncing: Fatal exception
> SMP: stopping secondary CPUs
> Kernel Offset: disabled
> CPU features: 0x23800c38
>
> So track the clients that the SSIF driver adds and only remove
> those.
>
> Reported-by: George Cherian <[email protected]>
> Signed-off-by: Corey Minyard <[email protected]>
> ---
> drivers/char/ipmi/ipmi_ssif.c | 17 ++++++-----------
> 1 file changed, 6 insertions(+), 11 deletions(-)
>
> George, your fix was close, but not quite right. The following
> should fix the problem, obvious in hindsight :-). Thanks for working
> with me on this.
>
Thanks Corey!!!
This works for me now.
> -corey
>
> diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
> index e7e8b86..ca4f7c4 100644
> --- a/drivers/char/ipmi/ipmi_ssif.c
> +++ b/drivers/char/ipmi/ipmi_ssif.c
> @@ -182,6 +182,8 @@ struct ssif_addr_info {
> struct device *dev;
> struct i2c_client *client;
>
> + struct i2c_client *added_client;
> +
> struct mutex clients_mutex;
> struct list_head clients;
>
> @@ -1639,15 +1641,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
>
> out:
> if (rv) {
> - /*
> - * Note that if addr_info->client is assigned, we
> - * leave it. The i2c client hangs around even if we
> - * return a failure here, and the failure here is not
> - * propagated back to the i2c code. This seems to be
> - * design intent, strange as it may be. But if we
> - * don't leave it, ssif_platform_remove will not remove
> - * the client like it should.
> - */
> + addr_info->client = NULL;
> dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv);
> kfree(ssif_info);
> }
> @@ -1667,7 +1661,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque)
> if (adev->type != &i2c_adapter_type)
> return 0;
>
> - i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
> + addr_info->added_client = i2c_new_device(to_i2c_adapter(adev),
> + &addr_info->binfo);
>
> if (!addr_info->adapter_name)
> return 1; /* Only try the first I2C adapter by default. */
> @@ -1840,7 +1835,7 @@ static int ssif_platform_remove(struct platform_device *dev)
> return 0;
>
> mutex_lock(&ssif_infos_mutex);
> - i2c_unregister_device(addr_info->client);
> + i2c_unregister_device(addr_info->added_client);
>
> list_del(&addr_info->link);
> kfree(addr_info);
> --
> 2.7.4
>