The cros_ec_uart_probe() function calls devm_serdev_device_open() before
it calls serdev_device_set_client_ops(). This can trigger a NULL pointer
dereference:
BUG: kernel NULL pointer dereference, address: 0000000000000000
...
CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c
Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024
Workqueue: events_unbound flush_to_ldisc
RIP: 0010:ttyport_receive_buf+0x3f/0xf0
...
Call Trace:
<TASK>
? __die+0x10f/0x120
? page_fault_oops+0x171/0x4e0
? srso_return_thunk+0x5/0x5f
? exc_page_fault+0x7f/0x180
? asm_exc_page_fault+0x26/0x30
? ttyport_receive_buf+0x3f/0xf0
flush_to_ldisc+0x9b/0x1c0
process_one_work+0x17b/0x340
worker_thread+0x301/0x490
? __pfx_worker_thread+0x10/0x10
kthread+0xe8/0x120
? __pfx_kthread+0x10/0x10
ret_from_fork+0x34/0x50
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1b/0x30
</TASK>
A simplified version of crashing code is as follows:
static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl,
const u8 *data,
size_t count)
{
struct serdev_device *serdev = ctrl->serdev;
if (!serdev || !serdev->ops->receive_buf) // CRASH!
return 0;
return serdev->ops->receive_buf(serdev, data, count);
}
static size_t ttyport_receive_buf(struct tty_port *port, const u8 *cp,
const u8 *fp, size_t count)
{
struct serdev_controller *ctrl = port->client_data;
[...]
if (!test_bit(SERPORT_ACTIVE, &serport->flags))
return 0;
ret = serdev_controller_receive_buf(ctrl, cp, count);
[...]
return ret;
}
It assumes that if SERPORT_ACTIVE is set and serdev exists, serdev->ops
will also exist. This conflicts with the existing cros_ec_uart_probe()
logic, as it first calls devm_serdev_device_open() (which sets
SERPORT_ACTIVE), and only later sets serdev->ops via
serdev_device_set_client_ops().
Commit 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race
condition") attempted to fix a similar race condition, but while doing
so, made the window of error for this race condition to happen much
wider.
Attempt to fix the race condition again, making sure we fully setup
before calling devm_serdev_device_open().
Fixes: 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race condition")
Cc: [email protected]
Signed-off-by: Noah Loomans <[email protected]>
---
This is my first time contributing to Linux, I hope this is a good
patch. Feedback on how to improve is welcome!
drivers/platform/chrome/cros_ec_uart.c | 28 +++++++++++++-------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c
index 8ea867c2a01a..62bc24f6dcc7 100644
--- a/drivers/platform/chrome/cros_ec_uart.c
+++ b/drivers/platform/chrome/cros_ec_uart.c
@@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
if (!ec_dev)
return -ENOMEM;
- ret = devm_serdev_device_open(dev, serdev);
- if (ret) {
- dev_err(dev, "Unable to open UART device");
- return ret;
- }
-
serdev_device_set_drvdata(serdev, ec_dev);
init_waitqueue_head(&ec_uart->response.wait_queue);
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
return ret;
}
- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
- if (ret < 0) {
- dev_err(dev, "Failed to set up host baud rate (%d)", ret);
- return ret;
- }
-
- serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
/* Initialize ec_dev for cros_ec */
ec_dev->phys_name = dev_name(dev);
ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
+ ret = devm_serdev_device_open(dev, serdev);
+ if (ret) {
+ dev_err(dev, "Unable to open UART device");
+ return ret;
+ }
+
+ ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+ if (ret < 0) {
+ dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+ return ret;
+ }
+
+ serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
return cros_ec_register(ec_dev);
}
--
2.44.0
On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans <[email protected]> wrote:
>
> The cros_ec_uart_probe() function calls devm_serdev_device_open() before
> it calls serdev_device_set_client_ops(). This can trigger a NULL pointer
> dereference:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000000
> ...
> CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c
> Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024
> Workqueue: events_unbound flush_to_ldisc
> RIP: 0010:ttyport_receive_buf+0x3f/0xf0
> ...
> Call Trace:
> <TASK>
> ? __die+0x10f/0x120
> ? page_fault_oops+0x171/0x4e0
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7f/0x180
> ? asm_exc_page_fault+0x26/0x30
> ? ttyport_receive_buf+0x3f/0xf0
> flush_to_ldisc+0x9b/0x1c0
> process_one_work+0x17b/0x340
> worker_thread+0x301/0x490
> ? __pfx_worker_thread+0x10/0x10
> kthread+0xe8/0x120
> ? __pfx_kthread+0x10/0x10
> ret_from_fork+0x34/0x50
> ? __pfx_kthread+0x10/0x10
> ret_from_fork_asm+0x1b/0x30
> </TASK>
>
> A simplified version of crashing code is as follows:
>
> static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl,
> const u8 *data,
> size_t count)
> {
> struct serdev_device *serdev = ctrl->serdev;
>
> if (!serdev || !serdev->ops->receive_buf) // CRASH!
> return 0;
>
> return serdev->ops->receive_buf(serdev, data, count);
> }
>
> static size_t ttyport_receive_buf(struct tty_port *port, const u8 *cp,
> const u8 *fp, size_t count)
> {
> struct serdev_controller *ctrl = port->client_data;
> [...]
>
> if (!test_bit(SERPORT_ACTIVE, &serport->flags))
> return 0;
>
> ret = serdev_controller_receive_buf(ctrl, cp, count);
>
> [...]
> return ret;
> }
>
> It assumes that if SERPORT_ACTIVE is set and serdev exists, serdev->ops
> will also exist. This conflicts with the existing cros_ec_uart_probe()
> logic, as it first calls devm_serdev_device_open() (which sets
> SERPORT_ACTIVE), and only later sets serdev->ops via
> serdev_device_set_client_ops().
>
> Commit 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race
> condition") attempted to fix a similar race condition, but while doing
> so, made the window of error for this race condition to happen much
> wider.
>
> Attempt to fix the race condition again, making sure we fully setup
> before calling devm_serdev_device_open().
>
> Fixes: 01f95d42b8f4 ("platform/chrome: cros_ec_uart: fix race condition")
> Cc: [email protected]
> Signed-off-by: Noah Loomans <[email protected]>
> ---
>
> This is my first time contributing to Linux, I hope this is a good
> patch. Feedback on how to improve is welcome!
>
The commit message is a bit long, but the patch itself looks good to me.
Reviewed-by: Guenter Roeck <[email protected]>
Guenter
> drivers/platform/chrome/cros_ec_uart.c | 28 +++++++++++++-------------
> 1 file changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c
> index 8ea867c2a01a..62bc24f6dcc7 100644
> --- a/drivers/platform/chrome/cros_ec_uart.c
> +++ b/drivers/platform/chrome/cros_ec_uart.c
> @@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
> if (!ec_dev)
> return -ENOMEM;
>
> - ret = devm_serdev_device_open(dev, serdev);
> - if (ret) {
> - dev_err(dev, "Unable to open UART device");
> - return ret;
> - }
> -
> serdev_device_set_drvdata(serdev, ec_dev);
> init_waitqueue_head(&ec_uart->response.wait_queue);
>
> @@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
> return ret;
> }
>
> - ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
> - if (ret < 0) {
> - dev_err(dev, "Failed to set up host baud rate (%d)", ret);
> - return ret;
> - }
> -
> - serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
> -
> /* Initialize ec_dev for cros_ec */
> ec_dev->phys_name = dev_name(dev);
> ec_dev->dev = dev;
> @@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
>
> serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
>
> + ret = devm_serdev_device_open(dev, serdev);
> + if (ret) {
> + dev_err(dev, "Unable to open UART device");
> + return ret;
> + }
> +
> + ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
> + if (ret < 0) {
> + dev_err(dev, "Failed to set up host baud rate (%d)", ret);
> + return ret;
> + }
> +
> + serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
> +
> return cros_ec_register(ec_dev);
> }
>
> --
> 2.44.0
>
On 2024-04-10 at 21:48 UTC+02, Guenter Roeck wrote:
> On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans <[email protected]> wrote:
> > This is my first time contributing to Linux, I hope this is a good
> > patch. Feedback on how to improve is welcome!
>
> The commit message is a bit long, but the patch itself looks good to me.
Hmm yeah it's a bit on a long side. I'm not sure what could be removed
though, it all seems relevant for understanding the bug and the fix.
> Reviewed-by: Guenter Roeck <[email protected]>
Thanks!
-Noah
On Thu, Apr 11, 2024 at 12:06:33AM +0200, Noah Loomans wrote:
> On 2024-04-10 at 21:48 UTC+02, Guenter Roeck wrote:
> > On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans <[email protected]> wrote:
> > > This is my first time contributing to Linux, I hope this is a good
> > > patch. Feedback on how to improve is welcome!
> >
> > The commit message is a bit long, but the patch itself looks good to me.
>
> Hmm yeah it's a bit on a long side. I'm not sure what could be removed
> though, it all seems relevant for understanding the bug and the fix.
Applied with shortening the message slightly.
On Wed, Apr 10, 2024 at 7:18 PM Tzung-Bi Shih <[email protected]> wrote:
>
> On Thu, Apr 11, 2024 at 12:06:33AM +0200, Noah Loomans wrote:
> > On 2024-04-10 at 21:48 UTC+02, Guenter Roeck wrote:
> > > On Wed, Apr 10, 2024 at 11:29 AM Noah Loomans <[email protected]> wrote:
> > > > This is my first time contributing to Linux, I hope this is a good
> > > > patch. Feedback on how to improve is welcome!
> > >
> > > The commit message is a bit long, but the patch itself looks good to me.
> >
> > Hmm yeah it's a bit on a long side. I'm not sure what could be removed
> > though, it all seems relevant for understanding the bug and the fix.
>
> Applied with shortening the message slightly.
We might also consider applying the patch to all ChromeOS branches
directly (not waiting for upstream); we do see a number of crashes
because of it.
Guenter
On 2024-04-11 at 04:18 UTC+02, Tzung-Bi Shih wrote:
> Applied with shortening the message slightly.
Thanks! And I see you already sent a pull request to Linus. That's nice,
didn't expect that to happen so quickly.
On 2024-04-11 at 04:26 UTC+02, Guenter Roeck wrote:
> We might also consider applying the patch to all ChromeOS branches
> directly (not waiting for upstream); we do see a number of crashes
> because of it.
Heh, I did wonder how Chromebooks running Chrome OS avoided this issue.
I guess the answer is simple: they didn't :)
Hello:
This patch was applied to chrome-platform/linux.git (for-next)
by Tzung-Bi Shih <[email protected]>:
On Wed, 10 Apr 2024 20:26:19 +0200 you wrote:
> The cros_ec_uart_probe() function calls devm_serdev_device_open() before
> it calls serdev_device_set_client_ops(). This can trigger a NULL pointer
> dereference:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000000
> ...
> CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c
> Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024
> Workqueue: events_unbound flush_to_ldisc
> RIP: 0010:ttyport_receive_buf+0x3f/0xf0
> ...
> Call Trace:
> <TASK>
> ? __die+0x10f/0x120
> ? page_fault_oops+0x171/0x4e0
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7f/0x180
> ? asm_exc_page_fault+0x26/0x30
> ? ttyport_receive_buf+0x3f/0xf0
> flush_to_ldisc+0x9b/0x1c0
> process_one_work+0x17b/0x340
> worker_thread+0x301/0x490
> ? __pfx_worker_thread+0x10/0x10
> kthread+0xe8/0x120
> ? __pfx_kthread+0x10/0x10
> ret_from_fork+0x34/0x50
> ? __pfx_kthread+0x10/0x10
> ret_from_fork_asm+0x1b/0x30
> </TASK>
>
> [...]
Here is the summary with links:
- platform/chrome: cros_ec_uart: properly fix race condition
https://git.kernel.org/chrome-platform/c/5e700b384ec1
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
Hello:
This patch was applied to chrome-platform/linux.git (for-kernelci)
by Tzung-Bi Shih <[email protected]>:
On Wed, 10 Apr 2024 20:26:19 +0200 you wrote:
> The cros_ec_uart_probe() function calls devm_serdev_device_open() before
> it calls serdev_device_set_client_ops(). This can trigger a NULL pointer
> dereference:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000000
> ...
> CPU: 5 PID: 103 Comm: kworker/u16:3 Not tainted 6.8.4-zen1-1-zen #1 4a88f2661038c2a3bb69aa70fb41a5735338823c
> Hardware name: Google Morphius/Morphius, BIOS MrChromebox-4.22.2-1-g2a93624aebf 01/22/2024
> Workqueue: events_unbound flush_to_ldisc
> RIP: 0010:ttyport_receive_buf+0x3f/0xf0
> ...
> Call Trace:
> <TASK>
> ? __die+0x10f/0x120
> ? page_fault_oops+0x171/0x4e0
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7f/0x180
> ? asm_exc_page_fault+0x26/0x30
> ? ttyport_receive_buf+0x3f/0xf0
> flush_to_ldisc+0x9b/0x1c0
> process_one_work+0x17b/0x340
> worker_thread+0x301/0x490
> ? __pfx_worker_thread+0x10/0x10
> kthread+0xe8/0x120
> ? __pfx_kthread+0x10/0x10
> ret_from_fork+0x34/0x50
> ? __pfx_kthread+0x10/0x10
> ret_from_fork_asm+0x1b/0x30
> </TASK>
>
> [...]
Here is the summary with links:
- platform/chrome: cros_ec_uart: properly fix race condition
https://git.kernel.org/chrome-platform/c/5e700b384ec1
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html